From ab1d0b6f9847c7deca72092f8fa3162db854f99b Mon Sep 17 00:00:00 2001 From: Koen Zandberg Date: Thu, 6 Aug 2020 20:17:20 +0200 Subject: [PATCH] sched: Reverse runqueue order when CLZ is available This commit reverses the runqueue_cache bit order when the architecture has a CLZ (count leading zeros) instruction. When the architecture supports CLZ, it is faster to determine the most significant set bit of a word than to determine the least significant bit set. Unfortunately when the instruction is not available, it is more efficient to determine the least significant bit set. Reversing the bit order shaves off another 4 cycles on the same54-xpro. From 147 to 143 ticks when testing with tests/bench_sched_nop. Architectures where no CLZ instruction is available are not affected. --- core/sched.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/core/sched.c b/core/sched.c index d933898e05..ee1dc7ee1d 100644 --- a/core/sched.c +++ b/core/sched.c @@ -79,6 +79,40 @@ static void (*sched_cb) (kernel_pid_t active_thread, kernel_pid_t next_thread) = NULL; #endif +/* Depending on whether the CLZ instruction is available, the order of the + * runqueue_bitcache is reversed. When the instruction is available, it is + * faster to determine the MSBit set. When it is not available it is faster to + * determine the LSBit set. These functions abstract the runqueue modifications + * and readout away, switching between the two orders depending on the CLZ + * instruction availability + */ +static inline void _set_runqueue_bit(thread_t *process) +{ +#if defined(BITARITHM_HAS_CLZ) + runqueue_bitcache |= BIT31 >> process->priority; +#else + runqueue_bitcache |= 1 << process->priority; +#endif +} + +static inline void _clear_runqueue_bit(thread_t *process) +{ +#if defined(BITARITHM_HAS_CLZ) + runqueue_bitcache &= ~(BIT31 >> process->priority); +#else + runqueue_bitcache &= ~(1 << process->priority); +#endif +} + +static inline unsigned _get_prio_queue_from_runqueue(void) +{ +#if defined(BITARITHM_HAS_CLZ) + return 31 - bitarithm_msb(runqueue_bitcache); +#else + return bitarithm_lsb(runqueue_bitcache); +#endif +} + static void _unschedule(thread_t *active_thread) { if (active_thread->status == STATUS_RUNNING) { @@ -119,7 +153,7 @@ int __attribute__((used)) sched_run(void) sched_context_switch_request = 0; - int nextrq = bitarithm_lsb(runqueue_bitcache); + unsigned nextrq = _get_prio_queue_from_runqueue(); thread_t *next_thread = container_of(sched_runqueues[nextrq].next->next, thread_t, rq_entry); @@ -173,7 +207,7 @@ void sched_set_status(thread_t *process, thread_status_t status) process->pid, process->priority); clist_rpush(&sched_runqueues[process->priority], &(process->rq_entry)); - runqueue_bitcache |= 1 << process->priority; + _set_runqueue_bit(process); } } else { @@ -184,7 +218,7 @@ void sched_set_status(thread_t *process, thread_status_t status) clist_lpop(&sched_runqueues[process->priority]); if (!sched_runqueues[process->priority].next) { - runqueue_bitcache &= ~(1 << process->priority); + _clear_runqueue_bit(process); } } }