diff --git a/core/Kconfig b/core/Kconfig index 4f5ec6f4b7..f8682c2daa 100644 --- a/core/Kconfig +++ b/core/Kconfig @@ -39,6 +39,9 @@ config MODULE_CORE_MSG_BUS help Messaging Bus API for inter process message broadcast. +config MODULE_CORE_MUTEX_DEBUG + bool "Aid debugging deadlocks by printing on whom mutex_lock() is waiting" + config MODULE_CORE_MUTEX_PRIORITY_INHERITANCE bool "Use priority inheritance to mitigate priority inversion for mutexes" diff --git a/core/include/mutex.h b/core/include/mutex.h index ac05572c55..1055bae145 100644 --- a/core/include/mutex.h +++ b/core/include/mutex.h @@ -97,6 +97,25 @@ * `MUTEX_LOCK`. * - The scheduler is run, so that if the unblocked waiting thread can * run now, in case it has a higher priority than the running thread. + * + * Debugging deadlocks + * ------------------- + * + * The module `core_mutex_debug` can be used to print on whom `mutex_lock()` + * is waiting. This information includes the thread ID of the owner and the + * program counter (PC) from where `mutex_lock()` was called. Note that the + * information is only valid if: + * + * - The mutex was locked by a thread, and not e.g. by `MUTEX_INIT_LOCKED` + * - The function `cpu_get_caller_pc()` is implemented for the target + * architecture. (The thread ID will remain valid, though.) + * - The caller PC is briefly 0 when the current owner passes over ownership + * to the next thread, but that thread didn't get CPU time yet to write its + * PC into the data structure. Even worse, on architectures where an aligned + * function-pointer-sized write is not atomic, the value may briefly be + * bogus. Chances are close to zero this ever hits and since this only + * effects debug output, the ostrich algorithm was chosen here. + * * @{ * * @file @@ -112,6 +131,7 @@ #include #include +#include "architecture.h" #include "kernel_defines.h" #include "list.h" #include "thread.h" @@ -130,7 +150,8 @@ typedef struct { * @internal */ list_node_t queue; -#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) +#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \ + || defined(MODULE_CORE_MUTEX_DEBUG) /** * @brief The current owner of the mutex or `NULL` * @note Only available if module core_mutex_priority_inheritance @@ -141,6 +162,18 @@ typedef struct { * this will have the value of `NULL`. */ kernel_pid_t owner; +#endif +#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_DEBUG) + /** + * @brief Program counter of the call to @ref mutex_lock that most + * recently acquired this mutex + * + * This is used when the module `core_mutex_debug` is used to debug + * deadlocks and is non-existing otherwise + */ + uinttxtptr_t owner_calling_pc; +#endif +#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) /** * @brief Original priority of the owner * @note Only available if module core_mutex_priority_inheritance diff --git a/core/mutex.c b/core/mutex.c index a2529ac34e..53dd916a93 100644 --- a/core/mutex.c +++ b/core/mutex.c @@ -47,9 +47,18 @@ * function into both @ref mutex_lock and @ref mutex_lock_cancelable is, * therefore, beneficial for the majority of applications. */ -static inline __attribute__((always_inline)) void _block(mutex_t *mutex, - unsigned irq_state) +static inline __attribute__((always_inline)) +void _block(mutex_t *mutex, + unsigned irq_state, + uinttxtptr_t pc) { + /* pc is only used when MODULE_CORE_MUTEX_DEBUG */ + (void)pc; +#if IS_USED(MODULE_CORE_MUTEX_DEBUG) + printf("[mutex] waiting for thread %" PRIkernel_pid " (pc = 0x%" PRIxTXTPTR + ")\n", + mutex->owner, mutex->owner_calling_pc); +#endif thread_t *me = thread_get_active(); /* Fail visibly even if a blocking action is called from somewhere where @@ -80,10 +89,17 @@ static inline __attribute__((always_inline)) void _block(mutex_t *mutex, irq_restore(irq_state); thread_yield_higher(); /* We were woken up by scheduler. Waker removed us from queue. */ +#if IS_USED(MODULE_CORE_MUTEX_DEBUG) + mutex->owner_calling_pc = pc; +#endif } bool mutex_lock_internal(mutex_t *mutex, bool block) { + uinttxtptr_t pc = 0; +#if IS_USED(MODULE_CORE_MUTEX_DEBUG) + pc = cpu_get_caller_pc(); +#endif unsigned irq_state = irq_disable(); DEBUG("PID[%" PRIkernel_pid "] mutex_lock_internal(block=%u).\n", @@ -92,9 +108,15 @@ bool mutex_lock_internal(mutex_t *mutex, bool block) if (mutex->queue.next == NULL) { /* mutex is unlocked. */ mutex->queue.next = MUTEX_LOCKED; -#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE +#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \ + || IS_USED(MODULE_CORE_MUTEX_DEBUG) thread_t *me = thread_get_active(); mutex->owner = me->pid; +#endif +#if IS_USED(MODULE_CORE_MUTEX_DEBUG) + mutex->owner_calling_pc = pc; +#endif +#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) mutex->owner_original_priority = me->priority; #endif DEBUG("PID[%" PRIkernel_pid "] mutex_lock(): early out.\n", @@ -106,7 +128,7 @@ bool mutex_lock_internal(mutex_t *mutex, bool block) irq_restore(irq_state); return false; } - _block(mutex, irq_state); + _block(mutex, irq_state, pc); } return true; @@ -114,6 +136,10 @@ bool mutex_lock_internal(mutex_t *mutex, bool block) int mutex_lock_cancelable(mutex_cancel_t *mc) { + uinttxtptr_t pc = 0; +#if IS_USED(MODULE_CORE_MUTEX_DEBUG) + pc = cpu_get_caller_pc(); +#endif unsigned irq_state = irq_disable(); DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable()\n", @@ -131,9 +157,15 @@ int mutex_lock_cancelable(mutex_cancel_t *mc) if (mutex->queue.next == NULL) { /* mutex is unlocked. */ mutex->queue.next = MUTEX_LOCKED; -#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE +#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \ + || IS_USED(MODULE_CORE_MUTEX_DEBUG) thread_t *me = thread_get_active(); mutex->owner = me->pid; +#endif +#if IS_USED(MODULE_CORE_MUTEX_DEBUG) + mutex->owner_calling_pc = pc; +#endif +#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) mutex->owner_original_priority = me->priority; #endif DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable() early out.\n", @@ -142,7 +174,7 @@ int mutex_lock_cancelable(mutex_cancel_t *mc) return 0; } else { - _block(mutex, irq_state); + _block(mutex, irq_state, pc); if (mc->cancelled) { DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable() " "cancelled.\n", thread_getpid()); @@ -185,7 +217,7 @@ void mutex_unlock(mutex_t *mutex) uint16_t process_priority = process->priority; -#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE +#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) thread_t *owner = thread_get(mutex->owner); if ((owner) && (owner->priority != mutex->owner_original_priority)) { DEBUG("PID[%" PRIkernel_pid "] prio %u --> %u\n", @@ -194,6 +226,9 @@ void mutex_unlock(mutex_t *mutex) sched_change_priority(owner, mutex->owner_original_priority); } #endif +#if IS_USED(MODULE_CORE_MUTEX_DEBUG) + mutex->owner_calling_pc = 0; +#endif irq_restore(irqstate); sched_switch(process_priority);