From e3191b0bfd463d19b0c95b389b00a204ebb01894 Mon Sep 17 00:00:00 2001 From: James Kuszmaul Date: Tue, 24 Nov 2015 16:35:36 -0500 Subject: [PATCH] Prevent PID tests from hanging. For the previous couple of months, the PID tests have been hanging. The reason that the tests have been hanging lies with the Notifier, not the PID controller. Basically, a deadlock was occuring during Notifier destruction when the notifier destructor was called while the notifier interrupt handler was being called. Because the low-level interrupt manager waits for the interrupt handler to finish executing before disabling itself, the notifier destructor would not exit until the ProcessQueue function finished. However, at the same time, the handler was attempting to lock the queueMutex before continuing; the Notifier destructor had locked the queueMutex while wrapping things up, meaning that the last run of the handler would not complete until the destructor did, resulting in a deadlock. In order to repair this, I reduced the scope of the lock on the queueMutex in the destructor so that it only locks when absolutely necessary. This should work now. This bug was likely introduced over the summer when we updated to stl mutexes and locks, which may have messed up the original lock structure. This likely did not affect any teams, as it can only occur if you are actively destroying every* Notifier object present and if the destructor happens to be called while the handler is being run. *Note: the component of the destructor causing issues only ran if the last Notifier object is being destroyed. Change-Id: I38ba4e60816a2a8d523e927c25378390a0755444 --- wpilibc/Athena/src/Notifier.cpp | 51 ++++++++++++++++++++----------- wpilibc/shared/include/Notifier.h | 3 +- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/wpilibc/Athena/src/Notifier.cpp b/wpilibc/Athena/src/Notifier.cpp index 02a1a47d80..faee0bc8b0 100644 --- a/wpilibc/Athena/src/Notifier.cpp +++ b/wpilibc/Athena/src/Notifier.cpp @@ -13,8 +13,9 @@ Notifier *Notifier::timerQueueHead = nullptr; priority_recursive_mutex Notifier::queueMutex; +priority_mutex Notifier::halMutex; void *Notifier::m_notifier = nullptr; -int Notifier::refcount = 0; +std::atomic Notifier::refcount{0}; /** * Create a Notifier for timer event notification. @@ -26,15 +27,15 @@ Notifier::Notifier(TimerEventHandler handler, void *param) { wpi_setWPIErrorWithContext(NullParameter, "handler must not be nullptr"); m_handler = handler; m_param = param; - { - std::lock_guard sync(queueMutex); - // do the first time intialization of static variables - if (refcount == 0) { - int32_t status = 0; - m_notifier = initializeNotifier(ProcessQueue, &status); - wpi_setErrorWithContext(status, getHALErrorMessage(status)); + // do the first time intialization of static variables + if (refcount.fetch_add(1) == 0) { + int32_t status = 0; + { + std::lock_guard sync(halMutex); + if (!m_notifier) + m_notifier = initializeNotifier(ProcessQueue, &status); } - refcount++; + wpi_setErrorWithContext(status, getHALErrorMessage(status)); } } @@ -47,13 +48,19 @@ Notifier::~Notifier() { { std::lock_guard sync(queueMutex); DeleteFromQueue(); + } - // Delete the static variables when the last one is going away - if (!(--refcount)) { - int32_t status = 0; - cleanNotifier(m_notifier, &status); - wpi_setErrorWithContext(status, getHALErrorMessage(status)); + // Delete the static variables when the last one is going away + if (refcount.fetch_sub(1) == 1) { + int32_t status = 0; + { + std::lock_guard sync(halMutex); + if (m_notifier) { + cleanNotifier(m_notifier, &status); + m_notifier = nullptr; + } } + wpi_setErrorWithContext(status, getHALErrorMessage(status)); } // Acquire the mutex; this makes certain that the handler is @@ -73,9 +80,19 @@ Notifier::~Notifier() { void Notifier::UpdateAlarm() { if (timerQueueHead != nullptr) { int32_t status = 0; - updateNotifierAlarm(m_notifier, - (uint32_t)(timerQueueHead->m_expirationTime * 1e6), - &status); + // This locking is necessary in order to avoid two things: + // 1) Race condition issues with calling cleanNotifer() and + // updateNotifierAlarm() at the same time. + // 2) Avoid deadlock by making it so that this won't block waiting + // for the mutex to unlock. + // Checking refcount as well is unnecessary, but will not hurt. + if (halMutex.try_lock() && refcount != 0) { + if (m_notifier) + updateNotifierAlarm(m_notifier, + (uint32_t)(timerQueueHead->m_expirationTime * 1e6), + &status); + halMutex.unlock(); + } wpi_setStaticErrorWithContext(timerQueueHead, status, getHALErrorMessage(status)); } diff --git a/wpilibc/shared/include/Notifier.h b/wpilibc/shared/include/Notifier.h index de3f377554..056ed0a3fd 100644 --- a/wpilibc/shared/include/Notifier.h +++ b/wpilibc/shared/include/Notifier.h @@ -28,8 +28,9 @@ class Notifier : public ErrorBase { private: static Notifier *timerQueueHead; static priority_recursive_mutex queueMutex; + static priority_mutex halMutex; static void *m_notifier; - static int refcount; + static std::atomic refcount; static void ProcessQueue( uint32_t mask, void *params); // process the timer queue on a timer event