[hal,wpilib] Fix TimedRobot notifier race (#8445)

It was possible for the alarm to fire between the set alarm and ack,
resulting in a hang on next wait. It's not possible to ack before set
alarm due to a race in sim step timing, so the fix is to provide an
atomic ack and set alarm; the easiest way to implement this in the API
was to change ack to optionally also set the alarm again.
This commit is contained in:
Peter Johnson
2025-12-04 09:59:59 -07:00
committed by GitHub
parent d1b1703c86
commit 934f8d9c15
12 changed files with 147 additions and 78 deletions

View File

@@ -88,12 +88,26 @@ public class NotifierJNI extends JNIWrapper {
public static native void cancelNotifierAlarm(int notifierHandle);
/**
* Indicates the notifier alarm has been serviced. This must be called before waiting for the next
* alarm.
* Indicates the notifier alarm has been serviced and optionally sets a new alarm time. This must
* be called before waiting for the next alarm.
*
* <p>The alarmTime is an absolute time (using the WPI now() time base) if absolute is true, or
* relative to the current time if absolute is false.
*
* <p>If intervalTime is non-zero, the notifier will alarm periodically following alarmTime at the
* given interval.
*
* <p>If an absolute alarmTime is in the past, the notifier will alarm immediately.
*
* @param notifierHandle the notifier handle
* @param setAlarm true to set a new alarm time, false to leave the alarm unchanged
* @param alarmTime the first alarm time (in microseconds)
* @param intervalTime the periodic interval time (in microseconds)
* @param absolute true if the alarm time is absolute
* @see "HAL_AcknowledgeNotifierAlarm"
*/
public static native void acknowledgeNotifierAlarm(int notifierHandle);
public static native void acknowledgeNotifierAlarm(
int notifierHandle, boolean setAlarm, long alarmTime, long intervalTime, boolean absolute);
/**
* Gets the overrun count for a notifier.

View File

@@ -113,14 +113,18 @@ Java_org_wpilib_hardware_hal_NotifierJNI_cancelNotifierAlarm
/*
* Class: org_wpilib_hardware_hal_NotifierJNI
* Method: acknowledgeNotifierAlarm
* Signature: (I)V
* Signature: (IZJJZ)V
*/
JNIEXPORT void JNICALL
Java_org_wpilib_hardware_hal_NotifierJNI_acknowledgeNotifierAlarm
(JNIEnv* env, jclass cls, jint notifierHandle)
(JNIEnv* env, jclass cls, jint notifierHandle, jboolean setAlarm,
jlong alarmTime, jlong intervalTime, jboolean absolute)
{
int32_t status = 0;
HAL_AcknowledgeNotifierAlarm((HAL_NotifierHandle)notifierHandle, &status);
HAL_AcknowledgeNotifierAlarm((HAL_NotifierHandle)notifierHandle, setAlarm,
static_cast<uint64_t>(alarmTime),
static_cast<uint64_t>(intervalTime), absolute,
&status);
CheckStatus(env, status);
}

View File

@@ -103,13 +103,28 @@ void HAL_CancelNotifierAlarm(HAL_NotifierHandle notifierHandle,
int32_t* status);
/**
* Indicates the notifier alarm has been serviced. This must be called before
* waiting for the next alarm.
* Indicates the notifier alarm has been serviced and optionally sets a new
* alarm time. This must be called before waiting for the next alarm.
*
* The alarmTime is an absolute time (using the WPI_Now() time base) if
* absolute is true, or relative to the current time if absolute is false.
*
* If intervalTime is non-zero, the notifier will alarm periodically following
* alarmTime at the given interval.
*
* If an absolute alarmTime is in the past, the notifier will alarm immediately.
*
* @param[in] notifierHandle the notifier handle
* @param[in] setAlarm true to set a new alarm time, false to leave the
* alarm unchanged
* @param[in] alarmTime the first alarm time (in microseconds)
* @param[in] intervalTime the periodic interval time (in microseconds)
* @param[in] absolute true if the alarm time is absolute
* @param[out] status Error status variable. 0 on success.
*/
void HAL_AcknowledgeNotifierAlarm(HAL_NotifierHandle notifierHandle,
HAL_Bool setAlarm, uint64_t alarmTime,
uint64_t intervalTime, HAL_Bool absolute,
int32_t* status);
/**

View File

@@ -46,6 +46,10 @@ class NotifierThread : public wpi::util::SafeThread {
public:
void Main() override;
void SetAlarm(HAL_NotifierHandle notifierHandle,
std::shared_ptr<Notifier>& notifier, uint64_t alarmTime,
uint64_t intervalTime, bool absolute, int32_t* status);
void ProcessAlarms(wpi::util::SmallVectorImpl<HAL_NotifierHandle>* signaled);
bool m_paused = false;
@@ -111,6 +115,30 @@ void NotifierThread::Main() {
}
}
void NotifierThread::SetAlarm(HAL_NotifierHandle notifierHandle,
std::shared_ptr<Notifier>& notifier,
uint64_t alarmTime, uint64_t intervalTime,
bool absolute, int32_t* status) {
if (!absolute) {
alarmTime += HAL_GetFPGATime(status);
}
uint64_t prevWakeup = UINT64_MAX;
if (!m_alarmQueue.empty()) {
prevWakeup = m_alarmQueue.top().notifier->alarmTime;
m_alarmQueue.remove({notifierHandle, notifier});
}
notifier->alarmTime = alarmTime;
notifier->intervalTime = intervalTime;
notifier->overrunCount = 0;
m_alarmQueue.push({notifierHandle, notifier});
// wake up notifier thread if needed
if (alarmTime < prevWakeup) {
m_cond.notify_all();
}
}
void NotifierThread::ProcessAlarms(
wpi::util::SmallVectorImpl<HAL_NotifierHandle>* signaled) {
int32_t status = 0;
@@ -256,25 +284,8 @@ void HAL_SetNotifierAlarm(HAL_NotifierHandle notifierHandle, uint64_t alarmTime,
if (!notifier) {
return;
}
if (!absolute) {
alarmTime += HAL_GetFPGATime(status);
}
uint64_t prevWakeup = UINT64_MAX;
if (!thr->m_alarmQueue.empty()) {
prevWakeup = thr->m_alarmQueue.top().notifier->alarmTime;
thr->m_alarmQueue.remove({notifierHandle, notifier});
}
notifier->alarmTime = alarmTime;
notifier->intervalTime = intervalTime;
notifier->overrunCount = 0;
thr->m_alarmQueue.push({notifierHandle, notifier});
// wake up notifier thread if needed
if (alarmTime < prevWakeup) {
thr->m_cond.notify_all();
}
thr->SetAlarm(notifierHandle, notifier, alarmTime, intervalTime, absolute,
status);
}
void HAL_CancelNotifierAlarm(HAL_NotifierHandle notifierHandle,
@@ -287,16 +298,23 @@ void HAL_CancelNotifierAlarm(HAL_NotifierHandle notifierHandle,
thr->m_alarmQueue.remove({notifierHandle, notifier});
notifier->alarmTime = UINT64_MAX;
notifier->handlerSignaled.clear();
}
void HAL_AcknowledgeNotifierAlarm(HAL_NotifierHandle notifierHandle,
HAL_Bool setAlarm, uint64_t alarmTime,
uint64_t intervalTime, HAL_Bool absolute,
int32_t* status) {
auto notifier =
notifierInstance->owner.GetThread()->m_handles.Get(notifierHandle);
auto thr = notifierInstance->owner.GetThread();
auto notifier = thr->m_handles.Get(notifierHandle);
if (!notifier) {
return;
}
notifier->handlerSignaled.clear();
if (setAlarm) {
thr->SetAlarm(notifierHandle, notifier, alarmTime, intervalTime, absolute,
status);
}
}
int32_t HAL_GetNotifierOverrun(HAL_NotifierHandle notifierHandle,

View File

@@ -39,6 +39,10 @@ class NotifierThread : public wpi::util::SafeThread {
public:
void Main() override;
void SetAlarm(HAL_NotifierHandle notifierHandle,
std::shared_ptr<Notifier>& notifier, uint64_t alarmTime,
uint64_t intervalTime, bool absolute, int32_t* status);
void ProcessAlarms();
UnlimitedHandleResource<HAL_NotifierHandle, Notifier,
@@ -97,6 +101,30 @@ void NotifierThread::Main() {
}
}
void NotifierThread::SetAlarm(HAL_NotifierHandle notifierHandle,
std::shared_ptr<Notifier>& notifier,
uint64_t alarmTime, uint64_t intervalTime,
bool absolute, int32_t* status) {
if (!absolute) {
alarmTime += HAL_GetFPGATime(status);
}
uint64_t prevWakeup = UINT64_MAX;
if (!m_alarmQueue.empty()) {
prevWakeup = m_alarmQueue.top().notifier->alarmTime;
m_alarmQueue.remove({notifierHandle, notifier});
}
notifier->alarmTime = alarmTime;
notifier->intervalTime = intervalTime;
notifier->overrunCount = 0;
m_alarmQueue.push({notifierHandle, notifier});
// wake up notifier thread if needed
if (alarmTime < prevWakeup) {
m_cond.notify_all();
}
}
void NotifierThread::ProcessAlarms() {
int32_t status = 0;
uint64_t curTime = HAL_GetFPGATime(&status);
@@ -182,25 +210,8 @@ void HAL_SetNotifierAlarm(HAL_NotifierHandle notifierHandle, uint64_t alarmTime,
if (!notifier) {
return;
}
if (!absolute) {
alarmTime += HAL_GetFPGATime(status);
}
uint64_t prevWakeup = UINT64_MAX;
if (!thr->m_alarmQueue.empty()) {
prevWakeup = thr->m_alarmQueue.top().notifier->alarmTime;
thr->m_alarmQueue.remove({notifierHandle, notifier});
}
notifier->alarmTime = alarmTime;
notifier->intervalTime = intervalTime;
notifier->overrunCount = 0;
thr->m_alarmQueue.push({notifierHandle, notifier});
// wake up notifier thread if needed
if (alarmTime < prevWakeup) {
thr->m_cond.notify_all();
}
thr->SetAlarm(notifierHandle, notifier, alarmTime, intervalTime, absolute,
status);
}
void HAL_CancelNotifierAlarm(HAL_NotifierHandle notifierHandle,
@@ -213,16 +224,23 @@ void HAL_CancelNotifierAlarm(HAL_NotifierHandle notifierHandle,
thr->m_alarmQueue.remove({notifierHandle, notifier});
notifier->alarmTime = UINT64_MAX;
notifier->handlerSignaled.clear();
}
void HAL_AcknowledgeNotifierAlarm(HAL_NotifierHandle notifierHandle,
HAL_Bool setAlarm, uint64_t alarmTime,
uint64_t intervalTime, HAL_Bool absolute,
int32_t* status) {
auto notifier =
notifierInstance->owner.GetThread()->m_handles.Get(notifierHandle);
auto thr = notifierInstance->owner.GetThread();
auto notifier = thr->m_handles.Get(notifierHandle);
if (!notifier) {
return;
}
notifier->handlerSignaled.clear();
if (setAlarm) {
thr->SetAlarm(notifierHandle, notifier, alarmTime, intervalTime, absolute,
status);
}
}
int32_t HAL_GetNotifierOverrun(HAL_NotifierHandle notifierHandle,

View File

@@ -35,16 +35,14 @@ void TimedRobot::StartCompetition() {
auto callback = m_callbacks.pop();
int32_t status = 0;
HAL_SetNotifierAlarm(m_notifier, callback.expirationTime.count(), 0, true,
&status);
WPILIB_CheckErrorStatus(status, "UpdateNotifierAlarm");
// Acknowledge previous alarm after setting the next one to avoid a race
// against getting the next notifier timeout in HALSIM StepTiming.
if (first) {
first = false;
HAL_SetNotifierAlarm(m_notifier, callback.expirationTime.count(), 0, true,
&status);
WPILIB_CheckErrorStatus(status, "SetNotifierAlarm");
} else {
HAL_AcknowledgeNotifierAlarm(m_notifier, &status);
HAL_AcknowledgeNotifierAlarm(
m_notifier, true, callback.expirationTime.count(), 0, true, &status);
WPILIB_CheckErrorStatus(status, "AcknowledgeNotifierAlarm");
}

View File

@@ -46,7 +46,7 @@ Notifier::Notifier(std::function<void()> callback) {
}
// Ack notifier
HAL_AcknowledgeNotifierAlarm(notifier, &status);
HAL_AcknowledgeNotifierAlarm(notifier, false, 0, 0, false, &status);
WPILIB_CheckErrorStatus(status, "AcknowledgeNotifier");
}
});
@@ -99,7 +99,7 @@ Notifier::Notifier(int priority, std::function<void()> callback) {
}
// Ack notifier
HAL_AcknowledgeNotifierAlarm(notifier, &status);
HAL_AcknowledgeNotifierAlarm(notifier, false, 0, 0, false, &status);
WPILIB_CheckErrorStatus(status, "AcknowledgeNotifier");
}
});

View File

@@ -39,7 +39,7 @@ class Watchdog::Impl {
DerefGreater<Watchdog*>>
m_watchdogs;
void UpdateAlarm();
void UpdateAlarm(bool acknowledge = false);
private:
void Main();
@@ -67,7 +67,7 @@ Watchdog::Impl::~Impl() {
}
}
void Watchdog::Impl::UpdateAlarm() {
void Watchdog::Impl::UpdateAlarm(bool acknowledge) {
int32_t status = 0;
// Return if we are being destructed, or were not created successfully
auto notifier = m_notifier.load();
@@ -76,6 +76,12 @@ void Watchdog::Impl::UpdateAlarm() {
}
if (m_watchdogs.empty()) {
HAL_CancelNotifierAlarm(notifier, &status);
} else if (acknowledge) {
HAL_AcknowledgeNotifierAlarm(
notifier, true,
static_cast<uint64_t>(m_watchdogs.top()->m_expirationTime.value() *
1e6),
0, true, &status);
} else {
HAL_SetNotifierAlarm(notifier,
static_cast<uint64_t>(
@@ -125,9 +131,7 @@ void Watchdog::Impl::Main() {
watchdog->m_callback();
lock.lock();
UpdateAlarm();
HAL_AcknowledgeNotifierAlarm(notifier, &status);
UpdateAlarm(true);
}
}

View File

@@ -71,7 +71,7 @@ PyNotifier::PyNotifier(std::function<void()> handler) {
}
// Ack notifier
HAL_AcknowledgeNotifierAlarm(notifier, &status);
HAL_AcknowledgeNotifierAlarm(notifier, false, 0, 0, false, &status);
WPILIB_CheckErrorStatus(status, "AcknowledgeNotifier");
}
} catch (...) {

View File

@@ -139,14 +139,11 @@ public class TimedRobot extends IterativeRobotBase {
// at the end of the loop.
var callback = m_callbacks.poll();
NotifierJNI.setNotifierAlarm(m_notifier, callback.expirationTime, 0, true);
// Acknowledge previous alarm after setting the next one to avoid a race
// against getting the next notifier timeout in HALSIM StepTiming.
if (first) {
first = false;
NotifierJNI.setNotifierAlarm(m_notifier, callback.expirationTime, 0, true);
} else {
NotifierJNI.acknowledgeNotifierAlarm(m_notifier);
NotifierJNI.acknowledgeNotifierAlarm(m_notifier, true, callback.expirationTime, 0, true);
}
try {

View File

@@ -95,7 +95,7 @@ public class Notifier implements AutoCloseable {
}
// Acknowledge the alarm
NotifierJNI.acknowledgeNotifierAlarm(notifier);
NotifierJNI.acknowledgeNotifierAlarm(notifier, false, 0, 0, false);
}
});
m_thread.setName("Notifier");

View File

@@ -120,7 +120,7 @@ public class Watchdog implements Closeable, Comparable<Watchdog> {
m_watchdogs.remove(this);
m_expirationTime = m_startTime + m_timeout;
m_watchdogs.add(this);
updateAlarm();
updateAlarm(false);
} finally {
m_queueMutex.unlock();
}
@@ -194,7 +194,7 @@ public class Watchdog implements Closeable, Comparable<Watchdog> {
m_watchdogs.remove(this);
m_expirationTime = m_startTime + m_timeout;
m_watchdogs.add(this);
updateAlarm();
updateAlarm(false);
} finally {
m_queueMutex.unlock();
}
@@ -205,7 +205,7 @@ public class Watchdog implements Closeable, Comparable<Watchdog> {
m_queueMutex.lock();
try {
m_watchdogs.remove(this);
updateAlarm();
updateAlarm(false);
} finally {
m_queueMutex.unlock();
}
@@ -223,9 +223,12 @@ public class Watchdog implements Closeable, Comparable<Watchdog> {
}
@SuppressWarnings("resource")
private static void updateAlarm() {
private static void updateAlarm(boolean acknowledge) {
if (m_watchdogs.isEmpty()) {
NotifierJNI.cancelNotifierAlarm(m_notifier);
} else if (acknowledge) {
NotifierJNI.acknowledgeNotifierAlarm(
m_notifier, true, (long) (m_watchdogs.peek().m_expirationTime * 1e6), 0, true);
} else {
NotifierJNI.setNotifierAlarm(
m_notifier, (long) (m_watchdogs.peek().m_expirationTime * 1e6), 0, true);
@@ -277,9 +280,7 @@ public class Watchdog implements Closeable, Comparable<Watchdog> {
watchdog.m_callback.run();
m_queueMutex.lock();
updateAlarm();
NotifierJNI.acknowledgeNotifierAlarm(m_notifier);
updateAlarm(true);
} finally {
m_queueMutex.unlock();
}