Files
allwpilib/wpilibc/src/main/native/cpp/system/Watchdog.cpp

242 lines
5.9 KiB
C++
Raw Normal View History

// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
2025-11-07 19:56:21 -05:00
#include "wpi/system/Watchdog.hpp"
#include <atomic>
#include <thread>
2020-12-28 10:12:52 -08:00
#include <utility>
2024-09-20 17:43:39 -07:00
#include <vector>
#include <fmt/format.h>
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
#include "wpi/hal/HALBase.h"
2025-11-07 19:57:55 -05:00
#include "wpi/hal/Notifier.h"
2025-11-07 19:56:21 -05:00
#include "wpi/system/Errors.hpp"
#include "wpi/system/Timer.hpp"
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
#include "wpi/util/Synchronization.h"
2025-11-07 19:57:55 -05:00
#include "wpi/util/mutex.hpp"
#include "wpi/util/priority_queue.hpp"
2025-11-07 20:00:05 -05:00
using namespace wpi;
class Watchdog::Impl {
public:
Impl();
~Impl();
template <typename T>
struct DerefGreater {
constexpr bool operator()(const T& lhs, const T& rhs) const {
return *lhs > *rhs;
}
};
2025-11-07 20:00:05 -05:00
wpi::util::mutex m_mutex;
std::atomic<HAL_NotifierHandle> m_notifier;
2025-11-07 20:00:05 -05:00
wpi::util::priority_queue<Watchdog*, std::vector<Watchdog*>,
2025-11-07 20:01:58 -05:00
DerefGreater<Watchdog*>>
m_watchdogs;
void UpdateAlarm();
private:
void Main();
std::thread m_thread;
};
Watchdog::Impl::Impl() {
int32_t status = 0;
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
m_notifier = HAL_CreateNotifier(&status);
2025-11-07 20:00:43 -05:00
WPILIB_CheckErrorStatus(status, "starting watchdog notifier");
HAL_SetNotifierName(m_notifier, "Watchdog", &status);
m_thread = std::thread([=, this] { Main(); });
}
Watchdog::Impl::~Impl() {
// atomically set handle to 0, then clean
HAL_NotifierHandle handle = m_notifier.exchange(0);
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
HAL_DestroyNotifier(handle);
// Join the thread to ensure the handler has exited.
if (m_thread.joinable()) {
m_thread.join();
}
}
void Watchdog::Impl::UpdateAlarm() {
int32_t status = 0;
// Return if we are being destructed, or were not created successfully
auto notifier = m_notifier.load();
if (notifier == 0) {
return;
}
if (m_watchdogs.empty()) {
HAL_CancelNotifierAlarm(notifier, &status);
} else {
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
HAL_SetNotifierAlarm(notifier,
static_cast<uint64_t>(
m_watchdogs.top()->m_expirationTime.value() * 1e6),
0, true, &status);
}
2025-11-07 20:00:43 -05:00
WPILIB_CheckErrorStatus(status, "updating watchdog notifier alarm");
}
void Watchdog::Impl::Main() {
for (;;) {
int32_t status = 0;
HAL_NotifierHandle notifier = m_notifier.load();
if (notifier == 0) {
break;
}
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
if (WPI_WaitForObject(notifier) == 0) {
break;
}
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
uint64_t curTime = HAL_GetFPGATime(&status);
std::unique_lock lock(m_mutex);
if (m_watchdogs.empty()) {
continue;
}
// If the condition variable timed out, that means a Watchdog timeout
// has occurred, so call its timeout function.
auto watchdog = m_watchdogs.pop();
2025-11-07 20:00:05 -05:00
wpi::units::second_t now{curTime * 1e-6};
if (now - watchdog->m_lastTimeoutPrintTime > kMinPrintPeriod) {
watchdog->m_lastTimeoutPrintTime = now;
if (!watchdog->m_suppressTimeoutMessage) {
2025-11-07 20:00:43 -05:00
WPILIB_ReportWarning("Watchdog not fed within {:.6f}s",
2025-11-07 20:01:58 -05:00
watchdog->m_timeout.value());
}
}
// Set expiration flag before calling the callback so any manipulation
// of the flag in the callback (e.g., calling Disable()) isn't
// clobbered.
watchdog->m_isExpired = true;
lock.unlock();
watchdog->m_callback();
lock.lock();
UpdateAlarm();
[hal] Revamp notifiers (#8424) This changes the HAL notifier interface to: - Use wpiutil signal objects. This means waiting is done through the `WPI_WaitObject` API instead of a dedicated function and allows for higher level code to simultaneously wait on notifiers and other events. - Interval timers are supported at the HAL layer - Handlers are now required to acknowledge notifications. This is invisible to users unless they're directly using the HAL API. - For interval timers, an overrun count is maintained to detect if the handler didn't acknowledge The underlying implementation still uses condition variables for the actual waiting. In basic testing using this approach seemed to be lower jitter than timerfd. Currently, the simulation and systemcore implementations are nearly identical except for a few additional sim hook bits. This could be refactored, but keeping them separate may make sense to keep the systemcore implementation easy to read and reason about, or if we ever choose to use a different underlying timer implementation on systemcore. The simulation side API is unchanged in form but does change in function--waiting for notifiers now only waits for currently running (or newly signaled) notifiers to acknowledge. To avoid a race condition in sim stepTiming, users of the low level API must make any alarm updates (especially for one-shot alarms) prior to acknowledging the previous alarm. The only current use of the interval timer feature is the `Notifier` class. The `TimedRobot` implementation still uses a single notifier and its own interval timing logic to ensure consistent callback order. Using separate notifiers for each user-level interval would substantially increase complexity. `Watchdog` also doesn't use the interval timer, as it's looking for an amount of time since the last `set` call rather than a recurring interval time. To reduce flicker, the sim GUI uses a fade out when a timeout goes from set to unset. This fixes tsan for wpilib and commands, and also fixes some spurious test failures.
2025-11-29 11:00:18 -08:00
HAL_AcknowledgeNotifierAlarm(notifier, &status);
}
}
2025-11-07 20:00:05 -05:00
Watchdog::Watchdog(wpi::units::second_t timeout, std::function<void()> callback)
2020-12-28 10:12:52 -08:00
: m_timeout(timeout), m_callback(std::move(callback)), m_impl(GetImpl()) {}
Watchdog::~Watchdog() {
try {
Disable();
} catch (const RuntimeError& e) {
e.Report();
}
}
Watchdog::Watchdog(Watchdog&& rhs) {
*this = std::move(rhs);
}
Watchdog& Watchdog::operator=(Watchdog&& rhs) {
m_impl = rhs.m_impl;
std::scoped_lock lock(m_impl->m_mutex);
m_startTime = rhs.m_startTime;
m_timeout = rhs.m_timeout;
m_expirationTime = rhs.m_expirationTime;
m_callback = std::move(rhs.m_callback);
m_lastTimeoutPrintTime = rhs.m_lastTimeoutPrintTime;
m_suppressTimeoutMessage = rhs.m_suppressTimeoutMessage;
m_tracer = std::move(rhs.m_tracer);
m_isExpired = rhs.m_isExpired;
if (m_expirationTime != 0_s) {
m_impl->m_watchdogs.remove(&rhs);
m_impl->m_watchdogs.emplace(this);
}
return *this;
}
2025-11-07 20:00:05 -05:00
wpi::units::second_t Watchdog::GetTime() const {
return Timer::GetFPGATimestamp() - m_startTime;
}
2025-11-07 20:00:05 -05:00
void Watchdog::SetTimeout(wpi::units::second_t timeout) {
m_startTime = Timer::GetFPGATimestamp();
m_tracer.ClearEpochs();
std::scoped_lock lock(m_impl->m_mutex);
m_timeout = timeout;
m_isExpired = false;
m_impl->m_watchdogs.remove(this);
m_expirationTime = m_startTime + m_timeout;
m_impl->m_watchdogs.emplace(this);
m_impl->UpdateAlarm();
}
2025-11-07 20:00:05 -05:00
wpi::units::second_t Watchdog::GetTimeout() const {
std::scoped_lock lock(m_impl->m_mutex);
return m_timeout;
}
bool Watchdog::IsExpired() const {
std::scoped_lock lock(m_impl->m_mutex);
return m_isExpired;
}
void Watchdog::AddEpoch(std::string_view epochName) {
m_tracer.AddEpoch(epochName);
}
void Watchdog::PrintEpochs() {
m_tracer.PrintEpochs();
}
void Watchdog::Reset() {
Enable();
}
void Watchdog::Enable() {
m_startTime = Timer::GetFPGATimestamp();
m_tracer.ClearEpochs();
std::scoped_lock lock(m_impl->m_mutex);
m_isExpired = false;
m_impl->m_watchdogs.remove(this);
m_expirationTime = m_startTime + m_timeout;
m_impl->m_watchdogs.emplace(this);
m_impl->UpdateAlarm();
}
void Watchdog::Disable() {
std::scoped_lock lock(m_impl->m_mutex);
if (m_expirationTime != 0_s) {
m_impl->m_watchdogs.remove(this);
m_expirationTime = 0_s;
m_impl->UpdateAlarm();
}
}
void Watchdog::SuppressTimeoutMessage(bool suppress) {
m_suppressTimeoutMessage = suppress;
}
bool Watchdog::operator>(const Watchdog& rhs) const {
return m_expirationTime > rhs.m_expirationTime;
}
Watchdog::Impl* Watchdog::GetImpl() {
static Impl inst;
return &inst;
}