diff --git a/wpiutil/src/main/native/include/wpi/spinlock.h b/wpiutil/src/main/native/include/wpi/spinlock.h new file mode 100644 index 0000000000..6a62818b5a --- /dev/null +++ b/wpiutil/src/main/native/include/wpi/spinlock.h @@ -0,0 +1,133 @@ +/*----------------------------------------------------------------------------*/ +/* Copyright (c) 2018 FIRST. All Rights Reserved. */ +/* Open Source Software - may be modified and shared by FRC teams. The code */ +/* must be accompanied by the FIRST BSD license file in the root directory of */ +/* the project. */ +/*----------------------------------------------------------------------------*/ + +#pragma once + +#include +#include +#include + +#include "Compiler.h" + +namespace wpi { + +/** + * A spinlock mutex. Wraps std::atomic_flag in a std::mutex compatible way. + */ +class spinlock { + std::atomic_flag lock_flag; + + public: + spinlock() noexcept { lock_flag.clear(); } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + bool try_lock() { return !lock_flag.test_and_set(std::memory_order_acquire); } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + void lock() { + for (unsigned int i = 1; !try_lock(); ++i) + if ((i & 0xff) == 0) std::this_thread::yield(); + } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + void unlock() { lock_flag.clear(std::memory_order_release); } +}; + +/** + * A recursive spinlock mutex. This version uses std::atomic_flag for spin, + * then checks the thread id for recursion. It is generally faster on desktop + * platforms compared to recursive_spinlock2. + */ +class recursive_spinlock1 { + std::atomic owner_thread_id{std::thread::id{}}; + int32_t recursive_counter{0}; + std::atomic_flag lock_flag; + + public: + recursive_spinlock1() noexcept { lock_flag.clear(); } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + bool try_lock() { + if (!lock_flag.test_and_set(std::memory_order_acquire)) { + owner_thread_id.store(std::this_thread::get_id(), + std::memory_order_release); + } else { + if (owner_thread_id.load(std::memory_order_acquire) != + std::this_thread::get_id()) + return false; + } + ++recursive_counter; + return true; + } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + void lock() { + for (unsigned int i = 1; !try_lock(); ++i) + if ((i & 0xffff) == 0) std::this_thread::yield(); + } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + void unlock() { + assert(owner_thread_id.load(std::memory_order_acquire) == + std::this_thread::get_id()); + assert(recursive_counter > 0); + + if (--recursive_counter == 0) { + owner_thread_id.store(std::thread::id{}, std::memory_order_release); + lock_flag.clear(std::memory_order_release); + } + } +}; + +/** + * A recursive spinlock mutex. This version spins directly on the std::atomic + * of the thread id. It is generally faster on embedded ARM platforms such + * as the RoboRIO and Raspberry Pi, compared to recursive_spinlock1. + */ +class recursive_spinlock2 { + std::atomic owner_thread_id{std::thread::id{}}; + int32_t recursive_counter{0}; + + public: + LLVM_ATTRIBUTE_ALWAYS_INLINE + bool try_lock() { + auto owner = std::thread::id{}; + auto us = std::this_thread::get_id(); + if (!owner_thread_id.compare_exchange_weak(owner, us, + std::memory_order_acquire)) { + if (owner != us) return false; + } + ++recursive_counter; + return true; + } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + void lock() { + for (unsigned int i = 1; !try_lock(); ++i) + if ((i & 0xffff) == 0) std::this_thread::yield(); + } + + LLVM_ATTRIBUTE_ALWAYS_INLINE + void unlock() { + assert(owner_thread_id.load(std::memory_order_acquire) == + std::this_thread::get_id()); + assert(recursive_counter > 0); + + if (--recursive_counter == 0) + owner_thread_id.store(std::thread::id{}, std::memory_order_release); + } +}; + +#ifdef __arm__ +// benchmarking has shown this version to be faster on ARM, but slower on +// windows, mac, and linux +using recursive_spinlock = recursive_spinlock2; +#else +using recursive_spinlock = recursive_spinlock1; +#endif + +} // namespace wpi diff --git a/wpiutil/src/test/native/cpp/spinlock_bench.cpp b/wpiutil/src/test/native/cpp/spinlock_bench.cpp new file mode 100644 index 0000000000..aa5d11bc60 --- /dev/null +++ b/wpiutil/src/test/native/cpp/spinlock_bench.cpp @@ -0,0 +1,164 @@ +/*----------------------------------------------------------------------------*/ +/* Copyright (c) 2018 FIRST. All Rights Reserved. */ +/* Open Source Software - may be modified and shared by FRC teams. The code */ +/* must be accompanied by the FIRST BSD license file in the root directory of */ +/* the project. */ +/*----------------------------------------------------------------------------*/ + +#include "wpi/spinlock.h" // NOLINT(build/include_order) + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "wpi/mutex.h" + +static std::mutex std_mutex; +static std::recursive_mutex std_recursive_mutex; +static wpi::mutex wpi_mutex; +static wpi::recursive_mutex wpi_recursive_mutex; +static wpi::spinlock spinlock; +static wpi::recursive_spinlock1 recursive_spinlock1; +static wpi::recursive_spinlock2 recursive_spinlock2; +static wpi::recursive_spinlock recursive_spinlock; + +TEST(SpinlockTest, Benchmark) { + using namespace std::chrono; + + // warmup + std::thread thr([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 10000000; i++) { + std::lock_guard lock(std_mutex); + ++value; + } + auto stop = high_resolution_clock::now(); + (void)start; + (void)stop; + }); + thr.join(); + + std::thread thrb([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(std_mutex); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "std::mutex sizeof: " << sizeof(std_mutex) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thrb.join(); + + std::thread thrb2([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(std_recursive_mutex); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "std::recursive_mutex sizeof: " << sizeof(std_recursive_mutex) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thrb2.join(); + + std::thread thr2([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(wpi_mutex); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "wpi::mutex sizeof: " << sizeof(wpi_mutex) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thr2.join(); + + std::thread thr2b([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(wpi_recursive_mutex); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "wpi::recursive_mutex sizeof: " << sizeof(wpi_recursive_mutex) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thr2b.join(); + + std::thread thr3([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(spinlock); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "spinlock sizeof: " << sizeof(spinlock) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thr3.join(); + + std::thread thr4([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(recursive_spinlock1); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "recursive_spinlock1 sizeof: " << sizeof(recursive_spinlock1) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thr4.join(); + + std::thread thr4b([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(recursive_spinlock2); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "recursive_spinlock2 sizeof: " << sizeof(recursive_spinlock2) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thr4b.join(); + + std::thread thr4c([]() { + int value = 0; + + auto start = high_resolution_clock::now(); + for (int i = 0; i < 1000000; i++) { + std::lock_guard lock(recursive_spinlock); + ++value; + } + auto stop = high_resolution_clock::now(); + std::cout << "recursive_spinlock sizeof: " << sizeof(recursive_spinlock) + << " time: " << duration_cast(stop - start).count() + << " value: " << value << "\n"; + }); + thr4c.join(); +}