diff --git a/wpiutil/src/main/native/include/wpi/spinlock.h b/wpiutil/src/main/native/include/wpi/spinlock.h
new file mode 100644
index 0000000000..6a62818b5a
--- /dev/null
+++ b/wpiutil/src/main/native/include/wpi/spinlock.h
@@ -0,0 +1,133 @@
+/*----------------------------------------------------------------------------*/
+/* Copyright (c) 2018 FIRST. All Rights Reserved.                             */
+/* Open Source Software - may be modified and shared by FRC teams. The code   */
+/* must be accompanied by the FIRST BSD license file in the root directory of */
+/* the project.                                                               */
+/*----------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <thread>
+
+#include "Compiler.h"
+
+namespace wpi {
+
+/**
+ * A spinlock mutex.  Wraps std::atomic_flag in a std::mutex compatible way.
+ */
+class spinlock {
+  std::atomic_flag lock_flag;
+
+ public:
+  spinlock() noexcept { lock_flag.clear(); }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool try_lock() { return !lock_flag.test_and_set(std::memory_order_acquire); }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void lock() {
+    for (unsigned int i = 1; !try_lock(); ++i)
+      if ((i & 0xff) == 0) std::this_thread::yield();
+  }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void unlock() { lock_flag.clear(std::memory_order_release); }
+};
+
+/**
+ * A recursive spinlock mutex.  This version uses std::atomic_flag for spin,
+ * then checks the thread id for recursion.  It is generally faster on desktop
+ * platforms compared to recursive_spinlock2.
+ */
+class recursive_spinlock1 {
+  std::atomic<std::thread::id> owner_thread_id{std::thread::id{}};
+  int32_t recursive_counter{0};
+  std::atomic_flag lock_flag;
+
+ public:
+  recursive_spinlock1() noexcept { lock_flag.clear(); }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool try_lock() {
+    if (!lock_flag.test_and_set(std::memory_order_acquire)) {
+      owner_thread_id.store(std::this_thread::get_id(),
+                            std::memory_order_release);
+    } else {
+      if (owner_thread_id.load(std::memory_order_acquire) !=
+          std::this_thread::get_id())
+        return false;
+    }
+    ++recursive_counter;
+    return true;
+  }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void lock() {
+    for (unsigned int i = 1; !try_lock(); ++i)
+      if ((i & 0xffff) == 0) std::this_thread::yield();
+  }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void unlock() {
+    assert(owner_thread_id.load(std::memory_order_acquire) ==
+           std::this_thread::get_id());
+    assert(recursive_counter > 0);
+
+    if (--recursive_counter == 0) {
+      owner_thread_id.store(std::thread::id{}, std::memory_order_release);
+      lock_flag.clear(std::memory_order_release);
+    }
+  }
+};
+
+/**
+ * A recursive spinlock mutex.  This version spins directly on the std::atomic
+ * of the thread id.  It is generally faster on embedded ARM platforms such
+ * as the RoboRIO and Raspberry Pi, compared to recursive_spinlock1.
+ */
+class recursive_spinlock2 {
+  std::atomic<std::thread::id> owner_thread_id{std::thread::id{}};
+  int32_t recursive_counter{0};
+
+ public:
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool try_lock() {
+    auto owner = std::thread::id{};
+    auto us = std::this_thread::get_id();
+    if (!owner_thread_id.compare_exchange_weak(owner, us,
+                                               std::memory_order_acquire)) {
+      if (owner != us) return false;
+    }
+    ++recursive_counter;
+    return true;
+  }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void lock() {
+    for (unsigned int i = 1; !try_lock(); ++i)
+      if ((i & 0xffff) == 0) std::this_thread::yield();
+  }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void unlock() {
+    assert(owner_thread_id.load(std::memory_order_acquire) ==
+           std::this_thread::get_id());
+    assert(recursive_counter > 0);
+
+    if (--recursive_counter == 0)
+      owner_thread_id.store(std::thread::id{}, std::memory_order_release);
+  }
+};
+
+#ifdef __arm__
+// benchmarking has shown this version to be faster on ARM, but slower on
+// windows, mac, and linux
+using recursive_spinlock = recursive_spinlock2;
+#else
+using recursive_spinlock = recursive_spinlock1;
+#endif
+
+}  // namespace wpi
diff --git a/wpiutil/src/test/native/cpp/spinlock_bench.cpp b/wpiutil/src/test/native/cpp/spinlock_bench.cpp
new file mode 100644
index 0000000000..aa5d11bc60
--- /dev/null
+++ b/wpiutil/src/test/native/cpp/spinlock_bench.cpp
@@ -0,0 +1,164 @@
+/*----------------------------------------------------------------------------*/
+/* Copyright (c) 2018 FIRST. All Rights Reserved.                             */
+/* Open Source Software - may be modified and shared by FRC teams. The code   */
+/* must be accompanied by the FIRST BSD license file in the root directory of */
+/* the project.                                                               */
+/*----------------------------------------------------------------------------*/
+
+#include "wpi/spinlock.h"  // NOLINT(build/include_order)
+
+#include <chrono>
+#include <iostream>
+#include <mutex>
+#include <thread>
+
+#include "gtest/gtest.h"
+#include "wpi/mutex.h"
+
+static std::mutex std_mutex;
+static std::recursive_mutex std_recursive_mutex;
+static wpi::mutex wpi_mutex;
+static wpi::recursive_mutex wpi_recursive_mutex;
+static wpi::spinlock spinlock;
+static wpi::recursive_spinlock1 recursive_spinlock1;
+static wpi::recursive_spinlock2 recursive_spinlock2;
+static wpi::recursive_spinlock recursive_spinlock;
+
+TEST(SpinlockTest, Benchmark) {
+  using namespace std::chrono;
+
+  // warmup
+  std::thread thr([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 10000000; i++) {
+      std::lock_guard<std::mutex> lock(std_mutex);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    (void)start;
+    (void)stop;
+  });
+  thr.join();
+
+  std::thread thrb([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<std::mutex> lock(std_mutex);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "std::mutex sizeof: " << sizeof(std_mutex)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thrb.join();
+
+  std::thread thrb2([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<std::recursive_mutex> lock(std_recursive_mutex);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "std::recursive_mutex sizeof: " << sizeof(std_recursive_mutex)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thrb2.join();
+
+  std::thread thr2([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<wpi::mutex> lock(wpi_mutex);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "wpi::mutex sizeof: " << sizeof(wpi_mutex)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thr2.join();
+
+  std::thread thr2b([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<wpi::recursive_mutex> lock(wpi_recursive_mutex);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "wpi::recursive_mutex sizeof: " << sizeof(wpi_recursive_mutex)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thr2b.join();
+
+  std::thread thr3([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<wpi::spinlock> lock(spinlock);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "spinlock sizeof: " << sizeof(spinlock)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thr3.join();
+
+  std::thread thr4([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<wpi::recursive_spinlock1> lock(recursive_spinlock1);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "recursive_spinlock1 sizeof: " << sizeof(recursive_spinlock1)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thr4.join();
+
+  std::thread thr4b([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<wpi::recursive_spinlock2> lock(recursive_spinlock2);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "recursive_spinlock2 sizeof: " << sizeof(recursive_spinlock2)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thr4b.join();
+
+  std::thread thr4c([]() {
+    int value = 0;
+
+    auto start = high_resolution_clock::now();
+    for (int i = 0; i < 1000000; i++) {
+      std::lock_guard<wpi::recursive_spinlock> lock(recursive_spinlock);
+      ++value;
+    }
+    auto stop = high_resolution_clock::now();
+    std::cout << "recursive_spinlock sizeof: " << sizeof(recursive_spinlock)
+              << " time: " << duration_cast<microseconds>(stop - start).count()
+              << " value: " << value << "\n";
+  });
+  thr4c.join();
+}