[upstream_utils] Upgrade Sleipnir to use a small vector type (#6565)

This sped up ArmFeedforward.Calculate() by up to 2x.
This commit is contained in:
Tyler Veness
2024-04-29 22:00:32 -07:00
committed by GitHub
parent a9cfd0d0f9
commit 7601b7250a
15 changed files with 237 additions and 45 deletions

View File

@@ -10,11 +10,11 @@
#include <memory>
#include <numbers>
#include <utility>
#include <vector>
#include "sleipnir/autodiff/ExpressionType.hpp"
#include "sleipnir/util/IntrusiveSharedPtr.hpp"
#include "sleipnir/util/Pool.hpp"
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir::detail {
@@ -425,7 +425,7 @@ inline void IntrusiveSharedPtrDecRefCount(Expression* expr) {
// Expression destructor when expr's refcount reaches zero can cause a stack
// overflow. Instead, we iterate over its children to decrement their
// refcounts and deallocate them.
std::vector<Expression*> stack;
small_vector<Expression*> stack;
stack.emplace_back(expr);
while (!stack.empty()) {

View File

@@ -3,10 +3,10 @@
#pragma once
#include <span>
#include <vector>
#include "sleipnir/autodiff/Expression.hpp"
#include "sleipnir/util/FunctionRef.hpp"
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir::detail {
@@ -36,7 +36,7 @@ class SLEIPNIR_DLLEXPORT ExpressionGraph {
// https://en.wikipedia.org/wiki/Breadth-first_search
// BFS list sorted from parent to child.
std::vector<Expression*> stack;
small_vector<Expression*> stack;
stack.emplace_back(root.Get());
@@ -119,7 +119,7 @@ class SLEIPNIR_DLLEXPORT ExpressionGraph {
*
* @param wrt Variables with respect to which to compute the gradient.
*/
std::vector<ExpressionPtr> GenerateGradientTree(
small_vector<ExpressionPtr> GenerateGradientTree(
std::span<const ExpressionPtr> wrt) const {
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
// for background on reverse accumulation automatic differentiation.
@@ -128,7 +128,7 @@ class SLEIPNIR_DLLEXPORT ExpressionGraph {
wrt[row]->row = row;
}
std::vector<ExpressionPtr> grad;
small_vector<ExpressionPtr> grad;
grad.reserve(wrt.size());
for (size_t row = 0; row < wrt.size(); ++row) {
grad.emplace_back(MakeExpressionPtr());
@@ -231,13 +231,13 @@ class SLEIPNIR_DLLEXPORT ExpressionGraph {
private:
// List that maps nodes to their respective row.
std::vector<int> m_rowList;
small_vector<int> m_rowList;
// List for updating adjoints
std::vector<Expression*> m_adjointList;
small_vector<Expression*> m_adjointList;
// List for updating values
std::vector<Expression*> m_valueList;
small_vector<Expression*> m_valueList;
};
} // namespace sleipnir::detail

View File

@@ -3,7 +3,6 @@
#pragma once
#include <utility>
#include <vector>
#include <Eigen/Core>
#include <Eigen/SparseCore>
@@ -13,6 +12,7 @@
#include "sleipnir/autodiff/Profiler.hpp"
#include "sleipnir/autodiff/Variable.hpp"
#include "sleipnir/autodiff/VariableMatrix.hpp"
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir {
@@ -36,7 +36,7 @@ class SLEIPNIR_DLLEXPORT Hessian {
Hessian(Variable variable, const VariableMatrix& wrt) noexcept
: m_jacobian{
[&] {
std::vector<detail::ExpressionPtr> wrtVec;
small_vector<detail::ExpressionPtr> wrtVec;
wrtVec.reserve(wrt.size());
for (auto& elem : wrt) {
wrtVec.emplace_back(elem.expr);

View File

@@ -3,7 +3,6 @@
#pragma once
#include <utility>
#include <vector>
#include <Eigen/SparseCore>
@@ -11,6 +10,7 @@
#include "sleipnir/autodiff/Profiler.hpp"
#include "sleipnir/autodiff/Variable.hpp"
#include "sleipnir/autodiff/VariableMatrix.hpp"
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir {
@@ -81,7 +81,7 @@ class SLEIPNIR_DLLEXPORT Jacobian {
VariableMatrix Get() const {
VariableMatrix result{m_variables.Rows(), m_wrt.Rows()};
std::vector<detail::ExpressionPtr> wrtVec;
small_vector<detail::ExpressionPtr> wrtVec;
wrtVec.reserve(m_wrt.size());
for (auto& elem : m_wrt) {
wrtVec.emplace_back(elem.expr);
@@ -145,16 +145,16 @@ class SLEIPNIR_DLLEXPORT Jacobian {
VariableMatrix m_variables;
VariableMatrix m_wrt;
std::vector<detail::ExpressionGraph> m_graphs;
small_vector<detail::ExpressionGraph> m_graphs;
Eigen::SparseMatrix<double> m_J{m_variables.Rows(), m_wrt.Rows()};
// Cached triplets for gradients of linear rows
std::vector<Eigen::Triplet<double>> m_cachedTriplets;
small_vector<Eigen::Triplet<double>> m_cachedTriplets;
// List of row indices for nonlinear rows whose graients will be computed in
// Value()
std::vector<int> m_nonlinearRows;
small_vector<int> m_nonlinearRows;
Profiler m_profiler;
};

View File

@@ -4,11 +4,11 @@
#include <algorithm>
#include <concepts>
#include <vector>
#include "sleipnir/autodiff/Variable.hpp"
#include "sleipnir/util/Assert.hpp"
#include "sleipnir/util/Concepts.hpp"
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir {
@@ -28,8 +28,8 @@ template <typename LHS, typename RHS>
requires(ScalarLike<LHS> || MatrixLike<LHS>) &&
(ScalarLike<RHS> || MatrixLike<RHS>) &&
(!std::same_as<LHS, double> || !std::same_as<RHS, double>)
std::vector<Variable> MakeConstraints(const LHS& lhs, const RHS& rhs) {
std::vector<Variable> constraints;
small_vector<Variable> MakeConstraints(const LHS& lhs, const RHS& rhs) {
small_vector<Variable> constraints;
if constexpr (ScalarLike<LHS> && ScalarLike<RHS>) {
constraints.emplace_back(lhs - rhs);
@@ -113,7 +113,7 @@ std::vector<Variable> MakeConstraints(const LHS& lhs, const RHS& rhs) {
*/
struct SLEIPNIR_DLLEXPORT EqualityConstraints {
/// A vector of scalar equality constraints.
std::vector<Variable> constraints;
small_vector<Variable> constraints;
/**
* Constructs an equality constraint from a left and right side.
@@ -146,7 +146,7 @@ struct SLEIPNIR_DLLEXPORT EqualityConstraints {
*/
struct SLEIPNIR_DLLEXPORT InequalityConstraints {
/// A vector of scalar inequality constraints.
std::vector<Variable> constraints;
small_vector<Variable> constraints;
/**
* Constructs an inequality constraint from a left and right side.

View File

@@ -5,10 +5,10 @@
#include <algorithm>
#include <future>
#include <span>
#include <vector>
#include "sleipnir/optimization/SolverStatus.hpp"
#include "sleipnir/util/FunctionRef.hpp"
#include "sleipnir/util/SmallVector.hpp"
namespace sleipnir {
@@ -43,12 +43,16 @@ MultistartResult<DecisionVariables> Multistart(
function_ref<MultistartResult<DecisionVariables>(const DecisionVariables&)>
solve,
std::span<const DecisionVariables> initialGuesses) {
std::vector<std::future<MultistartResult<DecisionVariables>>> futures;
small_vector<std::future<MultistartResult<DecisionVariables>>> futures;
futures.reserve(initialGuesses.size());
for (const auto& initialGuess : initialGuesses) {
futures.emplace_back(std::async(std::launch::async, solve, initialGuess));
}
std::vector<MultistartResult<DecisionVariables>> results;
small_vector<MultistartResult<DecisionVariables>> results;
results.reserve(futures.size());
for (auto& future : futures) {
results.emplace_back(future.get());
}

View File

@@ -10,7 +10,6 @@
#include <optional>
#include <type_traits>
#include <utility>
#include <vector>
#include <Eigen/Core>
@@ -23,6 +22,7 @@
#include "sleipnir/optimization/SolverStatus.hpp"
#include "sleipnir/optimization/solver/InteriorPoint.hpp"
#include "sleipnir/util/Print.hpp"
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir {
@@ -531,16 +531,16 @@ class SLEIPNIR_DLLEXPORT OptimizationProblem {
private:
// The list of decision variables, which are the root of the problem's
// expression tree
std::vector<Variable> m_decisionVariables;
small_vector<Variable> m_decisionVariables;
// The cost function: f(x)
std::optional<Variable> m_f;
// The list of equality constraints: cₑ(x) = 0
std::vector<Variable> m_equalityConstraints;
small_vector<Variable> m_equalityConstraints;
// The list of inequality constraints: cᵢ(x) ≥ 0
std::vector<Variable> m_inequalityConstraints;
small_vector<Variable> m_inequalityConstraints;
// The user callback
std::function<bool(const SolverIterationInfo&)> m_callback =

View File

@@ -4,8 +4,8 @@
#include <cstddef>
#include <memory>
#include <vector>
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir {
@@ -76,8 +76,8 @@ class SLEIPNIR_DLLEXPORT PoolResource {
}
private:
std::vector<std::unique_ptr<std::byte[]>> m_buffer;
std::vector<void*> m_freeList;
small_vector<std::unique_ptr<std::byte[]>> m_buffer;
small_vector<void*> m_freeList;
size_t blocksPerChunk;
/**

View File

@@ -0,0 +1,163 @@
// Copyright (c) Sleipnir contributors
#pragma once
#include <algorithm>
#include <cstddef>
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>
namespace sleipnir {
template <typename T, size_t MaxSize = 8, typename NonReboundT = T>
struct small_buffer_vector_allocator {
alignas(alignof(T)) std::byte m_smallBuffer[MaxSize * sizeof(T)];
std::allocator<T> m_alloc;
bool m_smallBufferUsed = false;
using value_type = T;
// we have to set this three values, as they are responsible for the correct
// handling of the move assignment operator
using propagate_on_container_move_assignment = std::false_type;
using propagate_on_container_swap = std::false_type;
using is_always_equal = std::false_type;
constexpr small_buffer_vector_allocator() noexcept = default;
template <class U>
constexpr small_buffer_vector_allocator( // NOLINT
const small_buffer_vector_allocator<U, MaxSize, NonReboundT>&) noexcept {}
template <class U>
struct rebind {
using other = small_buffer_vector_allocator<U, MaxSize, NonReboundT>;
};
// don't copy the small buffer for the copy/move constructors, as the copying
// is done through the vector
constexpr small_buffer_vector_allocator(
const small_buffer_vector_allocator& other) noexcept
: m_smallBufferUsed(other.m_smallBufferUsed) {}
constexpr small_buffer_vector_allocator& operator=(
const small_buffer_vector_allocator& other) noexcept {
if (this == &other) {
return *this;
}
m_smallBufferUsed = other.m_smallBufferUsed;
return *this;
}
constexpr small_buffer_vector_allocator(
small_buffer_vector_allocator&&) noexcept {}
constexpr small_buffer_vector_allocator& operator=(
const small_buffer_vector_allocator&&) noexcept {
return *this;
}
[[nodiscard]]
constexpr T* allocate(const size_t n) {
// when the allocator was rebound we don't want to use the small buffer
if constexpr (std::is_same_v<T, NonReboundT>) {
if (n <= MaxSize) {
m_smallBufferUsed = true;
// as long as we use less memory than the small buffer, we return a
// pointer to it
return reinterpret_cast<T*>(&m_smallBuffer);
}
}
m_smallBufferUsed = false;
// otherwise use the default allocator
return m_alloc.allocate(n);
}
constexpr void deallocate(void* p, const size_t n) {
// we don't deallocate anything if the memory was allocated in small buffer
if (&m_smallBuffer != p) {
m_alloc.deallocate(static_cast<T*>(p), n);
}
m_smallBufferUsed = false;
}
// according to the C++ standard when propagate_on_container_move_assignment
// is set to false, the comparision operators are used to check if two
// allocators are equal. When they are not, an element wise move is done
// instead of just taking over the memory. For our implementation this means
// the comparision has to return false, when the small buffer is active
friend constexpr bool operator==(const small_buffer_vector_allocator& lhs,
const small_buffer_vector_allocator& rhs) {
return !lhs.m_smallBufferUsed && !rhs.m_smallBufferUsed;
}
friend constexpr bool operator!=(const small_buffer_vector_allocator& lhs,
const small_buffer_vector_allocator& rhs) {
return !(lhs == rhs);
}
};
template <typename T, size_t N = 8>
class small_vector
: public std::vector<T, small_buffer_vector_allocator<T, N>> {
public:
using vectorT = std::vector<T, small_buffer_vector_allocator<T, N>>;
// default initialize with the small buffer size
constexpr small_vector() noexcept { vectorT::reserve(N); }
small_vector(const small_vector&) = default;
small_vector& operator=(const small_vector&) = default;
small_vector(small_vector&& other) noexcept(
std::is_nothrow_move_constructible_v<T>) {
if (other.size() <= N) {
vectorT::reserve(N);
}
vectorT::operator=(std::move(other));
}
small_vector& operator=(small_vector&& other) noexcept(
std::is_nothrow_move_constructible_v<T>) {
if (other.size() <= N) {
vectorT::reserve(N);
}
vectorT::operator=(std::move(other));
return *this;
}
// use the default constructor first to reserve then construct the values
explicit small_vector(size_t count) : small_vector() {
vectorT::resize(count);
}
small_vector(size_t count, const T& value) : small_vector() {
vectorT::assign(count, value);
}
template <class InputIt>
small_vector(InputIt first, InputIt last) : small_vector() {
vectorT::insert(vectorT::begin(), first, last);
}
small_vector(std::initializer_list<T> init) : small_vector() { // NOLINT
vectorT::insert(vectorT::begin(), init);
}
friend void swap(small_vector& a, small_vector& b) noexcept {
std::swap(static_cast<vectorT&>(a), static_cast<vectorT&>(b));
}
};
template <typename T, size_t N, typename Pred>
constexpr typename small_vector<T, N>::size_type erase_if(small_vector<T, N>& c,
Pred pred) {
auto it = std::remove_if(c.begin(), c.end(), pred);
auto r = c.end() - it;
c.erase(it, c.end());
return r;
}
} // namespace sleipnir

View File

@@ -5,10 +5,10 @@
#include <fstream>
#include <string>
#include <string_view>
#include <vector>
#include <Eigen/SparseCore>
#include "sleipnir/util/SmallVector.hpp"
#include "sleipnir/util/SymbolExports.hpp"
namespace sleipnir {
@@ -32,7 +32,7 @@ SLEIPNIR_DLLEXPORT inline void Spy(std::ostream& file,
const int cells_width = mat.cols() + 1;
const int cells_height = mat.rows();
std::vector<uint8_t> cells;
small_vector<uint8_t> cells;
// Allocate space for matrix of characters plus trailing newlines
cells.reserve(cells_width * cells_height);