[upstream_utils] Upgrade to Sleipnir 0.6.1 (#8930)

This fixes a memory leak on Windows.
This commit is contained in:
Tyler Veness
2026-05-29 14:59:39 -07:00
committed by GitHub
parent 50fd29e697
commit e728ee27f2
13 changed files with 253 additions and 251 deletions

View File

@@ -96,22 +96,27 @@ struct Expression {
/// The adjoint of the expression node, used during autodiff.
Scalar adjoint{0};
/// Counts incoming edges for this node.
uint32_t incoming_edges = 0;
/// This expression's column in a Jacobian, or -1 otherwise.
int32_t col = -1;
/// The adjoint of the expression node, used during gradient expression tree
/// generation.
ExpressionPtr<Scalar> adjoint_expr;
/// Reference count for intrusive shared pointer.
uint32_t ref_count = 0;
/// Expression arguments.
std::array<ExpressionPtr<Scalar>, 2> args{nullptr, nullptr};
/// Scratch space for various graph algorithms.
///
/// In expression_graph.hpp's topological_sort(), scratch counts incoming
/// edges for this node, offset by -1 so -1 means no edges.
///
/// In Hessian and Jacobian constructors, scratch represents this expression's
/// column in a Jacobian, or -1 otherwise.
///
/// They share a default state of -1 to avoid extra assignments.
int32_t scratch = -1;
/// Reference count for intrusive shared pointer.
uint32_t ref_count = 0;
/// Constructs a constant expression with a value of zero.
constexpr Expression() = default;
@@ -774,6 +779,8 @@ void dec_ref_count(Expression<Scalar>* expr) {
auto alloc = global_pool_allocator<Expression<Scalar>>();
std::allocator_traits<decltype(alloc)>::deallocate(
alloc, elem, sizeof(Expression<Scalar>));
} else {
operator delete(elem);
}
}
}

View File

@@ -4,12 +4,19 @@
#include <ranges>
#include <Eigen/SparseCore>
#include <gch/small_vector.hpp>
#include "sleipnir/autodiff/expression.hpp"
namespace slp::detail {
/// Typedef for topologically sorted expression graph from parent to child.
///
/// @tparam Scalar Scalar type.
template <typename Scalar>
using ExpressionGraph = gch::small_vector<Expression<Scalar>*>;
/// Generates a topological sort of an expression graph from parent to child.
///
/// https://en.wikipedia.org/wiki/Topological_sorting
@@ -17,9 +24,8 @@ namespace slp::detail {
/// @tparam Scalar Scalar type.
/// @param root The root node of the expression.
template <typename Scalar>
gch::small_vector<Expression<Scalar>*> topological_sort(
const ExpressionPtr<Scalar>& root) {
gch::small_vector<Expression<Scalar>*> list;
ExpressionGraph<Scalar> topological_sort(const ExpressionPtr<Scalar>& root) {
ExpressionGraph<Scalar> list;
// If the root type is constant, updates are a no-op, so return an empty list
if (root == nullptr || root->type() == ExpressionType::CONSTANT) {
@@ -30,6 +36,8 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
gch::small_vector<Expression<Scalar>*> stack;
// Enumerate incoming edges for each node via depth-first search
//
// NOTE: scratch counts incoming edges, offset by -1 so -1 means no edges.
stack.emplace_back(root.get());
while (!stack.empty()) {
auto node = stack.back();
@@ -37,7 +45,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
for (auto& arg : node->args) {
// If the node hasn't been explored yet, add it to the stack
if (arg != nullptr && ++arg->incoming_edges == 1) {
if (arg != nullptr && ++arg->scratch == 0) {
stack.push_back(arg.get());
}
}
@@ -46,8 +54,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
// Generate topological sort of graph from parent to child.
//
// A node is only added to the stack after all its incoming edges have been
// traversed. Expression::incoming_edges is a decrementing counter for
// tracking this.
// traversed. Expression::scratch is a decrementing counter for tracking this.
//
// https://en.wikipedia.org/wiki/Topological_sorting
stack.emplace_back(root.get());
@@ -59,7 +66,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
for (auto& arg : node->args) {
// If we traversed all this node's incoming edges, add it to the stack
if (arg != nullptr && --arg->incoming_edges == 0) {
if (arg != nullptr && --arg->scratch == -1) {
stack.push_back(arg.get());
}
}
@@ -74,7 +81,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
/// @tparam Scalar Scalar type.
/// @param list Topological sort of graph from parent to child.
template <typename Scalar>
void update_values(const gch::small_vector<Expression<Scalar>*>& list) {
void update_values(const ExpressionGraph<Scalar>& list) {
// Traverse graph from child to parent and update values
for (auto& node : list | std::views::reverse) {
auto& lhs = node->args[0];
@@ -86,4 +93,61 @@ void update_values(const gch::small_vector<Expression<Scalar>*>& list) {
}
}
/// Updates the adjoints in the expression graph (computes the gradient) then
/// appends the adjoints of wrt to the sparse matrix triplets.
///
/// @tparam Scalar Scalar type.
/// @param top_list Topologically sorted graph from parent to child.
/// @param output_list Output row as column-node pairs.
/// @param triplets The sparse matrix triplets.
/// @param row The row of wrt.
template <typename Scalar>
void append_triplets(
const ExpressionGraph<Scalar>& top_list,
const gch::small_vector<std::pair<int, detail::Expression<Scalar>*>>&
output_list,
gch::small_vector<Eigen::Triplet<Scalar>>& triplets, int row) {
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
// for background on reverse accumulation automatic differentiation.
if (top_list.empty()) {
return;
}
// Set root node's adjoint to 1 since df/df is 1
top_list[0]->adjoint = Scalar(1);
// Zero the rest of the adjoints
for (auto& node : top_list | std::views::drop(1)) {
node->adjoint = Scalar(0);
}
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
// multiplied by dy/dx. If there are multiple "paths" from the root node to
// variable; the variable's adjoint is the sum of each path's adjoint
// contribution.
for (const auto& node : top_list) {
auto& lhs = node->args[0];
auto& rhs = node->args[1];
if (lhs != nullptr) {
if (rhs != nullptr) {
// Binary operator
lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint);
rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint);
} else {
// Unary operator
lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint);
}
}
}
// Exploit the row's sparsity pattern by only appending wrt adjoints that
// appear in the expression graph
for (const auto& [col, node] : output_list) {
// Append adjoints of wrt to sparse matrix triplets
triplets.emplace_back(row, col, node->adjoint);
}
}
} // namespace slp::detail

View File

@@ -1,161 +0,0 @@
// Copyright (c) Sleipnir contributors
#pragma once
#include <ranges>
#include <utility>
#include <Eigen/SparseCore>
#include <gch/small_vector.hpp>
#include "sleipnir/autodiff/expression_graph.hpp"
#include "sleipnir/autodiff/variable.hpp"
#include "sleipnir/autodiff/variable_matrix.hpp"
#include "sleipnir/util/assert.hpp"
#include "sleipnir/util/empty.hpp"
namespace slp::detail {
/// This class is an adapter type that performs value updates of an expression
/// graph, generates a gradient tree, or appends gradient triplets for creating
/// a sparse matrix of gradients.
///
/// @tparam Scalar Scalar type.
template <typename Scalar>
class GradientExpressionGraph {
public:
/// Generates the gradient graph for the given expression.
///
/// @param root The root node of the expression.
explicit GradientExpressionGraph(const Variable<Scalar>& root)
: m_top_list{topological_sort(root.expr)} {
for (const auto& node : m_top_list) {
m_col_list.emplace_back(node->col);
}
}
/// Updates the values of all nodes in this graph based on the values of their
/// dependent nodes.
void update_values() { detail::update_values(m_top_list); }
/// Returns the variable's gradient tree.
///
/// This function lazily allocates variables, so elements of the returned
/// VariableMatrix will be empty if the corresponding element of wrt had no
/// adjoint. Ensure Variable::expr isn't nullptr before calling member
/// functions.
///
/// @param wrt Variables with respect to which to compute the gradient.
/// @return The variable's gradient tree.
VariableMatrix<Scalar> generate_tree(
const VariableMatrix<Scalar>& wrt) const {
slp_assert(wrt.cols() == 1);
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
// for background on reverse accumulation automatic differentiation.
if (m_top_list.empty()) {
return VariableMatrix<Scalar>{detail::empty, wrt.rows(), 1};
}
// Set root node's adjoint to 1 since df/df is 1
m_top_list[0]->adjoint_expr = constant_ptr(Scalar(1));
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
// multiplied by dy/dx. If there are multiple "paths" from the root node to
// variable; the variable's adjoint is the sum of each path's adjoint
// contribution.
for (auto& node : m_top_list) {
auto& lhs = node->args[0];
auto& rhs = node->args[1];
if (lhs != nullptr) {
if (rhs != nullptr) {
// Binary operator
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr);
} else {
// Unary operator
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
}
}
}
// Move gradient tree to return value
VariableMatrix<Scalar> grad{detail::empty, wrt.rows(), 1};
for (int row = 0; row < grad.rows(); ++row) {
grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)};
}
// Unlink adjoints to avoid circular references between them and their
// parent expressions. This ensures all expressions are returned to the free
// list.
for (auto& node : m_top_list) {
node->adjoint_expr = nullptr;
}
return grad;
}
/// Updates the adjoints in the expression graph (computes the gradient) then
/// appends the adjoints of wrt to the sparse matrix triplets.
///
/// @param triplets The sparse matrix triplets.
/// @param row The row of wrt.
void append_triplets(gch::small_vector<Eigen::Triplet<Scalar>>& triplets,
int row) const {
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
// for background on reverse accumulation automatic differentiation.
if (m_top_list.empty()) {
return;
}
// Set root node's adjoint to 1 since df/df is 1
m_top_list[0]->adjoint = Scalar(1);
// Zero the rest of the adjoints
for (auto& node : m_top_list | std::views::drop(1)) {
node->adjoint = Scalar(0);
}
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
// multiplied by dy/dx. If there are multiple "paths" from the root node to
// variable; the variable's adjoint is the sum of each path's adjoint
// contribution.
for (const auto& node : m_top_list) {
auto& lhs = node->args[0];
auto& rhs = node->args[1];
if (lhs != nullptr) {
if (rhs != nullptr) {
// Binary operator
lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint);
rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint);
} else {
// Unary operator
lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint);
}
}
}
for (size_t i = 0; i < m_top_list.size(); ++i) {
const auto& col = m_col_list[i];
const auto& node = m_top_list[i];
// Append adjoints of wrt to sparse matrix triplets
if (col != -1) {
triplets.emplace_back(row, col, node->adjoint);
}
}
}
private:
/// Topological sort of graph from parent to child
gch::small_vector<Expression<Scalar>*> m_top_list;
/// List that maps nodes to their respective column
gch::small_vector<int> m_col_list;
};
} // namespace slp::detail

View File

@@ -7,11 +7,12 @@
#include <Eigen/SparseCore>
#include <gch/small_vector.hpp>
#include "sleipnir/autodiff/gradient_expression_graph.hpp"
#include "sleipnir/autodiff/expression_graph.hpp"
#include "sleipnir/autodiff/variable.hpp"
#include "sleipnir/autodiff/variable_matrix.hpp"
#include "sleipnir/util/assert.hpp"
#include "sleipnir/util/concepts.hpp"
#include "sleipnir/util/symbol_exports.hpp"
namespace slp {
@@ -41,24 +42,33 @@ class Hessian {
/// @param wrt Vector of variables with respect to which to compute the
/// Hessian.
Hessian(Variable<Scalar> variable, SleipnirMatrixLike<Scalar> auto wrt)
: m_variables{
detail::GradientExpressionGraph<Scalar>{variable}.generate_tree(
wrt)},
m_wrt{wrt} {
slp_assert(m_wrt.cols() == 1);
: m_variables{detail::gradient_tree(
detail::topological_sort(variable.expr), wrt)},
m_wrt{std::move(wrt)} {
slp_assert(wrt.cols() == 1);
for (auto& variable : m_variables) {
m_top_lists.emplace_back(detail::topological_sort(variable.expr));
}
// Initialize column each expression's adjoint occupies in the Jacobian
for (size_t col = 0; col < m_wrt.size(); ++col) {
m_wrt[col].expr->col = col;
m_wrt[col].expr->scratch = col;
}
for (auto& variable : m_variables) {
m_graphs.emplace_back(variable);
// Make list of only nodes in output row, and their output columns
for (auto& top_list : m_top_lists) {
m_output_lists.emplace_back();
for (const auto& node : top_list) {
if (node->scratch != -1) {
m_output_lists.back().emplace_back(node->scratch, node);
}
}
}
// Reset col to -1
for (auto& node : m_wrt) {
node.expr->col = -1;
node.expr->scratch = -1;
}
for (int row = 0; row < m_variables.rows(); ++row) {
@@ -70,7 +80,8 @@ class Hessian {
// If the row is linear, compute its gradient once here and cache its
// triplets. Constant rows are ignored because their gradients have no
// nonzero triplets.
m_graphs[row].append_triplets(m_cached_triplets, row);
detail::append_triplets(m_top_lists[row], m_output_lists[row],
m_cached_triplets, row);
} else if (m_variables[row].type() > ExpressionType::LINEAR) {
// If the row is quadratic or nonlinear, add it to the list of nonlinear
// rows to be recomputed in value().
@@ -97,7 +108,7 @@ class Hessian {
m_wrt.rows()};
for (int row = 0; row < m_variables.rows(); ++row) {
auto grad = m_graphs[row].generate_tree(m_wrt);
auto grad = detail::gradient_tree(m_top_lists[row], m_wrt);
for (int col = 0; col < m_wrt.rows(); ++col) {
if (grad[col].expr != nullptr) {
result(row, col) = std::move(grad[col]);
@@ -118,8 +129,8 @@ class Hessian {
return m_H;
}
for (auto& graph : m_graphs) {
graph.update_values();
for (auto& top_list : m_top_lists) {
detail::update_values(top_list);
}
// Copy the cached triplets so triplets added for the nonlinear rows are
@@ -128,7 +139,8 @@ class Hessian {
// Compute each nonlinear row of the Hessian
for (int row : m_nonlinear_rows) {
m_graphs[row].append_triplets(triplets, row);
detail::append_triplets(m_top_lists[row], m_output_lists[row], triplets,
row);
}
m_H.setFromTriplets(triplets.begin(), triplets.end());
@@ -143,7 +155,13 @@ class Hessian {
VariableMatrix<Scalar> m_variables;
VariableMatrix<Scalar> m_wrt;
gch::small_vector<detail::GradientExpressionGraph<Scalar>> m_graphs;
/// List of topologically sorted graphs from parent to child, one for each row
gch::small_vector<detail::ExpressionGraph<Scalar>> m_top_lists;
/// List of output rows as column-node pairs
gch::small_vector<
gch::small_vector<std::pair<int, detail::Expression<Scalar>*>>>
m_output_lists;
Eigen::SparseMatrix<Scalar> m_H{m_variables.rows(), m_wrt.rows()};

View File

@@ -7,12 +7,11 @@
#include <Eigen/SparseCore>
#include <gch/small_vector.hpp>
#include "sleipnir/autodiff/gradient_expression_graph.hpp"
#include "sleipnir/autodiff/expression_graph.hpp"
#include "sleipnir/autodiff/variable.hpp"
#include "sleipnir/autodiff/variable_matrix.hpp"
#include "sleipnir/util/assert.hpp"
#include "sleipnir/util/concepts.hpp"
#include "sleipnir/util/empty.hpp"
#include "sleipnir/util/symbol_exports.hpp"
namespace slp {
@@ -54,18 +53,28 @@ class Jacobian {
slp_assert(m_variables.cols() == 1);
slp_assert(m_wrt.cols() == 1);
// Initialize column each expression's adjoint occupies in the Jacobian
for (size_t col = 0; col < m_wrt.size(); ++col) {
m_wrt[col].expr->col = col;
for (auto& variable : m_variables) {
m_top_lists.emplace_back(detail::topological_sort(variable.expr));
}
for (auto& variable : m_variables) {
m_graphs.emplace_back(variable);
// Initialize column each expression's adjoint occupies in the Jacobian
for (size_t col = 0; col < m_wrt.size(); ++col) {
m_wrt[col].expr->scratch = col;
}
// Make list of output rows as column-node pairs
for (auto& top_list : m_top_lists) {
m_output_lists.emplace_back();
for (const auto& node : top_list) {
if (node->scratch != -1) {
m_output_lists.back().emplace_back(node->scratch, node);
}
}
}
// Reset col to -1
for (auto& node : m_wrt) {
node.expr->col = -1;
node.expr->scratch = -1;
}
for (int row = 0; row < m_variables.rows(); ++row) {
@@ -77,7 +86,8 @@ class Jacobian {
// If the row is linear, compute its gradient once here and cache its
// triplets. Constant rows are ignored because their gradients have no
// nonzero triplets.
m_graphs[row].append_triplets(m_cached_triplets, row);
detail::append_triplets(m_top_lists[row], m_output_lists[row],
m_cached_triplets, row);
} else if (m_variables[row].type() > ExpressionType::LINEAR) {
// If the row is quadratic or nonlinear, add it to the list of nonlinear
// rows to be recomputed in value().
@@ -101,7 +111,7 @@ class Jacobian {
m_wrt.rows()};
for (int row = 0; row < m_variables.rows(); ++row) {
auto grad = m_graphs[row].generate_tree(m_wrt);
auto grad = detail::gradient_tree(m_top_lists[row], m_wrt);
for (int col = 0; col < m_wrt.rows(); ++col) {
if (grad[col].expr != nullptr) {
result(row, col) = std::move(grad[col]);
@@ -122,8 +132,8 @@ class Jacobian {
return m_J;
}
for (auto& graph : m_graphs) {
graph.update_values();
for (auto& top_list : m_top_lists) {
detail::update_values(top_list);
}
// Copy the cached triplets so triplets added for the nonlinear rows are
@@ -132,7 +142,8 @@ class Jacobian {
// Compute each nonlinear row of the Jacobian
for (int row : m_nonlinear_rows) {
m_graphs[row].append_triplets(triplets, row);
detail::append_triplets(m_top_lists[row], m_output_lists[row], triplets,
row);
}
m_J.setFromTriplets(triplets.begin(), triplets.end());
@@ -144,7 +155,13 @@ class Jacobian {
VariableMatrix<Scalar> m_variables;
VariableMatrix<Scalar> m_wrt;
gch::small_vector<detail::GradientExpressionGraph<Scalar>> m_graphs;
/// List of topologically sorted graphs from parent to child, one for each row
gch::small_vector<detail::ExpressionGraph<Scalar>> m_top_lists;
/// List of output rows as column-node pairs
gch::small_vector<
gch::small_vector<std::pair<int, detail::Expression<Scalar>*>>>
m_output_lists;
Eigen::SparseMatrix<Scalar> m_J{m_variables.rows(), m_wrt.rows()};

View File

@@ -26,10 +26,15 @@ namespace slp {
// Forward declarations for friend declarations in Variable
template <typename Scalar>
class VariableMatrix;
namespace detail {
template <typename Scalar>
class GradientExpressionGraph;
VariableMatrix<Scalar> gradient_tree(
const detail::ExpressionGraph<Scalar>& top_list,
const VariableMatrix<Scalar>& wrt);
} // namespace detail
@@ -265,7 +270,7 @@ class Variable : public SleipnirBase {
/// Used to update the value of this variable based on the values of its
/// dependent variables
gch::small_vector<detail::Expression<Scalar>*> m_graph;
detail::ExpressionGraph<Scalar> m_graph;
/// Used for lazy initialization of m_graph
bool m_graph_initialized = false;
@@ -354,7 +359,10 @@ class Variable : public SleipnirBase {
const Variable<Scalar>& y,
const Variable<Scalar>& z);
friend class detail::GradientExpressionGraph<Scalar>;
template <typename Scalar>
friend VariableMatrix<Scalar> detail::gradient_tree(
const detail::ExpressionGraph<Scalar>& top_list,
const VariableMatrix<Scalar>& wrt);
template <typename Scalar, int UpLo>
requires(UpLo == Eigen::Lower) || (UpLo == (Eigen::Lower | Eigen::Upper))
friend class Hessian;

View File

@@ -1647,6 +1647,72 @@ VariableMatrix<Scalar> solve(const VariableMatrix<Scalar>& A,
}
}
namespace detail {
/// Returns the variable's gradient tree.
///
/// This function lazily allocates variables, so elements of the returned
/// VariableMatrix will be empty if the corresponding element of wrt had no
/// adjoint. Ensure Variable::expr isn't nullptr before calling member
/// functions.
///
/// @tparam Scalar Scalar type.
/// @param top_list Topologically sorted graph from parent to child.
/// @param wrt Variables with respect to which to compute the gradient.
/// @return The variable's gradient tree.
template <typename Scalar>
VariableMatrix<Scalar> gradient_tree(const ExpressionGraph<Scalar>& top_list,
const VariableMatrix<Scalar>& wrt) {
slp_assert(wrt.cols() == 1);
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
// for background on reverse accumulation automatic differentiation.
if (top_list.empty()) {
return VariableMatrix<Scalar>{detail::empty, wrt.rows(), 1};
}
// Set root node's adjoint to 1 since df/df is 1
top_list[0]->adjoint_expr = constant_ptr(Scalar(1));
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
// multiplied by dy/dx. If there are multiple "paths" from the root node to
// variable; the variable's adjoint is the sum of each path's adjoint
// contribution.
for (auto& node : top_list) {
auto& lhs = node->args[0];
auto& rhs = node->args[1];
if (lhs != nullptr) {
if (rhs != nullptr) {
// Binary operator
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr);
} else {
// Unary operator
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
}
}
}
// Move gradient tree to return value
VariableMatrix<Scalar> grad{detail::empty, wrt.rows(), 1};
for (int row = 0; row < grad.rows(); ++row) {
grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)};
}
// Unlink adjoints to avoid circular references between them and their
// parent expressions. This ensures all expressions are returned to the free
// list.
for (auto& node : top_list) {
node->adjoint_expr = nullptr;
}
return grad;
}
} // namespace detail
extern template SLEIPNIR_DLLEXPORT VariableMatrix<double> solve(
const VariableMatrix<double>& A, const VariableMatrix<double>& B);