mirror of
https://github.com/wpilibsuite/allwpilib
synced 2026-06-19 00:41:43 +00:00
[upstream_utils] Upgrade to Sleipnir 0.6.1 (#8930)
This fixes a memory leak on Windows.
This commit is contained in:
@@ -46,7 +46,7 @@ using small_vector = wpi::util::SmallVector<T>;
|
||||
def main():
|
||||
name = "sleipnir"
|
||||
url = "https://github.com/SleipnirGroup/Sleipnir"
|
||||
tag = "v0.6.0"
|
||||
tag = "v0.6.1"
|
||||
|
||||
sleipnir = Lib(name, url, tag, copy_upstream_src)
|
||||
sleipnir.main()
|
||||
|
||||
@@ -9,7 +9,7 @@ Subject: [PATCH 02/10] Use wpi::SmallVector
|
||||
2 files changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/include/sleipnir/autodiff/expression.hpp b/include/sleipnir/autodiff/expression.hpp
|
||||
index af10b505ae44ca0eb656f66f3f49172d7fc98c6d..7a9dc089a9e2fb323ecc8725f924ebee1b213af0 100644
|
||||
index f04967c6a84ea76b77cebb7ef8edcb798d03e2a5..361c74702107eadf6f1fbe1682a7ef296abd19a3 100644
|
||||
--- a/include/sleipnir/autodiff/expression.hpp
|
||||
+++ b/include/sleipnir/autodiff/expression.hpp
|
||||
@@ -34,7 +34,7 @@ struct Expression;
|
||||
@@ -21,7 +21,7 @@ index af10b505ae44ca0eb656f66f3f49172d7fc98c6d..7a9dc089a9e2fb323ecc8725f924ebee
|
||||
|
||||
/// Typedef for intrusive shared pointer to Expression.
|
||||
///
|
||||
@@ -744,7 +744,7 @@ constexpr void inc_ref_count(Expression<Scalar>* expr) {
|
||||
@@ -749,7 +749,7 @@ constexpr void inc_ref_count(Expression<Scalar>* expr) {
|
||||
/// @tparam Scalar Scalar type.
|
||||
/// @param expr The shared pointer's managed object.
|
||||
template <typename Scalar>
|
||||
@@ -31,10 +31,10 @@ index af10b505ae44ca0eb656f66f3f49172d7fc98c6d..7a9dc089a9e2fb323ecc8725f924ebee
|
||||
// Expression destructor when expr's refcount reaches zero can cause a stack
|
||||
// overflow. Instead, we iterate over its children to decrement their
|
||||
diff --git a/include/sleipnir/autodiff/variable.hpp b/include/sleipnir/autodiff/variable.hpp
|
||||
index 910ebcf5266bdf76711db7849dd6584549094e3b..3c4f67c1f6224226620e2ffcc597336435943ce8 100644
|
||||
index a6b054494a18eacbcae10c0e88648550a39ed48e..ad8baa3b58047f5257c210b88446324cca121d4a 100644
|
||||
--- a/include/sleipnir/autodiff/variable.hpp
|
||||
+++ b/include/sleipnir/autodiff/variable.hpp
|
||||
@@ -53,7 +53,7 @@ class Variable : public SleipnirBase {
|
||||
@@ -58,7 +58,7 @@ class Variable : public SleipnirBase {
|
||||
Variable() = default;
|
||||
|
||||
/// Constructs an empty Variable.
|
||||
@@ -43,7 +43,7 @@ index 910ebcf5266bdf76711db7849dd6584549094e3b..3c4f67c1f6224226620e2ffcc5973364
|
||||
|
||||
/// Constructs a Variable from a scalar type.
|
||||
///
|
||||
@@ -96,7 +96,7 @@ class Variable : public SleipnirBase {
|
||||
@@ -101,7 +101,7 @@ class Variable : public SleipnirBase {
|
||||
/// Constructs a Variable pointing to the specified expression.
|
||||
///
|
||||
/// @param expr The autodiff variable.
|
||||
|
||||
@@ -4,26 +4,9 @@ Date: Sat, 8 Feb 2025 13:42:36 -0800
|
||||
Subject: [PATCH 05/10] Replace std::views::zip()
|
||||
|
||||
---
|
||||
include/sleipnir/autodiff/gradient_expression_graph.hpp | 5 ++++-
|
||||
include/sleipnir/optimization/problem.hpp | 8 +++++---
|
||||
2 files changed, 9 insertions(+), 4 deletions(-)
|
||||
include/sleipnir/optimization/problem.hpp | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/include/sleipnir/autodiff/gradient_expression_graph.hpp b/include/sleipnir/autodiff/gradient_expression_graph.hpp
|
||||
index fda7931c686a315209acdbe6f1ac581944e2781c..d3e59068d04b3db01945c5657959b69cd4522cca 100644
|
||||
--- a/include/sleipnir/autodiff/gradient_expression_graph.hpp
|
||||
+++ b/include/sleipnir/autodiff/gradient_expression_graph.hpp
|
||||
@@ -139,7 +139,10 @@ class GradientExpressionGraph {
|
||||
}
|
||||
}
|
||||
|
||||
- for (const auto& [col, node] : std::views::zip(m_col_list, m_top_list)) {
|
||||
+ for (size_t i = 0; i < m_top_list.size(); ++i) {
|
||||
+ const auto& col = m_col_list[i];
|
||||
+ const auto& node = m_top_list[i];
|
||||
+
|
||||
// Append adjoints of wrt to sparse matrix triplets
|
||||
if (col != -1) {
|
||||
triplets.emplace_back(row, col, node->adjoint);
|
||||
diff --git a/include/sleipnir/optimization/problem.hpp b/include/sleipnir/optimization/problem.hpp
|
||||
index 49921b98de3452b6ca9f2e33a86daa254c0d3a01..d3feadd577bd53147a8b07da76910e046ccbf95d 100644
|
||||
--- a/include/sleipnir/optimization/problem.hpp
|
||||
|
||||
@@ -8,10 +8,10 @@ Subject: [PATCH 07/10] Suppress clang-tidy false positives
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/sleipnir/autodiff/variable.hpp b/include/sleipnir/autodiff/variable.hpp
|
||||
index 3c4f67c1f6224226620e2ffcc597336435943ce8..3d3b355f9fef7d5b6b57521317f67041e0fb1ad6 100644
|
||||
index ad8baa3b58047f5257c210b88446324cca121d4a..5f32d7ea16864ad9a4fdcc4b067ef7dff2e9027d 100644
|
||||
--- a/include/sleipnir/autodiff/variable.hpp
|
||||
+++ b/include/sleipnir/autodiff/variable.hpp
|
||||
@@ -835,7 +835,7 @@ struct InequalityConstraints {
|
||||
@@ -843,7 +843,7 @@ struct InequalityConstraints {
|
||||
///
|
||||
/// @param inequality_constraints The list of InequalityConstraints to
|
||||
/// concatenate.
|
||||
|
||||
@@ -8,7 +8,7 @@ Subject: [PATCH 08/10] Suppress GCC 12 warning false positive
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/include/sleipnir/autodiff/variable_matrix.hpp b/include/sleipnir/autodiff/variable_matrix.hpp
|
||||
index 2cf6e00f16f58a5847a4515f13b3a91fffbb72b8..b5d15edc5b294a2ea50dca15a7612c61e5f2680b 100644
|
||||
index 2bdbf11841b9920111b9cb1426d6fd04764040e4..8b84a04780240cffb801e2c6f84e22c0e5246286 100644
|
||||
--- a/include/sleipnir/autodiff/variable_matrix.hpp
|
||||
+++ b/include/sleipnir/autodiff/variable_matrix.hpp
|
||||
@@ -503,6 +503,10 @@ class VariableMatrix : public SleipnirBase {
|
||||
|
||||
@@ -15,11 +15,11 @@ Subject: [PATCH 10/10] Use operator() instead of multidimensional array
|
||||
7 files changed, 135 insertions(+), 135 deletions(-)
|
||||
|
||||
diff --git a/include/sleipnir/autodiff/hessian.hpp b/include/sleipnir/autodiff/hessian.hpp
|
||||
index 2a8087eda23f49f8c01be35d5289568644a9742d..c48e01c7b89d80db8032f6d2e492a0ec1bfa9f42 100644
|
||||
index d9d25a00251dd31e446bd0419f0c13e8f2456f2d..39cd798160ff272aeac4b12c92edee3f2a213a22 100644
|
||||
--- a/include/sleipnir/autodiff/hessian.hpp
|
||||
+++ b/include/sleipnir/autodiff/hessian.hpp
|
||||
@@ -100,9 +100,9 @@ class Hessian {
|
||||
auto grad = m_graphs[row].generate_tree(m_wrt);
|
||||
@@ -111,9 +111,9 @@ class Hessian {
|
||||
auto grad = detail::gradient_tree(m_top_lists[row], m_wrt);
|
||||
for (int col = 0; col < m_wrt.rows(); ++col) {
|
||||
if (grad[col].expr != nullptr) {
|
||||
- result[row, col] = std::move(grad[col]);
|
||||
@@ -31,11 +31,11 @@ index 2a8087eda23f49f8c01be35d5289568644a9742d..c48e01c7b89d80db8032f6d2e492a0ec
|
||||
}
|
||||
}
|
||||
diff --git a/include/sleipnir/autodiff/jacobian.hpp b/include/sleipnir/autodiff/jacobian.hpp
|
||||
index 7a5c3798dab50f14db35bee430f1d5e8e32196ff..1a5b01de99a7c011ebf8c5ed9fc64942675fcb4b 100644
|
||||
index f97b5737c8fc007299bb4a1dbe5b057c1e3645de..b202f776d00feb5eaa0703796d226ab52f89aad5 100644
|
||||
--- a/include/sleipnir/autodiff/jacobian.hpp
|
||||
+++ b/include/sleipnir/autodiff/jacobian.hpp
|
||||
@@ -104,9 +104,9 @@ class Jacobian {
|
||||
auto grad = m_graphs[row].generate_tree(m_wrt);
|
||||
@@ -114,9 +114,9 @@ class Jacobian {
|
||||
auto grad = detail::gradient_tree(m_top_lists[row], m_wrt);
|
||||
for (int col = 0; col < m_wrt.rows(); ++col) {
|
||||
if (grad[col].expr != nullptr) {
|
||||
- result[row, col] = std::move(grad[col]);
|
||||
@@ -47,10 +47,10 @@ index 7a5c3798dab50f14db35bee430f1d5e8e32196ff..1a5b01de99a7c011ebf8c5ed9fc64942
|
||||
}
|
||||
}
|
||||
diff --git a/include/sleipnir/autodiff/variable.hpp b/include/sleipnir/autodiff/variable.hpp
|
||||
index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd81fbc6825 100644
|
||||
index 5f32d7ea16864ad9a4fdcc4b067ef7dff2e9027d..5d7b56de884cdb73fc9f08fcfa1982b252bdd88e 100644
|
||||
--- a/include/sleipnir/autodiff/variable.hpp
|
||||
+++ b/include/sleipnir/autodiff/variable.hpp
|
||||
@@ -68,7 +68,7 @@ class Variable : public SleipnirBase {
|
||||
@@ -73,7 +73,7 @@ class Variable : public SleipnirBase {
|
||||
///
|
||||
/// @param value The value of the Variable.
|
||||
// NOLINTNEXTLINE (google-explicit-constructor)
|
||||
@@ -59,7 +59,7 @@ index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd8
|
||||
slp_assert(value.rows() == 1 && value.cols() == 1);
|
||||
}
|
||||
|
||||
@@ -725,7 +725,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) {
|
||||
@@ -733,7 +733,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) {
|
||||
for (int row = 0; row < rhs.rows(); ++row) {
|
||||
for (int col = 0; col < rhs.cols(); ++col) {
|
||||
// Make right-hand side zero
|
||||
@@ -68,7 +68,7 @@ index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd8
|
||||
}
|
||||
}
|
||||
|
||||
@@ -741,7 +741,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) {
|
||||
@@ -749,7 +749,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) {
|
||||
for (int row = 0; row < lhs.rows(); ++row) {
|
||||
for (int col = 0; col < lhs.cols(); ++col) {
|
||||
// Make right-hand side zero
|
||||
@@ -77,7 +77,7 @@ index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd8
|
||||
}
|
||||
}
|
||||
|
||||
@@ -759,7 +759,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) {
|
||||
@@ -767,7 +767,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) {
|
||||
for (int row = 0; row < lhs.rows(); ++row) {
|
||||
for (int col = 0; col < lhs.cols(); ++col) {
|
||||
// Make right-hand side zero
|
||||
@@ -388,7 +388,7 @@ index 4018606df45941b578c861caf934495f8c9e368e..cf554832b82adb17b4b1d7b56842a77d
|
||||
}
|
||||
|
||||
diff --git a/include/sleipnir/autodiff/variable_matrix.hpp b/include/sleipnir/autodiff/variable_matrix.hpp
|
||||
index b5d15edc5b294a2ea50dca15a7612c61e5f2680b..02bccb660474d9380444f5e20124fb687e372982 100644
|
||||
index 8b84a04780240cffb801e2c6f84e22c0e5246286..d82b90191f6fb3f30460fecb2653655c7449d109 100644
|
||||
--- a/include/sleipnir/autodiff/variable_matrix.hpp
|
||||
+++ b/include/sleipnir/autodiff/variable_matrix.hpp
|
||||
@@ -154,7 +154,7 @@ class VariableMatrix : public SleipnirBase {
|
||||
|
||||
@@ -96,22 +96,27 @@ struct Expression {
|
||||
/// The adjoint of the expression node, used during autodiff.
|
||||
Scalar adjoint{0};
|
||||
|
||||
/// Counts incoming edges for this node.
|
||||
uint32_t incoming_edges = 0;
|
||||
|
||||
/// This expression's column in a Jacobian, or -1 otherwise.
|
||||
int32_t col = -1;
|
||||
|
||||
/// The adjoint of the expression node, used during gradient expression tree
|
||||
/// generation.
|
||||
ExpressionPtr<Scalar> adjoint_expr;
|
||||
|
||||
/// Reference count for intrusive shared pointer.
|
||||
uint32_t ref_count = 0;
|
||||
|
||||
/// Expression arguments.
|
||||
std::array<ExpressionPtr<Scalar>, 2> args{nullptr, nullptr};
|
||||
|
||||
/// Scratch space for various graph algorithms.
|
||||
///
|
||||
/// In expression_graph.hpp's topological_sort(), scratch counts incoming
|
||||
/// edges for this node, offset by -1 so -1 means no edges.
|
||||
///
|
||||
/// In Hessian and Jacobian constructors, scratch represents this expression's
|
||||
/// column in a Jacobian, or -1 otherwise.
|
||||
///
|
||||
/// They share a default state of -1 to avoid extra assignments.
|
||||
int32_t scratch = -1;
|
||||
|
||||
/// Reference count for intrusive shared pointer.
|
||||
uint32_t ref_count = 0;
|
||||
|
||||
/// Constructs a constant expression with a value of zero.
|
||||
constexpr Expression() = default;
|
||||
|
||||
@@ -774,6 +779,8 @@ void dec_ref_count(Expression<Scalar>* expr) {
|
||||
auto alloc = global_pool_allocator<Expression<Scalar>>();
|
||||
std::allocator_traits<decltype(alloc)>::deallocate(
|
||||
alloc, elem, sizeof(Expression<Scalar>));
|
||||
} else {
|
||||
operator delete(elem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,12 +4,19 @@
|
||||
|
||||
#include <ranges>
|
||||
|
||||
#include <Eigen/SparseCore>
|
||||
#include <gch/small_vector.hpp>
|
||||
|
||||
#include "sleipnir/autodiff/expression.hpp"
|
||||
|
||||
namespace slp::detail {
|
||||
|
||||
/// Typedef for topologically sorted expression graph from parent to child.
|
||||
///
|
||||
/// @tparam Scalar Scalar type.
|
||||
template <typename Scalar>
|
||||
using ExpressionGraph = gch::small_vector<Expression<Scalar>*>;
|
||||
|
||||
/// Generates a topological sort of an expression graph from parent to child.
|
||||
///
|
||||
/// https://en.wikipedia.org/wiki/Topological_sorting
|
||||
@@ -17,9 +24,8 @@ namespace slp::detail {
|
||||
/// @tparam Scalar Scalar type.
|
||||
/// @param root The root node of the expression.
|
||||
template <typename Scalar>
|
||||
gch::small_vector<Expression<Scalar>*> topological_sort(
|
||||
const ExpressionPtr<Scalar>& root) {
|
||||
gch::small_vector<Expression<Scalar>*> list;
|
||||
ExpressionGraph<Scalar> topological_sort(const ExpressionPtr<Scalar>& root) {
|
||||
ExpressionGraph<Scalar> list;
|
||||
|
||||
// If the root type is constant, updates are a no-op, so return an empty list
|
||||
if (root == nullptr || root->type() == ExpressionType::CONSTANT) {
|
||||
@@ -30,6 +36,8 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
|
||||
gch::small_vector<Expression<Scalar>*> stack;
|
||||
|
||||
// Enumerate incoming edges for each node via depth-first search
|
||||
//
|
||||
// NOTE: scratch counts incoming edges, offset by -1 so -1 means no edges.
|
||||
stack.emplace_back(root.get());
|
||||
while (!stack.empty()) {
|
||||
auto node = stack.back();
|
||||
@@ -37,7 +45,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
|
||||
|
||||
for (auto& arg : node->args) {
|
||||
// If the node hasn't been explored yet, add it to the stack
|
||||
if (arg != nullptr && ++arg->incoming_edges == 1) {
|
||||
if (arg != nullptr && ++arg->scratch == 0) {
|
||||
stack.push_back(arg.get());
|
||||
}
|
||||
}
|
||||
@@ -46,8 +54,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
|
||||
// Generate topological sort of graph from parent to child.
|
||||
//
|
||||
// A node is only added to the stack after all its incoming edges have been
|
||||
// traversed. Expression::incoming_edges is a decrementing counter for
|
||||
// tracking this.
|
||||
// traversed. Expression::scratch is a decrementing counter for tracking this.
|
||||
//
|
||||
// https://en.wikipedia.org/wiki/Topological_sorting
|
||||
stack.emplace_back(root.get());
|
||||
@@ -59,7 +66,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
|
||||
|
||||
for (auto& arg : node->args) {
|
||||
// If we traversed all this node's incoming edges, add it to the stack
|
||||
if (arg != nullptr && --arg->incoming_edges == 0) {
|
||||
if (arg != nullptr && --arg->scratch == -1) {
|
||||
stack.push_back(arg.get());
|
||||
}
|
||||
}
|
||||
@@ -74,7 +81,7 @@ gch::small_vector<Expression<Scalar>*> topological_sort(
|
||||
/// @tparam Scalar Scalar type.
|
||||
/// @param list Topological sort of graph from parent to child.
|
||||
template <typename Scalar>
|
||||
void update_values(const gch::small_vector<Expression<Scalar>*>& list) {
|
||||
void update_values(const ExpressionGraph<Scalar>& list) {
|
||||
// Traverse graph from child to parent and update values
|
||||
for (auto& node : list | std::views::reverse) {
|
||||
auto& lhs = node->args[0];
|
||||
@@ -86,4 +93,61 @@ void update_values(const gch::small_vector<Expression<Scalar>*>& list) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Updates the adjoints in the expression graph (computes the gradient) then
|
||||
/// appends the adjoints of wrt to the sparse matrix triplets.
|
||||
///
|
||||
/// @tparam Scalar Scalar type.
|
||||
/// @param top_list Topologically sorted graph from parent to child.
|
||||
/// @param output_list Output row as column-node pairs.
|
||||
/// @param triplets The sparse matrix triplets.
|
||||
/// @param row The row of wrt.
|
||||
template <typename Scalar>
|
||||
void append_triplets(
|
||||
const ExpressionGraph<Scalar>& top_list,
|
||||
const gch::small_vector<std::pair<int, detail::Expression<Scalar>*>>&
|
||||
output_list,
|
||||
gch::small_vector<Eigen::Triplet<Scalar>>& triplets, int row) {
|
||||
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
|
||||
// for background on reverse accumulation automatic differentiation.
|
||||
|
||||
if (top_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Set root node's adjoint to 1 since df/df is 1
|
||||
top_list[0]->adjoint = Scalar(1);
|
||||
|
||||
// Zero the rest of the adjoints
|
||||
for (auto& node : top_list | std::views::drop(1)) {
|
||||
node->adjoint = Scalar(0);
|
||||
}
|
||||
|
||||
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
|
||||
// multiplied by dy/dx. If there are multiple "paths" from the root node to
|
||||
// variable; the variable's adjoint is the sum of each path's adjoint
|
||||
// contribution.
|
||||
for (const auto& node : top_list) {
|
||||
auto& lhs = node->args[0];
|
||||
auto& rhs = node->args[1];
|
||||
|
||||
if (lhs != nullptr) {
|
||||
if (rhs != nullptr) {
|
||||
// Binary operator
|
||||
lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint);
|
||||
rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint);
|
||||
} else {
|
||||
// Unary operator
|
||||
lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Exploit the row's sparsity pattern by only appending wrt adjoints that
|
||||
// appear in the expression graph
|
||||
for (const auto& [col, node] : output_list) {
|
||||
// Append adjoints of wrt to sparse matrix triplets
|
||||
triplets.emplace_back(row, col, node->adjoint);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace slp::detail
|
||||
|
||||
@@ -1,161 +0,0 @@
|
||||
// Copyright (c) Sleipnir contributors
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ranges>
|
||||
#include <utility>
|
||||
|
||||
#include <Eigen/SparseCore>
|
||||
#include <gch/small_vector.hpp>
|
||||
|
||||
#include "sleipnir/autodiff/expression_graph.hpp"
|
||||
#include "sleipnir/autodiff/variable.hpp"
|
||||
#include "sleipnir/autodiff/variable_matrix.hpp"
|
||||
#include "sleipnir/util/assert.hpp"
|
||||
#include "sleipnir/util/empty.hpp"
|
||||
|
||||
namespace slp::detail {
|
||||
|
||||
/// This class is an adapter type that performs value updates of an expression
|
||||
/// graph, generates a gradient tree, or appends gradient triplets for creating
|
||||
/// a sparse matrix of gradients.
|
||||
///
|
||||
/// @tparam Scalar Scalar type.
|
||||
template <typename Scalar>
|
||||
class GradientExpressionGraph {
|
||||
public:
|
||||
/// Generates the gradient graph for the given expression.
|
||||
///
|
||||
/// @param root The root node of the expression.
|
||||
explicit GradientExpressionGraph(const Variable<Scalar>& root)
|
||||
: m_top_list{topological_sort(root.expr)} {
|
||||
for (const auto& node : m_top_list) {
|
||||
m_col_list.emplace_back(node->col);
|
||||
}
|
||||
}
|
||||
|
||||
/// Updates the values of all nodes in this graph based on the values of their
|
||||
/// dependent nodes.
|
||||
void update_values() { detail::update_values(m_top_list); }
|
||||
|
||||
/// Returns the variable's gradient tree.
|
||||
///
|
||||
/// This function lazily allocates variables, so elements of the returned
|
||||
/// VariableMatrix will be empty if the corresponding element of wrt had no
|
||||
/// adjoint. Ensure Variable::expr isn't nullptr before calling member
|
||||
/// functions.
|
||||
///
|
||||
/// @param wrt Variables with respect to which to compute the gradient.
|
||||
/// @return The variable's gradient tree.
|
||||
VariableMatrix<Scalar> generate_tree(
|
||||
const VariableMatrix<Scalar>& wrt) const {
|
||||
slp_assert(wrt.cols() == 1);
|
||||
|
||||
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
|
||||
// for background on reverse accumulation automatic differentiation.
|
||||
|
||||
if (m_top_list.empty()) {
|
||||
return VariableMatrix<Scalar>{detail::empty, wrt.rows(), 1};
|
||||
}
|
||||
|
||||
// Set root node's adjoint to 1 since df/df is 1
|
||||
m_top_list[0]->adjoint_expr = constant_ptr(Scalar(1));
|
||||
|
||||
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
|
||||
// multiplied by dy/dx. If there are multiple "paths" from the root node to
|
||||
// variable; the variable's adjoint is the sum of each path's adjoint
|
||||
// contribution.
|
||||
for (auto& node : m_top_list) {
|
||||
auto& lhs = node->args[0];
|
||||
auto& rhs = node->args[1];
|
||||
|
||||
if (lhs != nullptr) {
|
||||
if (rhs != nullptr) {
|
||||
// Binary operator
|
||||
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
|
||||
rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr);
|
||||
} else {
|
||||
// Unary operator
|
||||
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move gradient tree to return value
|
||||
VariableMatrix<Scalar> grad{detail::empty, wrt.rows(), 1};
|
||||
for (int row = 0; row < grad.rows(); ++row) {
|
||||
grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)};
|
||||
}
|
||||
|
||||
// Unlink adjoints to avoid circular references between them and their
|
||||
// parent expressions. This ensures all expressions are returned to the free
|
||||
// list.
|
||||
for (auto& node : m_top_list) {
|
||||
node->adjoint_expr = nullptr;
|
||||
}
|
||||
|
||||
return grad;
|
||||
}
|
||||
|
||||
/// Updates the adjoints in the expression graph (computes the gradient) then
|
||||
/// appends the adjoints of wrt to the sparse matrix triplets.
|
||||
///
|
||||
/// @param triplets The sparse matrix triplets.
|
||||
/// @param row The row of wrt.
|
||||
void append_triplets(gch::small_vector<Eigen::Triplet<Scalar>>& triplets,
|
||||
int row) const {
|
||||
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
|
||||
// for background on reverse accumulation automatic differentiation.
|
||||
|
||||
if (m_top_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Set root node's adjoint to 1 since df/df is 1
|
||||
m_top_list[0]->adjoint = Scalar(1);
|
||||
|
||||
// Zero the rest of the adjoints
|
||||
for (auto& node : m_top_list | std::views::drop(1)) {
|
||||
node->adjoint = Scalar(0);
|
||||
}
|
||||
|
||||
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
|
||||
// multiplied by dy/dx. If there are multiple "paths" from the root node to
|
||||
// variable; the variable's adjoint is the sum of each path's adjoint
|
||||
// contribution.
|
||||
for (const auto& node : m_top_list) {
|
||||
auto& lhs = node->args[0];
|
||||
auto& rhs = node->args[1];
|
||||
|
||||
if (lhs != nullptr) {
|
||||
if (rhs != nullptr) {
|
||||
// Binary operator
|
||||
lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint);
|
||||
rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint);
|
||||
} else {
|
||||
// Unary operator
|
||||
lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < m_top_list.size(); ++i) {
|
||||
const auto& col = m_col_list[i];
|
||||
const auto& node = m_top_list[i];
|
||||
|
||||
// Append adjoints of wrt to sparse matrix triplets
|
||||
if (col != -1) {
|
||||
triplets.emplace_back(row, col, node->adjoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/// Topological sort of graph from parent to child
|
||||
gch::small_vector<Expression<Scalar>*> m_top_list;
|
||||
|
||||
/// List that maps nodes to their respective column
|
||||
gch::small_vector<int> m_col_list;
|
||||
};
|
||||
|
||||
} // namespace slp::detail
|
||||
@@ -7,11 +7,12 @@
|
||||
#include <Eigen/SparseCore>
|
||||
#include <gch/small_vector.hpp>
|
||||
|
||||
#include "sleipnir/autodiff/gradient_expression_graph.hpp"
|
||||
#include "sleipnir/autodiff/expression_graph.hpp"
|
||||
#include "sleipnir/autodiff/variable.hpp"
|
||||
#include "sleipnir/autodiff/variable_matrix.hpp"
|
||||
#include "sleipnir/util/assert.hpp"
|
||||
#include "sleipnir/util/concepts.hpp"
|
||||
#include "sleipnir/util/symbol_exports.hpp"
|
||||
|
||||
namespace slp {
|
||||
|
||||
@@ -41,24 +42,33 @@ class Hessian {
|
||||
/// @param wrt Vector of variables with respect to which to compute the
|
||||
/// Hessian.
|
||||
Hessian(Variable<Scalar> variable, SleipnirMatrixLike<Scalar> auto wrt)
|
||||
: m_variables{
|
||||
detail::GradientExpressionGraph<Scalar>{variable}.generate_tree(
|
||||
wrt)},
|
||||
m_wrt{wrt} {
|
||||
slp_assert(m_wrt.cols() == 1);
|
||||
: m_variables{detail::gradient_tree(
|
||||
detail::topological_sort(variable.expr), wrt)},
|
||||
m_wrt{std::move(wrt)} {
|
||||
slp_assert(wrt.cols() == 1);
|
||||
|
||||
for (auto& variable : m_variables) {
|
||||
m_top_lists.emplace_back(detail::topological_sort(variable.expr));
|
||||
}
|
||||
|
||||
// Initialize column each expression's adjoint occupies in the Jacobian
|
||||
for (size_t col = 0; col < m_wrt.size(); ++col) {
|
||||
m_wrt[col].expr->col = col;
|
||||
m_wrt[col].expr->scratch = col;
|
||||
}
|
||||
|
||||
for (auto& variable : m_variables) {
|
||||
m_graphs.emplace_back(variable);
|
||||
// Make list of only nodes in output row, and their output columns
|
||||
for (auto& top_list : m_top_lists) {
|
||||
m_output_lists.emplace_back();
|
||||
for (const auto& node : top_list) {
|
||||
if (node->scratch != -1) {
|
||||
m_output_lists.back().emplace_back(node->scratch, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset col to -1
|
||||
for (auto& node : m_wrt) {
|
||||
node.expr->col = -1;
|
||||
node.expr->scratch = -1;
|
||||
}
|
||||
|
||||
for (int row = 0; row < m_variables.rows(); ++row) {
|
||||
@@ -70,7 +80,8 @@ class Hessian {
|
||||
// If the row is linear, compute its gradient once here and cache its
|
||||
// triplets. Constant rows are ignored because their gradients have no
|
||||
// nonzero triplets.
|
||||
m_graphs[row].append_triplets(m_cached_triplets, row);
|
||||
detail::append_triplets(m_top_lists[row], m_output_lists[row],
|
||||
m_cached_triplets, row);
|
||||
} else if (m_variables[row].type() > ExpressionType::LINEAR) {
|
||||
// If the row is quadratic or nonlinear, add it to the list of nonlinear
|
||||
// rows to be recomputed in value().
|
||||
@@ -97,7 +108,7 @@ class Hessian {
|
||||
m_wrt.rows()};
|
||||
|
||||
for (int row = 0; row < m_variables.rows(); ++row) {
|
||||
auto grad = m_graphs[row].generate_tree(m_wrt);
|
||||
auto grad = detail::gradient_tree(m_top_lists[row], m_wrt);
|
||||
for (int col = 0; col < m_wrt.rows(); ++col) {
|
||||
if (grad[col].expr != nullptr) {
|
||||
result(row, col) = std::move(grad[col]);
|
||||
@@ -118,8 +129,8 @@ class Hessian {
|
||||
return m_H;
|
||||
}
|
||||
|
||||
for (auto& graph : m_graphs) {
|
||||
graph.update_values();
|
||||
for (auto& top_list : m_top_lists) {
|
||||
detail::update_values(top_list);
|
||||
}
|
||||
|
||||
// Copy the cached triplets so triplets added for the nonlinear rows are
|
||||
@@ -128,7 +139,8 @@ class Hessian {
|
||||
|
||||
// Compute each nonlinear row of the Hessian
|
||||
for (int row : m_nonlinear_rows) {
|
||||
m_graphs[row].append_triplets(triplets, row);
|
||||
detail::append_triplets(m_top_lists[row], m_output_lists[row], triplets,
|
||||
row);
|
||||
}
|
||||
|
||||
m_H.setFromTriplets(triplets.begin(), triplets.end());
|
||||
@@ -143,7 +155,13 @@ class Hessian {
|
||||
VariableMatrix<Scalar> m_variables;
|
||||
VariableMatrix<Scalar> m_wrt;
|
||||
|
||||
gch::small_vector<detail::GradientExpressionGraph<Scalar>> m_graphs;
|
||||
/// List of topologically sorted graphs from parent to child, one for each row
|
||||
gch::small_vector<detail::ExpressionGraph<Scalar>> m_top_lists;
|
||||
|
||||
/// List of output rows as column-node pairs
|
||||
gch::small_vector<
|
||||
gch::small_vector<std::pair<int, detail::Expression<Scalar>*>>>
|
||||
m_output_lists;
|
||||
|
||||
Eigen::SparseMatrix<Scalar> m_H{m_variables.rows(), m_wrt.rows()};
|
||||
|
||||
|
||||
@@ -7,12 +7,11 @@
|
||||
#include <Eigen/SparseCore>
|
||||
#include <gch/small_vector.hpp>
|
||||
|
||||
#include "sleipnir/autodiff/gradient_expression_graph.hpp"
|
||||
#include "sleipnir/autodiff/expression_graph.hpp"
|
||||
#include "sleipnir/autodiff/variable.hpp"
|
||||
#include "sleipnir/autodiff/variable_matrix.hpp"
|
||||
#include "sleipnir/util/assert.hpp"
|
||||
#include "sleipnir/util/concepts.hpp"
|
||||
#include "sleipnir/util/empty.hpp"
|
||||
#include "sleipnir/util/symbol_exports.hpp"
|
||||
|
||||
namespace slp {
|
||||
@@ -54,18 +53,28 @@ class Jacobian {
|
||||
slp_assert(m_variables.cols() == 1);
|
||||
slp_assert(m_wrt.cols() == 1);
|
||||
|
||||
// Initialize column each expression's adjoint occupies in the Jacobian
|
||||
for (size_t col = 0; col < m_wrt.size(); ++col) {
|
||||
m_wrt[col].expr->col = col;
|
||||
for (auto& variable : m_variables) {
|
||||
m_top_lists.emplace_back(detail::topological_sort(variable.expr));
|
||||
}
|
||||
|
||||
for (auto& variable : m_variables) {
|
||||
m_graphs.emplace_back(variable);
|
||||
// Initialize column each expression's adjoint occupies in the Jacobian
|
||||
for (size_t col = 0; col < m_wrt.size(); ++col) {
|
||||
m_wrt[col].expr->scratch = col;
|
||||
}
|
||||
|
||||
// Make list of output rows as column-node pairs
|
||||
for (auto& top_list : m_top_lists) {
|
||||
m_output_lists.emplace_back();
|
||||
for (const auto& node : top_list) {
|
||||
if (node->scratch != -1) {
|
||||
m_output_lists.back().emplace_back(node->scratch, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset col to -1
|
||||
for (auto& node : m_wrt) {
|
||||
node.expr->col = -1;
|
||||
node.expr->scratch = -1;
|
||||
}
|
||||
|
||||
for (int row = 0; row < m_variables.rows(); ++row) {
|
||||
@@ -77,7 +86,8 @@ class Jacobian {
|
||||
// If the row is linear, compute its gradient once here and cache its
|
||||
// triplets. Constant rows are ignored because their gradients have no
|
||||
// nonzero triplets.
|
||||
m_graphs[row].append_triplets(m_cached_triplets, row);
|
||||
detail::append_triplets(m_top_lists[row], m_output_lists[row],
|
||||
m_cached_triplets, row);
|
||||
} else if (m_variables[row].type() > ExpressionType::LINEAR) {
|
||||
// If the row is quadratic or nonlinear, add it to the list of nonlinear
|
||||
// rows to be recomputed in value().
|
||||
@@ -101,7 +111,7 @@ class Jacobian {
|
||||
m_wrt.rows()};
|
||||
|
||||
for (int row = 0; row < m_variables.rows(); ++row) {
|
||||
auto grad = m_graphs[row].generate_tree(m_wrt);
|
||||
auto grad = detail::gradient_tree(m_top_lists[row], m_wrt);
|
||||
for (int col = 0; col < m_wrt.rows(); ++col) {
|
||||
if (grad[col].expr != nullptr) {
|
||||
result(row, col) = std::move(grad[col]);
|
||||
@@ -122,8 +132,8 @@ class Jacobian {
|
||||
return m_J;
|
||||
}
|
||||
|
||||
for (auto& graph : m_graphs) {
|
||||
graph.update_values();
|
||||
for (auto& top_list : m_top_lists) {
|
||||
detail::update_values(top_list);
|
||||
}
|
||||
|
||||
// Copy the cached triplets so triplets added for the nonlinear rows are
|
||||
@@ -132,7 +142,8 @@ class Jacobian {
|
||||
|
||||
// Compute each nonlinear row of the Jacobian
|
||||
for (int row : m_nonlinear_rows) {
|
||||
m_graphs[row].append_triplets(triplets, row);
|
||||
detail::append_triplets(m_top_lists[row], m_output_lists[row], triplets,
|
||||
row);
|
||||
}
|
||||
|
||||
m_J.setFromTriplets(triplets.begin(), triplets.end());
|
||||
@@ -144,7 +155,13 @@ class Jacobian {
|
||||
VariableMatrix<Scalar> m_variables;
|
||||
VariableMatrix<Scalar> m_wrt;
|
||||
|
||||
gch::small_vector<detail::GradientExpressionGraph<Scalar>> m_graphs;
|
||||
/// List of topologically sorted graphs from parent to child, one for each row
|
||||
gch::small_vector<detail::ExpressionGraph<Scalar>> m_top_lists;
|
||||
|
||||
/// List of output rows as column-node pairs
|
||||
gch::small_vector<
|
||||
gch::small_vector<std::pair<int, detail::Expression<Scalar>*>>>
|
||||
m_output_lists;
|
||||
|
||||
Eigen::SparseMatrix<Scalar> m_J{m_variables.rows(), m_wrt.rows()};
|
||||
|
||||
|
||||
@@ -26,10 +26,15 @@ namespace slp {
|
||||
|
||||
// Forward declarations for friend declarations in Variable
|
||||
|
||||
template <typename Scalar>
|
||||
class VariableMatrix;
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename Scalar>
|
||||
class GradientExpressionGraph;
|
||||
VariableMatrix<Scalar> gradient_tree(
|
||||
const detail::ExpressionGraph<Scalar>& top_list,
|
||||
const VariableMatrix<Scalar>& wrt);
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -265,7 +270,7 @@ class Variable : public SleipnirBase {
|
||||
|
||||
/// Used to update the value of this variable based on the values of its
|
||||
/// dependent variables
|
||||
gch::small_vector<detail::Expression<Scalar>*> m_graph;
|
||||
detail::ExpressionGraph<Scalar> m_graph;
|
||||
|
||||
/// Used for lazy initialization of m_graph
|
||||
bool m_graph_initialized = false;
|
||||
@@ -354,7 +359,10 @@ class Variable : public SleipnirBase {
|
||||
const Variable<Scalar>& y,
|
||||
const Variable<Scalar>& z);
|
||||
|
||||
friend class detail::GradientExpressionGraph<Scalar>;
|
||||
template <typename Scalar>
|
||||
friend VariableMatrix<Scalar> detail::gradient_tree(
|
||||
const detail::ExpressionGraph<Scalar>& top_list,
|
||||
const VariableMatrix<Scalar>& wrt);
|
||||
template <typename Scalar, int UpLo>
|
||||
requires(UpLo == Eigen::Lower) || (UpLo == (Eigen::Lower | Eigen::Upper))
|
||||
friend class Hessian;
|
||||
|
||||
@@ -1647,6 +1647,72 @@ VariableMatrix<Scalar> solve(const VariableMatrix<Scalar>& A,
|
||||
}
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
|
||||
/// Returns the variable's gradient tree.
|
||||
///
|
||||
/// This function lazily allocates variables, so elements of the returned
|
||||
/// VariableMatrix will be empty if the corresponding element of wrt had no
|
||||
/// adjoint. Ensure Variable::expr isn't nullptr before calling member
|
||||
/// functions.
|
||||
///
|
||||
/// @tparam Scalar Scalar type.
|
||||
/// @param top_list Topologically sorted graph from parent to child.
|
||||
/// @param wrt Variables with respect to which to compute the gradient.
|
||||
/// @return The variable's gradient tree.
|
||||
template <typename Scalar>
|
||||
VariableMatrix<Scalar> gradient_tree(const ExpressionGraph<Scalar>& top_list,
|
||||
const VariableMatrix<Scalar>& wrt) {
|
||||
slp_assert(wrt.cols() == 1);
|
||||
|
||||
// Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
|
||||
// for background on reverse accumulation automatic differentiation.
|
||||
|
||||
if (top_list.empty()) {
|
||||
return VariableMatrix<Scalar>{detail::empty, wrt.rows(), 1};
|
||||
}
|
||||
|
||||
// Set root node's adjoint to 1 since df/df is 1
|
||||
top_list[0]->adjoint_expr = constant_ptr(Scalar(1));
|
||||
|
||||
// df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
|
||||
// multiplied by dy/dx. If there are multiple "paths" from the root node to
|
||||
// variable; the variable's adjoint is the sum of each path's adjoint
|
||||
// contribution.
|
||||
for (auto& node : top_list) {
|
||||
auto& lhs = node->args[0];
|
||||
auto& rhs = node->args[1];
|
||||
|
||||
if (lhs != nullptr) {
|
||||
if (rhs != nullptr) {
|
||||
// Binary operator
|
||||
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
|
||||
rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr);
|
||||
} else {
|
||||
// Unary operator
|
||||
lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move gradient tree to return value
|
||||
VariableMatrix<Scalar> grad{detail::empty, wrt.rows(), 1};
|
||||
for (int row = 0; row < grad.rows(); ++row) {
|
||||
grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)};
|
||||
}
|
||||
|
||||
// Unlink adjoints to avoid circular references between them and their
|
||||
// parent expressions. This ensures all expressions are returned to the free
|
||||
// list.
|
||||
for (auto& node : top_list) {
|
||||
node->adjoint_expr = nullptr;
|
||||
}
|
||||
|
||||
return grad;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
extern template SLEIPNIR_DLLEXPORT VariableMatrix<double> solve(
|
||||
const VariableMatrix<double>& A, const VariableMatrix<double>& B);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user