From e728ee27f2918bd21a6c933f3b33c261cc7019d1 Mon Sep 17 00:00:00 2001 From: Tyler Veness Date: Fri, 29 May 2026 14:59:39 -0700 Subject: [PATCH] [upstream_utils] Upgrade to Sleipnir 0.6.1 (#8930) This fixes a memory leak on Windows. --- upstream_utils/sleipnir.py | 2 +- .../0002-Use-wpi-SmallVector.patch | 10 +- .../0005-Replace-std-views-zip.patch | 21 +-- ...-Suppress-clang-tidy-false-positives.patch | 4 +- ...ppress-GCC-12-warning-false-positive.patch | 2 +- ...tead-of-multidimensional-array-subsc.patch | 24 +-- .../include/sleipnir/autodiff/expression.hpp | 25 ++- .../sleipnir/autodiff/expression_graph.hpp | 80 ++++++++- .../autodiff/gradient_expression_graph.hpp | 161 ------------------ .../include/sleipnir/autodiff/hessian.hpp | 50 ++++-- .../include/sleipnir/autodiff/jacobian.hpp | 45 +++-- .../include/sleipnir/autodiff/variable.hpp | 14 +- .../sleipnir/autodiff/variable_matrix.hpp | 66 +++++++ 13 files changed, 253 insertions(+), 251 deletions(-) delete mode 100644 wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/gradient_expression_graph.hpp diff --git a/upstream_utils/sleipnir.py b/upstream_utils/sleipnir.py index a388c21168..d05aa96aea 100755 --- a/upstream_utils/sleipnir.py +++ b/upstream_utils/sleipnir.py @@ -46,7 +46,7 @@ using small_vector = wpi::util::SmallVector; def main(): name = "sleipnir" url = "https://github.com/SleipnirGroup/Sleipnir" - tag = "v0.6.0" + tag = "v0.6.1" sleipnir = Lib(name, url, tag, copy_upstream_src) sleipnir.main() diff --git a/upstream_utils/sleipnir_patches/0002-Use-wpi-SmallVector.patch b/upstream_utils/sleipnir_patches/0002-Use-wpi-SmallVector.patch index 6efb521a55..9c5053bc94 100644 --- a/upstream_utils/sleipnir_patches/0002-Use-wpi-SmallVector.patch +++ b/upstream_utils/sleipnir_patches/0002-Use-wpi-SmallVector.patch @@ -9,7 +9,7 @@ Subject: [PATCH 02/10] Use wpi::SmallVector 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sleipnir/autodiff/expression.hpp b/include/sleipnir/autodiff/expression.hpp -index af10b505ae44ca0eb656f66f3f49172d7fc98c6d..7a9dc089a9e2fb323ecc8725f924ebee1b213af0 100644 +index f04967c6a84ea76b77cebb7ef8edcb798d03e2a5..361c74702107eadf6f1fbe1682a7ef296abd19a3 100644 --- a/include/sleipnir/autodiff/expression.hpp +++ b/include/sleipnir/autodiff/expression.hpp @@ -34,7 +34,7 @@ struct Expression; @@ -21,7 +21,7 @@ index af10b505ae44ca0eb656f66f3f49172d7fc98c6d..7a9dc089a9e2fb323ecc8725f924ebee /// Typedef for intrusive shared pointer to Expression. /// -@@ -744,7 +744,7 @@ constexpr void inc_ref_count(Expression* expr) { +@@ -749,7 +749,7 @@ constexpr void inc_ref_count(Expression* expr) { /// @tparam Scalar Scalar type. /// @param expr The shared pointer's managed object. template @@ -31,10 +31,10 @@ index af10b505ae44ca0eb656f66f3f49172d7fc98c6d..7a9dc089a9e2fb323ecc8725f924ebee // Expression destructor when expr's refcount reaches zero can cause a stack // overflow. Instead, we iterate over its children to decrement their diff --git a/include/sleipnir/autodiff/variable.hpp b/include/sleipnir/autodiff/variable.hpp -index 910ebcf5266bdf76711db7849dd6584549094e3b..3c4f67c1f6224226620e2ffcc597336435943ce8 100644 +index a6b054494a18eacbcae10c0e88648550a39ed48e..ad8baa3b58047f5257c210b88446324cca121d4a 100644 --- a/include/sleipnir/autodiff/variable.hpp +++ b/include/sleipnir/autodiff/variable.hpp -@@ -53,7 +53,7 @@ class Variable : public SleipnirBase { +@@ -58,7 +58,7 @@ class Variable : public SleipnirBase { Variable() = default; /// Constructs an empty Variable. @@ -43,7 +43,7 @@ index 910ebcf5266bdf76711db7849dd6584549094e3b..3c4f67c1f6224226620e2ffcc5973364 /// Constructs a Variable from a scalar type. /// -@@ -96,7 +96,7 @@ class Variable : public SleipnirBase { +@@ -101,7 +101,7 @@ class Variable : public SleipnirBase { /// Constructs a Variable pointing to the specified expression. /// /// @param expr The autodiff variable. diff --git a/upstream_utils/sleipnir_patches/0005-Replace-std-views-zip.patch b/upstream_utils/sleipnir_patches/0005-Replace-std-views-zip.patch index 17e36fbcf5..e4201f6483 100644 --- a/upstream_utils/sleipnir_patches/0005-Replace-std-views-zip.patch +++ b/upstream_utils/sleipnir_patches/0005-Replace-std-views-zip.patch @@ -4,26 +4,9 @@ Date: Sat, 8 Feb 2025 13:42:36 -0800 Subject: [PATCH 05/10] Replace std::views::zip() --- - include/sleipnir/autodiff/gradient_expression_graph.hpp | 5 ++++- - include/sleipnir/optimization/problem.hpp | 8 +++++--- - 2 files changed, 9 insertions(+), 4 deletions(-) + include/sleipnir/optimization/problem.hpp | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) -diff --git a/include/sleipnir/autodiff/gradient_expression_graph.hpp b/include/sleipnir/autodiff/gradient_expression_graph.hpp -index fda7931c686a315209acdbe6f1ac581944e2781c..d3e59068d04b3db01945c5657959b69cd4522cca 100644 ---- a/include/sleipnir/autodiff/gradient_expression_graph.hpp -+++ b/include/sleipnir/autodiff/gradient_expression_graph.hpp -@@ -139,7 +139,10 @@ class GradientExpressionGraph { - } - } - -- for (const auto& [col, node] : std::views::zip(m_col_list, m_top_list)) { -+ for (size_t i = 0; i < m_top_list.size(); ++i) { -+ const auto& col = m_col_list[i]; -+ const auto& node = m_top_list[i]; -+ - // Append adjoints of wrt to sparse matrix triplets - if (col != -1) { - triplets.emplace_back(row, col, node->adjoint); diff --git a/include/sleipnir/optimization/problem.hpp b/include/sleipnir/optimization/problem.hpp index 49921b98de3452b6ca9f2e33a86daa254c0d3a01..d3feadd577bd53147a8b07da76910e046ccbf95d 100644 --- a/include/sleipnir/optimization/problem.hpp diff --git a/upstream_utils/sleipnir_patches/0007-Suppress-clang-tidy-false-positives.patch b/upstream_utils/sleipnir_patches/0007-Suppress-clang-tidy-false-positives.patch index b95a1f11d0..0042d2f1e8 100644 --- a/upstream_utils/sleipnir_patches/0007-Suppress-clang-tidy-false-positives.patch +++ b/upstream_utils/sleipnir_patches/0007-Suppress-clang-tidy-false-positives.patch @@ -8,10 +8,10 @@ Subject: [PATCH 07/10] Suppress clang-tidy false positives 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sleipnir/autodiff/variable.hpp b/include/sleipnir/autodiff/variable.hpp -index 3c4f67c1f6224226620e2ffcc597336435943ce8..3d3b355f9fef7d5b6b57521317f67041e0fb1ad6 100644 +index ad8baa3b58047f5257c210b88446324cca121d4a..5f32d7ea16864ad9a4fdcc4b067ef7dff2e9027d 100644 --- a/include/sleipnir/autodiff/variable.hpp +++ b/include/sleipnir/autodiff/variable.hpp -@@ -835,7 +835,7 @@ struct InequalityConstraints { +@@ -843,7 +843,7 @@ struct InequalityConstraints { /// /// @param inequality_constraints The list of InequalityConstraints to /// concatenate. diff --git a/upstream_utils/sleipnir_patches/0008-Suppress-GCC-12-warning-false-positive.patch b/upstream_utils/sleipnir_patches/0008-Suppress-GCC-12-warning-false-positive.patch index 0ec037ffe0..1c74a5f9ce 100644 --- a/upstream_utils/sleipnir_patches/0008-Suppress-GCC-12-warning-false-positive.patch +++ b/upstream_utils/sleipnir_patches/0008-Suppress-GCC-12-warning-false-positive.patch @@ -8,7 +8,7 @@ Subject: [PATCH 08/10] Suppress GCC 12 warning false positive 1 file changed, 7 insertions(+) diff --git a/include/sleipnir/autodiff/variable_matrix.hpp b/include/sleipnir/autodiff/variable_matrix.hpp -index 2cf6e00f16f58a5847a4515f13b3a91fffbb72b8..b5d15edc5b294a2ea50dca15a7612c61e5f2680b 100644 +index 2bdbf11841b9920111b9cb1426d6fd04764040e4..8b84a04780240cffb801e2c6f84e22c0e5246286 100644 --- a/include/sleipnir/autodiff/variable_matrix.hpp +++ b/include/sleipnir/autodiff/variable_matrix.hpp @@ -503,6 +503,10 @@ class VariableMatrix : public SleipnirBase { diff --git a/upstream_utils/sleipnir_patches/0010-Use-operator-instead-of-multidimensional-array-subsc.patch b/upstream_utils/sleipnir_patches/0010-Use-operator-instead-of-multidimensional-array-subsc.patch index a9dc822a34..525922adba 100644 --- a/upstream_utils/sleipnir_patches/0010-Use-operator-instead-of-multidimensional-array-subsc.patch +++ b/upstream_utils/sleipnir_patches/0010-Use-operator-instead-of-multidimensional-array-subsc.patch @@ -15,11 +15,11 @@ Subject: [PATCH 10/10] Use operator() instead of multidimensional array 7 files changed, 135 insertions(+), 135 deletions(-) diff --git a/include/sleipnir/autodiff/hessian.hpp b/include/sleipnir/autodiff/hessian.hpp -index 2a8087eda23f49f8c01be35d5289568644a9742d..c48e01c7b89d80db8032f6d2e492a0ec1bfa9f42 100644 +index d9d25a00251dd31e446bd0419f0c13e8f2456f2d..39cd798160ff272aeac4b12c92edee3f2a213a22 100644 --- a/include/sleipnir/autodiff/hessian.hpp +++ b/include/sleipnir/autodiff/hessian.hpp -@@ -100,9 +100,9 @@ class Hessian { - auto grad = m_graphs[row].generate_tree(m_wrt); +@@ -111,9 +111,9 @@ class Hessian { + auto grad = detail::gradient_tree(m_top_lists[row], m_wrt); for (int col = 0; col < m_wrt.rows(); ++col) { if (grad[col].expr != nullptr) { - result[row, col] = std::move(grad[col]); @@ -31,11 +31,11 @@ index 2a8087eda23f49f8c01be35d5289568644a9742d..c48e01c7b89d80db8032f6d2e492a0ec } } diff --git a/include/sleipnir/autodiff/jacobian.hpp b/include/sleipnir/autodiff/jacobian.hpp -index 7a5c3798dab50f14db35bee430f1d5e8e32196ff..1a5b01de99a7c011ebf8c5ed9fc64942675fcb4b 100644 +index f97b5737c8fc007299bb4a1dbe5b057c1e3645de..b202f776d00feb5eaa0703796d226ab52f89aad5 100644 --- a/include/sleipnir/autodiff/jacobian.hpp +++ b/include/sleipnir/autodiff/jacobian.hpp -@@ -104,9 +104,9 @@ class Jacobian { - auto grad = m_graphs[row].generate_tree(m_wrt); +@@ -114,9 +114,9 @@ class Jacobian { + auto grad = detail::gradient_tree(m_top_lists[row], m_wrt); for (int col = 0; col < m_wrt.rows(); ++col) { if (grad[col].expr != nullptr) { - result[row, col] = std::move(grad[col]); @@ -47,10 +47,10 @@ index 7a5c3798dab50f14db35bee430f1d5e8e32196ff..1a5b01de99a7c011ebf8c5ed9fc64942 } } diff --git a/include/sleipnir/autodiff/variable.hpp b/include/sleipnir/autodiff/variable.hpp -index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd81fbc6825 100644 +index 5f32d7ea16864ad9a4fdcc4b067ef7dff2e9027d..5d7b56de884cdb73fc9f08fcfa1982b252bdd88e 100644 --- a/include/sleipnir/autodiff/variable.hpp +++ b/include/sleipnir/autodiff/variable.hpp -@@ -68,7 +68,7 @@ class Variable : public SleipnirBase { +@@ -73,7 +73,7 @@ class Variable : public SleipnirBase { /// /// @param value The value of the Variable. // NOLINTNEXTLINE (google-explicit-constructor) @@ -59,7 +59,7 @@ index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd8 slp_assert(value.rows() == 1 && value.cols() == 1); } -@@ -725,7 +725,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) { +@@ -733,7 +733,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) { for (int row = 0; row < rhs.rows(); ++row) { for (int col = 0; col < rhs.cols(); ++col) { // Make right-hand side zero @@ -68,7 +68,7 @@ index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd8 } } -@@ -741,7 +741,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) { +@@ -749,7 +749,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) { for (int row = 0; row < lhs.rows(); ++row) { for (int col = 0; col < lhs.cols(); ++col) { // Make right-hand side zero @@ -77,7 +77,7 @@ index 3d3b355f9fef7d5b6b57521317f67041e0fb1ad6..130a4f057ea08ab0565b54f8a3556bd8 } } -@@ -759,7 +759,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) { +@@ -767,7 +767,7 @@ auto make_constraints(LHS&& lhs, RHS&& rhs) { for (int row = 0; row < lhs.rows(); ++row) { for (int col = 0; col < lhs.cols(); ++col) { // Make right-hand side zero @@ -388,7 +388,7 @@ index 4018606df45941b578c861caf934495f8c9e368e..cf554832b82adb17b4b1d7b56842a77d } diff --git a/include/sleipnir/autodiff/variable_matrix.hpp b/include/sleipnir/autodiff/variable_matrix.hpp -index b5d15edc5b294a2ea50dca15a7612c61e5f2680b..02bccb660474d9380444f5e20124fb687e372982 100644 +index 8b84a04780240cffb801e2c6f84e22c0e5246286..d82b90191f6fb3f30460fecb2653655c7449d109 100644 --- a/include/sleipnir/autodiff/variable_matrix.hpp +++ b/include/sleipnir/autodiff/variable_matrix.hpp @@ -154,7 +154,7 @@ class VariableMatrix : public SleipnirBase { diff --git a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression.hpp b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression.hpp index 7a9dc089a9..361c747021 100644 --- a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression.hpp +++ b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression.hpp @@ -96,22 +96,27 @@ struct Expression { /// The adjoint of the expression node, used during autodiff. Scalar adjoint{0}; - /// Counts incoming edges for this node. - uint32_t incoming_edges = 0; - - /// This expression's column in a Jacobian, or -1 otherwise. - int32_t col = -1; - /// The adjoint of the expression node, used during gradient expression tree /// generation. ExpressionPtr adjoint_expr; - /// Reference count for intrusive shared pointer. - uint32_t ref_count = 0; - /// Expression arguments. std::array, 2> args{nullptr, nullptr}; + /// Scratch space for various graph algorithms. + /// + /// In expression_graph.hpp's topological_sort(), scratch counts incoming + /// edges for this node, offset by -1 so -1 means no edges. + /// + /// In Hessian and Jacobian constructors, scratch represents this expression's + /// column in a Jacobian, or -1 otherwise. + /// + /// They share a default state of -1 to avoid extra assignments. + int32_t scratch = -1; + + /// Reference count for intrusive shared pointer. + uint32_t ref_count = 0; + /// Constructs a constant expression with a value of zero. constexpr Expression() = default; @@ -774,6 +779,8 @@ void dec_ref_count(Expression* expr) { auto alloc = global_pool_allocator>(); std::allocator_traits::deallocate( alloc, elem, sizeof(Expression)); + } else { + operator delete(elem); } } } diff --git a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression_graph.hpp b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression_graph.hpp index 387c53ae3f..d6e1a74908 100644 --- a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression_graph.hpp +++ b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/expression_graph.hpp @@ -4,12 +4,19 @@ #include +#include #include #include "sleipnir/autodiff/expression.hpp" namespace slp::detail { +/// Typedef for topologically sorted expression graph from parent to child. +/// +/// @tparam Scalar Scalar type. +template +using ExpressionGraph = gch::small_vector*>; + /// Generates a topological sort of an expression graph from parent to child. /// /// https://en.wikipedia.org/wiki/Topological_sorting @@ -17,9 +24,8 @@ namespace slp::detail { /// @tparam Scalar Scalar type. /// @param root The root node of the expression. template -gch::small_vector*> topological_sort( - const ExpressionPtr& root) { - gch::small_vector*> list; +ExpressionGraph topological_sort(const ExpressionPtr& root) { + ExpressionGraph list; // If the root type is constant, updates are a no-op, so return an empty list if (root == nullptr || root->type() == ExpressionType::CONSTANT) { @@ -30,6 +36,8 @@ gch::small_vector*> topological_sort( gch::small_vector*> stack; // Enumerate incoming edges for each node via depth-first search + // + // NOTE: scratch counts incoming edges, offset by -1 so -1 means no edges. stack.emplace_back(root.get()); while (!stack.empty()) { auto node = stack.back(); @@ -37,7 +45,7 @@ gch::small_vector*> topological_sort( for (auto& arg : node->args) { // If the node hasn't been explored yet, add it to the stack - if (arg != nullptr && ++arg->incoming_edges == 1) { + if (arg != nullptr && ++arg->scratch == 0) { stack.push_back(arg.get()); } } @@ -46,8 +54,7 @@ gch::small_vector*> topological_sort( // Generate topological sort of graph from parent to child. // // A node is only added to the stack after all its incoming edges have been - // traversed. Expression::incoming_edges is a decrementing counter for - // tracking this. + // traversed. Expression::scratch is a decrementing counter for tracking this. // // https://en.wikipedia.org/wiki/Topological_sorting stack.emplace_back(root.get()); @@ -59,7 +66,7 @@ gch::small_vector*> topological_sort( for (auto& arg : node->args) { // If we traversed all this node's incoming edges, add it to the stack - if (arg != nullptr && --arg->incoming_edges == 0) { + if (arg != nullptr && --arg->scratch == -1) { stack.push_back(arg.get()); } } @@ -74,7 +81,7 @@ gch::small_vector*> topological_sort( /// @tparam Scalar Scalar type. /// @param list Topological sort of graph from parent to child. template -void update_values(const gch::small_vector*>& list) { +void update_values(const ExpressionGraph& list) { // Traverse graph from child to parent and update values for (auto& node : list | std::views::reverse) { auto& lhs = node->args[0]; @@ -86,4 +93,61 @@ void update_values(const gch::small_vector*>& list) { } } +/// Updates the adjoints in the expression graph (computes the gradient) then +/// appends the adjoints of wrt to the sparse matrix triplets. +/// +/// @tparam Scalar Scalar type. +/// @param top_list Topologically sorted graph from parent to child. +/// @param output_list Output row as column-node pairs. +/// @param triplets The sparse matrix triplets. +/// @param row The row of wrt. +template +void append_triplets( + const ExpressionGraph& top_list, + const gch::small_vector*>>& + output_list, + gch::small_vector>& triplets, int row) { + // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation + // for background on reverse accumulation automatic differentiation. + + if (top_list.empty()) { + return; + } + + // Set root node's adjoint to 1 since df/df is 1 + top_list[0]->adjoint = Scalar(1); + + // Zero the rest of the adjoints + for (auto& node : top_list | std::views::drop(1)) { + node->adjoint = Scalar(0); + } + + // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y + // multiplied by dy/dx. If there are multiple "paths" from the root node to + // variable; the variable's adjoint is the sum of each path's adjoint + // contribution. + for (const auto& node : top_list) { + auto& lhs = node->args[0]; + auto& rhs = node->args[1]; + + if (lhs != nullptr) { + if (rhs != nullptr) { + // Binary operator + lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint); + rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint); + } else { + // Unary operator + lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint); + } + } + } + + // Exploit the row's sparsity pattern by only appending wrt adjoints that + // appear in the expression graph + for (const auto& [col, node] : output_list) { + // Append adjoints of wrt to sparse matrix triplets + triplets.emplace_back(row, col, node->adjoint); + } +} + } // namespace slp::detail diff --git a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/gradient_expression_graph.hpp b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/gradient_expression_graph.hpp deleted file mode 100644 index d3e59068d0..0000000000 --- a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/gradient_expression_graph.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) Sleipnir contributors - -#pragma once - -#include -#include - -#include -#include - -#include "sleipnir/autodiff/expression_graph.hpp" -#include "sleipnir/autodiff/variable.hpp" -#include "sleipnir/autodiff/variable_matrix.hpp" -#include "sleipnir/util/assert.hpp" -#include "sleipnir/util/empty.hpp" - -namespace slp::detail { - -/// This class is an adapter type that performs value updates of an expression -/// graph, generates a gradient tree, or appends gradient triplets for creating -/// a sparse matrix of gradients. -/// -/// @tparam Scalar Scalar type. -template -class GradientExpressionGraph { - public: - /// Generates the gradient graph for the given expression. - /// - /// @param root The root node of the expression. - explicit GradientExpressionGraph(const Variable& root) - : m_top_list{topological_sort(root.expr)} { - for (const auto& node : m_top_list) { - m_col_list.emplace_back(node->col); - } - } - - /// Updates the values of all nodes in this graph based on the values of their - /// dependent nodes. - void update_values() { detail::update_values(m_top_list); } - - /// Returns the variable's gradient tree. - /// - /// This function lazily allocates variables, so elements of the returned - /// VariableMatrix will be empty if the corresponding element of wrt had no - /// adjoint. Ensure Variable::expr isn't nullptr before calling member - /// functions. - /// - /// @param wrt Variables with respect to which to compute the gradient. - /// @return The variable's gradient tree. - VariableMatrix generate_tree( - const VariableMatrix& wrt) const { - slp_assert(wrt.cols() == 1); - - // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation - // for background on reverse accumulation automatic differentiation. - - if (m_top_list.empty()) { - return VariableMatrix{detail::empty, wrt.rows(), 1}; - } - - // Set root node's adjoint to 1 since df/df is 1 - m_top_list[0]->adjoint_expr = constant_ptr(Scalar(1)); - - // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y - // multiplied by dy/dx. If there are multiple "paths" from the root node to - // variable; the variable's adjoint is the sum of each path's adjoint - // contribution. - for (auto& node : m_top_list) { - auto& lhs = node->args[0]; - auto& rhs = node->args[1]; - - if (lhs != nullptr) { - if (rhs != nullptr) { - // Binary operator - lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr); - rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr); - } else { - // Unary operator - lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr); - } - } - } - - // Move gradient tree to return value - VariableMatrix grad{detail::empty, wrt.rows(), 1}; - for (int row = 0; row < grad.rows(); ++row) { - grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)}; - } - - // Unlink adjoints to avoid circular references between them and their - // parent expressions. This ensures all expressions are returned to the free - // list. - for (auto& node : m_top_list) { - node->adjoint_expr = nullptr; - } - - return grad; - } - - /// Updates the adjoints in the expression graph (computes the gradient) then - /// appends the adjoints of wrt to the sparse matrix triplets. - /// - /// @param triplets The sparse matrix triplets. - /// @param row The row of wrt. - void append_triplets(gch::small_vector>& triplets, - int row) const { - // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation - // for background on reverse accumulation automatic differentiation. - - if (m_top_list.empty()) { - return; - } - - // Set root node's adjoint to 1 since df/df is 1 - m_top_list[0]->adjoint = Scalar(1); - - // Zero the rest of the adjoints - for (auto& node : m_top_list | std::views::drop(1)) { - node->adjoint = Scalar(0); - } - - // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y - // multiplied by dy/dx. If there are multiple "paths" from the root node to - // variable; the variable's adjoint is the sum of each path's adjoint - // contribution. - for (const auto& node : m_top_list) { - auto& lhs = node->args[0]; - auto& rhs = node->args[1]; - - if (lhs != nullptr) { - if (rhs != nullptr) { - // Binary operator - lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint); - rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint); - } else { - // Unary operator - lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint); - } - } - } - - for (size_t i = 0; i < m_top_list.size(); ++i) { - const auto& col = m_col_list[i]; - const auto& node = m_top_list[i]; - - // Append adjoints of wrt to sparse matrix triplets - if (col != -1) { - triplets.emplace_back(row, col, node->adjoint); - } - } - } - - private: - /// Topological sort of graph from parent to child - gch::small_vector*> m_top_list; - - /// List that maps nodes to their respective column - gch::small_vector m_col_list; -}; - -} // namespace slp::detail diff --git a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/hessian.hpp b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/hessian.hpp index c48e01c7b8..39cd798160 100644 --- a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/hessian.hpp +++ b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/hessian.hpp @@ -7,11 +7,12 @@ #include #include -#include "sleipnir/autodiff/gradient_expression_graph.hpp" +#include "sleipnir/autodiff/expression_graph.hpp" #include "sleipnir/autodiff/variable.hpp" #include "sleipnir/autodiff/variable_matrix.hpp" #include "sleipnir/util/assert.hpp" #include "sleipnir/util/concepts.hpp" +#include "sleipnir/util/symbol_exports.hpp" namespace slp { @@ -41,24 +42,33 @@ class Hessian { /// @param wrt Vector of variables with respect to which to compute the /// Hessian. Hessian(Variable variable, SleipnirMatrixLike auto wrt) - : m_variables{ - detail::GradientExpressionGraph{variable}.generate_tree( - wrt)}, - m_wrt{wrt} { - slp_assert(m_wrt.cols() == 1); + : m_variables{detail::gradient_tree( + detail::topological_sort(variable.expr), wrt)}, + m_wrt{std::move(wrt)} { + slp_assert(wrt.cols() == 1); + + for (auto& variable : m_variables) { + m_top_lists.emplace_back(detail::topological_sort(variable.expr)); + } // Initialize column each expression's adjoint occupies in the Jacobian for (size_t col = 0; col < m_wrt.size(); ++col) { - m_wrt[col].expr->col = col; + m_wrt[col].expr->scratch = col; } - for (auto& variable : m_variables) { - m_graphs.emplace_back(variable); + // Make list of only nodes in output row, and their output columns + for (auto& top_list : m_top_lists) { + m_output_lists.emplace_back(); + for (const auto& node : top_list) { + if (node->scratch != -1) { + m_output_lists.back().emplace_back(node->scratch, node); + } + } } // Reset col to -1 for (auto& node : m_wrt) { - node.expr->col = -1; + node.expr->scratch = -1; } for (int row = 0; row < m_variables.rows(); ++row) { @@ -70,7 +80,8 @@ class Hessian { // If the row is linear, compute its gradient once here and cache its // triplets. Constant rows are ignored because their gradients have no // nonzero triplets. - m_graphs[row].append_triplets(m_cached_triplets, row); + detail::append_triplets(m_top_lists[row], m_output_lists[row], + m_cached_triplets, row); } else if (m_variables[row].type() > ExpressionType::LINEAR) { // If the row is quadratic or nonlinear, add it to the list of nonlinear // rows to be recomputed in value(). @@ -97,7 +108,7 @@ class Hessian { m_wrt.rows()}; for (int row = 0; row < m_variables.rows(); ++row) { - auto grad = m_graphs[row].generate_tree(m_wrt); + auto grad = detail::gradient_tree(m_top_lists[row], m_wrt); for (int col = 0; col < m_wrt.rows(); ++col) { if (grad[col].expr != nullptr) { result(row, col) = std::move(grad[col]); @@ -118,8 +129,8 @@ class Hessian { return m_H; } - for (auto& graph : m_graphs) { - graph.update_values(); + for (auto& top_list : m_top_lists) { + detail::update_values(top_list); } // Copy the cached triplets so triplets added for the nonlinear rows are @@ -128,7 +139,8 @@ class Hessian { // Compute each nonlinear row of the Hessian for (int row : m_nonlinear_rows) { - m_graphs[row].append_triplets(triplets, row); + detail::append_triplets(m_top_lists[row], m_output_lists[row], triplets, + row); } m_H.setFromTriplets(triplets.begin(), triplets.end()); @@ -143,7 +155,13 @@ class Hessian { VariableMatrix m_variables; VariableMatrix m_wrt; - gch::small_vector> m_graphs; + /// List of topologically sorted graphs from parent to child, one for each row + gch::small_vector> m_top_lists; + + /// List of output rows as column-node pairs + gch::small_vector< + gch::small_vector*>>> + m_output_lists; Eigen::SparseMatrix m_H{m_variables.rows(), m_wrt.rows()}; diff --git a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/jacobian.hpp b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/jacobian.hpp index 1a5b01de99..b202f776d0 100644 --- a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/jacobian.hpp +++ b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/jacobian.hpp @@ -7,12 +7,11 @@ #include #include -#include "sleipnir/autodiff/gradient_expression_graph.hpp" +#include "sleipnir/autodiff/expression_graph.hpp" #include "sleipnir/autodiff/variable.hpp" #include "sleipnir/autodiff/variable_matrix.hpp" #include "sleipnir/util/assert.hpp" #include "sleipnir/util/concepts.hpp" -#include "sleipnir/util/empty.hpp" #include "sleipnir/util/symbol_exports.hpp" namespace slp { @@ -54,18 +53,28 @@ class Jacobian { slp_assert(m_variables.cols() == 1); slp_assert(m_wrt.cols() == 1); - // Initialize column each expression's adjoint occupies in the Jacobian - for (size_t col = 0; col < m_wrt.size(); ++col) { - m_wrt[col].expr->col = col; + for (auto& variable : m_variables) { + m_top_lists.emplace_back(detail::topological_sort(variable.expr)); } - for (auto& variable : m_variables) { - m_graphs.emplace_back(variable); + // Initialize column each expression's adjoint occupies in the Jacobian + for (size_t col = 0; col < m_wrt.size(); ++col) { + m_wrt[col].expr->scratch = col; + } + + // Make list of output rows as column-node pairs + for (auto& top_list : m_top_lists) { + m_output_lists.emplace_back(); + for (const auto& node : top_list) { + if (node->scratch != -1) { + m_output_lists.back().emplace_back(node->scratch, node); + } + } } // Reset col to -1 for (auto& node : m_wrt) { - node.expr->col = -1; + node.expr->scratch = -1; } for (int row = 0; row < m_variables.rows(); ++row) { @@ -77,7 +86,8 @@ class Jacobian { // If the row is linear, compute its gradient once here and cache its // triplets. Constant rows are ignored because their gradients have no // nonzero triplets. - m_graphs[row].append_triplets(m_cached_triplets, row); + detail::append_triplets(m_top_lists[row], m_output_lists[row], + m_cached_triplets, row); } else if (m_variables[row].type() > ExpressionType::LINEAR) { // If the row is quadratic or nonlinear, add it to the list of nonlinear // rows to be recomputed in value(). @@ -101,7 +111,7 @@ class Jacobian { m_wrt.rows()}; for (int row = 0; row < m_variables.rows(); ++row) { - auto grad = m_graphs[row].generate_tree(m_wrt); + auto grad = detail::gradient_tree(m_top_lists[row], m_wrt); for (int col = 0; col < m_wrt.rows(); ++col) { if (grad[col].expr != nullptr) { result(row, col) = std::move(grad[col]); @@ -122,8 +132,8 @@ class Jacobian { return m_J; } - for (auto& graph : m_graphs) { - graph.update_values(); + for (auto& top_list : m_top_lists) { + detail::update_values(top_list); } // Copy the cached triplets so triplets added for the nonlinear rows are @@ -132,7 +142,8 @@ class Jacobian { // Compute each nonlinear row of the Jacobian for (int row : m_nonlinear_rows) { - m_graphs[row].append_triplets(triplets, row); + detail::append_triplets(m_top_lists[row], m_output_lists[row], triplets, + row); } m_J.setFromTriplets(triplets.begin(), triplets.end()); @@ -144,7 +155,13 @@ class Jacobian { VariableMatrix m_variables; VariableMatrix m_wrt; - gch::small_vector> m_graphs; + /// List of topologically sorted graphs from parent to child, one for each row + gch::small_vector> m_top_lists; + + /// List of output rows as column-node pairs + gch::small_vector< + gch::small_vector*>>> + m_output_lists; Eigen::SparseMatrix m_J{m_variables.rows(), m_wrt.rows()}; diff --git a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable.hpp b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable.hpp index 130a4f057e..5d7b56de88 100644 --- a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable.hpp +++ b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable.hpp @@ -26,10 +26,15 @@ namespace slp { // Forward declarations for friend declarations in Variable +template +class VariableMatrix; + namespace detail { template -class GradientExpressionGraph; +VariableMatrix gradient_tree( + const detail::ExpressionGraph& top_list, + const VariableMatrix& wrt); } // namespace detail @@ -265,7 +270,7 @@ class Variable : public SleipnirBase { /// Used to update the value of this variable based on the values of its /// dependent variables - gch::small_vector*> m_graph; + detail::ExpressionGraph m_graph; /// Used for lazy initialization of m_graph bool m_graph_initialized = false; @@ -354,7 +359,10 @@ class Variable : public SleipnirBase { const Variable& y, const Variable& z); - friend class detail::GradientExpressionGraph; + template + friend VariableMatrix detail::gradient_tree( + const detail::ExpressionGraph& top_list, + const VariableMatrix& wrt); template requires(UpLo == Eigen::Lower) || (UpLo == (Eigen::Lower | Eigen::Upper)) friend class Hessian; diff --git a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable_matrix.hpp b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable_matrix.hpp index 02bccb6604..d82b90191f 100644 --- a/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable_matrix.hpp +++ b/wpimath/src/main/native/thirdparty/sleipnir/include/sleipnir/autodiff/variable_matrix.hpp @@ -1647,6 +1647,72 @@ VariableMatrix solve(const VariableMatrix& A, } } +namespace detail { + +/// Returns the variable's gradient tree. +/// +/// This function lazily allocates variables, so elements of the returned +/// VariableMatrix will be empty if the corresponding element of wrt had no +/// adjoint. Ensure Variable::expr isn't nullptr before calling member +/// functions. +/// +/// @tparam Scalar Scalar type. +/// @param top_list Topologically sorted graph from parent to child. +/// @param wrt Variables with respect to which to compute the gradient. +/// @return The variable's gradient tree. +template +VariableMatrix gradient_tree(const ExpressionGraph& top_list, + const VariableMatrix& wrt) { + slp_assert(wrt.cols() == 1); + + // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation + // for background on reverse accumulation automatic differentiation. + + if (top_list.empty()) { + return VariableMatrix{detail::empty, wrt.rows(), 1}; + } + + // Set root node's adjoint to 1 since df/df is 1 + top_list[0]->adjoint_expr = constant_ptr(Scalar(1)); + + // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y + // multiplied by dy/dx. If there are multiple "paths" from the root node to + // variable; the variable's adjoint is the sum of each path's adjoint + // contribution. + for (auto& node : top_list) { + auto& lhs = node->args[0]; + auto& rhs = node->args[1]; + + if (lhs != nullptr) { + if (rhs != nullptr) { + // Binary operator + lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr); + rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr); + } else { + // Unary operator + lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr); + } + } + } + + // Move gradient tree to return value + VariableMatrix grad{detail::empty, wrt.rows(), 1}; + for (int row = 0; row < grad.rows(); ++row) { + grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)}; + } + + // Unlink adjoints to avoid circular references between them and their + // parent expressions. This ensures all expressions are returned to the free + // list. + for (auto& node : top_list) { + node->adjoint_expr = nullptr; + } + + return grad; +} + +} // namespace detail + extern template SLEIPNIR_DLLEXPORT VariableMatrix solve( const VariableMatrix& A, const VariableMatrix& B);