From bdc7344df194446481da6f7eee51fa919e348ad2 Mon Sep 17 00:00:00 2001 From: Tyler Veness Date: Tue, 7 May 2024 12:47:15 -0700 Subject: [PATCH] [upstream_utils] Upgrade Eigen to get more constexpr support (#6596) --- upstream_utils/update_eigen.py | 4 +- .../thirdparty/eigen/include/Eigen/Core | 5 +- .../Eigen/src/Core/ArithmeticSequence.h | 49 +- .../include/Eigen/src/Core/CoreEvaluators.h | 21 + .../include/Eigen/src/Core/CwiseUnaryView.h | 124 +-- .../include/Eigen/src/Core/DenseCoeffsBase.h | 31 +- .../include/Eigen/src/Core/GeneralProduct.h | 14 +- .../Eigen/src/Core/GenericPacketMath.h | 148 ++-- .../include/Eigen/src/Core/GlobalFunctions.h | 7 +- .../include/Eigen/src/Core/IndexedView.h | 124 ++- .../include/Eigen/src/Core/MathFunctions.h | 355 ++++++--- .../Eigen/src/Core/MathFunctionsImpl.h | 59 -- .../eigen/include/Eigen/src/Core/Matrix.h | 15 +- .../eigen/include/Eigen/src/Core/NumTraits.h | 10 +- .../include/Eigen/src/Core/PlainObjectBase.h | 37 +- .../eigen/include/Eigen/src/Core/Product.h | 139 +++- .../eigen/include/Eigen/src/Core/RandomImpl.h | 253 ++++++ .../include/Eigen/src/Core/TriangularMatrix.h | 3 +- .../include/Eigen/src/Core/arch/AVX/Complex.h | 18 + .../Eigen/src/Core/arch/AVX/MathFunctions.h | 10 +- .../Eigen/src/Core/arch/AVX/PacketMath.h | 145 ++-- .../Eigen/src/Core/arch/AVX/TypeCasting.h | 79 ++ .../Eigen/src/Core/arch/Default/BFloat16.h | 21 +- .../arch/Default/GenericPacketMathFunctions.h | 451 ++++++++++- .../Default/GenericPacketMathFunctionsFwd.h | 42 +- .../Eigen/src/Core/arch/Default/Half.h | 21 +- .../Eigen/src/Core/arch/NEON/Complex.h | 28 + .../Eigen/src/Core/arch/NEON/PacketMath.h | 130 ++- .../include/Eigen/src/Core/arch/SSE/Complex.h | 18 + .../Eigen/src/Core/arch/SSE/PacketMath.h | 744 +++++++++++------- .../Eigen/src/Core/arch/SSE/TypeCasting.h | 33 + .../Eigen/src/Core/functors/UnaryFunctors.h | 78 +- .../Core/products/GeneralBlockPanelKernel.h | 6 +- .../src/Core/products/GeneralMatrixMatrix.h | 2 +- .../Core/products/TriangularMatrixVector.h | 32 +- .../Core/products/TriangularSolverMatrix.h | 2 +- .../src/Core/util/ConfigureVectorization.h | 10 + .../include/Eigen/src/Core/util/Constants.h | 4 +- .../Eigen/src/Core/util/EmulateArray.h | 18 +- .../Eigen/src/Core/util/ForwardDeclarations.h | 2 + .../Eigen/src/Core/util/IndexedViewHelper.h | 527 ++++++++++--- .../Eigen/src/Core/util/IntegralConstant.h | 35 +- .../include/Eigen/src/Core/util/Macros.h | 6 +- .../include/Eigen/src/Core/util/Memory.h | 18 +- .../eigen/include/Eigen/src/Core/util/Meta.h | 12 +- .../Eigen/src/Core/util/SymbolicIndex.h | 314 ++++++-- .../include/Eigen/src/Core/util/XprHelper.h | 8 +- .../src/Eigenvalues/ComplexEigenSolver.h | 4 +- .../Eigen/src/Eigenvalues/ComplexSchur.h | 4 +- .../Eigen/src/Eigenvalues/EigenSolver.h | 2 +- .../src/Eigenvalues/GeneralizedEigenSolver.h | 2 +- .../src/Eigenvalues/HessenbergDecomposition.h | 2 +- .../include/Eigen/src/Eigenvalues/RealQZ.h | 2 +- .../include/Eigen/src/Eigenvalues/RealSchur.h | 47 +- .../src/Eigenvalues/SelfAdjointEigenSolver.h | 2 +- .../src/Eigenvalues/Tridiagonalization.h | 6 +- .../IncompleteCholesky.h | 44 +- .../eigen/include/Eigen/src/Jacobi/Jacobi.h | 2 +- .../eigen/include/Eigen/src/LU/FullPivLU.h | 2 +- .../Eigen/src/QR/ColPivHouseholderQR.h | 29 +- .../src/QR/CompleteOrthogonalDecomposition.h | 20 + .../Eigen/src/QR/FullPivHouseholderQR.h | 29 +- .../include/Eigen/src/QR/HouseholderQR.h | 33 + .../eigen/include/Eigen/src/SVD/BDCSVD.h | 47 +- .../eigen/include/Eigen/src/SVD/JacobiSVD.h | 84 +- .../eigen/include/Eigen/src/SVD/SVDBase.h | 13 +- .../src/SparseCholesky/SimplicialCholesky.h | 287 ++++++- .../SparseCholesky/SimplicialCholesky_impl.h | 16 +- .../Eigen/src/SparseCore/CompressedStorage.h | 7 +- .../include/Eigen/src/SparseCore/SparseDot.h | 19 +- .../Eigen/src/SparseCore/SparseMatrix.h | 50 +- .../src/SparseCore/SparseSelfAdjointView.h | 20 +- .../Eigen/src/SparseCore/SparseVector.h | 18 + .../include/Eigen/src/SparseLU/SparseLU.h | 145 +++- .../include/Eigen/src/SparseQR/SparseQR.h | 4 +- .../eigen/include/Eigen/src/misc/Image.h | 2 +- .../eigen/include/Eigen/src/misc/Kernel.h | 2 +- .../Eigen/src/plugins/ArrayCwiseUnaryOps.inc | 10 + .../Eigen/src/plugins/CommonCwiseUnaryOps.inc | 17 +- .../Eigen/src/plugins/IndexedViewMethods.inc | 213 +---- .../Eigen/src/plugins/MatrixCwiseUnaryOps.inc | 11 +- 81 files changed, 3825 insertions(+), 1582 deletions(-) create mode 100644 wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h diff --git a/upstream_utils/update_eigen.py b/upstream_utils/update_eigen.py index 9ed255c0b5..409380c033 100755 --- a/upstream_utils/update_eigen.py +++ b/upstream_utils/update_eigen.py @@ -97,8 +97,8 @@ def unsupported_inclusions(dp, f): def main(): upstream_root = clone_repo( "https://gitlab.com/libeigen/eigen.git", - # master on 2023-12-01 - "96880810295b65d77057f4a7fb83a99a590122ad", + # master on 2024-05-07 + "99c18bce6eb225fa5a4861af97189ada1bca3103", shallow=False, ) wpilib_root = get_repo_root() diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core index a30eedaec1..fdd4472c39 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core @@ -178,6 +178,7 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/RandomImpl.h" #include "src/Core/GenericPacketMath.h" #include "src/Core/MathFunctionsImpl.h" #include "src/Core/arch/Default/ConjHelper.h" @@ -378,10 +379,6 @@ using std::ptrdiff_t; // #include "src/Core/arch/AVX512/GemmKernel.h" #endif -#if defined(EIGEN_VECTORIZE_HVX) -// #include "src/Core/arch/HVX/GeneralBlockPanelKernel.h" -#endif - #include "src/Core/Select.h" #include "src/Core/VectorwiseOp.h" #include "src/Core/PartialReduxEvaluator.h" diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ArithmeticSequence.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ArithmeticSequence.h index 0f45e89ea2..ae6373dda2 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ArithmeticSequence.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ArithmeticSequence.h @@ -61,26 +61,28 @@ seqN(FirstType first, SizeType size, IncrType incr); template class ArithmeticSequence { public: - ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} - ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {} + constexpr ArithmeticSequence() = default; + constexpr ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} + constexpr ArithmeticSequence(FirstType first, SizeType size, IncrType incr) + : m_first(first), m_size(size), m_incr(incr) {} enum { - SizeAtCompileTime = internal::get_fixed_value::value, + // SizeAtCompileTime = internal::get_fixed_value::value, IncrAtCompileTime = internal::get_fixed_value::value }; /** \returns the size, i.e., number of elements, of the sequence */ - Index size() const { return m_size; } + constexpr Index size() const { return m_size; } /** \returns the first element \f$ a_0 \f$ in the sequence */ - Index first() const { return m_first; } + constexpr Index first() const { return m_first; } /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */ - Index operator[](Index i) const { return m_first + i * m_incr; } + constexpr Index operator[](Index i) const { return m_first + i * m_incr; } - const FirstType& firstObject() const { return m_first; } - const SizeType& sizeObject() const { return m_size; } - const IncrType& incrObject() const { return m_incr; } + constexpr const FirstType& firstObject() const { return m_first; } + constexpr const SizeType& sizeObject() const { return m_size; } + constexpr const IncrType& incrObject() const { return m_incr; } protected: FirstType m_first; @@ -88,7 +90,7 @@ class ArithmeticSequence { IncrType m_incr; public: - auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) { + constexpr auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) { return seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr); } }; @@ -201,33 +203,6 @@ auto lastN(SizeType size) -> decltype(seqN(Eigen::placeholders::last + fix<1>() } // namespace placeholders -namespace internal { - -// Convert a symbolic span into a usable one (i.e., remove last/end "keywords") -template -struct make_size_type { - typedef std::conditional_t::value, Index, T> type; -}; - -template -struct IndexedViewCompatibleType, XprSize> { - typedef ArithmeticSequence::type, IncrType> type; -}; - -template -ArithmeticSequence::type, IncrType> makeIndexedViewCompatible( - const ArithmeticSequence& ids, Index size, SpecializedType) { - return ArithmeticSequence::type, IncrType>( - eval_expr_given_size(ids.firstObject(), size), eval_expr_given_size(ids.sizeObject(), size), ids.incrObject()); -} - -template -struct get_compile_time_incr > { - enum { value = get_fixed_value::value }; -}; - -} // end namespace internal - /** \namespace Eigen::indexing * \ingroup Core_Module * diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CoreEvaluators.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CoreEvaluators.h index c6206005ee..5e1cbf6ecf 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CoreEvaluators.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CoreEvaluators.h @@ -840,6 +840,27 @@ struct ternary_evaluator, IndexBased Data m_d; }; +// specialization for expresions like (a < b).select(c, d) to enable full vectorization +template +struct evaluator, Arg1, Arg2, + CwiseBinaryOp, CmpLhsType, CmpRhsType>>> + : public ternary_evaluator< + CwiseTernaryOp, Arg1, Arg2, + CwiseBinaryOp, CmpLhsType, CmpRhsType>>> { + using DummyTernaryOp = scalar_boolean_select_op; + using DummyArg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; + using DummyXprType = CwiseTernaryOp; + + using TernaryOp = scalar_boolean_select_op; + using Arg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; + using XprType = CwiseTernaryOp; + + using Base = ternary_evaluator; + + EIGEN_DEVICE_FUNC explicit evaluator(const DummyXprType& xpr) + : Base(XprType(xpr.arg1(), xpr.arg2(), Arg3(xpr.arg3().lhs(), xpr.arg3().rhs()))) {} +}; + // -------------------- CwiseBinaryOp -------------------- // this is a binary expression diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CwiseUnaryView.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CwiseUnaryView.h index 725b337105..49b1410111 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CwiseUnaryView.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/CwiseUnaryView.h @@ -18,7 +18,9 @@ namespace Eigen { namespace internal { template struct traits > : traits { - typedef typename result_of::Scalar&)>::type Scalar; + typedef typename result_of::Scalar&)>::type1 ScalarRef; + static_assert(std::is_reference::value, "Views must return a reference type."); + typedef remove_all_t Scalar; typedef typename MatrixType::Nested MatrixTypeNested; typedef remove_all_t MatrixTypeNested_; enum { @@ -44,10 +46,76 @@ struct traits > : traits -class CwiseUnaryViewImpl; +// Generic API dispatcher +template ::value> +class CwiseUnaryViewImpl : public generic_xpr_base >::type { + public: + typedef typename generic_xpr_base >::type Base; +}; + +template +class CwiseUnaryViewImpl + : public dense_xpr_base >::type { + public: + typedef CwiseUnaryView Derived; + typedef typename dense_xpr_base >::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeffRef(0)); } + + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const { + return StrideType::InnerStrideAtCompileTime != 0 ? int(StrideType::InnerStrideAtCompileTime) + : derived().nestedExpression().innerStride() * + sizeof(typename traits::Scalar) / sizeof(Scalar); + } + + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const { + return StrideType::OuterStrideAtCompileTime != 0 ? int(StrideType::OuterStrideAtCompileTime) + : derived().nestedExpression().outerStride() * + sizeof(typename traits::Scalar) / sizeof(Scalar); + } + + protected: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl) + + // Allow const access to coeffRef for the case of direct access being enabled. + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { + return internal::evaluator(derived()).coeffRef(index); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const { + return internal::evaluator(derived()).coeffRef(row, col); + } +}; + +template +class CwiseUnaryViewImpl + : public CwiseUnaryViewImpl { + public: + typedef CwiseUnaryViewImpl Base; + typedef CwiseUnaryView Derived; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + + using Base::data; + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return internal::evaluator(derived()).coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return internal::evaluator(derived()).coeffRef(index); + } + + protected: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl) +}; + +} // namespace internal /** \class CwiseUnaryView * \ingroup Core_Module @@ -63,11 +131,11 @@ class CwiseUnaryViewImpl; * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp */ template -class CwiseUnaryView - : public CwiseUnaryViewImpl::StorageKind> { +class CwiseUnaryView : public internal::CwiseUnaryViewImpl::StorageKind> { public: - typedef typename CwiseUnaryViewImpl::StorageKind>::Base Base; + typedef typename internal::CwiseUnaryViewImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) typedef typename internal::ref_selector::non_const_type MatrixTypeNested; typedef internal::remove_all_t NestedExpression; @@ -94,44 +162,6 @@ class CwiseUnaryView ViewOp m_functor; }; -// Generic API dispatcher -template -class CwiseUnaryViewImpl : public internal::generic_xpr_base >::type { - public: - typedef typename internal::generic_xpr_base >::type Base; -}; - -template -class CwiseUnaryViewImpl - : public internal::dense_xpr_base >::type { - public: - typedef CwiseUnaryView Derived; - typedef typename internal::dense_xpr_base >::type Base; - - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) - - EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } - - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const { - return StrideType::InnerStrideAtCompileTime != 0 - ? int(StrideType::InnerStrideAtCompileTime) - : derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / - sizeof(Scalar); - } - - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const { - return StrideType::OuterStrideAtCompileTime != 0 - ? int(StrideType::OuterStrideAtCompileTime) - : derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / - sizeof(Scalar); - } - - protected: - EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl) -}; - -} // end namespace Eigen +} // namespace Eigen #endif // EIGEN_CWISE_UNARY_VIEW_H diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h index 48c6d7308d..30e0aa38a7 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h @@ -89,12 +89,13 @@ class DenseCoeffsBase : public EigenBase { * * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return internal::evaluator(derived()).coeff(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeffByOuterInner(Index outer, + Index inner) const { return coeff(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); } @@ -102,7 +103,7 @@ class DenseCoeffsBase : public EigenBase { * * \sa operator()(Index,Index), operator[](Index) */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator()(Index row, Index col) const { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return coeff(row, col); } @@ -122,7 +123,7 @@ class DenseCoeffsBase : public EigenBase { * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeff(Index index) const { EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); @@ -137,7 +138,7 @@ class DenseCoeffsBase : public EigenBase { * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator[](Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator[](Index index) const { EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) eigen_assert(index >= 0 && index < size()); @@ -154,32 +155,32 @@ class DenseCoeffsBase : public EigenBase { * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator()(Index index) const { eigen_assert(index >= 0 && index < size()); return coeff(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType x() const { return (*this)[0]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType x() const { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType y() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType y() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType z() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType z() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType w() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType w() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; } @@ -361,32 +362,32 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); return coeffRef(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& x() { return (*this)[0]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& x() { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& y() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& y() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& z() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& z() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& w() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& w() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h index 3ec685274c..1220073a8c 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h @@ -242,26 +242,18 @@ struct gemv_static_vector_if { template struct gemv_static_vector_if { - enum { - ForceAlignment = internal::packet_traits::Vectorizable, - PacketSize = internal::packet_traits::size - }; #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0 - internal::plain_array + internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } #else // Some architectures cannot align on the stack, // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. internal::plain_array< - Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + (ForceAlignment ? EIGEN_MAX_ALIGN_BYTES : 0), 0> + Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> m_data; EIGEN_STRONG_INLINE Scalar* data() { - return ForceAlignment - ? reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + - EIGEN_MAX_ALIGN_BYTES) - : m_data.array; + return reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES); } #endif }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h index 593633665d..8a07d50fe3 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h @@ -44,20 +44,25 @@ namespace internal { struct default_packet_traits { enum { + // Ops that are implemented for most types. HasAdd = 1, HasSub = 1, HasShift = 1, HasMul = 1, HasNegate = 1, HasAbs = 1, - HasArg = 0, HasAbs2 = 1, - HasAbsDiff = 0, HasMin = 1, HasMax = 1, HasConj = 1, HasSetLinear = 1, HasSign = 1, + // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet + // types + HasRound = 1, + + HasArg = 0, + HasAbsDiff = 0, HasBlend = 0, // This flag is used to indicate whether packet comparison is supported. // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true. @@ -73,7 +78,6 @@ struct default_packet_traits { HasLog1p = 0, HasLog10 = 0, HasPow = 0, - HasSin = 0, HasCos = 0, HasTan = 0, @@ -96,12 +100,7 @@ struct default_packet_traits { HasIGammaDerA = 0, HasGammaSampleDerAlpha = 0, HasIGammac = 0, - HasBetaInc = 0, - - HasRound = 0, - HasRint = 0, - HasFloor = 0, - HasCeil = 0 + HasBetaInc = 0 }; }; @@ -135,7 +134,14 @@ template struct unpacket_traits { typedef T type; typedef T half; - enum { size = 1, alignment = 1, vectorizable = false, masked_load_available = false, masked_store_available = false }; + typedef typename numext::get_integer_by_size::signed_type integer_packet; + enum { + size = 1, + alignment = alignof(T), + vectorizable = false, + masked_load_available = false, + masked_store_available = false + }; }; template @@ -335,12 +341,9 @@ EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) { /** \internal \returns -a (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { - return -a; -} - -template <> -EIGEN_DEVICE_FUNC inline bool pnegate(const bool& a) { - return !a; + EIGEN_STATIC_ASSERT((!is_same::type, bool>::value), + NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + return numext::negate(a); } /** \internal \returns conj(a) (coeff-wise) */ @@ -376,6 +379,12 @@ struct ptrue_impl { } }; +// For booleans, we can only directly set a valid `bool` value to avoid UB. +template <> +struct ptrue_impl { + static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; } +}; + // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value). // Although this is technically not a valid bitmask, the scalar path for pselect // uses a comparison to zero, so this should still work in most cases. We don't @@ -458,6 +467,32 @@ struct bit_not { EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; } }; +template <> +struct bit_and { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a && b; + } +}; + +template <> +struct bit_or { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a || b; + } +}; + +template <> +struct bit_xor { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a != b; + } +}; + +template <> +struct bit_not { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; } +}; + // Use operators &, |, ^, ~. template struct operator_bitwise_helper { @@ -612,11 +647,7 @@ struct pminmax_impl { } }; -#ifndef SYCL_DEVICE_ONLY -#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func -#else #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& a, const Type& b) { return Func(a, b); } -#endif /** \internal \returns the min of \a a and \a b (coeff-wise). If \a a or \b b is NaN, the return value is implementation defined. */ @@ -678,33 +709,21 @@ EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) { } /** \internal \returns \a a arithmetically shifted by N bits to the right */ -template -EIGEN_DEVICE_FUNC inline int parithmetic_shift_right(const int& a) { - return a >> N; -} -template -EIGEN_DEVICE_FUNC inline long int parithmetic_shift_right(const long int& a) { - return a >> N; +template +EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) { + return numext::arithmetic_shift_right(a, N); } /** \internal \returns \a a logically shifted by N bits to the right */ -template -EIGEN_DEVICE_FUNC inline int plogical_shift_right(const int& a) { - return static_cast(static_cast(a) >> N); -} -template -EIGEN_DEVICE_FUNC inline long int plogical_shift_right(const long int& a) { - return static_cast(static_cast(a) >> N); +template +EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) { + return numext::logical_shift_right(a, N); } /** \internal \returns \a a shifted by N bits to the left */ -template -EIGEN_DEVICE_FUNC inline int plogical_shift_left(const int& a) { - return a << N; -} -template -EIGEN_DEVICE_FUNC inline long int plogical_shift_left(const long int& a) { - return a << N; +template +EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) { + return numext::logical_shift_left(a, N); } /** \internal \returns the significant and exponent of the underlying floating point numbers @@ -1089,8 +1108,9 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& /** \internal \returns the log10 of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) { - typedef typename internal::unpacket_traits::type Scalar; - return pmul(pset1(Scalar(EIGEN_LOG2E)), plog(a)); + using Scalar = typename internal::unpacket_traits::type; + using RealScalar = typename NumTraits::Real; + return pmul(pset1(Scalar(RealScalar(EIGEN_LOG2E))), plog(a)); } /** \internal \returns the square-root of \a a (coeff-wise) */ @@ -1105,33 +1125,45 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& return numext::cbrt(a); } +template ::value, + bool IsInteger = NumTraits::type>::IsInteger> +struct nearest_integer_packetop_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); } +}; + /** \internal \returns the rounded value of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pround(const Packet& a) { - using numext::round; - return round(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) { + return nearest_integer_packetop_impl::run_round(a); } /** \internal \returns the floor of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pfloor(const Packet& a) { - using numext::floor; - return floor(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) { + return nearest_integer_packetop_impl::run_floor(a); } /** \internal \returns the rounded value of \a a (coeff-wise) with current * rounding mode */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet print(const Packet& a) { - using numext::rint; - return rint(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) { + return nearest_integer_packetop_impl::run_rint(a); } /** \internal \returns the ceil of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { - using numext::ceil; - return ceil(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) { + return nearest_integer_packetop_impl::run_ceil(a); +} + +/** \internal \returns the truncation of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) { + return nearest_integer_packetop_impl::run_trunc(a); } template @@ -1265,13 +1297,13 @@ EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Pa /** \internal \returns -(a * b) + c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { - return padd(pnegate(pmul(a, b)), c); + return psub(c, pmul(a, b)); } -/** \internal \returns -(a * b) - c (coeff-wise) */ +/** \internal \returns -((a * b + c) (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { - return psub(pnegate(pmul(a, b)), c); + return pnegate(pmadd(a, b, c)); } /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h index f0ae5a8567..3f147b8f6f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h @@ -98,9 +98,12 @@ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rint, scalar_rint_op, EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round, scalar_round_op, nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( - floor, scalar_floor_op, nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) + floor, scalar_floor_op, nearest integer not greater than the given value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( - ceil, scalar_ceil_op, nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) + ceil, scalar_ceil_op, nearest integer not less than the given value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(trunc, scalar_trunc_op, + nearest integer not greater in magnitude than the given value,\sa Eigen::trunc DOXCOMMA + ArrayBase::trunc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( isnan, scalar_isnan_op, not -a - number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/IndexedView.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/IndexedView.h index 0a024170ef..454e560e4b 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/IndexedView.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/IndexedView.h @@ -20,8 +20,8 @@ namespace internal { template struct traits> : traits { enum { - RowsAtCompileTime = int(array_size::value), - ColsAtCompileTime = int(array_size::value), + RowsAtCompileTime = int(IndexedViewHelper::SizeAtCompileTime), + ColsAtCompileTime = int(IndexedViewHelper::SizeAtCompileTime), MaxRowsAtCompileTime = RowsAtCompileTime, MaxColsAtCompileTime = ColsAtCompileTime, @@ -30,8 +30,8 @@ struct traits> : traits { : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 : XprTypeIsRowMajor, - RowIncr = int(get_compile_time_incr::value), - ColIncr = int(get_compile_time_incr::value), + RowIncr = int(IndexedViewHelper::IncrAtCompileTime), + ColIncr = int(IndexedViewHelper::IncrAtCompileTime), InnerIncr = IsRowMajor ? ColIncr : RowIncr, OuterIncr = IsRowMajor ? RowIncr : ColIncr, @@ -47,24 +47,23 @@ struct traits> : traits { is_same, std::conditional_t>::value, InnerStrideAtCompileTime = - InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == UndefinedIncr + InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == Undefined ? Dynamic : XprInnerStride * InnerIncr, OuterStrideAtCompileTime = - OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == UndefinedIncr + OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == Undefined ? Dynamic : XprOuterstride * OuterIncr, - ReturnAsScalar = is_same::value && is_same::value, + ReturnAsScalar = is_single_range::value && is_single_range::value, ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike, ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock), // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, // but this is too strict regarding negative strides... - DirectAccessMask = - (int(InnerIncr) != UndefinedIncr && int(OuterIncr) != UndefinedIncr && InnerIncr >= 0 && OuterIncr >= 0) - ? DirectAccessBit - : 0, + DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0) + ? DirectAccessBit + : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, @@ -75,11 +74,11 @@ struct traits> : traits { typedef Block BlockType; }; -} // namespace internal - -template +template class IndexedViewImpl; +} // namespace internal + /** \class IndexedView * \ingroup Core_Module * @@ -120,26 +119,43 @@ class IndexedViewImpl; */ template class IndexedView - : public IndexedViewImpl::StorageKind> { + : public internal::IndexedViewImpl::StorageKind, + (internal::traits>::Flags & + DirectAccessBit) != 0> { public: - typedef - typename IndexedViewImpl::StorageKind>::Base - Base; + typedef typename internal::IndexedViewImpl< + XprType, RowIndices, ColIndices, typename internal::traits::StorageKind, + (internal::traits>::Flags & DirectAccessBit) != 0> + Base; EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView) + template + IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {} +}; + +namespace internal { + +// Generic API dispatcher +template +class IndexedViewImpl : public internal::generic_xpr_base>::type { + public: + typedef typename internal::generic_xpr_base>::type Base; typedef typename internal::ref_selector::non_const_type MatrixTypeNested; typedef internal::remove_all_t NestedExpression; + typedef typename XprType::Scalar Scalar; + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl) template - IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) + IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) {} /** \returns number of rows */ - Index rows() const { return internal::index_list_size(m_rowIndices); } + Index rows() const { return IndexedViewHelper::size(m_rowIndices); } /** \returns number of columns */ - Index cols() const { return internal::index_list_size(m_colIndices); } + Index cols() const { return IndexedViewHelper::size(m_colIndices); } /** \returns the nested expression */ const internal::remove_all_t& nestedExpression() const { return m_xpr; } @@ -153,20 +169,76 @@ class IndexedView /** \returns a const reference to the object storing/generating the column indices */ const ColIndices& colIndices() const { return m_colIndices; } + constexpr Scalar& coeffRef(Index rowId, Index colId) { + return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]); + } + + constexpr const Scalar& coeffRef(Index rowId, Index colId) const { + return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]); + } + protected: MatrixTypeNested m_xpr; RowIndices m_rowIndices; ColIndices m_colIndices; }; -// Generic API dispatcher template -class IndexedViewImpl : public internal::generic_xpr_base>::type { +class IndexedViewImpl + : public IndexedViewImpl { public: - typedef typename internal::generic_xpr_base>::type Base; -}; + using Base = internal::IndexedViewImpl::StorageKind, false>; + using Derived = IndexedView; -namespace internal { + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl) + + template + IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {} + + Index rowIncrement() const { + if (traits::RowIncr != DynamicIndex && traits::RowIncr != Undefined) { + return traits::RowIncr; + } + return IndexedViewHelper::incr(this->rowIndices()); + } + Index colIncrement() const { + if (traits::ColIncr != DynamicIndex && traits::ColIncr != Undefined) { + return traits::ColIncr; + } + return IndexedViewHelper::incr(this->colIndices()); + } + + Index innerIncrement() const { return traits::IsRowMajor ? colIncrement() : rowIncrement(); } + + Index outerIncrement() const { return traits::IsRowMajor ? rowIncrement() : colIncrement(); } + + std::decay_t* data() { + Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride(); + Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride(); + return this->nestedExpression().data() + row_offset + col_offset; + } + + const std::decay_t* data() const { + Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride(); + Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride(); + return this->nestedExpression().data() + row_offset + col_offset; + } + + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT { + if (traits::InnerStrideAtCompileTime != Dynamic) { + return traits::InnerStrideAtCompileTime; + } + return innerIncrement() * this->nestedExpression().innerStride(); + } + + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT { + if (traits::OuterStrideAtCompileTime != Dynamic) { + return traits::OuterStrideAtCompileTime; + } + return outerIncrement() * this->nestedExpression().outerStride(); + } +}; template struct unary_evaluator, IndexBased> diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h index 95f9b97234..d42fc93cc9 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h @@ -164,7 +164,7 @@ struct imag_ref_default_impl { typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[1]; } EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { - return reinterpret_cast(&x)[1]; + return reinterpret_cast(&x)[1]; } }; @@ -563,34 +563,6 @@ struct pow_impl { } }; -/**************************************************************************** - * Implementation of random * - ****************************************************************************/ - -template -struct random_default_impl {}; - -template -struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; - -template -struct random_retval { - typedef Scalar type; -}; - -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y); -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(); - -template -struct random_default_impl { - static inline Scalar run(const Scalar& x, const Scalar& y) { - return x + (y - x) * Scalar(std::rand()) / Scalar(RAND_MAX); - } - static inline Scalar run() { return run(Scalar(NumTraits::IsSigned ? -1 : 0), Scalar(1)); } -}; - enum { meta_floor_log2_terminate, meta_floor_log2_move_up, meta_floor_log2_move_down, meta_floor_log2_bogus }; template @@ -628,69 +600,168 @@ struct meta_floor_log2 { // no value, error at compile time }; -template -struct random_default_impl { - static inline Scalar run(const Scalar& x, const Scalar& y) { - if (y <= x) return x; - // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself. - typedef typename make_unsigned::type ScalarU; - // ScalarX is the widest of ScalarU and unsigned int. - // We'll deal only with ScalarX and unsigned int below thus avoiding signed - // types and arithmetic and signed overflows (which are undefined behavior). - typedef std::conditional_t<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned> ScalarX; - // The following difference doesn't overflow, provided our integer types are two's - // complement and have the same number of padding bits in signed and unsigned variants. - // This is the case in most modern implementations of C++. - ScalarX range = ScalarX(y) - ScalarX(x); - ScalarX offset = 0; - ScalarX divisor = 1; - ScalarX multiplier = 1; - const unsigned rand_max = RAND_MAX; - if (range <= rand_max) - divisor = (rand_max + 1) / (range + 1); - else - multiplier = 1 + range / (rand_max + 1); - // Rejection sampling. - do { - offset = (unsigned(std::rand()) * multiplier) / divisor; - } while (offset > range); - return Scalar(ScalarX(x) + offset); +template +struct count_bits_impl { + static_assert(std::is_integral::value && std::is_unsigned::value, + "BitsType must be an unsigned integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits >> shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; } - static inline Scalar run() { -#ifdef EIGEN_MAKING_DOCS - return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); -#else - enum { - rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value, - scalar_bits = sizeof(Scalar) * CHAR_BIT, - shift = plain_enum_max(0, int(rand_bits) - int(scalar_bits)), - offset = NumTraits::IsSigned ? (1 << (plain_enum_min(rand_bits, scalar_bits) - 1)) : 0 - }; - return Scalar((std::rand() >> shift) - offset); -#endif + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits << shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; } }; -template -struct random_default_impl { - static inline Scalar run(const Scalar& x, const Scalar& y) { - return Scalar(random(x.real(), y.real()), random(x.imag(), y.imag())); - } - static inline Scalar run() { - typedef typename NumTraits::Real RealScalar; - return Scalar(random(), random()); - } -}; - -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) { - return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +// Count leading zeros. +template +EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + return count_bits_impl::clz(bits); } -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() { - return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +// Count trailing zeros. +template +EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return count_bits_impl::ctz(bits); +} + +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + +template +struct count_bits_impl< + BitsType, std::enable_if_t::value && sizeof(BitsType) <= sizeof(unsigned int)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clz(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctz(static_cast(bits)); + } +}; + +template +struct count_bits_impl::value && sizeof(unsigned int) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(unsigned long)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzl(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzl(static_cast(bits)); + } +}; + +template +struct count_bits_impl::value && sizeof(unsigned long) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(unsigned long long)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzll(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzll(static_cast(bits)); + } +}; + +#elif EIGEN_COMP_MSVC + +template +struct count_bits_impl< + BitsType, std::enable_if_t::value && sizeof(BitsType) <= sizeof(unsigned long)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + unsigned long out; + _BitScanReverse(&out, static_cast(bits)); + return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast(out); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out); + } +}; + +#ifdef _WIN64 + +template +struct count_bits_impl::value && sizeof(unsigned long) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(__int64)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + unsigned long out; + _BitScanReverse64(&out, static_cast(bits)); + return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast(out); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward64(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out); + } +}; + +#endif // _WIN64 + +#endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + +template +struct log_2_impl { + static constexpr int kTotalBits = sizeof(BitsType) * CHAR_BIT; + static EIGEN_DEVICE_FUNC inline int run_ceil(const BitsType& x) { + const int n = kTotalBits - clz(x); + bool power_of_two = (x & (x - 1)) == 0; + return x == 0 ? 0 : power_of_two ? (n - 1) : n; + } + static EIGEN_DEVICE_FUNC inline int run_floor(const BitsType& x) { + const int n = kTotalBits - clz(x); + return x == 0 ? 0 : n - 1; + } +}; + +template +int log2_ceil(const BitsType& x) { + return log_2_impl::run_ceil(x); +} + +template +int log2_floor(const BitsType& x) { + return log_2_impl::run_floor(x); } // Implementation of is* functions @@ -749,7 +820,7 @@ EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); template -T generic_fast_tanh_float(const T& a_x); +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x); /**************************************************************************** * Implementation of sign * @@ -790,6 +861,25 @@ struct sign_retval { typedef Scalar type; }; +// suppress "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC +// note: `0 - a` is distinct from `-a` when Scalar is a floating point type and `a` is zero + +template ::IsInteger> +struct negate_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return -a; } +}; + +template +struct negate_impl { + EIGEN_STATIC_ASSERT((!is_same::value), NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return Scalar(0) - a; } +}; + +template +struct negate_retval { + typedef Scalar type; +}; + template ::type>::IsInteger> struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) { @@ -804,6 +894,9 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { EIGEN_USING_STD(round) return round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { + EIGEN_USING_STD(trunc) return trunc(x); + } }; template struct nearest_integer_impl { @@ -811,6 +904,7 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_ceil(const Scalar& x) { return x; } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_rint(const Scalar& x) { return x; } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { return x; } }; } // end namespace internal @@ -995,6 +1089,11 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(negate, Scalar) negate(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(negate, Scalar)::run(x); +} + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); @@ -1097,17 +1196,26 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar round(const Scalar& x) { return internal::nearest_integer_impl::run_round(x); } -#if defined(SYCL_DEVICE_ONLY) -SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) -#endif - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(floor)(const Scalar& x) { return internal::nearest_integer_impl::run_floor(x); } +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) { + return internal::nearest_integer_impl::run_ceil(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(trunc)(const Scalar& x) { + return internal::nearest_integer_impl::run_trunc(x); +} + #if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(trunc, trunc) #endif #if defined(EIGEN_GPUCC) @@ -1115,32 +1223,26 @@ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float& x) { return ::floorf(x); } - template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double floor(const double& x) { return ::floor(x); } -#endif - -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) { - return internal::nearest_integer_impl::run_ceil(x); -} - -#if defined(SYCL_DEVICE_ONLY) -SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) -#endif - -#if defined(EIGEN_GPUCC) template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float& x) { return ::ceilf(x); } - template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double ceil(const double& x) { return ::ceil(x); } +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float trunc(const float& x) { + return ::truncf(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double trunc(const double& x) { + return ::trunc(x); +} #endif // Integer division with rounding up. @@ -1304,6 +1406,25 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp(const T& x) { return exp(x); } +// MSVC screws up some edge-cases for std::exp(complex). +#ifdef EIGEN_COMP_MSVC +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::complex& x) { + EIGEN_USING_STD(exp); + // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised. + // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised. + if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) { + return std::complex(NumTraits::quiet_NaN(), NumTraits::quiet_NaN()); + } + // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified) + // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified) + if ((real_ref(x) == NumTraits::infinity() && !(isfinite)(imag_ref(x)))) { + return std::complex(NumTraits::infinity(), NumTraits::quiet_NaN()); + } + return exp(x); +} +#endif + #if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp) #endif @@ -1567,7 +1688,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tanh(const T& x) { } #if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY) -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::ptanh_float(x); } #endif #if defined(SYCL_DEVICE_ONLY) @@ -1625,6 +1746,23 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double fmod(const double& a, const double& #undef SYCL_SPECIALIZE_BINARY_FUNC #endif +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) { + return a << n; +} + +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_right(const Scalar& a, int n) { + using UnsignedScalar = typename numext::get_integer_by_size::unsigned_type; + return bit_cast(bit_cast(a) >> n); +} + +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar& a, int n) { + using SignedScalar = typename numext::get_integer_by_size::signed_type; + return bit_cast(bit_cast(a) >> n); +} + } // end namespace numext namespace internal { @@ -1722,13 +1860,6 @@ EIGEN_DEVICE_FUNC inline bool isApproxOrLessThan( *** The special case of the bool type *** ******************************************/ -template <> -struct random_impl { - static inline bool run() { return random(0, 1) == 0 ? false : true; } - - static inline bool run(const bool& a, const bool& b) { return random(a, b) == 0 ? false : true; } -}; - template <> struct scalar_fuzzy_impl { typedef bool RealScalar; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctionsImpl.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctionsImpl.h index ed44089eaa..689c6d8276 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctionsImpl.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctionsImpl.h @@ -146,65 +146,6 @@ struct generic_sqrt_newton_step { } }; -/** \internal \returns the hyperbolic tan of \a a (coeff-wise) - Doesn't do anything fancy, just a 13/6-degree rational interpolant which - is accurate up to a couple of ulps in the (approximate) range [-8, 8], - outside of which tanh(x) = +/-1 in single precision. The input is clamped - to the range [-c, c]. The value c is chosen as the smallest value where - the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] - the approximation tanh(x) ~= x is used for better accuracy as x tends to zero. - - This implementation works on both scalars and packets. -*/ -template -T generic_fast_tanh_float(const T& a_x) { - // Clamp the inputs to the range [-c, c] -#ifdef EIGEN_VECTORIZE_FMA - const T plus_clamp = pset1(7.99881172180175781f); - const T minus_clamp = pset1(-7.99881172180175781f); -#else - const T plus_clamp = pset1(7.90531110763549805f); - const T minus_clamp = pset1(-7.90531110763549805f); -#endif - const T tiny = pset1(0.0004f); - const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); - const T tiny_mask = pcmp_lt(pabs(a_x), tiny); - // The monomial coefficients of the numerator polynomial (odd). - const T alpha_1 = pset1(4.89352455891786e-03f); - const T alpha_3 = pset1(6.37261928875436e-04f); - const T alpha_5 = pset1(1.48572235717979e-05f); - const T alpha_7 = pset1(5.12229709037114e-08f); - const T alpha_9 = pset1(-8.60467152213735e-11f); - const T alpha_11 = pset1(2.00018790482477e-13f); - const T alpha_13 = pset1(-2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - const T beta_0 = pset1(4.89352518554385e-03f); - const T beta_2 = pset1(2.26843463243900e-03f); - const T beta_4 = pset1(1.18534705686654e-04f); - const T beta_6 = pset1(1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const T x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - T p = pmadd(x2, alpha_13, alpha_11); - p = pmadd(x2, p, alpha_9); - p = pmadd(x2, p, alpha_7); - p = pmadd(x2, p, alpha_5); - p = pmadd(x2, p, alpha_3); - p = pmadd(x2, p, alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial q. - T q = pmadd(x2, beta_6, beta_4); - q = pmadd(x2, q, beta_2); - q = pmadd(x2, q, beta_0); - - // Divide the numerator by the denominator. - return pselect(tiny_mask, x, pdiv(p, q)); -} - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) { // IEEE IEC 6059 special cases. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h index ce0e4e6a2f..af6afafc0c 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h @@ -30,7 +30,7 @@ struct traits> { actual_alignment = ((Options_ & DontAlign) == 0) ? default_alignment : 0, required_alignment = unpacket_traits::alignment, packet_access_bit = (packet_traits::Vectorizable && - (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment >= required_alignment))) + (EIGEN_UNALIGNED_VECTORIZE || (int(actual_alignment) >= int(required_alignment)))) ? PacketAccessBit : 0 }; @@ -48,7 +48,7 @@ struct traits> { Flags = compute_matrix_flags(Options_), Options = Options_, InnerStrideAtCompileTime = 1, - OuterStrideAtCompileTime = (Options & RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + OuterStrideAtCompileTime = (int(Options) & int(RowMajor)) ? ColsAtCompileTime : RowsAtCompileTime, // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, @@ -207,7 +207,7 @@ class Matrix : public PlainObjectBase::value) : Base(std::move(other)) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(Matrix&& other) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { Base::operator=(std::move(other)); return *this; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h index 80f74e92c3..a6e2de4774 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h @@ -101,10 +101,10 @@ namespace numext { template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { // The behaviour of memcpy is not specified for non-trivially copyable types - EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED); + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value && std::is_default_constructible::value, - THIS_TYPE_IS_NOT_SUPPORTED); - EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED); + THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED) Tgt tgt; // Load src into registers first. This allows the memcpy to be elided by CUDA. @@ -206,9 +206,7 @@ struct GenericNumTraits { EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T highest() { return (numext::numeric_limits::max)(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T lowest() { - return IsInteger ? (numext::numeric_limits::min)() : static_cast(-(numext::numeric_limits::max)()); - } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T lowest() { return (numext::numeric_limits::lowest)(); } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T infinity() { return numext::numeric_limits::infinity(); } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h index a8307c7aeb..5f846a0f10 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h @@ -31,6 +31,7 @@ namespace Eigen { namespace internal { +#ifndef EIGEN_NO_DEBUG template struct check_rows_cols_for_overflow { EIGEN_STATIC_ASSERT(MaxRowsAtCompileTime* MaxColsAtCompileTime == MaxSizeAtCompileTime, @@ -44,7 +45,7 @@ struct check_rows_cols_for_overflow { template EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index cols) { constexpr Index MaxIndex = NumTraits::highest(); - bool error = cols > MaxIndex / MaxRowsAtCompileTime; + bool error = cols > (MaxIndex / MaxRowsAtCompileTime); if (error) throw_std_bad_alloc(); } }; @@ -54,7 +55,7 @@ struct check_rows_cols_for_overflow { template EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index) { constexpr Index MaxIndex = NumTraits::highest(); - bool error = rows > MaxIndex / MaxColsAtCompileTime; + bool error = rows > (MaxIndex / MaxColsAtCompileTime); if (error) throw_std_bad_alloc(); } }; @@ -64,10 +65,11 @@ struct check_rows_cols_for_overflow { template EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) { constexpr Index MaxIndex = NumTraits::highest(); - bool error = cols == 0 ? false : (rows > MaxIndex / cols); + bool error = cols == 0 ? false : (rows > (MaxIndex / cols)); if (error) throw_std_bad_alloc(); } }; +#endif template @@ -204,7 +206,9 @@ class PlainObjectBase : public internal::dense_xpr_base::type * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. * * See DenseCoeffsBase::coeff(Index) const for details. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { return m_storage.data()[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index index) const { + return m_storage.data()[index]; + } /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index,Index) const * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. @@ -295,8 +299,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type internal::check_implication(ColsAtCompileTime == Dynamic && MaxColsAtCompileTime != Dynamic, cols <= MaxColsAtCompileTime) && rows >= 0 && cols >= 0 && "Invalid sizes when resizing a matrix or array."); +#ifndef EIGEN_NO_DEBUG internal::check_rows_cols_for_overflow::run(rows, cols); +#endif #ifdef EIGEN_INITIALIZE_COEFFS Index size = rows * cols; bool size_changed = size != this->size(); @@ -365,8 +371,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) { const OtherDerived& other = _other.derived(); +#ifndef EIGEN_NO_DEBUG internal::check_rows_cols_for_overflow::run( other.rows(), other.cols()); +#endif const Index othersize = other.rows() * other.cols(); if (RowsAtCompileTime == 1) { eigen_assert(other.rows() == 1 || other.cols() == 1); @@ -444,7 +452,9 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) { return _set(other); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& operator=(const PlainObjectBase& other) { + return _set(other); + } /** \sa MatrixBase::lazyAssign() */ template @@ -462,28 +472,29 @@ class PlainObjectBase : public internal::dense_xpr_base::type // Prevent user from trying to instantiate PlainObjectBase objects // by making all its constructor protected. See bug 1074. protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() : m_storage() { // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } #ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME is it still needed ? /** \internal */ - EIGEN_DEVICE_FUNC explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC constexpr explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) : m_storage(internal::constructor_without_unaligned_array_assert()) { // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } #endif - EIGEN_DEVICE_FUNC PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT : m_storage(std::move(other.m_storage)) {} + EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT + : m_storage(std::move(other.m_storage)) {} - EIGEN_DEVICE_FUNC PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT { m_storage = std::move(other.m_storage); return *this; } /** Copy constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other) : Base(), m_storage(other.m_storage) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) : m_storage(size, rows, cols) { @@ -741,7 +752,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type // aliasing is dealt once in internal::call_assignment // so at this stage we have to assume aliasing... and resising has to be done later. template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set(const DenseBase& other) { internal::call_assignment(this->derived(), other.derived()); return this->derived(); } @@ -752,7 +763,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * \sa operator=(const MatrixBase&), _set() */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set_noalias(const DenseBase& other) { // I don't think we need this resize call since the lazyAssign will anyways resize // and lazyAssign will be called by the assign selector. //_resize_to_match(other); @@ -939,8 +950,10 @@ struct conservative_resize_like_impl { ((Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows (!Derived::IsRowMajor && _this.rows() == rows))) // column-major and we change only the number of columns { +#ifndef EIGEN_NO_DEBUG internal::check_rows_cols_for_overflow::run(rows, cols); +#endif _this.derived().m_storage.conservativeResize(rows * cols, rows, cols); } else { // The storage order does not allow us to use reallocation. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Product.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Product.h index 6bad832e0b..37683e3c27 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Product.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Product.h @@ -21,7 +21,7 @@ class ProductImpl; namespace internal { template -struct traits > { +struct traits> { typedef remove_all_t LhsCleaned; typedef remove_all_t RhsCleaned; typedef traits LhsTraits; @@ -55,6 +55,129 @@ struct traits > { }; }; +struct TransposeProductEnum { + // convenience enumerations to specialize transposed products + enum : int { + Default = 0x00, + Matrix = 0x01, + Permutation = 0x02, + MatrixMatrix = (Matrix << 8) | Matrix, + MatrixPermutation = (Matrix << 8) | Permutation, + PermutationMatrix = (Permutation << 8) | Matrix + }; +}; +template +struct TransposeKind { + static constexpr int Kind = is_matrix_base_xpr::value ? TransposeProductEnum::Matrix + : is_permutation_base_xpr::value ? TransposeProductEnum::Permutation + : TransposeProductEnum::Default; +}; + +template +struct TransposeProductKind { + static constexpr int Kind = (TransposeKind::Kind << 8) | TransposeKind::Kind; +}; + +template ::Kind> +struct product_transpose_helper { + // by default, don't optimize the transposed product + using Derived = Product; + using Scalar = typename Derived::Scalar; + using TransposeType = Transpose; + using ConjugateTransposeType = CwiseUnaryOp, TransposeType>; + using AdjointType = std::conditional_t::IsComplex, ConjugateTransposeType, TransposeType>; + + // return (lhs * rhs)^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(derived); + } + // return (lhs * rhs)^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(TransposeType(derived)); + } +}; + +template +struct product_transpose_helper { + // expand the transposed matrix-matrix product + using Derived = Product; + + using LhsScalar = typename traits::Scalar; + using LhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using LhsConjugateTransposeType = CwiseUnaryOp, LhsTransposeType>; + using LhsAdjointType = + std::conditional_t::IsComplex, LhsConjugateTransposeType, LhsTransposeType>; + + using RhsScalar = typename traits::Scalar; + using RhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using RhsConjugateTransposeType = CwiseUnaryOp, RhsTransposeType>; + using RhsAdjointType = + std::conditional_t::IsComplex, RhsConjugateTransposeType, RhsTransposeType>; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^T * lhs^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsTransposeType(derived.rhs()), LhsTransposeType(derived.lhs())); + } + // return rhs^H * lhs^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsAdjointType(RhsTransposeType(derived.rhs())), + LhsAdjointType(LhsTransposeType(derived.lhs()))); + } +}; +template +struct product_transpose_helper { + // expand the transposed permutation-matrix product + using Derived = Product; + + using LhsInverseType = typename PermutationBase::InverseReturnType; + + using RhsScalar = typename traits::Scalar; + using RhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using RhsConjugateTransposeType = CwiseUnaryOp, RhsTransposeType>; + using RhsAdjointType = + std::conditional_t::IsComplex, RhsConjugateTransposeType, RhsTransposeType>; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^T * lhs^-1 + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsTransposeType(derived.rhs()), LhsInverseType(derived.lhs())); + } + // return rhs^H * lhs^-1 + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsAdjointType(RhsTransposeType(derived.rhs())), LhsInverseType(derived.lhs())); + } +}; +template +struct product_transpose_helper { + // expand the transposed matrix-permutation product + using Derived = Product; + + using LhsScalar = typename traits::Scalar; + using LhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using LhsConjugateTransposeType = CwiseUnaryOp, LhsTransposeType>; + using LhsAdjointType = + std::conditional_t::IsComplex, LhsConjugateTransposeType, LhsTransposeType>; + + using RhsInverseType = typename PermutationBase::InverseReturnType; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^-1 * lhs^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsInverseType(derived.rhs()), LhsTransposeType(derived.lhs())); + } + // return rhs^-1 * lhs^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsInverseType(derived.rhs()), LhsAdjointType(LhsTransposeType(derived.lhs()))); + } +}; + } // end namespace internal /** \class Product @@ -93,6 +216,9 @@ class Product typedef internal::remove_all_t LhsNestedCleaned; typedef internal::remove_all_t RhsNestedCleaned; + using TransposeReturnType = typename internal::product_transpose_helper::TransposeType; + using AdjointReturnType = typename internal::product_transpose_helper::AdjointType; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); @@ -104,6 +230,13 @@ class Product EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNestedCleaned& lhs() const { return m_lhs; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNestedCleaned& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeReturnType transpose() const { + return internal::product_transpose_helper::run_transpose(*this); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointReturnType adjoint() const { + return internal::product_transpose_helper::run_adjoint(*this); + } + protected: LhsNested m_lhs; RhsNested m_rhs; @@ -112,12 +245,12 @@ class Product namespace internal { template ::ret> -class dense_product_base : public internal::dense_xpr_base >::type {}; +class dense_product_base : public internal::dense_xpr_base>::type {}; /** Conversion to scalar for inner-products */ template class dense_product_base - : public internal::dense_xpr_base >::type { + : public internal::dense_xpr_base>::type { typedef Product ProductXpr; typedef typename internal::dense_xpr_base::type Base; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h new file mode 100644 index 0000000000..e82da96609 --- /dev/null +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h @@ -0,0 +1,253 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charles Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOM_IMPL_H +#define EIGEN_RANDOM_IMPL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/**************************************************************************** + * Implementation of random * + ****************************************************************************/ + +template +struct random_default_impl {}; + +template +struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +struct random_retval { + typedef Scalar type; +}; + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) { + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() { + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +} + +// TODO: replace or provide alternatives to this, e.g. std::random_device +struct eigen_random_device { + using ReturnType = int; + static constexpr int Entropy = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value; + static constexpr ReturnType Highest = RAND_MAX; + static EIGEN_DEVICE_FUNC inline ReturnType run() { return std::rand(); } +}; + +// Fill a built-in unsigned integer with numRandomBits beginning with the least significant bit +template +struct random_bits_impl { + EIGEN_STATIC_ASSERT(std::is_unsigned::value, SCALAR MUST BE A BUILT - IN UNSIGNED INTEGER) + using RandomDevice = eigen_random_device; + using RandomReturnType = typename RandomDevice::ReturnType; + static constexpr int kEntropy = RandomDevice::Entropy; + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + // return a Scalar filled with numRandomBits beginning from the least significant bit + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert((numRandomBits >= 0) && (numRandomBits <= kTotalBits)); + const Scalar mask = Scalar(-1) >> ((kTotalBits - numRandomBits) & (kTotalBits - 1)); + Scalar randomBits = 0; + for (int shift = 0; shift < numRandomBits; shift += kEntropy) { + RandomReturnType r = RandomDevice::run(); + randomBits |= static_cast(r) << shift; + } + // clear the excess bits + randomBits &= mask; + return randomBits; + } +}; + +template +EIGEN_DEVICE_FUNC inline BitsType getRandomBits(int numRandomBits) { + return random_bits_impl::run(numRandomBits); +} + +// random implementation for a built-in floating point type +template ::value> +struct random_float_impl { + using BitsType = typename numext::get_integer_by_size::unsigned_type; + static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { + const int digits = NumTraits::digits(); + return digits - 1; + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + BitsType randomBits = getRandomBits(numRandomBits); + // if fewer than MantissaBits is requested, shift them to the left + randomBits <<= (mantissaBits() - numRandomBits); + // randomBits is in the half-open interval [2,4) + randomBits |= numext::bit_cast(Scalar(2)); + // result is in the half-open interval [-1,1) + Scalar result = numext::bit_cast(randomBits) - Scalar(3); + return result; + } +}; +// random implementation for a custom floating point type +// uses double as the implementation with a mantissa with a size equal to either the target scalar's mantissa or that of +// double, whichever is smaller +template +struct random_float_impl { + static EIGEN_DEVICE_FUNC inline int mantissaBits() { + const int digits = NumTraits::digits(); + constexpr int kDoubleDigits = NumTraits::digits(); + return numext::mini(digits, kDoubleDigits) - 1; + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + Scalar result = static_cast(random_float_impl::run(numRandomBits)); + return result; + } +}; + +// random implementation for long double +// this specialization is not compatible with double-double scalars +template ::digits != (2 * std::numeric_limits::digits)))> +struct random_longdouble_impl { + static constexpr int Size = sizeof(long double); + static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { return NumTraits::digits() - 1; } + static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + EIGEN_USING_STD(memcpy); + int numLowBits = numext::mini(numRandomBits, 64); + int numHighBits = numext::maxi(numRandomBits - 64, 0); + uint64_t randomBits[2]; + long double result = 2.0L; + memcpy(&randomBits, &result, Size); + randomBits[0] |= getRandomBits(numLowBits); + randomBits[1] |= getRandomBits(numHighBits); + memcpy(&result, &randomBits, Size); + result -= 3.0L; + return result; + } +}; +template <> +struct random_longdouble_impl { + static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { return NumTraits::digits() - 1; } + static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) { + return static_cast(random_float_impl::run(numRandomBits)); + } +}; +template <> +struct random_float_impl : random_longdouble_impl<> {}; + +template +struct random_default_impl { + using Impl = random_float_impl; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { + Scalar half_x = Scalar(0.5) * x; + Scalar half_y = Scalar(0.5) * y; + Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits); + // result is in the half-open interval [x, y) -- provided that x < y + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + return run(x, y, Impl::mantissaBits()); + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { return Impl::run(numRandomBits); } + static EIGEN_DEVICE_FUNC inline Scalar run() { return run(Impl::mantissaBits()); } +}; + +template ::IsSigned, bool BuiltIn = std::is_integral::value> +struct random_int_impl; + +// random implementation for a built-in unsigned integer type +template +struct random_int_impl { + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + if (y <= x) return x; + Scalar range = y - x; + // handle edge case where [x,y] spans the entire range of Scalar + if (range == NumTraits::highest()) return run(); + Scalar count = range + 1; + // calculate the number of random bits needed to fill range + int numRandomBits = log2_ceil(count); + Scalar randomBits; + do { + randomBits = getRandomBits(numRandomBits); + // if the random draw is outside [0, range), try again (rejection sampling) + // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50% + } while (randomBits >= count); + Scalar result = x + randomBits; + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return getRandomBits(kTotalBits); } +}; + +// random implementation for a built-in signed integer type +template +struct random_int_impl { + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + using BitsType = typename make_unsigned::type; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + if (y <= x) return x; + // Avoid overflow by representing `range` as an unsigned type + BitsType range = static_cast(y) - static_cast(x); + BitsType randomBits = random_int_impl::run(0, range); + // Avoid overflow in the case where `x` is negative and there is a large range so + // `randomBits` would also be negative if cast to `Scalar` first. + Scalar result = static_cast(static_cast(x) + randomBits); + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return static_cast(getRandomBits(kTotalBits)); } +}; + +// todo: custom integers +template +struct random_int_impl { + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&, const Scalar&) { return run(); } + static EIGEN_DEVICE_FUNC inline Scalar run() { + eigen_assert(std::false_type::value && "RANDOM FOR CUSTOM INTEGERS NOT YET SUPPORTED"); + return Scalar(0); + } +}; + +template +struct random_default_impl : random_int_impl {}; + +template <> +struct random_impl { + static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) { + if (y <= x) return x; + return run(); + } + static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits(1) ? true : false; } +}; + +template +struct random_default_impl { + typedef typename NumTraits::Real RealScalar; + using Impl = random_impl; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { + return Scalar(Impl::run(x.real(), y.real(), numRandomBits), Impl::run(x.imag(), y.imag(), numRandomBits)); + } + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + return Scalar(Impl::run(x.real(), y.real()), Impl::run(x.imag(), y.imag())); + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + return Scalar(Impl::run(numRandomBits), Impl::run(numRandomBits)); + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return Scalar(Impl::run(), Impl::run()); } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_RANDOM_IMPL_H diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/TriangularMatrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/TriangularMatrix.h index afdb2425ff..2b1683be95 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/TriangularMatrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/TriangularMatrix.h @@ -184,7 +184,8 @@ class TriangularView enum { Mode = Mode_, Flags = internal::traits::Flags, - TransposeMode = (Mode & Upper ? Lower : 0) | (Mode & Lower ? Upper : 0) | (Mode & (UnitDiag)) | (Mode & (ZeroDiag)), + TransposeMode = (int(Mode) & int(Upper) ? Lower : 0) | (int(Mode) & int(Lower) ? Upper : 0) | + (int(Mode) & int(UnitDiag)) | (int(Mode) & int(ZeroDiag)), IsVectorAtCompileTime = false }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h index a5e6499c53..bae57146b1 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h @@ -40,6 +40,8 @@ struct packet_traits > : default_packet_traits { HasDiv = 1, HasNegate = 1, HasSqrt = 1, + HasLog = 1, + HasExp = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -238,6 +240,7 @@ struct packet_traits > : default_packet_traits { HasDiv = 1, HasNegate = 1, HasSqrt = 1, + HasLog = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -431,6 +434,21 @@ EIGEN_STRONG_INLINE Packet4cf psqrt(const Packet4cf& a) { return psqrt_complex(a); } +template <> +EIGEN_STRONG_INLINE Packet2cd plog(const Packet2cd& a) { + return plog_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4cf plog(const Packet4cf& a) { + return plog_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4cf pexp(const Packet4cf& a) { + return pexp_complex(a); +} + } // end namespace internal } // end namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h index b125d5974f..321188c4b2 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -22,7 +22,15 @@ namespace Eigen { namespace internal { EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f) -EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet4d) + +EIGEN_DOUBLE_PACKET_FUNCTION(atan, Packet4d) +EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet4d) +EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet4d) +EIGEN_DOUBLE_PACKET_FUNCTION(exp, Packet4d) +#ifdef EIGEN_VECTORIZE_AVX2 +EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d) +EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d) +#endif // Notice that for newer processors, it is counterproductive to use Newton // iteration for square root. In particular, Skylake and Zen2 processors diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h index d752f06475..b05429cfe8 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h @@ -124,11 +124,7 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, - HasBlend = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasBlend = 1 }; }; template <> @@ -142,16 +138,16 @@ struct packet_traits : default_packet_traits { HasCmp = 1, HasDiv = 1, +#ifdef EIGEN_VECTORIZE_AVX2 + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, +#endif HasLog = 1, HasExp = 1, HasSqrt = 1, HasRsqrt = 1, HasATan = 1, - HasBlend = 1, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 + HasBlend = 1 }; }; @@ -188,10 +184,6 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasBlend = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, HasBessel = 1, HasNdtri = 1 }; @@ -231,10 +223,6 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasBlend = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, HasBessel = 1, HasNdtri = 1 }; @@ -270,9 +258,7 @@ struct packet_traits : default_packet_traits { template <> struct packet_traits : default_packet_traits { typedef Packet4l type; - // There is no half-size packet for current Packet4l. - // TODO: support as SSE path. - typedef Packet4l half; + typedef Packet2l half; enum { Vectorizable = 1, AlignedOnScalar = 1, HasCmp = 1, size = 4 }; }; template <> @@ -332,6 +318,9 @@ template <> struct unpacket_traits { typedef double type; typedef Packet2d half; +#ifdef EIGEN_VECTORIZE_AVX2 + typedef Packet4l integer_packet; +#endif enum { size = 4, alignment = Aligned32, @@ -368,7 +357,7 @@ struct unpacket_traits { template <> struct unpacket_traits { typedef int64_t type; - typedef Packet4l half; + typedef Packet2l half; enum { size = 4, alignment = Aligned32, @@ -561,7 +550,7 @@ EIGEN_STRONG_INLINE std::enable_if_t<(N >= 32) && (N < 63), Packet4l> parithmeti } template EIGEN_STRONG_INLINE std::enable_if_t<(N == 63), Packet4l> parithmetic_shift_right(Packet4l a) { - return _mm256_shuffle_epi32(_mm256_srai_epi32(a, 31), (shuffle_mask<1, 1, 3, 3>::mask)); + return _mm256_cmpgt_epi64(_mm256_setzero_si256(), a); } template EIGEN_STRONG_INLINE std::enable_if_t<(N < 0) || (N > 63), Packet4l> parithmetic_shift_right(Packet4l a) { @@ -623,22 +612,22 @@ EIGEN_DEVICE_FUNC inline Packet4ul pgather(const uint64_t* template <> EIGEN_DEVICE_FUNC inline void pscatter(int64_t* to, const Packet4l& from, Index stride) { __m128i low = _mm256_extractf128_si256(from, 0); - to[stride * 0] = _mm_extract_epi64(low, 0); - to[stride * 1] = _mm_extract_epi64(low, 1); + to[stride * 0] = _mm_extract_epi64_0(low); + to[stride * 1] = _mm_extract_epi64_1(low); __m128i high = _mm256_extractf128_si256(from, 1); - to[stride * 2] = _mm_extract_epi64(high, 0); - to[stride * 3] = _mm_extract_epi64(high, 1); + to[stride * 2] = _mm_extract_epi64_0(high); + to[stride * 3] = _mm_extract_epi64_1(high); } template <> EIGEN_DEVICE_FUNC inline void pscatter(uint64_t* to, const Packet4ul& from, Index stride) { __m128i low = _mm256_extractf128_si256(from, 0); - to[stride * 0] = _mm_extract_epi64(low, 0); - to[stride * 1] = _mm_extract_epi64(low, 1); + to[stride * 0] = _mm_extract_epi64_0(low); + to[stride * 1] = _mm_extract_epi64_1(low); __m128i high = _mm256_extractf128_si256(from, 1); - to[stride * 2] = _mm_extract_epi64(high, 0); - to[stride * 3] = _mm_extract_epi64(high, 1); + to[stride * 2] = _mm_extract_epi64_0(high); + to[stride * 3] = _mm_extract_epi64_1(high); } template <> EIGEN_STRONG_INLINE void pstore1(int64_t* to, const int64_t& a) { @@ -652,21 +641,21 @@ EIGEN_STRONG_INLINE void pstore1(uint64_t* to, const uint64_t& a) { } template <> EIGEN_STRONG_INLINE int64_t pfirst(const Packet4l& a) { - return _mm_cvtsi128_si64(_mm256_castsi256_si128(a)); + return _mm_extract_epi64_0(_mm256_castsi256_si128(a)); } template <> EIGEN_STRONG_INLINE uint64_t pfirst(const Packet4ul& a) { - return _mm_cvtsi128_si64(_mm256_castsi256_si128(a)); + return _mm_extract_epi64_0(_mm256_castsi256_si128(a)); } template <> EIGEN_STRONG_INLINE int64_t predux(const Packet4l& a) { __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1)); - return _mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1); + return _mm_extract_epi64_0(r) + _mm_extract_epi64_1(r); } template <> EIGEN_STRONG_INLINE uint64_t predux(const Packet4ul& a) { __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1)); - return numext::bit_cast(_mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1)); + return numext::bit_cast(_mm_extract_epi64_0(r) + _mm_extract_epi64_1(r)); } #define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M) EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { @@ -1252,6 +1241,15 @@ EIGEN_STRONG_INLINE Packet4d pfloor(const Packet4d& a) { return _mm256_floor_pd(a); } +template <> +EIGEN_STRONG_INLINE Packet8f ptrunc(const Packet8f& a) { + return _mm256_round_ps(a, _MM_FROUND_TRUNC); +} +template <> +EIGEN_STRONG_INLINE Packet4d ptrunc(const Packet4d& a) { + return _mm256_round_pd(a, _MM_FROUND_TRUNC); +} + template <> EIGEN_STRONG_INLINE Packet8i ptrue(const Packet8i& a) { #ifdef EIGEN_VECTORIZE_AVX2 @@ -1803,14 +1801,12 @@ EIGEN_STRONG_INLINE Packet4ul preverse(const Packet4ul& a) { // pabs should be ok template <> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a) { - const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, - 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)); + const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)); return _mm256_and_ps(a, mask); } template <> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a) { - const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, - 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF)); + const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFFFFFFFFFFFFFF)); return _mm256_and_pd(a, mask); } template <> @@ -1830,28 +1826,32 @@ EIGEN_STRONG_INLINE Packet8ui pabs(const Packet8ui& a) { template <> EIGEN_STRONG_INLINE Packet8h psignbit(const Packet8h& a) { - return _mm_srai_epi16(a, 15); + return _mm_cmpgt_epi16(_mm_setzero_si128(), a); } template <> EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) { - return _mm_srai_epi16(a, 15); + return _mm_cmpgt_epi16(_mm_setzero_si128(), a); } template <> EIGEN_STRONG_INLINE Packet8f psignbit(const Packet8f& a) { - return _mm256_castsi256_ps(parithmetic_shift_right<31>((Packet8i)_mm256_castps_si256(a))); +#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_castsi256_ps(_mm256_cmpgt_epi32(_mm256_setzero_si256(), _mm256_castps_si256(a))); +#else + return _mm256_castsi256_ps(parithmetic_shift_right<31>(Packet8i(_mm256_castps_si256(a)))); +#endif } template <> -EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& a) { - return pzero(a); +EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& /*unused*/) { + return _mm256_setzero_si256(); } #ifdef EIGEN_VECTORIZE_AVX2 template <> EIGEN_STRONG_INLINE Packet4d psignbit(const Packet4d& a) { - return _mm256_castsi256_pd(parithmetic_shift_right<63>((Packet4l)_mm256_castpd_si256(a))); + return _mm256_castsi256_pd(_mm256_cmpgt_epi64(_mm256_setzero_si256(), _mm256_castpd_si256(a))); } template <> -EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& a) { - return pzero(a); +EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& /*unused*/) { + return _mm256_setzero_si256(); } #endif @@ -2127,40 +2127,29 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49); } +EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<4>& ifPacket) { + return _mm256_set_epi64x(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1], + 0 - ifPacket.select[0]); +} + +EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<8>& ifPacket) { + return _mm256_set_epi32(0 - ifPacket.select[7], 0 - ifPacket.select[6], 0 - ifPacket.select[5], + 0 - ifPacket.select[4], 0 - ifPacket.select[3], 0 - ifPacket.select[2], + 0 - ifPacket.select[1], 0 - ifPacket.select[0]); +} + template <> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) { -#ifdef EIGEN_VECTORIZE_AVX2 - const __m256i zero = _mm256_setzero_si256(); - const __m256i select = - _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], - ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m256i false_mask = _mm256_cmpeq_epi32(zero, select); - return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask)); -#else - const __m256 zero = _mm256_setzero_ps(); - const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], - ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ); - return _mm256_blendv_ps(thenPacket, elsePacket, false_mask); -#endif + const __m256 true_mask = _mm256_castsi256_ps(avx_blend_mask(ifPacket)); + return pselect(true_mask, thenPacket, elsePacket); } template <> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) { -#ifdef EIGEN_VECTORIZE_AVX2 - const __m256i zero = _mm256_setzero_si256(); - const __m256i select = - _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m256i false_mask = _mm256_cmpeq_epi64(select, zero); - return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask)); -#else - const __m256d zero = _mm256_setzero_pd(); - const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ); - return _mm256_blendv_pd(thenPacket, elsePacket, false_mask); -#endif + const __m256d true_mask = _mm256_castsi256_pd(avx_blend_mask(ifPacket)); + return pselect(true_mask, thenPacket, elsePacket); } // Packet math for Eigen::half @@ -2315,6 +2304,11 @@ EIGEN_STRONG_INLINE Packet8h pfloor(const Packet8h& a) { return float2half(pfloor(half2float(a))); } +template <> +EIGEN_STRONG_INLINE Packet8h ptrunc(const Packet8h& a) { + return float2half(ptrunc(half2float(a))); +} + template <> EIGEN_STRONG_INLINE Packet8h pcmp_eq(const Packet8h& a, const Packet8h& b) { return Pack16To8(pcmp_eq(half2float(a), half2float(b))); @@ -2690,6 +2684,11 @@ EIGEN_STRONG_INLINE Packet8bf pfloor(const Packet8bf& a) { return F32ToBf16(pfloor(Bf16ToF32(a))); } +template <> +EIGEN_STRONG_INLINE Packet8bf ptrunc(const Packet8bf& a) { + return F32ToBf16(ptrunc(Bf16ToF32(a))); +} + template <> EIGEN_STRONG_INLINE Packet8bf pcmp_eq(const Packet8bf& a, const Packet8bf& b) { return Pack16To8(pcmp_eq(Bf16ToF32(a), Bf16ToF32(b))); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/TypeCasting.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/TypeCasting.h index 3688f8df2d..9dcd6ef844 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/TypeCasting.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/TypeCasting.h @@ -47,6 +47,13 @@ template <> struct type_casting_traits : vectorized_type_casting_traits {}; template <> struct type_casting_traits : vectorized_type_casting_traits {}; + +#ifdef EIGEN_VECTORIZE_AVX2 +template <> +struct type_casting_traits : vectorized_type_casting_traits {}; +template <> +struct type_casting_traits : vectorized_type_casting_traits {}; +#endif #endif template <> @@ -188,6 +195,63 @@ EIGEN_STRONG_INLINE Packet4ui preinterpret(const Packet8ui } #ifdef EIGEN_VECTORIZE_AVX2 +template <> +EIGEN_STRONG_INLINE Packet4l pcast(const Packet4d& a) { +#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL) + return _mm256_cvttpd_epi64(a); +#else + + // if 'a' exceeds the numerical limits of int64_t, the behavior is undefined + + // e <= 0 corresponds to |a| < 1, which should result in zero. incidentally, intel intrinsics with shift arguments + // greater than or equal to 64 produce zero. furthermore, negative shifts appear to be interpreted as large positive + // shifts (two's complement), which also result in zero. therefore, e does not need to be clamped to [0, 64) + + constexpr int kTotalBits = sizeof(double) * CHAR_BIT, kMantissaBits = std::numeric_limits::digits - 1, + kExponentBits = kTotalBits - kMantissaBits - 1, kBias = (1 << (kExponentBits - 1)) - 1; + + const __m256i cst_one = _mm256_set1_epi64x(1); + const __m256i cst_total_bits = _mm256_set1_epi64x(kTotalBits); + const __m256i cst_bias = _mm256_set1_epi64x(kBias); + + __m256i a_bits = _mm256_castpd_si256(a); + // shift left by 1 to clear the sign bit, and shift right by kMantissaBits + 1 to recover biased exponent + __m256i biased_e = _mm256_srli_epi64(_mm256_slli_epi64(a_bits, 1), kMantissaBits + 1); + __m256i e = _mm256_sub_epi64(biased_e, cst_bias); + + // shift to the left by kExponentBits + 1 to clear the sign and exponent bits + __m256i shifted_mantissa = _mm256_slli_epi64(a_bits, kExponentBits + 1); + // shift to the right by kTotalBits - e to convert the significand to an integer + __m256i result_significand = _mm256_srlv_epi64(shifted_mantissa, _mm256_sub_epi64(cst_total_bits, e)); + + // add the implied bit + __m256i result_exponent = _mm256_sllv_epi64(cst_one, e); + // e <= 0 is interpreted as a large positive shift (2's complement), which also conveniently results in zero + __m256i result = _mm256_add_epi64(result_significand, result_exponent); + // handle negative arguments + __m256i sign_mask = _mm256_cmpgt_epi64(_mm256_setzero_si256(), a_bits); + result = _mm256_sub_epi64(_mm256_xor_si256(result, sign_mask), sign_mask); + return result; +#endif +} + +template <> +EIGEN_STRONG_INLINE Packet4d pcast(const Packet4l& a) { +#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL) + return _mm256_cvtepi64_pd(a); +#else + EIGEN_ALIGN16 int64_t aux[4]; + pstore(aux, a); + return _mm256_set_pd(static_cast(aux[3]), static_cast(aux[2]), static_cast(aux[1]), + static_cast(aux[0])); +#endif +} + +template <> +EIGEN_STRONG_INLINE Packet4d pcast(const Packet2l& a, const Packet2l& b) { + return _mm256_set_m128d((pcast(b)), (pcast(a))); +} + template <> EIGEN_STRONG_INLINE Packet4ul preinterpret(const Packet4l& a) { return Packet4ul(a); @@ -198,6 +262,21 @@ EIGEN_STRONG_INLINE Packet4l preinterpret(const Packet4ul& return Packet4l(a); } +template <> +EIGEN_STRONG_INLINE Packet4l preinterpret(const Packet4d& a) { + return _mm256_castpd_si256(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4d preinterpret(const Packet4l& a) { + return _mm256_castsi256_pd(a); +} + +// truncation operations +template <> +EIGEN_STRONG_INLINE Packet2l preinterpret(const Packet4l& a) { + return _mm256_castsi256_si128(a); +} #endif template <> diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h index 68b48f9c95..f31c6cee6e 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h @@ -637,6 +637,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) { return EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) { return bfloat16(::ceilf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) { return bfloat16(::rintf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) { return bfloat16(::roundf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 trunc(const bfloat16& a) { return bfloat16(::truncf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) { return bfloat16(::fmodf(float(a), float(b))); } @@ -677,16 +678,22 @@ EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const bfloat16& v namespace internal { template <> -struct random_default_impl { - static inline bfloat16 run(const bfloat16& x, const bfloat16& y) { - return x + (y - x) * bfloat16(float(std::rand()) / float(RAND_MAX)); - } - static inline bfloat16 run() { return run(bfloat16(-1.f), bfloat16(1.f)); } +struct is_arithmetic { + enum { value = true }; }; template <> -struct is_arithmetic { - enum { value = true }; +struct random_impl { + enum : int { MantissaBits = 7 }; + using Impl = random_impl; + static EIGEN_DEVICE_FUNC inline bfloat16 run(const bfloat16& x, const bfloat16& y) { + float result = Impl::run(x, y, MantissaBits); + return bfloat16(result); + } + static EIGEN_DEVICE_FUNC inline bfloat16 run() { + float result = Impl::run(MantissaBits); + return bfloat16(result); + } }; } // namespace internal diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 8fb5b6855c..1c46ba48d7 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -130,7 +130,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet& a, con PacketI b = parithmetic_shift_right<2>(e); // floor(e/4); Packet c = preinterpret(plogical_shift_left(padd(b, bias))); // 2^b Packet out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b) - b = psub(psub(psub(e, b), b), b); // e - 3b + b = pnmadd(pset1(3), b, e); // e - 3b c = preinterpret(plogical_shift_left(padd(b, bias))); // 2^(e-3*b) out = pmul(out, c); return out; @@ -555,7 +555,7 @@ inline float trig_reduce_huge(float xf, Eigen::numext::int32_t* quadrant) { return float(double(int64_t(p)) * pio2_62); } -template +template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS #if EIGEN_COMP_GNUC_STRICT __attribute__((optimize("-fno-unsafe-math-optimizations"))) @@ -582,8 +582,8 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS // Subtract y * Pi/2 to reduce x to the interval -Pi/4 <= x <= +Pi/4 // using "Extended precision modular arithmetic" -#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) - // This version requires true FMA for high accuracy +#if defined(EIGEN_VECTORIZE_FMA) + // This version requires true FMA for high accuracy. // It provides a max error of 1ULP up to (with absolute_error < 5.9605e-08): const float huge_th = ComputeSine ? 117435.992f : 71476.0625f; x = pmadd(y, pset1(-1.57079601287841796875f), x); @@ -669,10 +669,21 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS y2 = pmadd(y2, x, x); // Select the correct result from the two polynomials. - y = ComputeSine ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2); - + if (ComputeBoth) { + Packet peven = peven_mask(x); + Packet ysin = pselect(poly_mask, y2, y1); + Packet ycos = pselect(poly_mask, y1, y2); + Packet sign_bit_sin = pxor(_x, preinterpret(plogical_shift_left<30>(y_int))); + Packet sign_bit_cos = preinterpret(plogical_shift_left<30>(padd(y_int, csti_1))); + sign_bit_sin = pand(sign_bit_sin, cst_sign_mask); // clear all but left most bit + sign_bit_cos = pand(sign_bit_cos, cst_sign_mask); // clear all but left most bit + y = pselect(peven, pxor(ysin, sign_bit_sin), pxor(ycos, sign_bit_cos)); + } else { + y = ComputeSine ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2); + y = pxor(y, sign_bit); + } // Update the sign and filter huge inputs - return pxor(y, sign_bit); + return y; } template @@ -685,6 +696,174 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_float(const Pack return psincos_float(x); } +// Trigonometric argument reduction for double for inputs smaller than 15. +// Reduces trigonometric arguments for double inputs where x < 15. Given an argument x and its corresponding quadrant +// count n, the function computes and returns the reduced argument t such that x = n * pi/2 + t. +template +Packet trig_reduce_small_double(const Packet& x, const Packet& q) { + // Pi/2 split into 2 values + const Packet cst_pio2_a = pset1(-1.570796325802803); + const Packet cst_pio2_b = pset1(-9.920935184482005e-10); + + Packet t; + t = pmadd(cst_pio2_a, q, x); + t = pmadd(cst_pio2_b, q, t); + return t; +} + +// Trigonometric argument reduction for double for inputs smaller than 1e14. +// Reduces trigonometric arguments for double inputs where x < 1e14. Given an argument x and its corresponding quadrant +// count n, the function computes and returns the reduced argument t such that x = n * pi/2 + t. +template +Packet trig_reduce_medium_double(const Packet& x, const Packet& q_high, const Packet& q_low) { + // Pi/2 split into 4 values + const Packet cst_pio2_a = pset1(-1.570796325802803); + const Packet cst_pio2_b = pset1(-9.920935184482005e-10); + const Packet cst_pio2_c = pset1(-6.123234014771656e-17); + const Packet cst_pio2_d = pset1(1.903488962019325e-25); + + Packet t; + t = pmadd(cst_pio2_a, q_high, x); + t = pmadd(cst_pio2_a, q_low, t); + t = pmadd(cst_pio2_b, q_high, t); + t = pmadd(cst_pio2_b, q_low, t); + t = pmadd(cst_pio2_c, q_high, t); + t = pmadd(cst_pio2_c, q_low, t); + t = pmadd(cst_pio2_d, padd(q_low, q_high), t); + return t; +} + +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +#if EIGEN_COMP_GNUC_STRICT + __attribute__((optimize("-fno-unsafe-math-optimizations"))) +#endif + Packet + psincos_double(const Packet& x) { + typedef typename unpacket_traits::integer_packet PacketI; + typedef typename unpacket_traits::type ScalarI; + + const Packet cst_sign_mask = pset1frombits(static_cast(0x8000000000000000u)); + + // If the argument is smaller than this value, use a simpler argument reduction + const double small_th = 15; + // If the argument is bigger than this value, use the non-vectorized std version + const double huge_th = 1e14; + + const Packet cst_2oPI = pset1(0.63661977236758134307553505349006); // 2/PI + // Integer Packet constants + const PacketI cst_one = pset1(ScalarI(1)); + // Constant for splitting + const Packet cst_split = pset1(1 << 24); + + Packet x_abs = pabs(x); + + // Scale x by 2/Pi + PacketI q_int; + Packet s; + + // TODO Implement huge angle argument reduction + if (EIGEN_PREDICT_FALSE(predux_any(pcmp_le(pset1(small_th), x_abs)))) { + Packet q_high = pmul(pfloor(pmul(x_abs, pdiv(cst_2oPI, cst_split))), cst_split); + Packet q_low_noround = psub(pmul(x_abs, cst_2oPI), q_high); + q_int = pcast(padd(q_low_noround, pset1(0.5))); + Packet q_low = pcast(q_int); + s = trig_reduce_medium_double(x_abs, q_high, q_low); + } else { + Packet qval_noround = pmul(x_abs, cst_2oPI); + q_int = pcast(padd(qval_noround, pset1(0.5))); + Packet q = pcast(q_int); + s = trig_reduce_small_double(x_abs, q); + } + + // All the upcoming approximating polynomials have even exponents + Packet ss = pmul(s, s); + + // Padé approximant of cos(x) + // Assuring < 1 ULP error on the interval [-pi/4, pi/4] + // cos(x) ~= (80737373*x^8 - 13853547000*x^6 + 727718024880*x^4 - 11275015752000*x^2 + 23594700729600)/(147173*x^8 + + // 39328920*x^6 + 5772800880*x^4 + 522334612800*x^2 + 23594700729600) + // MATLAB code to compute those coefficients: + // syms x; + // cosf = @(x) cos(x); + // pade_cosf = pade(cosf(x), x, 0, 'Order', 8) + Packet sc1_num = pmadd(ss, pset1(80737373), pset1(-13853547000)); + Packet sc2_num = pmadd(sc1_num, ss, pset1(727718024880)); + Packet sc3_num = pmadd(sc2_num, ss, pset1(-11275015752000)); + Packet sc4_num = pmadd(sc3_num, ss, pset1(23594700729600)); + Packet sc1_denum = pmadd(ss, pset1(147173), pset1(39328920)); + Packet sc2_denum = pmadd(sc1_denum, ss, pset1(5772800880)); + Packet sc3_denum = pmadd(sc2_denum, ss, pset1(522334612800)); + Packet sc4_denum = pmadd(sc3_denum, ss, pset1(23594700729600)); + Packet scos = pdiv(sc4_num, sc4_denum); + + // Padé approximant of sin(x) + // Assuring < 1 ULP error on the interval [-pi/4, pi/4] + // sin(x) ~= (x*(4585922449*x^8 - 1066023933480*x^6 + 83284044283440*x^4 - 2303682236856000*x^2 + + // 15605159573203200))/(45*(1029037*x^8 + 345207016*x^6 + 61570292784*x^4 + 6603948711360*x^2 + 346781323848960)) + // MATLAB code to compute those coefficients: + // syms x; + // sinf = @(x) sin(x); + // pade_sinf = pade(sinf(x), x, 0, 'Order', 8, 'OrderMode', 'relative') + Packet ss1_num = pmadd(ss, pset1(4585922449), pset1(-1066023933480)); + Packet ss2_num = pmadd(ss1_num, ss, pset1(83284044283440)); + Packet ss3_num = pmadd(ss2_num, ss, pset1(-2303682236856000)); + Packet ss4_num = pmadd(ss3_num, ss, pset1(15605159573203200)); + Packet ss1_denum = pmadd(ss, pset1(1029037), pset1(345207016)); + Packet ss2_denum = pmadd(ss1_denum, ss, pset1(61570292784)); + Packet ss3_denum = pmadd(ss2_denum, ss, pset1(6603948711360)); + Packet ss4_denum = pmadd(ss3_denum, ss, pset1(346781323848960)); + Packet ssin = pdiv(pmul(s, ss4_num), pmul(pset1(45), ss4_denum)); + + Packet poly_mask = preinterpret(pcmp_eq(pand(q_int, cst_one), pzero(q_int))); + + Packet sign_sin = pxor(x, preinterpret(plogical_shift_left<62>(q_int))); + Packet sign_cos = preinterpret(plogical_shift_left<62>(padd(q_int, cst_one))); + Packet sign_bit, sFinalRes; + if (ComputeBoth) { + Packet peven = peven_mask(x); + sign_bit = pselect((s), sign_sin, sign_cos); + sFinalRes = pselect(pxor(peven, poly_mask), ssin, scos); + } else { + sign_bit = ComputeSine ? sign_sin : sign_cos; + sFinalRes = ComputeSine ? pselect(poly_mask, ssin, scos) : pselect(poly_mask, scos, ssin); + } + sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit + sFinalRes = pxor(sFinalRes, sign_bit); + + // If the inputs values are higher than that a value that the argument reduction can currently address, compute them + // using std::sin and std::cos + // TODO Remove it when huge angle argument reduction is implemented + if (EIGEN_PREDICT_FALSE(predux_any(pcmp_le(pset1(huge_th), x_abs)))) { + const int PacketSize = unpacket_traits::size; + EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) double sincos_vals[PacketSize]; + EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) double x_cpy[PacketSize]; + pstoreu(x_cpy, x); + pstoreu(sincos_vals, sFinalRes); + for (int k = 0; k < PacketSize; ++k) { + double val = x_cpy[k]; + if (std::abs(val) > huge_th && (numext::isfinite)(val)) { + if (ComputeBoth) + sincos_vals[k] = k % 2 == 0 ? std::sin(val) : std::cos(val); + else + sincos_vals[k] = ComputeSine ? std::sin(val) : std::cos(val); + } + } + sFinalRes = ploadu(sincos_vals); + } + return sFinalRes; +} + +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_double(const Packet& x) { + return psincos_double(x); +} + +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_double(const Packet& x) { + return psincos_double(x); +} + // Generic implementation of acos(x). template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos_float(const Packet& x_in) { @@ -917,6 +1096,65 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Pa return pxor(p, x_signmask); } +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant which + is accurate up to a couple of ulps in the (approximate) range [-8, 8], + outside of which tanh(x) = +/-1 in single precision. The input is clamped + to the range [-c, c]. The value c is chosen as the smallest value where + the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] + the approximation tanh(x) ~= x is used for better accuracy as x tends to zero. + + This implementation works on both scalars and packets. +*/ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x) { + // Clamp the inputs to the range [-c, c] +#ifdef EIGEN_VECTORIZE_FMA + const T plus_clamp = pset1(7.99881172180175781f); + const T minus_clamp = pset1(-7.99881172180175781f); +#else + const T plus_clamp = pset1(7.90531110763549805f); + const T minus_clamp = pset1(-7.90531110763549805f); +#endif + const T tiny = pset1(0.0004f); + const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); + const T tiny_mask = pcmp_lt(pabs(a_x), tiny); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(4.89352455891786e-03f); + const T alpha_3 = pset1(6.37261928875436e-04f); + const T alpha_5 = pset1(1.48572235717979e-05f); + const T alpha_7 = pset1(5.12229709037114e-08f); + const T alpha_9 = pset1(-8.60467152213735e-11f); + const T alpha_11 = pset1(2.00018790482477e-13f); + const T alpha_13 = pset1(-2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(4.89352518554385e-03f); + const T beta_2 = pset1(2.26843463243900e-03f); + const T beta_4 = pset1(1.18534705686654e-04f); + const T beta_6 = pset1(1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial q. + T q = pmadd(x2, beta_6, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pselect(tiny_mask, x, pdiv(p, q)); +} + template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x) { typedef typename unpacket_traits::type Scalar; @@ -964,6 +1202,77 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Pa return Packet(pdiv(result_scaled.v, y_max)); } +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Packet& x) { + typedef typename unpacket_traits::type Scalar; + typedef typename Scalar::value_type RealScalar; + typedef typename unpacket_traits::as_real RealPacket; + + RealPacket real_mask_rp = peven_mask(x.v); + Packet real_mask(real_mask_rp); + + // Real part + RealPacket x_flip = pcplxflip(x).v; // b, a + Packet x_norm = phypot_complex(x); // sqrt(a^2 + b^2), sqrt(a^2 + b^2) + RealPacket xlogr = plog(x_norm.v); // log(sqrt(a^2 + b^2)), log(sqrt(a^2 + b^2)) + + // Imag part + RealPacket ximg = patan2(x.v, x_flip); // atan2(a, b), atan2(b, a) + + const RealPacket cst_pos_inf = pset1(NumTraits::infinity()); + RealPacket x_abs = pabs(x.v); + RealPacket is_x_pos_inf = pcmp_eq(x_abs, cst_pos_inf); + RealPacket is_y_pos_inf = pcplxflip(Packet(is_x_pos_inf)).v; + RealPacket is_any_inf = por(is_x_pos_inf, is_y_pos_inf); + RealPacket xreal = pselect(is_any_inf, cst_pos_inf, xlogr); + + Packet xres = pselect(real_mask, Packet(xreal), Packet(ximg)); // log(sqrt(a^2 + b^2)), atan2(b, a) + return xres; +} + +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& a) { + typedef typename unpacket_traits::as_real RealPacket; + typedef typename unpacket_traits::type Scalar; + typedef typename Scalar::value_type RealScalar; + const RealPacket even_mask = peven_mask(a.v); + const RealPacket odd_mask = pcplxflip(Packet(even_mask)).v; + + // Let a = x + iy. + // exp(a) = exp(x) * cis(y), plus some special edge-case handling. + + // exp(x): + RealPacket x = pand(a.v, even_mask); + x = por(x, pcplxflip(Packet(x)).v); + RealPacket expx = pexp(x); // exp(x); + + // cis(y): + RealPacket y = pand(odd_mask, a.v); + y = por(y, pcplxflip(Packet(y)).v); + RealPacket cisy = psincos_float(y); + cisy = pcplxflip(Packet(cisy)).v; // cos(y) + i * sin(y) + + const RealPacket cst_pos_inf = pset1(NumTraits::infinity()); + const RealPacket cst_neg_inf = pset1(-NumTraits::infinity()); + + // If x is -inf, we know that cossin(y) is bounded, + // so the result is (0, +/-0), where the sign of the imaginary part comes + // from the sign of cossin(y). + RealPacket cisy_sign = por(pandnot(cisy, pabs(cisy)), pset1(RealScalar(1))); + cisy = pselect(pcmp_eq(x, cst_neg_inf), cisy_sign, cisy); + + // If x is inf, and cos(y) has unknown sign (y is inf or NaN), the result + // is (+/-inf, NaN), where the signs are undetermined (take the sign of y). + RealPacket y_sign = por(pandnot(y, pabs(y)), pset1(RealScalar(1))); + cisy = pselect(pand(pcmp_eq(x, cst_pos_inf), pisnan(cisy)), pand(y_sign, even_mask), cisy); + Packet result = Packet(pmul(expx, cisy)); + + // If y is +/- 0, the input is real, so take the real result for consistency. + result = pselect(Packet(pcmp_eq(y, pzero(y))), Packet(por(pand(expx, even_mask), pand(y, odd_mask))), result); + + return result; +} + template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const Packet& a) { typedef typename unpacket_traits::type Scalar; @@ -1076,6 +1385,41 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const P return pselect(is_imag_inf, imag_inf_result, pselect(is_real_inf, real_inf_result, result)); } +// \internal \returns the norm of a complex number z = x + i*y, defined as sqrt(x^2 + y^2). +// Implemented using the hypot(a,b) algorithm from https://doi.org/10.48550/arXiv.1904.09481 +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet phypot_complex(const Packet& a) { + typedef typename unpacket_traits::type Scalar; + typedef typename Scalar::value_type RealScalar; + typedef typename unpacket_traits::as_real RealPacket; + + const RealPacket cst_zero_rp = pset1(static_cast(0.0)); + const RealPacket cst_minus_one_rp = pset1(static_cast(-1.0)); + const RealPacket cst_two_rp = pset1(static_cast(2.0)); + const RealPacket evenmask = peven_mask(a.v); + + RealPacket a_abs = pabs(a.v); + RealPacket a_flip = pcplxflip(Packet(a_abs)).v; // |b|, |a| + RealPacket a_all = pselect(evenmask, a_abs, a_flip); // |a|, |a| + RealPacket b_all = pselect(evenmask, a_flip, a_abs); // |b|, |b| + + RealPacket a2 = pmul(a.v, a.v); // |a^2, b^2| + RealPacket a2_flip = pcplxflip(Packet(a2)).v; // |b^2, a^2| + RealPacket h = psqrt(padd(a2, a2_flip)); // |sqrt(a^2 + b^2), sqrt(a^2 + b^2)| + RealPacket h_sq = pmul(h, h); // |a^2 + b^2, a^2 + b^2| + RealPacket a_sq = pselect(evenmask, a2, a2_flip); // |a^2, a^2| + RealPacket m_h_sq = pmul(h_sq, cst_minus_one_rp); + RealPacket m_a_sq = pmul(a_sq, cst_minus_one_rp); + RealPacket x = psub(psub(pmadd(h, h, m_h_sq), pmadd(b_all, b_all, psub(a_sq, h_sq))), pmadd(a_all, a_all, m_a_sq)); + h = psub(h, pdiv(x, pmul(cst_two_rp, h))); // |h - x/(2*h), h - x/(2*h)| + + // handle zero-case + RealPacket iszero = pcmp_eq(por(a_abs, a_flip), cst_zero_rp); + + h = pandnot(h, iszero); // |sqrt(a^2+b^2), sqrt(a^2+b^2)| + return Packet(h); // |sqrt(a^2+b^2), sqrt(a^2+b^2)| +} + template struct psign_impl::type>::IsComplex && !NumTraits::type>::IsInteger>> { @@ -1181,7 +1525,7 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s s_lo = psub(y, t); } -#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#ifdef EIGEN_VECTORIZE_FMA // This function implements the extended precision product of // a pair of floating point numbers. Given {x, y}, it computes the pair // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and @@ -1227,7 +1571,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, p_lo = pmadd(x_lo, y_lo, p_lo); } -#endif // EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#endif // EIGEN_VECTORIZE_FMA // This function implements Dekker's algorithm for the addition // of two double word numbers represented by {x_hi, x_lo} and {y_hi, y_lo}. @@ -2125,6 +2469,95 @@ struct unary_pow_impl { } }; +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a) { + using Scalar = typename unpacket_traits::type; + using IntType = typename numext::get_integer_by_size::signed_type; + // Adds and subtracts signum(a) * 2^kMantissaBits to force rounding. + const IntType kLimit = IntType(1) << (NumTraits::digits() - 1); + const Packet cst_limit = pset1(static_cast(kLimit)); + Packet abs_a = pabs(a); + Packet sign_a = pandnot(a, abs_a); + Packet rint_a = padd(abs_a, cst_limit); + // Don't compile-away addition and subtraction. + EIGEN_OPTIMIZATION_BARRIER(rint_a); + rint_a = psub(rint_a, cst_limit); + rint_a = por(rint_a, sign_a); + // If greater than limit (or NaN), simply return a. + Packet mask = pcmp_lt(abs_a, cst_limit); + Packet result = pselect(mask, rint_a, a); + return result; +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a) { + using Scalar = typename unpacket_traits::type; + const Packet cst_1 = pset1(Scalar(1)); + Packet rint_a = generic_rint(a); + // if a < rint(a), then rint(a) == ceil(a) + Packet mask = pcmp_lt(a, rint_a); + Packet offset = pand(cst_1, mask); + Packet result = psub(rint_a, offset); + return result; +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) { + using Scalar = typename unpacket_traits::type; + const Packet cst_1 = pset1(Scalar(1)); + Packet rint_a = generic_rint(a); + // if rint(a) < a, then rint(a) == floor(a) + Packet mask = pcmp_lt(rint_a, a); + Packet offset = pand(cst_1, mask); + Packet result = padd(rint_a, offset); + return result; +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a) { + Packet abs_a = pabs(a); + Packet sign_a = pandnot(a, abs_a); + Packet floor_abs_a = generic_floor(abs_a); + Packet result = por(floor_abs_a, sign_a); + return result; +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a) { + using Scalar = typename unpacket_traits::type; + const Packet cst_half = pset1(Scalar(0.5)); + const Packet cst_1 = pset1(Scalar(1)); + Packet abs_a = pabs(a); + Packet sign_a = pandnot(a, abs_a); + Packet floor_abs_a = generic_floor(abs_a); + Packet diff = psub(abs_a, floor_abs_a); + Packet mask = pcmp_le(cst_half, diff); + Packet offset = pand(cst_1, mask); + Packet result = padd(floor_abs_a, offset); + result = por(result, sign_a); + return result; +} + +template +struct nearest_integer_packetop_impl { + using Scalar = typename unpacket_traits::type; + static_assert(packet_traits::HasRound, "Generic nearest integer functions are disabled for this type."); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return generic_floor(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return generic_ceil(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return generic_rint(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return generic_round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return generic_trunc(x); } +}; + +template +struct nearest_integer_packetop_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return x; } +}; + } // end namespace internal } // end namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index ade9f3f41e..1bf1128163 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -82,6 +82,14 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_float(const Pack template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_float(const Packet& x); +/** \internal \returns sin(x) for double precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_double(const Packet& x); + +/** \internal \returns cos(x) for double precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_double(const Packet& x); + /** \internal \returns asin(x) for single precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Packet& x); @@ -98,6 +106,10 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_float(const Pac template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x); +/** \internal \returns tanh(x) for single precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_float(const Packet& x); + /** \internal \returns atanh(x) for single precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x); @@ -113,6 +125,29 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Pa template struct ppolevl; +/** \internal \returns log(x) for complex types */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Packet& x); + +/** \internal \returns exp(x) for complex types */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& x); + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a); + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a); + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a); + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a); + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a); + // Macros for instantiating these generic functions for different backends. #define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET) \ template <> \ @@ -129,6 +164,7 @@ struct ppolevl; EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \ @@ -140,15 +176,13 @@ struct ppolevl; template <> \ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p(const PACKET& _x) { \ return internal::generic_plog1p(_x); \ - } \ - template <> \ - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET ptanh(const PACKET& _x) { \ - return internal::generic_fast_tanh_float(_x); \ } #define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(atan, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(log, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(sin, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(cos, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(log2, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET) diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h index 92516c79d8..9c195c12a1 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h @@ -722,6 +722,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) { return half(::rintf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half round(const half& a) { return half(::roundf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half trunc(const half& a) { return half(::truncf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half fmod(const half& a, const half& b) { return half(::fmodf(float(a), float(b))); } @@ -762,16 +763,22 @@ EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const half& v) { namespace internal { template <> -struct random_default_impl { - static inline half run(const half& x, const half& y) { - return x + (y - x) * half(float(std::rand()) / float(RAND_MAX)); - } - static inline half run() { return run(half(-1.f), half(1.f)); } +struct is_arithmetic { + enum { value = true }; }; template <> -struct is_arithmetic { - enum { value = true }; +struct random_impl { + enum : int { MantissaBits = 10 }; + using Impl = random_impl; + static EIGEN_DEVICE_FUNC inline half run(const half& x, const half& y) { + float result = Impl::run(x, y, MantissaBits); + return half(result); + } + static EIGEN_DEVICE_FUNC inline half run() { + float result = Impl::run(MantissaBits); + return half(result); + } }; } // end namespace internal diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h index 82408474d5..5257c03c86 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h @@ -62,6 +62,8 @@ struct packet_traits > : default_packet_traits { HasDiv = 1, HasNegate = 1, HasSqrt = 1, + HasLog = 1, + HasExp = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -436,6 +438,26 @@ EIGEN_STRONG_INLINE Packet2cf psqrt(const Packet2cf& a) { return psqrt_complex(a); } +template <> +EIGEN_STRONG_INLINE Packet1cf plog(const Packet1cf& a) { + return plog_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf plog(const Packet2cf& a) { + return plog_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet1cf pexp(const Packet1cf& a) { + return pexp_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf pexp(const Packet2cf& a) { + return pexp_complex(a); +} + //---------- double ---------- #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG @@ -468,6 +490,7 @@ struct packet_traits > : default_packet_traits { HasDiv = 1, HasNegate = 1, HasSqrt = 1, + HasLog = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -657,6 +680,11 @@ EIGEN_STRONG_INLINE Packet1cd psqrt(const Packet1cd& a) { return psqrt_complex(a); } +template <> +EIGEN_STRONG_INLINE Packet1cd plog(const Packet1cd& a) { + return plog_complex(a); +} + #endif // EIGEN_ARCH_ARM64 } // end namespace internal diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h index 4e3a14dcb2..50cf56f0e8 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h @@ -196,12 +196,7 @@ struct packet_traits : default_packet_traits { HasConj = 1, HasSetLinear = 1, HasBlend = 0, - HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, - HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasACos = 1, @@ -1271,7 +1266,7 @@ EIGEN_STRONG_INLINE Packet2ul pdiv(const Packet2ul& /*a*/, const Pack return pset1(0ULL); } -#ifdef __ARM_FEATURE_FMA +#ifdef EIGEN_VECTORIZE_FMA template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c, a, b); @@ -4470,76 +4465,25 @@ EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { return vrndpq_f32(a); } -#else - template <> -EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { - // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet4f limit = pset1(static_cast(1 << 23)); - const Packet4f abs_a = pabs(a); - Packet4f r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; +EIGEN_STRONG_INLINE Packet2f pround(const Packet2f& a) { + return vrnda_f32(a); } template <> -EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) { - // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet2f limit = pset1(static_cast(1 << 23)); - const Packet2f abs_a = pabs(a); - Packet2f r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; +EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { + return vrndaq_f32(a); } template <> -EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If greater, subtract one. - Packet4f mask = pcmp_lt(a, tmp); - mask = pand(mask, cst_1); - return psub(tmp, mask); +EIGEN_STRONG_INLINE Packet2f ptrunc(const Packet2f& a) { + return vrnd_f32(a); } template <> -EIGEN_STRONG_INLINE Packet2f pfloor(const Packet2f& a) { - const Packet2f cst_1 = pset1(1.0f); - Packet2f tmp = print(a); - // If greater, subtract one. - Packet2f mask = pcmp_lt(a, tmp); - mask = pand(mask, cst_1); - return psub(tmp, mask); +EIGEN_STRONG_INLINE Packet4f ptrunc(const Packet4f& a) { + return vrndq_f32(a); } - -template <> -EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If smaller, add one. - Packet4f mask = pcmp_lt(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet2f pceil(const Packet2f& a) { - const Packet2f cst_1 = pset1(1.0); - Packet2f tmp = print(a); - // If smaller, add one. - Packet2f mask = pcmp_lt(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); -} - #endif /** @@ -4800,10 +4744,6 @@ struct packet_traits : default_packet_traits { HasSetLinear = 1, HasBlend = 0, HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, - HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasLog = 1, @@ -4983,6 +4923,16 @@ EIGEN_STRONG_INLINE Packet4bf pceil(const Packet4bf& a) { return F32ToBf16(pceil(Bf16ToF32(a))); } +template <> +EIGEN_STRONG_INLINE Packet4bf pround(const Packet4bf& a) { + return F32ToBf16(pround(Bf16ToF32(a))); +} + +template <> +EIGEN_STRONG_INLINE Packet4bf ptrunc(const Packet4bf& a) { + return F32ToBf16(ptrunc(Bf16ToF32(a))); +} + template <> EIGEN_STRONG_INLINE Packet4bf pconj(const Packet4bf& a) { return a; @@ -5168,17 +5118,14 @@ struct packet_traits : default_packet_traits { HasBlend = 0, HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG HasExp = 1, HasLog = 1, HasATan = 1, #endif - HasSin = 0, - HasCos = 0, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, HasSqrt = 1, HasRsqrt = 1, HasTanh = 0, @@ -5249,7 +5196,7 @@ EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const Packet2d& b return vdivq_f64(a, b); } -#ifdef __ARM_FEATURE_FMA +#ifdef EIGEN_VECTORIZE_FMA // See bug 936. See above comment about FMA for float. template <> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { @@ -5460,6 +5407,16 @@ EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { return vrndpq_f64(a); } +template <> +EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { + return vrndaq_f64(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2d ptrunc(const Packet2d& a) { + return vrndq_f64(a); +} + template <> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { return pldexp_generic(a, exponent); @@ -5521,9 +5478,6 @@ struct packet_traits : default_packet_traits { HasInsert = 1, HasReduxp = 1, HasDiv = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1, HasSin = 0, HasCos = 0, HasLog = 0, @@ -5791,6 +5745,26 @@ EIGEN_STRONG_INLINE Packet4hf pceil(const Packet4hf& a) { return vrndp_f16(a); } +template <> +EIGEN_STRONG_INLINE Packet8hf pround(const Packet8hf& a) { + return vrndaq_f16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4hf pround(const Packet4hf& a) { + return vrnda_f16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet8hf ptrunc(const Packet8hf& a) { + return vrndq_f16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4hf ptrunc(const Packet4hf& a) { + return vrnd_f16(a); +} + template <> EIGEN_STRONG_INLINE Packet8hf psqrt(const Packet8hf& a) { return vsqrtq_f16(a); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h index 4c5c499e1c..0e70f03182 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h @@ -42,6 +42,8 @@ struct packet_traits > : default_packet_traits { HasDiv = 1, HasNegate = 1, HasSqrt = 1, + HasLog = 1, + HasExp = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -232,6 +234,7 @@ struct packet_traits > : default_packet_traits { HasDiv = 1, HasNegate = 1, HasSqrt = 1, + HasLog = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -412,6 +415,21 @@ EIGEN_STRONG_INLINE Packet2cf psqrt(const Packet2cf& a) { return psqrt_complex(a); } +template <> +EIGEN_STRONG_INLINE Packet1cd plog(const Packet1cd& a) { + return plog_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf plog(const Packet2cf& a) { + return plog_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf pexp(const Packet2cf& a) { + return pexp_complex(a); +} + } // end namespace internal } // end namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h index be8183cd75..e19e9480a2 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h @@ -52,6 +52,7 @@ typedef __m128d Packet2d; typedef eigen_packet_wrapper<__m128i, 0> Packet4i; typedef eigen_packet_wrapper<__m128i, 1> Packet16b; typedef eigen_packet_wrapper<__m128i, 4> Packet4ui; +typedef eigen_packet_wrapper<__m128i, 5> Packet2l; template <> struct is_arithmetic<__m128> { @@ -69,6 +70,10 @@ template <> struct is_arithmetic { enum { value = true }; }; +template <> +struct is_arithmetic { + enum { value = true }; +}; // Note that `Packet4ui` uses the underlying type `__m128i`, which is // interpreted as a vector of _signed_ `int32`s, which breaks some arithmetic // operations used in `GenericPacketMath.h`. @@ -140,6 +145,27 @@ EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d& a, const Packet2d& b #define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = pset1(X) +// Work around lack of extract/cvt for epi64 when compiling for 32-bit. +#if EIGEN_ARCH_x86_64 +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { return _mm_cvtsi128_si64(a); } +#ifdef EIGEN_VECTORIZE_SSE4_1 +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { return _mm_extract_epi64(a, 1); } +#else +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { + return _mm_cvtsi128_si64(_mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1))); +} +#endif +#else +// epi64 instructions are not available. The following seems to generate the same instructions +// with -O2 in GCC/Clang. +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { + return numext::bit_cast(_mm_cvtsd_f64(_mm_castsi128_pd(a))); +} +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { + return numext::bit_cast(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1))); +} +#endif + // Use the packet_traits defined in AVX/PacketMath.h instead if we're going // to leverage AVX instructions. #ifndef EIGEN_VECTORIZE_AVX @@ -172,12 +198,6 @@ struct packet_traits : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasBlend = 1, - HasCeil = 1, - HasFloor = 1, -#ifdef EIGEN_VECTORIZE_SSE4_1 - HasRound = 1, -#endif - HasRint = 1, HasSign = 0 // The manually vectorized version is slightly slower for SSE. }; }; @@ -192,18 +212,14 @@ struct packet_traits : default_packet_traits { HasCmp = 1, HasDiv = 1, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, HasSqrt = 1, HasRsqrt = 1, HasATan = 1, - HasBlend = 1, - HasFloor = 1, - HasCeil = 1, -#ifdef EIGEN_VECTORIZE_SSE4_1 - HasRound = 1, -#endif - HasRint = 1 + HasBlend = 1 }; }; template <> @@ -213,10 +229,10 @@ struct packet_traits : default_packet_traits { enum { Vectorizable = 1, AlignedOnScalar = 1, - HasCmp = 1, - HasDiv = 1, size = 4, + HasCmp = 1, + HasDiv = 1, HasShift = 1, HasBlend = 1 }; @@ -232,10 +248,22 @@ struct packet_traits : default_packet_traits { HasDiv = 0, HasNegate = 0, - HasSqrt = 0, HasCmp = 1, - HasMin = 1, - HasMax = 1, + HasShift = 1, + HasBlend = 1 + }; +}; +template <> +struct packet_traits : default_packet_traits { + typedef Packet2l type; + typedef Packet2l half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 2, + + HasDiv = 0, + HasCmp = 1, HasShift = 1, HasBlend = 1 }; @@ -250,18 +278,15 @@ struct packet_traits : default_packet_traits { AlignedOnScalar = 1, size = 16, - HasAdd = 1, - HasSub = 1, HasCmp = 1, // note -- only pcmp_eq is defined HasShift = 0, - HasMul = 1, - HasNegate = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, HasMax = 0, HasConj = 0, HasSqrt = 1, + HasNegate = 0, HasSign = 0 // Don't try to vectorize psign = identity. }; }; @@ -283,6 +308,19 @@ template <> struct unpacket_traits { typedef double type; typedef Packet2d half; + typedef Packet2l integer_packet; + enum { + size = 2, + alignment = Aligned16, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; +template <> +struct unpacket_traits { + typedef int64_t type; + typedef Packet2l half; enum { size = 2, alignment = Aligned16, @@ -348,6 +386,10 @@ EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return _mm_set1_pd(from); } template <> +EIGEN_STRONG_INLINE Packet2l pset1(const int64_t& from) { + return _mm_set1_epi64x(from); +} +template <> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return _mm_set1_epi32(from); } @@ -374,6 +416,10 @@ EIGEN_STRONG_INLINE Packet4f peven_mask(const Packet4f& /*a*/) { return _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1)); } template <> +EIGEN_STRONG_INLINE Packet2l peven_mask(const Packet2l& /*a*/) { + return _mm_set_epi32(0, 0, -1, -1); +} +template <> EIGEN_STRONG_INLINE Packet4i peven_mask(const Packet4i& /*a*/) { return _mm_set_epi32(0, -1, 0, -1); } @@ -395,6 +441,10 @@ EIGEN_STRONG_INLINE Packet2d pzero(const Packet2d& /*a*/) { return _mm_setzero_pd(); } template <> +EIGEN_STRONG_INLINE Packet2l pzero(const Packet2l& /*a*/) { + return _mm_setzero_si128(); +} +template <> EIGEN_STRONG_INLINE Packet4i pzero(const Packet4i& /*a*/) { return _mm_setzero_si128(); } @@ -424,6 +474,10 @@ EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return _mm_add_pd(pset1(a), _mm_set_pd(1, 0)); } template <> +EIGEN_STRONG_INLINE Packet2l plset(const int64_t& a) { + return _mm_add_epi32(pset1(a), _mm_set_epi64x(1, 0)); +} +template <> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return _mm_add_epi32(pset1(a), _mm_set_epi32(3, 2, 1, 0)); } @@ -441,6 +495,10 @@ EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b return _mm_add_pd(a, b); } template <> +EIGEN_STRONG_INLINE Packet2l padd(const Packet2l& a, const Packet2l& b) { + return _mm_add_epi64(a, b); +} +template <> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a, b); } @@ -474,6 +532,10 @@ EIGEN_STRONG_INLINE Packet2d psub(const Packet2d& a, const Packet2d& b return _mm_sub_pd(a, b); } template <> +EIGEN_STRONG_INLINE Packet2l psub(const Packet2l& a, const Packet2l& b) { + return _mm_sub_epi64(a, b); +} +template <> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a, b); } @@ -521,13 +583,13 @@ EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return _mm_xor_pd(a, mask); } template <> -EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { - return psub(Packet4i(_mm_setr_epi32(0, 0, 0, 0)), a); +EIGEN_STRONG_INLINE Packet2l pnegate(const Packet2l& a) { + return psub(pzero(a), a); } template <> -EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a) { - return psub(pset1(false), a); +EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { + return psub(pzero(a), a); } template <> @@ -539,6 +601,10 @@ EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } template <> +EIGEN_STRONG_INLINE Packet2l pconj(const Packet2l& a) { + return a; +} +template <> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } @@ -552,6 +618,21 @@ EIGEN_STRONG_INLINE Packet2d pmul(const Packet2d& a, const Packet2d& b return _mm_mul_pd(a, b); } template <> +EIGEN_STRONG_INLINE Packet2l pmul(const Packet2l& a, const Packet2l& b) { + // 64-bit mul requires avx512, so do this with 32-bit multiplication + __m128i upper32_a = _mm_srli_epi64(a, 32); + __m128i upper32_b = _mm_srli_epi64(b, 32); + + // upper * lower + __m128i mul1 = _mm_mul_epu32(upper32_a, b); + __m128i mul2 = _mm_mul_epu32(upper32_b, a); + // Gives us both upper*upper and lower*lower + __m128i mul3 = _mm_mul_epu32(a, b); + + __m128i high = _mm_slli_epi64(_mm_add_epi64(mul1, mul2), 32); + return _mm_add_epi64(high, mul3); +} +template <> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_mullo_epi32(a, b); @@ -602,15 +683,6 @@ EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& a, const Packet4i& b #endif } -// for some weird raisons, it has to be overloaded for packet of integers -template <> -EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { - return padd(pmul(a, b), c); -} -template <> -EIGEN_STRONG_INLINE Packet4ui pmadd(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c) { - return padd(pmul(a, b), c); -} #ifdef EIGEN_VECTORIZE_FMA template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { @@ -659,45 +731,42 @@ EIGEN_STRONG_INLINE Packet2d pmadds(const Packet2d& a, const Packet2d& #ifdef EIGEN_VECTORIZE_SSE4_1 template <> -EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { +EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { return _mm_blendv_ps(b, a, mask); } template <> -EIGEN_DEVICE_FUNC inline Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) { +EIGEN_STRONG_INLINE Packet2l pselect(const Packet2l& mask, const Packet2l& a, const Packet2l& b) { + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(b), _mm_castsi128_pd(a), _mm_castsi128_pd(mask))); +} + +template <> +EIGEN_STRONG_INLINE Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) { return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _mm_castsi128_ps(mask))); } template <> -EIGEN_DEVICE_FUNC inline Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b) { +EIGEN_STRONG_INLINE Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b) { return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _mm_castsi128_ps(mask))); } template <> -EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { +EIGEN_STRONG_INLINE Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return _mm_blendv_pd(b, a, mask); } - -template <> -EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { - return _mm_blendv_epi8(b, a, mask); -} -#else -template <> -EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { - Packet16b a_part = _mm_and_si128(mask, a); - Packet16b b_part = _mm_andnot_si128(mask, b); - return _mm_or_si128(a_part, b_part); -} #endif +template <> +EIGEN_STRONG_INLINE Packet2l ptrue(const Packet2l& a) { + return _mm_cmpeq_epi32(a, a); +} template <> EIGEN_STRONG_INLINE Packet4i ptrue(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); } template <> -EIGEN_STRONG_INLINE Packet16b ptrue(const Packet16b& a) { - return _mm_cmpeq_epi8(a, a); +EIGEN_STRONG_INLINE Packet16b ptrue(const Packet16b& /*a*/) { + return pset1(true); } template <> EIGEN_STRONG_INLINE Packet4f ptrue(const Packet4f& a) { @@ -719,6 +788,10 @@ EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b return _mm_and_pd(a, b); } template <> +EIGEN_STRONG_INLINE Packet2l pand(const Packet2l& a, const Packet2l& b) { + return _mm_and_si128(a, b); +} +template <> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a, b); } @@ -740,6 +813,10 @@ EIGEN_STRONG_INLINE Packet2d por(const Packet2d& a, const Packet2d& b) return _mm_or_pd(a, b); } template <> +EIGEN_STRONG_INLINE Packet2l por(const Packet2l& a, const Packet2l& b) { + return _mm_or_si128(a, b); +} +template <> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a, b); } @@ -761,6 +838,10 @@ EIGEN_STRONG_INLINE Packet2d pxor(const Packet2d& a, const Packet2d& b return _mm_xor_pd(a, b); } template <> +EIGEN_STRONG_INLINE Packet2l pxor(const Packet2l& a, const Packet2l& b) { + return _mm_xor_si128(a, b); +} +template <> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a, b); } @@ -782,6 +863,10 @@ EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d return _mm_andnot_pd(b, a); } template <> +EIGEN_STRONG_INLINE Packet2l pandnot(const Packet2l& a, const Packet2l& b) { + return _mm_andnot_si128(b, a); +} +template <> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b, a); } @@ -823,7 +908,6 @@ template <> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a, b); } - template <> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a, b); @@ -833,16 +917,45 @@ EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a, b); } template <> -EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) { - return _mm_cmpeq_epi32(a, b); +EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { + return por(pcmp_lt(a, b), pcmp_eq(a, b)); +} +template <> +EIGEN_STRONG_INLINE Packet2l pcmp_lt(const Packet2l& a, const Packet2l& b) { +#ifdef EIGEN_VECTORIZE_SSE4_2 + return _mm_cmpgt_epi64(b, a); +#else + Packet4i eq = pcmp_eq(Packet4i(a), Packet4i(b)); + Packet2l hi_eq = Packet2l(_mm_shuffle_epi32(eq, (shuffle_mask<1, 1, 3, 3>::mask))); + Packet4i lt = pcmp_lt(Packet4i(a), Packet4i(b)); + Packet2l hi_lt = Packet2l(_mm_shuffle_epi32(lt, (shuffle_mask<1, 1, 3, 3>::mask))); + Packet2l lo_lt = Packet2l(_mm_shuffle_epi32(lt, (shuffle_mask<0, 0, 2, 2>::mask))); + // return hi(a) < hi(b) || (hi(a) == hi(b) && lo(a) < lo(b)) + return por(hi_lt, pand(hi_eq, lo_lt)); +#endif +} +template <> +EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) { +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_cmpeq_epi64(a, b); +#else + Packet4i tmp = pcmp_eq(Packet4i(a), Packet4i(b)); + return Packet2l(pand(tmp, _mm_shuffle_epi32(tmp, (shuffle_mask<1, 0, 3, 2>::mask)))); +#endif +} +template <> +EIGEN_STRONG_INLINE Packet2l pcmp_le(const Packet2l& a, const Packet2l& b) { + return por(pcmp_lt(a, b), pcmp_eq(a, b)); } template <> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { - return _mm_cmpeq_epi8(a, b); + // Mask out invalid bool bits to avoid UB. + const Packet16b kBoolMask = pset1(true); + return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask); } template <> -EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { - return por(pcmp_lt(a, b), pcmp_eq(a, b)); +EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) { + return _mm_cmpeq_epi32(a, b); } template <> @@ -886,6 +999,11 @@ EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b #endif } template <> +EIGEN_STRONG_INLINE Packet2l pmin(const Packet2l& a, const Packet2l& b) { + Packet2l a_lt_mask = pcmp_lt(a, b); + return por(pandnot(b, a_lt_mask), pand(a, a_lt_mask)); +} +template <> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_min_epi32(a, b); @@ -947,6 +1065,11 @@ EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b #endif } template <> +EIGEN_STRONG_INLINE Packet2l pmax(const Packet2l& a, const Packet2l& b) { + Packet2l a_lt_mask = pcmp_lt(a, b); + return por(pandnot(a, a_lt_mask), pand(b, a_lt_mask)); +} +template <> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_max_epi32(a, b); @@ -1038,6 +1161,46 @@ EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, con return pminmax_propagate_nan(a, b, pmax); } +template <> +EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) { + return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31)); +} +template <> +EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) { + Packet4f tmp = psignbit(_mm_castpd_ps(a)); +#ifdef EIGEN_VECTORIZE_AVX + return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask))); +#else + return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask))); +#endif // EIGEN_VECTORIZE_AVX +} +template <> +EIGEN_STRONG_INLINE Packet4i psignbit(const Packet4i& a) { + return _mm_srai_epi32(a, 31); +} +template <> +EIGEN_STRONG_INLINE Packet4ui psignbit(const Packet4ui& a) { + return pzero(a); +} +template <> +EIGEN_STRONG_INLINE Packet2l psignbit(const Packet2l& a) { + Packet4i tmp = psignbit(Packet4i(a)); + return Packet2l(_mm_shuffle_epi32(tmp, (shuffle_mask<1, 1, 3, 3>::mask))); +} + +template +EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) { + Packet2l signbit = psignbit(a); + return por(_mm_slli_epi64(signbit, 64 - N), _mm_srli_epi64(a, N)); +} +template +EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) { + return _mm_srli_epi64(a, N); +} +template +EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) { + return _mm_slli_epi64(a, N); +} template EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) { return _mm_srai_epi32(a, N); @@ -1050,7 +1213,6 @@ template EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) { return _mm_slli_epi32(a, N); } - template EIGEN_STRONG_INLINE Packet4ui parithmetic_shift_right(const Packet4ui& a) { return _mm_srli_epi32(a, N); @@ -1075,12 +1237,17 @@ EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return _mm_and_pd(a, mask); } template <> +EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) { + Packet2l signbit = psignbit(a); + return _mm_sub_epi64(_mm_xor_si128(a, signbit), signbit); +} +template <> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { #ifdef EIGEN_VECTORIZE_SSSE3 return _mm_abs_epi32(a); #else - Packet4i aux = _mm_srai_epi32(a, 31); - return _mm_sub_epi32(_mm_xor_si128(a, aux), aux); + Packet4i signbit = psignbit(a); + return _mm_sub_epi32(_mm_xor_si128(a, signbit), signbit); #endif } template <> @@ -1088,24 +1255,6 @@ EIGEN_STRONG_INLINE Packet4ui pabs(const Packet4ui& a) { return a; } -template <> -EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) { - return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31)); -} -template <> -EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) { - Packet4f tmp = psignbit(_mm_castpd_ps(a)); -#ifdef EIGEN_VECTORIZE_AVX - return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask))); -#else - return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask))); -#endif // EIGEN_VECTORIZE_AVX -} -template <> -EIGEN_STRONG_INLINE Packet4ui psignbit(const Packet4ui& a) { - return pzero(a); -} - #ifdef EIGEN_VECTORIZE_SSE4_1 template <> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { @@ -1148,73 +1297,14 @@ template <> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return _mm_floor_pd(a); } -#else -template <> -EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { - // Adds and subtracts signum(a) * 2^23 to force rounding. - const Packet4f limit = pset1(static_cast(1 << 23)); - const Packet4f abs_a = pabs(a); - Packet4f r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; -} template <> -EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) { - // Adds and subtracts signum(a) * 2^52 to force rounding. - const Packet2d limit = pset1(static_cast(1ull << 52)); - const Packet2d abs_a = pabs(a); - Packet2d r = padd(abs_a, limit); - // Don't compile-away addition and subtraction. - EIGEN_OPTIMIZATION_BARRIER(r); - r = psub(r, limit); - // If greater than limit, simply return a. Otherwise, account for sign. - r = pselect(pcmp_lt(abs_a, limit), pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a); - return r; +EIGEN_STRONG_INLINE Packet4f ptrunc(const Packet4f& a) { + return _mm_round_ps(a, _MM_FROUND_TRUNC); } - template <> -EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If greater, subtract one. - Packet4f mask = _mm_cmpgt_ps(tmp, a); - mask = pand(mask, cst_1); - return psub(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { - const Packet2d cst_1 = pset1(1.0); - Packet2d tmp = print(a); - // If greater, subtract one. - Packet2d mask = _mm_cmpgt_pd(tmp, a); - mask = pand(mask, cst_1); - return psub(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { - const Packet4f cst_1 = pset1(1.0f); - Packet4f tmp = print(a); - // If smaller, add one. - Packet4f mask = _mm_cmplt_ps(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); -} - -template <> -EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { - const Packet2d cst_1 = pset1(1.0); - Packet2d tmp = print(a); - // If smaller, add one. - Packet2d mask = _mm_cmplt_pd(tmp, a); - mask = pand(mask, cst_1); - return padd(tmp, mask); +EIGEN_STRONG_INLINE Packet2d ptrunc(const Packet2d& a) { + return _mm_round_pd(a, _MM_FROUND_TRUNC); } #endif @@ -1227,6 +1317,10 @@ EIGEN_STRONG_INLINE Packet2d pload(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } template <> +EIGEN_STRONG_INLINE Packet2l pload(const int64_t* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast(from)); +} +template <> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast(from)); } @@ -1261,6 +1355,11 @@ EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { return _mm_loadu_pd(from); } template <> +EIGEN_STRONG_INLINE Packet2l ploadu(const int64_t* from) { + EIGEN_DEBUG_UNALIGNED_LOAD + return _mm_loadu_si128(reinterpret_cast(from)); +} +template <> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast(from)); @@ -1309,6 +1408,10 @@ EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) { return pset1(from[0]); } template <> +EIGEN_STRONG_INLINE Packet2l ploaddup(const int64_t* from) { + return pset1(from[0]); +} +template <> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) { Packet4i tmp; tmp = _mm_loadl_epi64(reinterpret_cast(from)); @@ -1347,6 +1450,10 @@ EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } template <> +EIGEN_STRONG_INLINE void pstore(int64_t* to, const Packet2l& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); +} +template <> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } @@ -1368,6 +1475,10 @@ EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); } template <> +EIGEN_STRONG_INLINE void pstoreu(int64_t* to, const Packet2l& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); +} +template <> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } @@ -1377,7 +1488,7 @@ EIGEN_STRONG_INLINE void pstoreu(uint32_t* to, const Packet4ui& from) } template <> EIGEN_STRONG_INLINE void pstoreu(bool* to, const Packet16b& from) { - EIGEN_DEBUG_ALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); + EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } template @@ -1403,25 +1514,142 @@ EIGEN_STRONG_INLINE void pstores(double* to, const Packet2d& from) { } template <> -EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, Index stride) { +EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { + return _mm_shuffle_ps(a, a, 0x1B); +} +template <> +EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { + return _mm_shuffle_pd(a, a, 0x1); +} +template <> +EIGEN_STRONG_INLINE Packet2l preverse(const Packet2l& a) { + return _mm_castpd_si128(preverse(_mm_castsi128_pd(a))); +} +template <> +EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { + return _mm_shuffle_epi32(a, 0x1B); +} +template <> +EIGEN_STRONG_INLINE Packet4ui preverse(const Packet4ui& a) { + return _mm_shuffle_epi32(a, 0x1B); +} +template <> +EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) { +#ifdef EIGEN_VECTORIZE_SSSE3 + __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + return _mm_shuffle_epi8(a, mask); +#else + Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)); + tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); + return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8)); +#endif +} + +#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 +// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 +// Direct of the struct members fixed bug #62. +template <> +EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { + return a.m128_f32[0]; +} +template <> +EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { + return a.m128d_f64[0]; +} +template <> +EIGEN_STRONG_INLINE int64_t pfirst(const Packet2l& a) { + int64_t x = _mm_extract_epi64_0(a); + return x; +} +template <> +EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { + int x = _mm_cvtsi128_si32(a); + return x; +} +template <> +EIGEN_STRONG_INLINE uint32_t pfirst(const Packet4ui& a) { + uint32_t x = numext::bit_cast(_mm_cvtsi128_si32(a)); + return x; +} +#elif EIGEN_COMP_MSVC_STRICT +// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 +template <> +EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { + float x = _mm_cvtss_f32(a); + return x; +} +template <> +EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { + double x = _mm_cvtsd_f64(a); + return x; +} +template <> +EIGEN_STRONG_INLINE int64_t pfirst(const Packet2l& a) { + int64_t x = _mm_extract_epi64_0(a); + return x; +} +template <> +EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { + int x = _mm_cvtsi128_si32(a); + return x; +} +template <> +EIGEN_STRONG_INLINE uint32_t pfirst(const Packet4ui& a) { + uint32_t x = numext::bit_cast(_mm_cvtsi128_si32(a)); + return x; +} +#else +template <> +EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { + return _mm_cvtss_f32(a); +} +template <> +EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { + return _mm_cvtsd_f64(a); +} +template <> +EIGEN_STRONG_INLINE int64_t pfirst(const Packet2l& a) { + return _mm_extract_epi64_0(a); +} +template <> +EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { + return _mm_cvtsi128_si32(a); +} +template <> +EIGEN_STRONG_INLINE uint32_t pfirst(const Packet4ui& a) { + return numext::bit_cast(_mm_cvtsi128_si32(a)); +} +#endif +template <> +EIGEN_STRONG_INLINE bool pfirst(const Packet16b& a) { + int x = _mm_cvtsi128_si32(a); + return static_cast(x & 1); +} + +template <> +EIGEN_STRONG_INLINE Packet4f pgather(const float* from, Index stride) { return _mm_set_ps(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]); } template <> -EIGEN_DEVICE_FUNC inline Packet2d pgather(const double* from, Index stride) { +EIGEN_STRONG_INLINE Packet2d pgather(const double* from, Index stride) { return _mm_set_pd(from[1 * stride], from[0 * stride]); } template <> -EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, Index stride) { +EIGEN_STRONG_INLINE Packet2l pgather(const int64_t* from, Index stride) { + return _mm_set_epi64x(from[1 * stride], from[0 * stride]); +} +template <> +EIGEN_STRONG_INLINE Packet4i pgather(const int* from, Index stride) { return _mm_set_epi32(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]); } template <> -EIGEN_DEVICE_FUNC inline Packet4ui pgather(const uint32_t* from, Index stride) { +EIGEN_STRONG_INLINE Packet4ui pgather(const uint32_t* from, Index stride) { return _mm_set_epi32(numext::bit_cast(from[3 * stride]), numext::bit_cast(from[2 * stride]), numext::bit_cast(from[1 * stride]), numext::bit_cast(from[0 * stride])); } template <> -EIGEN_DEVICE_FUNC inline Packet16b pgather(const bool* from, Index stride) { +EIGEN_STRONG_INLINE Packet16b pgather(const bool* from, Index stride) { return _mm_set_epi8(from[15 * stride], from[14 * stride], from[13 * stride], from[12 * stride], from[11 * stride], from[10 * stride], from[9 * stride], from[8 * stride], from[7 * stride], from[6 * stride], from[5 * stride], from[4 * stride], from[3 * stride], from[2 * stride], from[1 * stride], @@ -1429,33 +1657,38 @@ EIGEN_DEVICE_FUNC inline Packet16b pgather(const bool* from, In } template <> -EIGEN_DEVICE_FUNC inline void pscatter(float* to, const Packet4f& from, Index stride) { - to[stride * 0] = _mm_cvtss_f32(from); - to[stride * 1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1)); - to[stride * 2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2)); - to[stride * 3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3)); +EIGEN_STRONG_INLINE void pscatter(float* to, const Packet4f& from, Index stride) { + to[stride * 0] = pfirst(from); + to[stride * 1] = pfirst(_mm_shuffle_ps(from, from, 1)); + to[stride * 2] = pfirst(_mm_shuffle_ps(from, from, 2)); + to[stride * 3] = pfirst(_mm_shuffle_ps(from, from, 3)); } template <> -EIGEN_DEVICE_FUNC inline void pscatter(double* to, const Packet2d& from, Index stride) { - to[stride * 0] = _mm_cvtsd_f64(from); - to[stride * 1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1)); +EIGEN_STRONG_INLINE void pscatter(double* to, const Packet2d& from, Index stride) { + to[stride * 0] = pfirst(from); + to[stride * 1] = pfirst(preverse(from)); } template <> -EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, Index stride) { +EIGEN_STRONG_INLINE void pscatter(int64_t* to, const Packet2l& from, Index stride) { + to[stride * 0] = pfirst(from); + to[stride * 1] = pfirst(preverse(from)); +} +template <> +EIGEN_STRONG_INLINE void pscatter(int* to, const Packet4i& from, Index stride) { to[stride * 0] = _mm_cvtsi128_si32(from); to[stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); to[stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)); to[stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3)); } template <> -EIGEN_DEVICE_FUNC inline void pscatter(uint32_t* to, const Packet4ui& from, Index stride) { +EIGEN_STRONG_INLINE void pscatter(uint32_t* to, const Packet4ui& from, Index stride) { to[stride * 0] = numext::bit_cast(_mm_cvtsi128_si32(from)); to[stride * 1] = numext::bit_cast(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1))); to[stride * 2] = numext::bit_cast(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2))); to[stride * 3] = numext::bit_cast(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3))); } template <> -EIGEN_DEVICE_FUNC inline void pscatter(bool* to, const Packet16b& from, Index stride) { +EIGEN_STRONG_INLINE void pscatter(bool* to, const Packet16b& from, Index stride) { to[4 * stride * 0] = _mm_cvtsi128_si32(from); to[4 * stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); to[4 * stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)); @@ -1495,106 +1728,15 @@ EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template <> +EIGEN_STRONG_INLINE void prefetch(const int64_t* addr) { + _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); +} +template <> EIGEN_STRONG_INLINE void prefetch(const uint32_t* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif -#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 -// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 -// Direct of the struct members fixed bug #62. -template <> -EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { - return a.m128_f32[0]; -} -template <> -EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { - return a.m128d_f64[0]; -} -template <> -EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { - int x = _mm_cvtsi128_si32(a); - return x; -} -template <> -EIGEN_STRONG_INLINE uint32_t pfirst(const Packet4ui& a) { - uint32_t x = numext::bit_cast(_mm_cvtsi128_si32(a)); - return x; -} -#elif EIGEN_COMP_MSVC_STRICT -// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 -template <> -EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { - float x = _mm_cvtss_f32(a); - return x; -} -template <> -EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { - double x = _mm_cvtsd_f64(a); - return x; -} -template <> -EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { - int x = _mm_cvtsi128_si32(a); - return x; -} -template <> -EIGEN_STRONG_INLINE uint32_t pfirst(const Packet4ui& a) { - uint32_t x = numext::bit_cast(_mm_cvtsi128_si32(a)); - return x; -} -#else -template <> -EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { - return _mm_cvtss_f32(a); -} -template <> -EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { - return _mm_cvtsd_f64(a); -} -template <> -EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { - return _mm_cvtsi128_si32(a); -} -template <> -EIGEN_STRONG_INLINE uint32_t pfirst(const Packet4ui& a) { - return numext::bit_cast(_mm_cvtsi128_si32(a)); -} -#endif -template <> -EIGEN_STRONG_INLINE bool pfirst(const Packet16b& a) { - int x = _mm_cvtsi128_si32(a); - return static_cast(x & 1); -} - -template <> -EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { - return _mm_shuffle_ps(a, a, 0x1B); -} -template <> -EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { - return _mm_shuffle_pd(a, a, 0x1); -} -template <> -EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { - return _mm_shuffle_epi32(a, 0x1B); -} -template <> -EIGEN_STRONG_INLINE Packet4ui preverse(const Packet4ui& a) { - return _mm_shuffle_epi32(a, 0x1B); -} -template <> -EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) { -#ifdef EIGEN_VECTORIZE_SSSE3 - __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - return _mm_shuffle_epi8(a, mask); -#else - Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)); - tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); - return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8)); -#endif -} - template <> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { return pfrexp_generic(a, exponent); @@ -1620,6 +1762,7 @@ EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& // We specialize pldexp here, since the generic implementation uses Packet2l, which is not well // supported by SSE, and has more range than is needed for exponents. +// TODO(rmlarsen): Remove this specialization once Packet2l has support or casting. template <> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { // Clamp exponent to [-2099, 2099] @@ -1700,6 +1843,11 @@ EIGEN_STRONG_INLINE double predux(const Packet2d& a) { // #endif } +template <> +EIGEN_STRONG_INLINE int64_t predux(const Packet2l& a) { + return pfirst(_mm_add_epi64(a, _mm_unpackhi_epi64(a, a))); +} + #ifdef EIGEN_VECTORIZE_SSSE3 template <> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { @@ -1711,7 +1859,6 @@ EIGEN_STRONG_INLINE uint32_t predux(const Packet4ui& a) { Packet4ui tmp0 = _mm_hadd_epi32(a, a); return pfirst(_mm_hadd_epi32(tmp0, tmp0)); } - #else template <> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { @@ -1744,9 +1891,15 @@ EIGEN_STRONG_INLINE double predux_mul(const Packet2d& a) { return pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a, a))); } template <> +EIGEN_STRONG_INLINE int64_t predux_mul(const Packet2l& a) { + EIGEN_ALIGN16 int64_t aux[2]; + pstore(aux, a); + return aux[0] * aux[1]; +} +template <> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) { // after some experiments, it is seems this is the fastest way to implement it - // for GCC (eg., reusing pmul is very slow !) + // for GCC (e.g., reusing pmul is very slow!) // TODO try to call _mm_mul_epu32 directly EIGEN_ALIGN16 int aux[4]; pstore(aux, a); @@ -1856,11 +2009,21 @@ EIGEN_STRONG_INLINE uint32_t predux_max(const Packet4ui& a) { // return _mm_movemask_ps(x) == 0xF; // } +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet2d& x) { + return _mm_movemask_pd(x) != 0x0; +} + template <> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x) { return _mm_movemask_ps(x) != 0x0; } +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet2l& x) { + return _mm_movemask_pd(_mm_castsi128_pd(x)) != 0x0; +} + template <> EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x) { return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0; @@ -1870,17 +2033,23 @@ EIGEN_STRONG_INLINE bool predux_any(const Packet4ui& x) { return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0; } -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]); } -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]); kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]); kernel.packet[1] = tmp; } -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { + __m128i tmp = _mm_unpackhi_epi64(kernel.packet[0], kernel.packet[1]); + kernel.packet[0] = _mm_unpacklo_epi64(kernel.packet[0], kernel.packet[1]); + kernel.packet[1] = tmp; +} + +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]); __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]); __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]); @@ -1891,11 +2060,11 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[2] = _mm_unpacklo_epi64(T2, T3); kernel.packet[3] = _mm_unpackhi_epi64(T2, T3); } -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { ptranspose((PacketBlock&)kernel); } -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { __m128i T0 = _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]); __m128i T1 = _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]); __m128i T2 = _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]); @@ -1906,7 +2075,7 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[3] = _mm_unpackhi_epi16(T1, T3); } -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { // If we number the elements in the input thus: // kernel.packet[ 0] = {00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f} // kernel.packet[ 1] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f} @@ -1992,17 +2161,25 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[15] = _mm_unpackhi_epi64(u7, uf); } +EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<2>& ifPacket) { + return _mm_set_epi64x(0 - ifPacket.select[1], 0 - ifPacket.select[0]); +} + +EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<4>& ifPacket) { + return _mm_set_epi32(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1], 0 - ifPacket.select[0]); +} + +template <> +EIGEN_STRONG_INLINE Packet2l pblend(const Selector<2>& ifPacket, const Packet2l& thenPacket, + const Packet2l& elsePacket) { + const __m128i true_mask = sse_blend_mask(ifPacket); + return pselect(true_mask, thenPacket, elsePacket); +} template <> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) { - const __m128i zero = _mm_setzero_si128(); - const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m128i false_mask = _mm_cmpeq_epi32(select, zero); -#ifdef EIGEN_VECTORIZE_SSE4_1 - return _mm_blendv_epi8(thenPacket, elsePacket, false_mask); -#else - return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket)); -#endif + const __m128i true_mask = sse_blend_mask(ifPacket); + return pselect(true_mask, thenPacket, elsePacket); } template <> EIGEN_STRONG_INLINE Packet4ui pblend(const Selector<4>& ifPacket, const Packet4ui& thenPacket, @@ -2012,26 +2189,14 @@ EIGEN_STRONG_INLINE Packet4ui pblend(const Selector<4>& ifPacket, const Packet4u template <> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) { - const __m128 zero = _mm_setzero_ps(); - const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m128 false_mask = _mm_cmpeq_ps(select, zero); -#ifdef EIGEN_VECTORIZE_SSE4_1 - return _mm_blendv_ps(thenPacket, elsePacket, false_mask); -#else - return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket)); -#endif + const __m128i true_mask = sse_blend_mask(ifPacket); + return pselect(_mm_castsi128_ps(true_mask), thenPacket, elsePacket); } template <> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { - const __m128d zero = _mm_setzero_pd(); - const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]); - __m128d false_mask = _mm_cmpeq_pd(select, zero); -#ifdef EIGEN_VECTORIZE_SSE4_1 - return _mm_blendv_pd(thenPacket, elsePacket, false_mask); -#else - return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket)); -#endif + const __m128i true_mask = sse_blend_mask(ifPacket); + return pselect(_mm_castsi128_pd(true_mask), thenPacket, elsePacket); } // Scalar path for pmadd with FMA to ensure consistency with vectorized path. @@ -2199,11 +2364,6 @@ struct packet_traits : default_packet_traits { HasMax = 0, HasConj = 0, HasSetLinear = 0, - HasSqrt = 0, - HasRsqrt = 0, - HasExp = 0, - HasLog = 0, - HasBlend = 0 }; }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/TypeCasting.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/TypeCasting.h index cbc6d4714e..9a7732a60d 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -37,6 +37,13 @@ template <> struct type_casting_traits : vectorized_type_casting_traits {}; template <> struct type_casting_traits : vectorized_type_casting_traits {}; + +#ifndef EIGEN_VECTORIZE_AVX2 +template <> +struct type_casting_traits : vectorized_type_casting_traits {}; +template <> +struct type_casting_traits : vectorized_type_casting_traits {}; +#endif #endif template <> @@ -79,6 +86,22 @@ EIGEN_STRONG_INLINE Packet4i pcast(const Packet2d& a, const (1 << 2) | (1 << 6))); } +template <> +EIGEN_STRONG_INLINE Packet2l pcast(const Packet2d& a) { +#if EIGEN_ARCH_x86_64 + return _mm_set_epi64x(_mm_cvttsd_si64(preverse(a)), _mm_cvttsd_si64(a)); +#else + return _mm_set_epi64x(static_cast(pfirst(preverse(a))), static_cast(pfirst(a))); +#endif +} + +template <> +EIGEN_STRONG_INLINE Packet2d pcast(const Packet2l& a) { + EIGEN_ALIGN16 int64_t aux[2]; + pstore(aux, a); + return _mm_set_pd(static_cast(aux[1]), static_cast(aux[0])); +} + template <> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { return _mm_cvtepi32_ps(a); @@ -126,6 +149,15 @@ EIGEN_STRONG_INLINE Packet2d preinterpret(const Packet4i& a) return _mm_castsi128_pd(a); } +template <> +EIGEN_STRONG_INLINE Packet2d preinterpret(const Packet2l& a) { + return _mm_castsi128_pd(a); +} +template <> +EIGEN_STRONG_INLINE Packet2l preinterpret(const Packet2d& a) { + return _mm_castpd_si128(a); +} + template <> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet2d& a) { return _mm_castpd_si128(a); @@ -140,6 +172,7 @@ template <> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet4ui& a) { return Packet4i(a); } + // Disable the following code since it's broken on too many platforms / compilers. // #elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC) #if 0 diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h index 3c7dfb769c..c1bbc7c28f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h @@ -24,7 +24,7 @@ namespace internal { */ template struct scalar_opposite_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return -a; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return numext::negate(a); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pnegate(a); @@ -219,7 +219,9 @@ struct functor_traits> { */ template struct scalar_shift_right_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return a >> N; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { + return numext::arithmetic_shift_right(a); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::parithmetic_shift_right(a); @@ -237,7 +239,9 @@ struct functor_traits> { */ template struct scalar_shift_left_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return a << N; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { + return numext::logical_shift_left(a); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::plogical_shift_left(a); @@ -286,9 +290,10 @@ struct functor_traits> { template struct scalar_real_ref_op { typedef typename NumTraits::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(const Scalar& a) const { - return numext::real_ref(*const_cast(&a)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type& operator()(const Scalar& a) const { + return numext::real_ref(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(Scalar& a) const { return numext::real_ref(a); } }; template struct functor_traits> { @@ -303,8 +308,9 @@ struct functor_traits> { template struct scalar_imag_ref_op { typedef typename NumTraits::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(const Scalar& a) const { - return numext::imag_ref(*const_cast(&a)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(Scalar& a) const { return numext::imag_ref(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type& operator()(const Scalar& a) const { + return numext::imag_ref(a); } }; template @@ -453,8 +459,9 @@ struct functor_traits> { */ template struct scalar_log2_op { + using RealScalar = typename NumTraits::Real; EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { - return Scalar(EIGEN_LOG2E) * numext::log(a); + return Scalar(RealScalar(EIGEN_LOG2E)) * numext::log(a); } template EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { @@ -879,7 +886,7 @@ template struct functor_traits> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::HasFloor || NumTraits::IsInteger + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger }; }; @@ -899,7 +906,7 @@ template struct functor_traits> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::HasRint || NumTraits::IsInteger + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger }; }; @@ -919,7 +926,27 @@ template struct functor_traits> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::HasCeil || NumTraits::IsInteger + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger + }; +}; + +/** \internal + * \brief Template functor to compute the truncation of a scalar + * \sa class CwiseUnaryOp, ArrayBase::floor() + */ +template +struct scalar_trunc_op { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return numext::trunc(a); } + template + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { + return internal::ptrunc(a); + } +}; +template +struct functor_traits> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = packet_traits::HasRound || NumTraits::IsInteger }; }; @@ -1091,12 +1118,9 @@ struct functor_traits> { }; }; -/** \internal - * \brief Template functor to compute the logistic function of a scalar - * \sa class CwiseUnaryOp, ArrayBase::logistic() - */ -template -struct scalar_logistic_op { +// Real-valued implementation. +template +struct scalar_logistic_op_impl { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { return packetOp(x); } template @@ -1109,9 +1133,25 @@ struct scalar_logistic_op { } }; +// Complex-valud implementation. +template +struct scalar_logistic_op_impl::IsComplex>> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { + const T e = numext::exp(x); + return (numext::isinf)(numext::real(e)) ? T(1) : e / (e + T(1)); + } +}; + +/** \internal + * \brief Template functor to compute the logistic function of a scalar + * \sa class CwiseUnaryOp, ArrayBase::logistic() + */ +template +struct scalar_logistic_op : scalar_logistic_op_impl {}; + // TODO(rmlarsen): Enable the following on host when integer_packet is defined // for the relevant packet types. -#ifdef EIGEN_GPU_CC +#ifndef EIGEN_GPUCC /** \internal * \brief Template specialization of the logistic function for float. @@ -1206,7 +1246,7 @@ struct functor_traits> { Cost = scalar_div_cost::HasDiv>::value + (internal::is_same::value ? NumTraits::AddCost * 15 + NumTraits::MulCost * 11 : NumTraits::AddCost * 2 + functor_traits>::Cost), - PacketAccess = packet_traits::HasAdd && packet_traits::HasDiv && + PacketAccess = !NumTraits::IsComplex && packet_traits::HasAdd && packet_traits::HasDiv && (internal::is_same::value ? packet_traits::HasMul && packet_traits::HasMax && packet_traits::HasMin : packet_traits::HasNegate && packet_traits::HasExp) diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 647a7dde7f..c4fa771e22 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -140,7 +140,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n typedef typename Traits::ResScalar ResScalar; enum { kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), - ksub = Traits::mr * Traits::nr * sizeof(ResScalar), + ksub = Traits::mr * (Traits::nr * sizeof(ResScalar)), kr = 8, mr = Traits::mr, nr = Traits::nr @@ -197,7 +197,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n enum { k_peeling = 8, k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), - k_sub = Traits::mr * Traits::nr * sizeof(ResScalar) + k_sub = Traits::mr * (Traits::nr * sizeof(ResScalar)) }; // ---- 1st level of blocking on L1, yields kc ---- @@ -2399,7 +2399,7 @@ EIGEN_DONT_INLINE void gebp_kernel::size < 4) || - (unpacket_traits::size % (unpacket_traits::size / 4)) == 0; + (unpacket_traits::size % ((std::max)(unpacket_traits::size, 4) / 4)) == 0; if (kCanLoadSRhsQuad && (SwappedTraits::LhsProgress % 4) == 0 && (SwappedTraits::LhsProgress <= 16) && (SwappedTraits::LhsProgress != 8 || SResPacketHalfSize == nr) && (SwappedTraits::LhsProgress != 16 || SResPacketQuarterSize == nr)) { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h index 55fa5ff702..e9d0cae8d4 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -69,7 +69,7 @@ struct general_matrix_matrix_product pack_rhs; gebp_kernel gebp; -#if defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL) +#if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)) if (info) { // this is the parallel version! int tid = info->logical_thread_id; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h index 413f0ee18d..05a5827a96 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h @@ -287,21 +287,39 @@ struct trmv_selector { constexpr bool DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1; + const RhsScalar* actualRhsPtr = actualRhs.data(); + + // Potentially create a temporary buffer to copy RHS to contiguous memory. gemv_static_vector_if - static_rhs; - - ei_declare_aligned_stack_constructed_variable( - RhsScalar, actualRhsPtr, actualRhs.size(), - DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); - + static_rhs; // Fixed-sized array. + RhsScalar* buffer = nullptr; if (!DirectlyUseRhs) { + // Maybe used fixed-sized buffer, otherwise allocate. + if (static_rhs.data() != nullptr) { + buffer = static_rhs.data(); + } else { + // Allocate either with alloca or malloc. + Eigen::internal::check_size_for_overflow(actualRhs.size()); +#ifdef EIGEN_ALLOCA + buffer = static_cast((sizeof(RhsScalar) * actualRhs.size() <= EIGEN_STACK_ALLOCATION_LIMIT) + ? EIGEN_ALIGNED_ALLOCA(sizeof(RhsScalar) * actualRhs.size()) + : Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size())); +#else + buffer = static_cast(Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size())); +#endif + } #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map(actualRhsPtr, actualRhs.size()) = actualRhs; + Map(buffer, actualRhs.size()) = actualRhs; + actualRhsPtr = buffer; } + // Deallocate only if malloced. + Eigen::internal::aligned_stack_memory_handler buffer_stack_memory_destructor( + buffer, actualRhs.size(), + !DirectlyUseRhs && static_rhs.data() == nullptr && actualRhs.size() > EIGEN_STACK_ALLOCATION_LIMIT); internal::triangular_matrix_vector_product::run(actualLhs.rows(), diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularSolverMatrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularSolverMatrix.h index f9b2ad0373..2122af956d 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -57,7 +57,7 @@ EIGEN_STRONG_INLINE void trsmKernelL // We need to #undef all these ugly tokens defined in // => use __vector instead of vector @@ -365,6 +369,7 @@ extern "C" { #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC +#define EIGEN_VECTORIZE_FMA #include // We need to #undef all these ugly tokens defined in // => use __vector instead of vector @@ -431,6 +436,11 @@ extern "C" { #include #endif +// Enable FMA for ARM. +#if defined(__ARM_FEATURE_FMA) +#define EIGEN_VECTORIZE_FMA +#endif + #if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0)) // We can use the optimized fp16 to float and float to fp16 conversion routines #define EIGEN_HAS_FP16_C diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Constants.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Constants.h index 8b06c676b6..9f4a2d8ef0 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Constants.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Constants.h @@ -29,9 +29,9 @@ const int Dynamic = -1; */ const int DynamicIndex = 0xffffff; -/** This value means that the increment to go from one value to another in a sequence is not constant for each step. +/** This value means that the requested value is not defined. */ -const int UndefinedIncr = 0xfffffe; +const int Undefined = 0xfffffe; /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm(). * The value Infinity there means the L-infinity norm. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/EmulateArray.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/EmulateArray.h index 2b11552454..f2fd10bb16 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/EmulateArray.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/EmulateArray.h @@ -27,16 +27,14 @@ class array { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE iterator end() { return values + n; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_iterator end() const { return values + n; } -#if !defined(EIGEN_GPUCC) typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end()); } + EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } -#endif + EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); } + EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& operator[](size_t index) { eigen_internal_assert(index < size()); @@ -204,19 +202,19 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array& a) { template struct array_size > { - enum { value = N }; + static constexpr Index value = N; }; template struct array_size&> { - enum { value = N }; + static constexpr Index value = N; }; template struct array_size > { - enum { value = N }; + static constexpr Index value = N; }; template struct array_size&> { - enum { value = N }; + static constexpr Index value = N; }; } // end namespace internal diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/ForwardDeclarations.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/ForwardDeclarations.h index c312939ca8..2f2ba9b202 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/ForwardDeclarations.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/ForwardDeclarations.h @@ -91,6 +91,8 @@ template class IndexedView; template class Reshaped; +template +class ArithmeticSequence; template class VectorBlock; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h index 3b451084b2..59486ea50a 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h @@ -17,6 +17,9 @@ namespace Eigen { namespace internal { struct symbolic_last_tag {}; + +struct all_t {}; + } // namespace internal namespace placeholders { @@ -42,126 +45,7 @@ typedef symbolic::SymbolExpr last_t; * * \sa end */ -static const last_t last; - -} // namespace placeholders - -namespace internal { - -// Replace symbolic last/end "keywords" by their true runtime value -inline Index eval_expr_given_size(Index x, Index /* size */) { return x; } - -template -FixedInt eval_expr_given_size(FixedInt x, Index /*size*/) { - return x; -} - -template -Index eval_expr_given_size(const symbolic::BaseExpr& x, Index size) { - return x.derived().eval(Eigen::placeholders::last = size - 1); -} - -// Extract increment/step at compile time -template -struct get_compile_time_incr { - enum { value = UndefinedIncr }; -}; - -// Analogue of std::get<0>(x), but tailored for our needs. -template -EIGEN_CONSTEXPR Index first(const T& x) EIGEN_NOEXCEPT { - return x.first(); -} - -// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by -// MatrixSlice The generic implementation is a no-op -template -struct IndexedViewCompatibleType { - typedef T type; -}; - -template -const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { - return x; -} - -//-------------------------------------------------------------------------------- -// Handling of a single Index -//-------------------------------------------------------------------------------- - -struct SingleRange { - enum { SizeAtCompileTime = 1 }; - SingleRange(Index val) : m_value(val) {} - Index operator[](Index) const { return m_value; } - static EIGEN_CONSTEXPR Index size() EIGEN_NOEXCEPT { return 1; } - Index first() const EIGEN_NOEXCEPT { return m_value; } - Index m_value; -}; - -template <> -struct get_compile_time_incr { - enum { value = 1 }; // 1 or 0 ?? -}; - -// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int) -// methods) -template -struct IndexedViewCompatibleType::value>> { - // Here we could simply use Array, but maybe it's less work for the compiler to use - // a simpler wrapper as SingleRange - // typedef Eigen::Array type; - typedef SingleRange type; -}; - -template -struct IndexedViewCompatibleType::value>> { - typedef SingleRange type; -}; - -template -std::enable_if_t::value, SingleRange> makeIndexedViewCompatible(const T& id, Index size, - SpecializedType) { - return eval_expr_given_size(id, size); -} - -//-------------------------------------------------------------------------------- -// Handling of all -//-------------------------------------------------------------------------------- - -struct all_t { - all_t() {} -}; - -// Convert a symbolic 'all' into a usable range type -template -struct AllRange { - enum { SizeAtCompileTime = XprSize }; - AllRange(Index size = XprSize) : m_size(size) {} - EIGEN_CONSTEXPR Index operator[](Index i) const EIGEN_NOEXCEPT { return i; } - EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_size.value(); } - EIGEN_CONSTEXPR Index first() const EIGEN_NOEXCEPT { return 0; } - variable_if_dynamic m_size; -}; - -template -struct IndexedViewCompatibleType { - typedef AllRange type; -}; - -template -inline AllRange::value> makeIndexedViewCompatible(all_t, XprSizeType size, - SpecializedType) { - return AllRange::value>(size); -} - -template -struct get_compile_time_incr> { - enum { value = 1 }; -}; - -} // end namespace internal - -namespace placeholders { +static constexpr const last_t last; typedef symbolic::AddExpr, symbolic::ValueExpr>> @@ -181,28 +65,423 @@ typedef Eigen::internal::all_t all_t; * \sa last */ #ifdef EIGEN_PARSED_BY_DOXYGEN -static const auto lastp1 = last + fix<1>; +static constexpr auto lastp1 = last + fix<1>; #else // Using a FixedExpr<1> expression is important here to make sure the compiler // can fully optimize the computation starting indices with zero overhead. -static const lastp1_t lastp1(last + fix<1>()); +static constexpr lastp1_t lastp1(last + fix<1>()); #endif /** \var end * \ingroup Core_Module * \sa lastp1 */ -static const lastp1_t end = lastp1; +static constexpr lastp1_t end = lastp1; /** \var all * \ingroup Core_Module * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or * columns */ -static const Eigen::internal::all_t all; +static constexpr Eigen::internal::all_t all; } // namespace placeholders +namespace internal { + +// Evaluate a symbolic expression or constant given the "size" of an object, allowing +// any symbols like `last` to be evaluated. The default here assumes a dynamic constant. +template +struct SymbolicExpressionEvaluator { + static constexpr Index ValueAtCompileTime = Undefined; + static Index eval(const Expr& expr, Index /*size*/) { return static_cast(expr); } +}; + +// Symbolic expression with size known at compile-time. +template +struct SymbolicExpressionEvaluator::value>> { + static constexpr Index ValueAtCompileTime = + Expr::Derived::eval_at_compile_time(Eigen::placeholders::last = fix); + static Index eval(const Expr& expr, Index /*size*/) { + return expr.eval(Eigen::placeholders::last = fix); + } +}; + +// Symbolic expression with dynamic size. +template +struct SymbolicExpressionEvaluator::value>> { + static constexpr Index ValueAtCompileTime = Undefined; + static Index eval(const Expr& expr, Index size) { return expr.eval(Eigen::placeholders::last = size - 1); } +}; + +// Fixed int. +template +struct SymbolicExpressionEvaluator, SizeAtCompileTime, void> { + static constexpr Index ValueAtCompileTime = static_cast(N); + static Index eval(const FixedInt& /*expr*/, Index /*size*/) { return ValueAtCompileTime; } +}; + +//-------------------------------------------------------------------------------- +// Handling of generic indices (e.g. array) +//-------------------------------------------------------------------------------- + +// Potentially wrap indices in a type that is better-suited for IndexedView evaluation. +template +struct IndexedViewHelperIndicesWrapper { + using type = Indices; + static const type& CreateIndexSequence(const Indices& indices, Index /*nested_size*/) { return indices; } +}; + +// Extract compile-time and runtime first, size, increments. +template +struct IndexedViewHelper { + static constexpr Index FirstAtCompileTime = Undefined; + static constexpr Index SizeAtCompileTime = array_size::value; + static constexpr Index IncrAtCompileTime = Undefined; + + static constexpr Index first(const Indices& indices) { return static_cast(indices[0]); } + static constexpr Index size(const Indices& indices) { return index_list_size(indices); } + static constexpr Index incr(const Indices& /*indices*/) { return Undefined; } +}; + +//-------------------------------------------------------------------------------- +// Handling of ArithmeticSequence +//-------------------------------------------------------------------------------- + +template +class ArithmeticSequenceRange { + public: + static constexpr Index FirstAtCompileTime = FirstAtCompileTime_; + static constexpr Index SizeAtCompileTime = SizeAtCompileTime_; + static constexpr Index IncrAtCompileTime = IncrAtCompileTime_; + + constexpr ArithmeticSequenceRange(Index first, Index size, Index incr) : first_{first}, size_{size}, incr_{incr} {} + constexpr Index operator[](Index i) const { return first() + i * incr(); } + constexpr Index first() const noexcept { return first_.value(); } + constexpr Index size() const noexcept { return size_.value(); } + constexpr Index incr() const noexcept { return incr_.value(); } + + private: + variable_if_dynamicindex first_; + variable_if_dynamic size_; + variable_if_dynamicindex incr_; +}; + +template +struct IndexedViewHelperIndicesWrapper, NestedSizeAtCompileTime, + void> { + static constexpr Index EvalFirstAtCompileTime = + SymbolicExpressionEvaluator::ValueAtCompileTime; + static constexpr Index EvalSizeAtCompileTime = + SymbolicExpressionEvaluator::ValueAtCompileTime; + static constexpr Index EvalIncrAtCompileTime = + SymbolicExpressionEvaluator::ValueAtCompileTime; + + static constexpr Index FirstAtCompileTime = + (int(EvalFirstAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalFirstAtCompileTime; + static constexpr Index SizeAtCompileTime = + (int(EvalSizeAtCompileTime) == Undefined) ? Index(Dynamic) : EvalSizeAtCompileTime; + static constexpr Index IncrAtCompileTime = + (int(EvalIncrAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalIncrAtCompileTime; + + using Indices = ArithmeticSequence; + using type = ArithmeticSequenceRange; + + static type CreateIndexSequence(const Indices& indices, Index nested_size) { + Index first = + SymbolicExpressionEvaluator::eval(indices.firstObject(), nested_size); + Index size = + SymbolicExpressionEvaluator::eval(indices.sizeObject(), nested_size); + Index incr = + SymbolicExpressionEvaluator::eval(indices.incrObject(), nested_size); + return type(first, size, incr); + } +}; + +template +struct IndexedViewHelper, void> { + public: + using Indices = ArithmeticSequenceRange; + static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime; + static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime; + static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime; + static Index first(const Indices& indices) { return indices.first(); } + static Index size(const Indices& indices) { return indices.size(); } + static Index incr(const Indices& indices) { return indices.incr(); } +}; + +//-------------------------------------------------------------------------------- +// Handling of a single index. +//-------------------------------------------------------------------------------- + +template +class SingleRange { + public: + static constexpr Index FirstAtCompileTime = ValueAtCompileTime; + static constexpr Index SizeAtCompileTime = Index(1); + static constexpr Index IncrAtCompileTime = Index(1); // Needs to be 1 to be treated as block-like. + + constexpr SingleRange(Index v) noexcept : value_(v) {} + constexpr Index operator[](Index) const noexcept { return first(); } + constexpr Index first() const noexcept { return value_.value(); } + constexpr Index size() const noexcept { return SizeAtCompileTime; } + constexpr Index incr() const noexcept { return IncrAtCompileTime; } + + private: + variable_if_dynamicindex value_; +}; + +template +struct is_single_range : public std::false_type {}; + +template +struct is_single_range> : public std::true_type {}; + +template +struct IndexedViewHelperIndicesWrapper< + SingleIndex, NestedSizeAtCompileTime, + std::enable_if_t::value || symbolic::is_symbolic::value>> { + static constexpr Index EvalValueAtCompileTime = + SymbolicExpressionEvaluator::ValueAtCompileTime; + static constexpr Index ValueAtCompileTime = + (int(EvalValueAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalValueAtCompileTime; + using type = SingleRange; + static type CreateIndexSequence(const SingleIndex& index, Index nested_size) { + return type(SymbolicExpressionEvaluator::eval(index, nested_size)); + } +}; + +template +struct IndexedViewHelperIndicesWrapper, NestedSizeAtCompileTime, void> { + using type = SingleRange; + static type CreateIndexSequence(const FixedInt& /*index*/) { return type(Index(N)); } +}; + +template +struct IndexedViewHelper, void> { + using Indices = SingleRange; + static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime; + static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime; + static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime; + + static constexpr Index first(const Indices& indices) { return indices.first(); } + static constexpr Index size(const Indices& /*indices*/) { return SizeAtCompileTime; } + static constexpr Index incr(const Indices& /*indices*/) { return IncrAtCompileTime; } +}; + +//-------------------------------------------------------------------------------- +// Handling of all +//-------------------------------------------------------------------------------- + +// Convert a symbolic 'all' into a usable range type +template +class AllRange { + public: + static constexpr Index FirstAtCompileTime = Index(0); + static constexpr Index SizeAtCompileTime = SizeAtCompileTime_; + static constexpr Index IncrAtCompileTime = Index(1); + constexpr AllRange(Index size) : size_(size) {} + constexpr Index operator[](Index i) const noexcept { return i; } + constexpr Index first() const noexcept { return FirstAtCompileTime; } + constexpr Index size() const noexcept { return size_.value(); } + constexpr Index incr() const noexcept { return IncrAtCompileTime; } + + private: + variable_if_dynamic size_; +}; + +template +struct IndexedViewHelperIndicesWrapper { + using type = AllRange; + static type CreateIndexSequence(const all_t& /*indices*/, Index nested_size) { return type(nested_size); } +}; + +template +struct IndexedViewHelper, void> { + using Indices = AllRange; + static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime; + static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime; + static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime; + + static Index first(const Indices& indices) { return indices.first(); } + static Index size(const Indices& indices) { return indices.size(); } + static Index incr(const Indices& indices) { return indices.incr(); } +}; + +// this helper class assumes internal::valid_indexed_view_overload::value == true +template +struct IndexedViewSelector; + +template +using IvcType = typename internal::IndexedViewHelperIndicesWrapper::type; + +template +inline IvcType CreateIndexSequence(size_t size, const Indices& indices) { + return internal::IndexedViewHelperIndicesWrapper::CreateIndexSequence(indices, size); +} + +// Generic +template +struct IndexedViewSelector, + IvcType>>::ReturnAsIndexedView>> { + using ReturnType = IndexedView, + IvcType>; + using ConstReturnType = IndexedView, + IvcType>; + + static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { + return ReturnType(derived, CreateIndexSequence(derived.rows(), rowIndices), + CreateIndexSequence(derived.cols(), colIndices)); + } + static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, + const ColIndices& colIndices) { + return ConstReturnType(derived, CreateIndexSequence(derived.rows(), rowIndices), + CreateIndexSequence(derived.cols(), colIndices)); + } +}; + +// Block +template +struct IndexedViewSelector< + Derived, RowIndices, ColIndices, + std::enable_if_t, + IvcType>>::ReturnAsBlock>> { + using ActualRowIndices = IvcType; + using ActualColIndices = IvcType; + using IndexedViewType = IndexedView; + using ConstIndexedViewType = IndexedView; + using ReturnType = typename internal::traits::BlockType; + using ConstReturnType = typename internal::traits::BlockType; + using RowHelper = internal::IndexedViewHelper; + using ColHelper = internal::IndexedViewHelper; + + static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence(derived.cols(), colIndices); + return ReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices), + RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices)); + } + static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, + const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence(derived.cols(), colIndices); + return ConstReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices), + RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices)); + } +}; + +// Scalar +template +struct IndexedViewSelector< + Derived, RowIndices, ColIndices, + std::enable_if_t, + IvcType>>::ReturnAsScalar>> { + using ReturnType = typename DenseBase::Scalar&; + using ConstReturnType = typename DenseBase::CoeffReturnType; + using ActualRowIndices = IvcType; + using ActualColIndices = IvcType; + using RowHelper = internal::IndexedViewHelper; + using ColHelper = internal::IndexedViewHelper; + static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence(derived.cols(), colIndices); + return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices)); + } + static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, + const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence(derived.cols(), colIndices); + return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices)); + } +}; + +// this helper class assumes internal::is_valid_index_type::value == false +template +struct VectorIndexedViewSelector; + +// Generic +template +struct VectorIndexedViewSelector< + Derived, Indices, + std::enable_if_t>::value && + internal::IndexedViewHelper>::IncrAtCompileTime != + 1>> { + static constexpr bool IsRowMajor = DenseBase::IsRowMajor; + using ZeroIndex = internal::SingleRange; + using RowMajorReturnType = IndexedView>; + using ConstRowMajorReturnType = IndexedView>; + + using ColMajorReturnType = IndexedView, ZeroIndex>; + using ConstColMajorReturnType = IndexedView, ZeroIndex>; + + using ReturnType = typename internal::conditional::type; + using ConstReturnType = + typename internal::conditional::type; + + template = true> + static inline RowMajorReturnType run(Derived& derived, const Indices& indices) { + return RowMajorReturnType(derived, ZeroIndex(0), + CreateIndexSequence(derived.cols(), indices)); + } + template = true> + static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) { + return ConstRowMajorReturnType(derived, ZeroIndex(0), + CreateIndexSequence(derived.cols(), indices)); + } + template = true> + static inline ColMajorReturnType run(Derived& derived, const Indices& indices) { + return ColMajorReturnType(derived, CreateIndexSequence(derived.rows(), indices), + ZeroIndex(0)); + } + template = true> + static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) { + return ConstColMajorReturnType(derived, CreateIndexSequence(derived.rows(), indices), + ZeroIndex(0)); + } +}; + +// Block +template +struct VectorIndexedViewSelector< + Derived, Indices, + std::enable_if_t>::value && + internal::IndexedViewHelper>::IncrAtCompileTime == + 1>> { + using Helper = internal::IndexedViewHelper>; + using ReturnType = VectorBlock; + using ConstReturnType = VectorBlock; + static inline ReturnType run(Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence(derived.size(), indices); + return ReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices)); + } + static inline ConstReturnType run(const Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence(derived.size(), indices); + return ConstReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices)); + } +}; + +// Symbolic +template +struct VectorIndexedViewSelector< + Derived, Indices, + std::enable_if_t>::value>> { + using ReturnType = typename DenseBase::Scalar&; + using ConstReturnType = typename DenseBase::CoeffReturnType; + using Helper = internal::IndexedViewHelper>; + static inline ReturnType run(Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence(derived.size(), indices); + return derived(Helper::first(actualIndices)); + } + static inline ConstReturnType run(const Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence(derived.size(), indices); + return derived(Helper::first(actualIndices)); + } +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_INDEXED_VIEW_HELPER_H diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IntegralConstant.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IntegralConstant.h index 279d553d93..2eb5fd9d03 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IntegralConstant.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IntegralConstant.h @@ -54,65 +54,60 @@ class VariableAndFixedInt; template class FixedInt { public: - static const int value = N; - EIGEN_CONSTEXPR operator int() const { return value; } + static constexpr int value = N; + constexpr operator int() const { return N; } - EIGEN_CONSTEXPR - FixedInt() = default; + constexpr FixedInt() = default; + constexpr FixedInt(std::integral_constant) {} - EIGEN_CONSTEXPR - FixedInt(std::integral_constant) {} - - EIGEN_CONSTEXPR - FixedInt(VariableAndFixedInt other) { + constexpr FixedInt(VariableAndFixedInt other) { #ifndef EIGEN_INTERNAL_DEBUGGING EIGEN_UNUSED_VARIABLE(other); #endif eigen_internal_assert(int(other) == N); } - EIGEN_CONSTEXPR - FixedInt<-N> operator-() const { return FixedInt<-N>(); } + constexpr FixedInt<-N> operator-() const { return FixedInt<-N>(); } template - EIGEN_CONSTEXPR FixedInt operator+(FixedInt) const { + constexpr FixedInt operator+(FixedInt) const { return FixedInt(); } template - EIGEN_CONSTEXPR FixedInt operator-(FixedInt) const { + constexpr FixedInt operator-(FixedInt) const { return FixedInt(); } template - EIGEN_CONSTEXPR FixedInt operator*(FixedInt) const { + constexpr FixedInt operator*(FixedInt) const { return FixedInt(); } template - EIGEN_CONSTEXPR FixedInt operator/(FixedInt) const { + constexpr FixedInt operator/(FixedInt) const { return FixedInt(); } template - EIGEN_CONSTEXPR FixedInt operator%(FixedInt) const { + constexpr FixedInt operator%(FixedInt) const { return FixedInt(); } template - EIGEN_CONSTEXPR FixedInt operator|(FixedInt) const { + constexpr FixedInt operator|(FixedInt) const { return FixedInt(); } template - EIGEN_CONSTEXPR FixedInt operator&(FixedInt) const { + constexpr FixedInt operator&(FixedInt) const { return FixedInt(); } // Needed in C++14 to allow fix(): - EIGEN_CONSTEXPR FixedInt operator()() const { return *this; } + constexpr FixedInt operator()() const { return *this; } - VariableAndFixedInt operator()(int val) const { return VariableAndFixedInt(val); } + constexpr VariableAndFixedInt operator()(int val) const { return VariableAndFixedInt(val); } }; /** \internal diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h index fda6ad91ee..5c8b3a3b3e 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h @@ -716,7 +716,7 @@ (EIGEN_COMP_ICC && EIGEN_COMP_ICC < 1500) || (EIGEN_COMP_NVCC && EIGEN_COMP_NVCC < 80000) || \ (EIGEN_COMP_CLANG_STRICT && EIGEN_COMP_CLANG < 390) || \ (EIGEN_COMP_CLANGAPPLE && EIGEN_COMP_CLANGAPPLE < 9000000) || (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 510) -#error This compiler appears to be too old to be supported by Eigen +#error Eigen requires at least c++14 support. #endif // Does the compiler support C99? @@ -974,6 +974,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void ignore_unused_variable(cons // added then subtracted, which is otherwise compiled away with -ffast-math. // // See bug 1674 +#if defined(EIGEN_GPU_COMPILE_PHASE) +#define EIGEN_OPTIMIZATION_BARRIER(X) +#endif + #if !defined(EIGEN_OPTIMIZATION_BARRIER) #if EIGEN_COMP_GNUC // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html: diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h index 31f1057ee5..62534540c9 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h @@ -156,7 +156,7 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, /** \internal Frees memory allocated with handmade_aligned_malloc */ EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void* ptr) { - if (ptr) { + if (ptr != nullptr) { uint8_t offset = static_cast(*(static_cast(ptr) - 1)); void* original = static_cast(static_cast(ptr) - offset); @@ -224,9 +224,11 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) { EIGEN_DEVICE_FUNC inline void aligned_free(void* ptr) { #if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED - if (ptr) check_that_malloc_is_allowed(); - EIGEN_USING_STD(free) - free(ptr); + if (ptr != nullptr) { + check_that_malloc_is_allowed(); + EIGEN_USING_STD(free) + free(ptr); + } #else handmade_aligned_free(ptr); @@ -294,9 +296,11 @@ EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void* ptr) { template <> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void* ptr) { - if (ptr) check_that_malloc_is_allowed(); - EIGEN_USING_STD(free) - free(ptr); + if (ptr != nullptr) { + check_that_malloc_is_allowed(); + EIGEN_USING_STD(free) + free(ptr); + } } template diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Meta.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Meta.h index 99cbf5f626..e08e1a3089 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Meta.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Meta.h @@ -303,30 +303,30 @@ class noncopyable { */ template struct array_size { - enum { value = Dynamic }; + static constexpr Index value = Dynamic; }; template struct array_size> { - enum { value = T::SizeAtCompileTime }; + static constexpr Index value = T::SizeAtCompileTime; }; template struct array_size { - enum { value = N }; + static constexpr Index value = N; }; template struct array_size { - enum { value = N }; + static constexpr Index value = N; }; template struct array_size> { - enum { value = N }; + static constexpr Index value = N; }; template struct array_size> { - enum { value = N }; + static constexpr Index value = N; }; /** \internal diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/SymbolicIndex.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/SymbolicIndex.h index 136942c357..9668f1e94e 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/SymbolicIndex.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/SymbolicIndex.h @@ -44,6 +44,8 @@ namespace symbolic { template class Symbol; +template +class SymbolValue; template class NegateExpr; template @@ -52,136 +54,123 @@ template class ProductExpr; template class QuotientExpr; - -// A simple wrapper around an integral value to provide the eval method. -// We could also use a free-function symbolic_eval... template -class ValueExpr { - public: - ValueExpr(IndexType val) : m_value(val) {} - template - IndexType eval_impl(const T&) const { - return m_value; - } - - protected: - IndexType m_value; -}; - -// Specialization for compile-time value, -// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. -template -class ValueExpr > { - public: - ValueExpr() {} - template - EIGEN_CONSTEXPR Index eval_impl(const T&) const { - return N; - } -}; +class ValueExpr; /** \class BaseExpr * \ingroup Core_Module * Common base class of any symbolic expressions */ -template +template class BaseExpr { public: - const Derived& derived() const { return *static_cast(this); } + using Derived = Derived_; + constexpr const Derived& derived() const { return *static_cast(this); } /** Evaluate the expression given the \a values of the symbols. * - * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue - * as constructed by SymbolExpr::operator= operator. + * \param values defines the values of the symbols, as constructed by SymbolExpr::operator= operator. * */ - template - Index eval(const T& values) const { - return derived().eval_impl(values); + template + constexpr Index eval(const SymbolValue&... values) const { + return derived().eval_impl(values...); } - template - Index eval(Types&&... values) const { - return derived().eval_impl(std::make_tuple(values...)); + /** Evaluate the expression at compile time given the \a values of the symbols. + * + * If a value is not known at compile-time, returns Eigen::Undefined. + * + */ + template + static constexpr Index eval_at_compile_time(const SymbolValue&...) { + return Derived::eval_at_compile_time_impl(SymbolValue{}...); } - NegateExpr operator-() const { return NegateExpr(derived()); } + constexpr NegateExpr operator-() const { return NegateExpr(derived()); } - AddExpr > operator+(Index b) const { return AddExpr >(derived(), b); } - AddExpr > operator-(Index a) const { return AddExpr >(derived(), -a); } - ProductExpr > operator*(Index a) const { + constexpr AddExpr> operator+(Index b) const { + return AddExpr>(derived(), b); + } + constexpr AddExpr> operator-(Index a) const { + return AddExpr>(derived(), -a); + } + constexpr ProductExpr> operator*(Index a) const { return ProductExpr >(derived(), a); } - QuotientExpr > operator/(Index a) const { + constexpr QuotientExpr> operator/(Index a) const { return QuotientExpr >(derived(), a); } - friend AddExpr > operator+(Index a, const BaseExpr& b) { + friend constexpr AddExpr> operator+(Index a, const BaseExpr& b) { return AddExpr >(b.derived(), a); } - friend AddExpr, ValueExpr<> > operator-(Index a, const BaseExpr& b) { + friend constexpr AddExpr, ValueExpr<>> operator-(Index a, const BaseExpr& b) { return AddExpr, ValueExpr<> >(-b.derived(), a); } - friend ProductExpr, Derived> operator*(Index a, const BaseExpr& b) { + friend constexpr ProductExpr, Derived> operator*(Index a, const BaseExpr& b) { return ProductExpr, Derived>(a, b.derived()); } - friend QuotientExpr, Derived> operator/(Index a, const BaseExpr& b) { + friend constexpr QuotientExpr, Derived> operator/(Index a, const BaseExpr& b) { return QuotientExpr, Derived>(a, b.derived()); } template - AddExpr > > operator+(internal::FixedInt) const { + constexpr AddExpr>> operator+(internal::FixedInt) const { return AddExpr > >(derived(), ValueExpr >()); } template - AddExpr > > operator-(internal::FixedInt) const { + constexpr AddExpr>> operator-(internal::FixedInt) const { return AddExpr > >(derived(), ValueExpr >()); } template - ProductExpr > > operator*(internal::FixedInt) const { + constexpr ProductExpr>> operator*(internal::FixedInt) const { return ProductExpr > >(derived(), ValueExpr >()); } template - QuotientExpr > > operator/(internal::FixedInt) const { + constexpr QuotientExpr>> operator/(internal::FixedInt) const { return QuotientExpr > >(derived(), ValueExpr >()); } template - friend AddExpr > > operator+(internal::FixedInt, const BaseExpr& b) { + friend constexpr AddExpr>> operator+(internal::FixedInt, + const BaseExpr& b) { return AddExpr > >(b.derived(), ValueExpr >()); } template - friend AddExpr, ValueExpr > > operator-(internal::FixedInt, - const BaseExpr& b) { + friend constexpr AddExpr, ValueExpr>> operator-(internal::FixedInt, + const BaseExpr& b) { return AddExpr, ValueExpr > >(-b.derived(), ValueExpr >()); } template - friend ProductExpr >, Derived> operator*(internal::FixedInt, const BaseExpr& b) { + friend constexpr ProductExpr>, Derived> operator*(internal::FixedInt, + const BaseExpr& b) { return ProductExpr >, Derived>(ValueExpr >(), b.derived()); } template - friend QuotientExpr >, Derived> operator/(internal::FixedInt, const BaseExpr& b) { + friend constexpr QuotientExpr>, Derived> operator/(internal::FixedInt, + const BaseExpr& b) { return QuotientExpr >, Derived>(ValueExpr >(), b.derived()); } template - AddExpr operator+(const BaseExpr& b) const { + constexpr AddExpr operator+(const BaseExpr& b) const { return AddExpr(derived(), b.derived()); } template - AddExpr > operator-(const BaseExpr& b) const { + constexpr AddExpr> operator-(const BaseExpr& b) const { return AddExpr >(derived(), -b.derived()); } template - ProductExpr operator*(const BaseExpr& b) const { + constexpr ProductExpr operator*(const BaseExpr& b) const { return ProductExpr(derived(), b.derived()); } template - QuotientExpr operator/(const BaseExpr& b) const { + constexpr QuotientExpr operator/(const BaseExpr& b) const { return QuotientExpr(derived(), b.derived()); } }; @@ -193,21 +182,137 @@ struct is_symbolic { enum { value = internal::is_convertible >::value }; }; +// A simple wrapper around an integral value to provide the eval method. +// We could also use a free-function symbolic_eval... +template +class ValueExpr : BaseExpr> { + public: + constexpr ValueExpr() = default; + constexpr ValueExpr(IndexType val) : value_(val) {} + template + constexpr IndexType eval_impl(const SymbolValue&...) const { + return value_; + } + template + static constexpr IndexType eval_at_compile_time_impl(const SymbolValue&...) { + return IndexType(Undefined); + } + + protected: + IndexType value_; +}; + +// Specialization for compile-time value, +// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. +template +class ValueExpr> : public BaseExpr>> { + public: + constexpr ValueExpr() = default; + constexpr ValueExpr(internal::FixedInt) {} + template + constexpr Index eval_impl(const SymbolValue&...) const { + return Index(N); + } + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + return Index(N); + } +}; + /** Represents the actual value of a symbol identified by its tag * * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used. */ +template +class SymbolValue : public BaseExpr> {}; + template -class SymbolValue { +class SymbolValue : public BaseExpr> { public: + constexpr SymbolValue() = default; + /** Default constructor from the value \a val */ - SymbolValue(Index val) : m_value(val) {} + constexpr SymbolValue(Index val) : value_(val) {} /** \returns the stored value of the symbol */ - Index value() const { return m_value; } + constexpr Index value() const { return value_; } + + /** \returns the stored value of the symbol at compile time, or Undefined if not known. */ + static constexpr Index value_at_compile_time() { return Index(Undefined); } + + template + constexpr Index eval_impl(const SymbolValue&...) const { + return value(); + } + + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + return value_at_compile_time(); + } protected: - Index m_value; + Index value_; +}; + +template +class SymbolValue> : public BaseExpr>> { + public: + constexpr SymbolValue() = default; + + /** Default constructor from the value \a val */ + constexpr SymbolValue(internal::FixedInt) {} + + /** \returns the stored value of the symbol */ + constexpr Index value() const { return static_cast(N); } + + /** \returns the stored value of the symbol at compile time, or Undefined if not known. */ + static constexpr Index value_at_compile_time() { return static_cast(N); } + + template + constexpr Index eval_impl(const SymbolValue&...) const { + return value(); + } + + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + return value_at_compile_time(); + } +}; + +// Find and return a symbol value based on the tag. +template +struct EvalSymbolValueHelper; + +// Empty base case, symbol not found. +template +struct EvalSymbolValueHelper { + static constexpr Index eval_impl() { + eigen_assert(false && "Symbol not found."); + return Index(Undefined); + } + static constexpr Index eval_at_compile_time_impl() { return Index(Undefined); } +}; + +// We found a symbol value matching the provided Tag! +template +struct EvalSymbolValueHelper, OtherTypes...> { + static constexpr Index eval_impl(const SymbolValue& symbol, const OtherTypes&...) { + return symbol.value(); + } + static constexpr Index eval_at_compile_time_impl(const SymbolValue& symbol, const OtherTypes&...) { + return symbol.value_at_compile_time(); + } +}; + +// No symbol value in first value, recursive search starting with next. +template +struct EvalSymbolValueHelper { + static constexpr Index eval_impl(const T1&, const OtherTypes&... values) { + return EvalSymbolValueHelper::eval_impl(values...); + } + static constexpr Index eval_at_compile_time_impl(const T1&, const OtherTypes&...) { + return EvalSymbolValueHelper::eval_at_compile_time_impl(OtherTypes{}...); + } }; /** Expression of a symbol uniquely identified by the template parameter type \c tag */ @@ -217,32 +322,47 @@ class SymbolExpr : public BaseExpr > { /** Alias to the template parameter \c tag */ typedef tag Tag; - SymbolExpr() {} + constexpr SymbolExpr() = default; /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag. * * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified * runtime-time value. */ - SymbolValue operator=(Index val) const { return SymbolValue(val); } + constexpr SymbolValue operator=(Index val) const { return SymbolValue(val); } - Index eval_impl(const SymbolValue& values) const { return values.value(); } + template + constexpr SymbolValue> operator=(internal::FixedInt) const { + return SymbolValue>{internal::FixedInt{}}; + } - // C++14 versions suitable for multiple symbols - template - Index eval_impl(const std::tuple& values) const { - return std::get >(values).value(); + template + constexpr Index eval_impl(const SymbolValue&... values) const { + return EvalSymbolValueHelper...>::eval_impl(values...); + } + + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + return EvalSymbolValueHelper...>::eval_at_compile_time_impl( + SymbolValue{}...); } }; template class NegateExpr : public BaseExpr > { public: - NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} + constexpr NegateExpr() = default; + constexpr NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} - template - Index eval_impl(const T& values) const { - return -m_arg0.eval_impl(values); + template + constexpr Index eval_impl(const SymbolValue&... values) const { + return -m_arg0.eval_impl(values...); + } + + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + constexpr Index v = Arg0::eval_at_compile_time_impl(SymbolValue{}...); + return (v == Undefined) ? Undefined : -v; } protected: @@ -252,11 +372,19 @@ class NegateExpr : public BaseExpr > { template class AddExpr : public BaseExpr > { public: - AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + constexpr AddExpr() = default; + constexpr AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - template - Index eval_impl(const T& values) const { - return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); + template + constexpr Index eval_impl(const SymbolValue&... values) const { + return m_arg0.eval_impl(values...) + m_arg1.eval_impl(values...); + } + + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue{}...); + constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue{}...); + return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 + v1; } protected: @@ -267,11 +395,19 @@ class AddExpr : public BaseExpr > { template class ProductExpr : public BaseExpr > { public: - ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + constexpr ProductExpr() = default; + constexpr ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - template - Index eval_impl(const T& values) const { - return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); + template + constexpr Index eval_impl(const SymbolValue&... values) const { + return m_arg0.eval_impl(values...) * m_arg1.eval_impl(values...); + } + + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue{}...); + constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue{}...); + return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 * v1; } protected: @@ -282,11 +418,19 @@ class ProductExpr : public BaseExpr > { template class QuotientExpr : public BaseExpr > { public: - QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + constexpr QuotientExpr() = default; + constexpr QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - template - Index eval_impl(const T& values) const { - return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); + template + constexpr Index eval_impl(const SymbolValue&... values) const { + return m_arg0.eval_impl(values...) / m_arg1.eval_impl(values...); + } + + template + static constexpr Index eval_at_compile_time_impl(const SymbolValue&...) { + constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue{}...); + constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue{}...); + return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 / v1; } protected: diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/XprHelper.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/XprHelper.h index 5b7bdc0776..a6a7d3fbb3 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/XprHelper.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/XprHelper.h @@ -484,7 +484,7 @@ struct nested_eval { // solution could be to count the number of temps? NAsInteger = n == Dynamic ? HugeCost : n, CostEval = (NAsInteger + 1) * ScalarReadCost + CoeffReadCost, - CostNoEval = NAsInteger * CoeffReadCost, + CostNoEval = int(NAsInteger) * int(CoeffReadCost), Evaluate = (int(evaluator::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval)) }; @@ -928,6 +928,12 @@ template struct block_xpr_helper> : block_xpr_helper> {}; +template +struct is_matrix_base_xpr : std::is_base_of>, remove_all_t> {}; + +template +struct is_permutation_base_xpr : std::is_base_of>, remove_all_t> {}; + } // end namespace internal /** \class ScalarBinaryOpTraits diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexEigenSolver.h index 6efd3c17d5..60a24a899e 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexEigenSolver.h @@ -54,7 +54,7 @@ class ComplexEigenSolver { enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; @@ -292,7 +292,7 @@ void ComplexEigenSolver::doComputeEigenvectors(RealScalar matrixnorm m_eivec.noalias() = m_schur.matrixU() * m_matX; // .. and normalize the eigenvectors for (Index k = 0; k < n; k++) { - m_eivec.col(k).normalize(); + m_eivec.col(k).stableNormalize(); } } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexSchur.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexSchur.h index 126b442a73..a33e46ee79 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexSchur.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/ComplexSchur.h @@ -59,7 +59,7 @@ class ComplexSchur { enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; @@ -275,7 +275,7 @@ inline bool ComplexSchur::subdiagonalEntryIsNeglegible(Index i) { template typename ComplexSchur::ComplexScalar ComplexSchur::computeShift(Index iu, Index iter) { using std::abs; - if (iter == 10 || iter == 20) { + if ((iter == 10 || iter == 20) && iu > 1) { // exceptional shift, taken from http://www.netlib.org/eispack/comqr.f return abs(numext::real(m_matT.coeff(iu, iu - 1))) + abs(numext::real(m_matT.coeff(iu - 1, iu - 2))); } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h index bb6583ad05..40830fbdc5 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h @@ -73,7 +73,7 @@ class EigenSolver { enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 95954e7e9a..08f1e3453e 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -67,7 +67,7 @@ class GeneralizedEigenSolver { enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/HessenbergDecomposition.h index 8f3c1b3c03..f79ee331ac 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/HessenbergDecomposition.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/HessenbergDecomposition.h @@ -66,7 +66,7 @@ class HessenbergDecomposition { enum { Size = MatrixType::RowsAtCompileTime, SizeMinusOne = Size == Dynamic ? Dynamic : Size - 1, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxSize = MatrixType::MaxRowsAtCompileTime, MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : MaxSize - 1 }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealQZ.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealQZ.h index 9fba7ad50b..3466f51c10 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealQZ.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealQZ.h @@ -64,7 +64,7 @@ class RealQZ { enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealSchur.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealSchur.h index 1ac9af850c..5cef6587b3 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealSchur.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/RealSchur.h @@ -61,7 +61,7 @@ class RealSchur { enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; @@ -408,28 +408,29 @@ inline void RealSchur::computeShift(Index iu, Index iter, Scalar& ex shiftInfo.coeffRef(1) = m_matT.coeff(iu - 1, iu - 1); shiftInfo.coeffRef(2) = m_matT.coeff(iu, iu - 1) * m_matT.coeff(iu - 1, iu); - // Wilkinson's original ad hoc shift - if (iter == 10) { - exshift += shiftInfo.coeff(0); - for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= shiftInfo.coeff(0); - Scalar s = abs(m_matT.coeff(iu, iu - 1)) + abs(m_matT.coeff(iu - 1, iu - 2)); - shiftInfo.coeffRef(0) = Scalar(0.75) * s; - shiftInfo.coeffRef(1) = Scalar(0.75) * s; - shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s; - } - - // MATLAB's new ad hoc shift - if (iter == 30) { - Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0); - s = s * s + shiftInfo.coeff(2); - if (s > Scalar(0)) { - s = sqrt(s); - if (shiftInfo.coeff(1) < shiftInfo.coeff(0)) s = -s; - s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0); - s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s; - exshift += s; - for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= s; - shiftInfo.setConstant(Scalar(0.964)); + // Alternate exceptional shifting strategy every 16 iterations. + if (iter % 16 == 0) { + // Wilkinson's original ad hoc shift + if (iter % 32 != 0) { + exshift += shiftInfo.coeff(0); + for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= shiftInfo.coeff(0); + Scalar s = abs(m_matT.coeff(iu, iu - 1)) + abs(m_matT.coeff(iu - 1, iu - 2)); + shiftInfo.coeffRef(0) = Scalar(0.75) * s; + shiftInfo.coeffRef(1) = Scalar(0.75) * s; + shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s; + } else { + // MATLAB's new ad hoc shift + Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0); + s = s * s + shiftInfo.coeff(2); + if (s > Scalar(0)) { + s = sqrt(s); + if (shiftInfo.coeff(1) < shiftInfo.coeff(0)) s = -s; + s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0); + s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s; + exshift += s; + for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= s; + shiftInfo.setConstant(Scalar(0.964)); + } } } } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 9511e68565..f84da91399 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -85,7 +85,7 @@ class SelfAdjointEigenSolver { enum { Size = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits::Options, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/Tridiagonalization.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/Tridiagonalization.h index 76158e988e..e49e9db5af 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/Tridiagonalization.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/Tridiagonalization.h @@ -75,7 +75,7 @@ class Tridiagonalization { enum { Size = MatrixType::RowsAtCompileTime, SizeMinusOne = Size == Dynamic ? Dynamic : (Size > 1 ? Size - 1 : 1), - Options = MatrixType::Options, + Options = internal::traits::Options, MaxSize = MatrixType::MaxRowsAtCompileTime, MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : (MaxSize > 1 ? MaxSize - 1 : 1) }; @@ -445,8 +445,8 @@ struct tridiagonalization_inplace_selector { typedef typename MatrixType::RealScalar RealScalar; template - static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, CoeffVectorType&, WorkSpaceType&, - bool extractQ) { + static EIGEN_DEVICE_FUNC void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, CoeffVectorType&, + WorkSpaceType&, bool extractQ) { using std::sqrt; const RealScalar tol = (std::numeric_limits::min)(); diag[0] = mat(0, 0); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h index 14ae6ea945..a97b9054ce 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h @@ -32,17 +32,19 @@ namespace Eigen { * * \implsparsesolverconcept * - * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$ - * where L is a lower triangular factor, S is a diagonal scaling matrix, and P is a - * fill-in reducing permutation as computed by the ordering method. + * It performs the following incomplete factorization: \f$ S P A P' S + \sigma I \approx L L' \f$ + * where L is a lower triangular factor, S is a diagonal scaling matrix, P is a + * fill-in reducing permutation as computed by the ordering method, and \f$ \sigma \f$ is a shift + * for ensuring the decomposed matrix is positive definite. * * \b Shifting \b strategy: Let \f$ B = S P A P' S \f$ be the scaled matrix on which the factorization is carried out, * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly - * performed on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I - * \f$ where \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default - * value is \f$ \sigma = 10^{-3} \f$. If the factorization fails, then the shift in doubled until it succeed or a - * maximum of ten attempts. If it still fails, as returned by the info() method, then you can either increase the - * initial shift, or better use another preconditioning technique. + * performed on the matrix B, and \sigma = 0. Otherwise, the factorization is performed on the shifted matrix \f$ B + + * \sigma I \f$ for a shifting factor \f$ \sigma \f$. We start with \f$ \sigma = \sigma_0 - \beta \f$, where \f$ + * \sigma_0 \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ + * \sigma_0 = 10^{-3} \f$. If the factorization fails, then the shift in doubled until it succeed or a maximum of ten + * attempts. If it still fails, as returned by the info() method, then you can either increase the initial shift, or + * better use another preconditioning technique. * */ template > @@ -176,6 +178,9 @@ class IncompleteCholesky : public SparseSolverBase colPtr, Ref rowIdx, Ref vals, const Index& col, @@ -214,6 +220,20 @@ void IncompleteCholesky::factorize(const MatrixType m_L.template selfadjointView() = mat.template selfadjointView(); } + // The algorithm will insert increasingly large shifts on the diagonal until + // factorization succeeds. Therefore we have to make sure that there is a + // space in the datastructure to store such values, even if the original + // matrix has a zero on the diagonal. + bool modified = false; + for (Index i = 0; i < mat.cols(); ++i) { + bool inserted = false; + m_L.findOrInsertCoeff(i, i, &inserted); + if (inserted) { + modified = true; + } + } + if (modified) m_L.makeCompressed(); + Index n = m_L.cols(); Index nnz = m_L.nonZeros(); Map vals(m_L.valuePtr(), nnz); // values @@ -257,8 +277,8 @@ void IncompleteCholesky::factorize(const MatrixType FactorType L_save = m_L; - RealScalar shift = 0; - if (mindiag <= RealScalar(0.)) shift = m_initialShift - mindiag; + m_shift = RealScalar(0); + if (mindiag <= RealScalar(0.)) m_shift = m_initialShift - mindiag; m_info = NumericalIssue; @@ -266,7 +286,7 @@ void IncompleteCholesky::factorize(const MatrixType int iter = 0; do { // Apply the shift to the diagonal elements of the matrix - for (Index j = 0; j < n; j++) vals[colPtr[j]] += shift; + for (Index j = 0; j < n; j++) vals[colPtr[j]] += m_shift; // jki version of the Cholesky factorization Index j = 0; @@ -310,7 +330,7 @@ void IncompleteCholesky::factorize(const MatrixType if (++iter >= 10) return; // increase shift - shift = numext::maxi(m_initialShift, RealScalar(2) * shift); + m_shift = numext::maxi(m_initialShift, RealScalar(2) * m_shift); // restore m_L, col_pattern, and listCol vals = Map(L_save.valuePtr(), nnz); rowIdx = Map(L_save.innerIndexPtr(), nnz); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Jacobi/Jacobi.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Jacobi/Jacobi.h index f53b8ec83d..2686a5237a 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Jacobi/Jacobi.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Jacobi/Jacobi.h @@ -265,7 +265,7 @@ EIGEN_DEVICE_FUNC inline void MatrixBase::applyOnTheLeft(Index p, Index internal::apply_rotation_in_the_plane(x, y, j); } -/** \ingroup Jacobi_Module +/** \jacobi_module * Applies the rotation in the plane \a j to the columns \a p and \a q of \c *this, i.e., it computes B = B * J * with \f$ B = \left ( \begin{array}{cc} \text{*this.col}(p) & \text{*this.col}(q) \end{array} \right ) \f$. * diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/LU/FullPivLU.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/LU/FullPivLU.h index 57d049c7b7..466834ada6 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/LU/FullPivLU.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/LU/FullPivLU.h @@ -622,7 +622,7 @@ struct kernel_retval > // permuting the rows and cols to bring the nonnegligible pivots to the top of // the main diagonal. We need that to be able to apply our triangular solvers. // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified - Matrix::Options, MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime> m(dec().matrixLU().block(0, 0, rank(), cols)); for (Index i = 0; i < rank(); ++i) { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/ColPivHouseholderQR.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/ColPivHouseholderQR.h index f1de6fd0bf..092c29d618 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/ColPivHouseholderQR.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/ColPivHouseholderQR.h @@ -238,6 +238,20 @@ class ColPivHouseholderQR : public SolverBase::d eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!"); Scalar detQ; internal::householder_determinant::IsComplex>::run(m_hCoeffs, detQ); - return m_qr.diagonal().prod() * detQ * Scalar(m_det_p); + return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().prod() : Scalar(0); } template @@ -436,14 +450,23 @@ typename MatrixType::RealScalar ColPivHouseholderQR typename MatrixType::RealScalar ColPivHouseholderQR::logAbsDeterminant() const { eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized."); eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!"); - return m_qr.diagonal().cwiseAbs().array().log().sum(); + return isInjective() ? m_qr.diagonal().cwiseAbs().array().log().sum() : -NumTraits::infinity(); +} + +template +typename MatrixType::Scalar ColPivHouseholderQR::signDeterminant() const { + eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized."); + eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!"); + Scalar detQ; + internal::householder_determinant::IsComplex>::run(m_hCoeffs, detQ); + return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().array().sign().prod() : Scalar(0); } /** Performs the QR factorization of the given matrix \a matrix. The result of diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/CompleteOrthogonalDecomposition.h index 8566e965b7..960ccb1e9f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -228,6 +228,21 @@ class CompleteOrthogonalDecomposition */ typename MatrixType::RealScalar logAbsDeterminant() const; + /** \returns the sign of the determinant of the + * matrix of which *this is the complete orthogonal decomposition. It has + * only linear complexity (that is, O(n) where n is the dimension of the + * square matrix) as the complete orthogonal decomposition has already been + * computed. + * + * \note This is only for square matrices. + * + * \note This method is useful to work around the risk of overflow/underflow + * that's inherent to determinant computation. + * + * \sa determinant(), absDeterminant(), logAbsDeterminant(), MatrixBase::determinant() + */ + typename MatrixType::Scalar signDeterminant() const; + /** \returns the rank of the matrix of which *this is the complete orthogonal * decomposition. * @@ -424,6 +439,11 @@ typename MatrixType::RealScalar CompleteOrthogonalDecomposition +typename MatrixType::Scalar CompleteOrthogonalDecomposition::signDeterminant() const { + return m_cpqr.signDeterminant(); +} + /** Performs the complete orthogonal decomposition of the given matrix \a * matrix. The result of the factorization is stored into \c *this, and a * reference to \c *this is returned. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/FullPivHouseholderQR.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/FullPivHouseholderQR.h index d93a5d1749..cae9ae4da0 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/FullPivHouseholderQR.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/FullPivHouseholderQR.h @@ -248,6 +248,20 @@ class FullPivHouseholderQR : public SolverBase:: eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!"); Scalar detQ; internal::householder_determinant::IsComplex>::run(m_hCoeffs, detQ); - return m_qr.diagonal().prod() * detQ * Scalar(m_det_p); + return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().prod() : Scalar(0); } template @@ -433,14 +447,23 @@ typename MatrixType::RealScalar FullPivHouseholderQR typename MatrixType::RealScalar FullPivHouseholderQR::logAbsDeterminant() const { eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized."); eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!"); - return m_qr.diagonal().cwiseAbs().array().log().sum(); + return isInjective() ? m_qr.diagonal().cwiseAbs().array().log().sum() : -NumTraits::infinity(); +} + +template +typename MatrixType::Scalar FullPivHouseholderQR::signDeterminant() const { + eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized."); + eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!"); + Scalar detQ; + internal::householder_determinant::IsComplex>::run(m_hCoeffs, detQ); + return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().array().sign().prod() : Scalar(0); } /** Performs the QR factorization of the given matrix \a matrix. The result of diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/HouseholderQR.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/HouseholderQR.h index 9e736722a2..e297372589 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/HouseholderQR.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/QR/HouseholderQR.h @@ -187,6 +187,8 @@ class HouseholderQR : public SolverBase> { * \warning a determinant can be very big or small, so for matrices * of large enough dimension, there is a risk of overflow/underflow. * One way to work around that is to use logAbsDeterminant() instead. + * Also, do not rely on the determinant being exactly zero for testing + * singularity or rank-deficiency. * * \sa absDeterminant(), logAbsDeterminant(), MatrixBase::determinant() */ @@ -202,6 +204,8 @@ class HouseholderQR : public SolverBase> { * \warning a determinant can be very big or small, so for matrices * of large enough dimension, there is a risk of overflow/underflow. * One way to work around that is to use logAbsDeterminant() instead. + * Also, do not rely on the determinant being exactly zero for testing + * singularity or rank-deficiency. * * \sa determinant(), logAbsDeterminant(), MatrixBase::determinant() */ @@ -217,10 +221,30 @@ class HouseholderQR : public SolverBase> { * \note This method is useful to work around the risk of overflow/underflow that's inherent * to determinant computation. * + * \warning Do not rely on the determinant being exactly zero for testing + * singularity or rank-deficiency. + * * \sa determinant(), absDeterminant(), MatrixBase::determinant() */ typename MatrixType::RealScalar logAbsDeterminant() const; + /** \returns the sign of the determinant of the matrix of which + * *this is the QR decomposition. It has only linear complexity + * (that is, O(n) where n is the dimension of the square matrix) + * as the QR decomposition has already been computed. + * + * \note This is only for square matrices. + * + * \note This method is useful to work around the risk of overflow/underflow that's inherent + * to determinant computation. + * + * \warning Do not rely on the determinant being exactly zero for testing + * singularity or rank-deficiency. + * + * \sa determinant(), absDeterminant(), MatrixBase::determinant() + */ + typename MatrixType::Scalar signDeterminant() const; + inline Index rows() const { return m_qr.rows(); } inline Index cols() const { return m_qr.cols(); } @@ -306,6 +330,15 @@ typename MatrixType::RealScalar HouseholderQR::logAbsDeterminant() c return m_qr.diagonal().cwiseAbs().array().log().sum(); } +template +typename MatrixType::Scalar HouseholderQR::signDeterminant() const { + eigen_assert(m_isInitialized && "HouseholderQR is not initialized."); + eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!"); + Scalar detQ; + internal::householder_determinant::IsComplex>::run(m_hCoeffs, detQ); + return detQ * m_qr.diagonal().array().sign().prod(); +} + namespace internal { /** \internal */ diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h index 7948ca3154..f80ddc0e25 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h @@ -1126,13 +1126,6 @@ void BDCSVD::perturbCol0(const ArrayRef& col0, const ArrayR << "j=" << j << "\n"; } #endif - // Avoid index out of bounds. - // Will end up setting zhat(k) = 0. - if (i >= k && l == 0) { - m_info = NumericalIssue; - prod = 0; - break; - } Index j = i < k ? i : l > 0 ? perm(l - 1) : i; #ifdef EIGEN_BDCSVD_SANITY_CHECKS if (!(dk != Literal(0) || diag(i) != Literal(0))) { @@ -1205,7 +1198,7 @@ void BDCSVD::computeSingVecs(const ArrayRef& zhat, const Ar // page 12_13 // i >= 1, di almost null and zi non null. -// We use a rotation to zero out zi applied to the left of M +// We use a rotation to zero out zi applied to the left of M, and set di = 0. template void BDCSVD::deflation43(Index firstCol, Index shift, Index i, Index size) { using std::abs; @@ -1231,9 +1224,8 @@ void BDCSVD::deflation43(Index firstCol, Index shift, Index } // end deflation 43 // page 13 -// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M) -// We apply two rotations to have zj = 0; -// TODO deflation44 is still broken and not properly tested +// i,j >= 1, i > j, and |di - dj| < epsilon * norm2(M) +// We apply two rotations to have zi = 0, and dj = di. template void BDCSVD::deflation44(Index firstColu, Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size) { @@ -1241,9 +1233,10 @@ void BDCSVD::deflation44(Index firstColu, Index firstColm, using std::conj; using std::pow; using std::sqrt; - RealScalar c = m_computed(firstColm + i, firstColm); - RealScalar s = m_computed(firstColm + j, firstColm); - RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); + + RealScalar s = m_computed(firstColm + i, firstColm); + RealScalar c = m_computed(firstColm + j, firstColm); + RealScalar r = numext::hypot(c, s); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; " << m_computed(firstColm + i - 1, firstColm) << " " << m_computed(firstColm + i, firstColm) << " " @@ -1253,21 +1246,21 @@ void BDCSVD::deflation44(Index firstColu, Index firstColm, << m_computed(firstColm + i + 2, firstColm + i + 2) << "\n"; #endif if (numext::is_exactly_zero(r)) { - m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); + m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); return; } c /= r; s /= r; - m_computed(firstColm + i, firstColm) = r; + m_computed(firstColm + j, firstColm) = r; m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); - m_computed(firstColm + j, firstColm) = Literal(0); + m_computed(firstColm + i, firstColm) = Literal(0); JacobiRotation J(c, -s); if (m_compU) - m_naiveU.middleRows(firstColu, size + 1).applyOnTheRight(firstColu + i, firstColu + j, J); + m_naiveU.middleRows(firstColu, size + 1).applyOnTheRight(firstColu + j, firstColu + i, J); else - m_naiveU.applyOnTheRight(firstColu + i, firstColu + j, J); - if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J); + m_naiveU.applyOnTheRight(firstColu + j, firstColu + i, J); + if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + j, firstColW + i, J); } // end deflation 44 // acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive] @@ -1350,7 +1343,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index // Move deflated diagonal entries at the end. for (Index i = 1; i < length; ++i) - if (abs(diag(i)) < considerZero) permutation[p++] = i; + if (diag(i) < considerZero) permutation[p++] = i; Index i = 1, j = k + 1; for (; p < length; ++p) { @@ -1369,7 +1362,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index if (total_deflation) { for (Index i = 1; i < length; ++i) { Index pi = permutation[i]; - if (abs(diag(pi)) < considerZero || diag(0) < diag(pi)) + if (diag(pi) < considerZero || diag(0) < diag(pi)) permutation[i - 1] = permutation[i]; else { permutation[i - 1] = 0; @@ -1424,17 +1417,19 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index // condition 4.4 { Index i = length - 1; - while (i > 0 && (abs(diag(i)) < considerZero || abs(col0(i)) < considerZero)) --i; + // Find last non-deflated entry. + while (i > 0 && (diag(i) < considerZero || abs(col0(i)) < considerZero)) --i; + for (; i > 1; --i) - if ((diag(i) - diag(i - 1)) < NumTraits::epsilon() * maxDiag) { + if ((diag(i) - diag(i - 1)) < epsilon_strict) { #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.4 with i = " << i << " because " << diag(i) << " - " << diag(i - 1) << " == " << (diag(i) - diag(i - 1)) << " < " - << NumTraits::epsilon() * /*diag(i)*/ maxDiag << "\n"; + << epsilon_strict << "\n"; #endif eigen_internal_assert(abs(diag(i) - diag(i - 1)) < epsilon_coarse && " diagonal entries are not properly sorted"); - deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i - 1, i, length); + deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i, i - 1, length); } } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h index aec1931d55..086d750a36 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h @@ -52,7 +52,10 @@ template class qr_preconditioner_impl { public: void allocate(const JacobiSVD&) {} - bool run(JacobiSVD&, const MatrixType&) { return false; } + template + bool run(JacobiSVD&, const Xpr&) { + return false; + } }; /*** preconditioner using FullPivHouseholderQR ***/ @@ -75,8 +78,8 @@ class qr_preconditioner_impl + bool run(SVDType& svd, const Xpr& matrix) { if (matrix.rows() > matrix.cols()) { m_qr.compute(matrix); svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView(); @@ -105,7 +108,7 @@ class qr_preconditioner_impl::Options }; typedef typename internal::make_proper_matrix_type + bool run(SVDType& svd, const Xpr& matrix) { if (matrix.cols() > matrix.rows()) { - m_adjoint = matrix.adjoint(); - m_qr.compute(m_adjoint); + m_qr.compute(matrix.adjoint()); svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView().adjoint(); if (svd.m_computeFullV) m_qr.matrixQ().evalTo(svd.m_matrixV, m_workspace); @@ -137,7 +138,6 @@ class qr_preconditioner_impl QRType; QRType m_qr; - TransposeTypeWithSameStorageOrder m_adjoint; typename plain_row_type::type m_workspace; }; @@ -167,8 +167,8 @@ class qr_preconditioner_impl + bool run(SVDType& svd, const Xpr& matrix) { if (matrix.rows() > matrix.cols()) { m_qr.compute(matrix); svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView(); @@ -202,7 +202,7 @@ class qr_preconditioner_impl::Options, WorkspaceSize = internal::traits::MatrixVColsAtCompileTime, MaxWorkspaceSize = internal::traits::MatrixVMaxColsAtCompileTime }; @@ -222,13 +222,11 @@ class qr_preconditioner_impl + bool run(SVDType& svd, const Xpr& matrix) { if (matrix.cols() > matrix.rows()) { - m_adjoint = matrix.adjoint(); - m_qr.compute(m_adjoint); + m_qr.compute(matrix.adjoint()); svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView().adjoint(); @@ -247,7 +245,6 @@ class qr_preconditioner_impl QRType; QRType m_qr; - TransposeTypeWithSameStorageOrder m_adjoint; WorkspaceType m_workspace; }; @@ -276,8 +273,8 @@ class qr_preconditioner_impl + bool run(SVDType& svd, const Xpr& matrix) { if (matrix.rows() > matrix.cols()) { m_qr.compute(matrix); svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView(); @@ -310,7 +307,7 @@ class qr_preconditioner_impl::Options, WorkspaceSize = internal::traits::MatrixVColsAtCompileTime, MaxWorkspaceSize = internal::traits::MatrixVMaxColsAtCompileTime }; @@ -330,13 +327,12 @@ class qr_preconditioner_impl + bool run(SVDType& svd, const Xpr& matrix) { if (matrix.cols() > matrix.rows()) { - m_adjoint = matrix.adjoint(); - m_qr.compute(m_adjoint); + m_qr.compute(matrix.adjoint()); svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView().adjoint(); @@ -355,7 +351,6 @@ class qr_preconditioner_impl QRType; QRType m_qr; - TransposeTypeWithSameStorageOrder m_adjoint; WorkspaceType m_workspace; }; @@ -509,7 +504,6 @@ class JacobiSVD : public SVDBase > { typedef MatrixType_ MatrixType; typedef typename Base::Scalar Scalar; typedef typename Base::RealScalar RealScalar; - typedef typename Base::Index Index; enum : int { Options = Options_, QRPreconditioner = internal::get_qr_preconditioner(Options), @@ -618,7 +612,18 @@ class JacobiSVD : public SVDBase > { using Base::rows; private: - void allocate(Index rows, Index cols, unsigned int computationOptions); + void allocate(Index rows_, Index cols_, unsigned int computationOptions) { + if (Base::allocate(rows_, cols_, computationOptions)) return; + eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) && + !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) && + "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. " + "Use the ColPivHouseholderQR preconditioner instead."); + + m_workMatrix.resize(diagSize(), diagSize()); + if (cols() > rows()) m_qr_precond_morecols.allocate(*this); + if (rows() > cols()) m_qr_precond_morerows.allocate(*this); + } + JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions); protected: @@ -654,24 +659,8 @@ class JacobiSVD : public SVDBase > { internal::qr_preconditioner_impl m_qr_precond_morerows; WorkMatrixType m_workMatrix; - MatrixType m_scaledMatrix; }; -template -void JacobiSVD::allocate(Index rows_, Index cols_, unsigned int computationOptions_) { - if (Base::allocate(rows_, cols_, computationOptions_)) return; - - eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) && - !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) && - "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. " - "Use the ColPivHouseholderQR preconditioner instead."); - - m_workMatrix.resize(diagSize(), diagSize()); - if (cols() > rows()) m_qr_precond_morecols.allocate(*this); - if (rows() > cols()) m_qr_precond_morerows.allocate(*this); - if (rows() != cols()) m_scaledMatrix.resize(rows(), cols()); -} - template JacobiSVD& JacobiSVD::compute_impl(const MatrixType& matrix, unsigned int computationOptions) { @@ -699,9 +688,8 @@ JacobiSVD& JacobiSVD::compute_impl(con /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */ if (rows() != cols()) { - m_scaledMatrix = matrix / scale; - m_qr_precond_morecols.run(*this, m_scaledMatrix); - m_qr_precond_morerows.run(*this, m_scaledMatrix); + m_qr_precond_morecols.run(*this, matrix / scale); + m_qr_precond_morerows.run(*this, matrix / scale); } else { m_workMatrix = matrix.template topLeftCorner(diagSize(), diagSize()) / scale; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/SVDBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/SVDBase.h index ae2843b2d8..d1ad63de26 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/SVDBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/SVDBase.h @@ -125,7 +125,6 @@ class SVDBase : public SolverBase > { typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; typedef typename Eigen::internal::traits::StorageIndex StorageIndex; - typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 static constexpr bool ShouldComputeFullU = internal::traits::ShouldComputeFullU; static constexpr bool ShouldComputeThinU = internal::traits::ShouldComputeThinU; @@ -139,7 +138,7 @@ class SVDBase : public SolverBase > { MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxDiagSizeAtCompileTime = internal::min_size_prefer_fixed(MaxRowsAtCompileTime, MaxColsAtCompileTime), - MatrixOptions = MatrixType::Options, + MatrixOptions = internal::traits::Options, MatrixUColsAtCompileTime = internal::traits::MatrixUColsAtCompileTime, MatrixVColsAtCompileTime = internal::traits::MatrixVColsAtCompileTime, MatrixUMaxColsAtCompileTime = internal::traits::MatrixUMaxColsAtCompileTime, @@ -355,11 +354,11 @@ class SVDBase : public SolverBase > { m_isInitialized(false), m_isAllocated(false), m_usePrescribedThreshold(false), - m_computeFullU(false), - m_computeThinU(false), - m_computeFullV(false), - m_computeThinV(false), - m_computationOptions(0), + m_computeFullU(ShouldComputeFullU), + m_computeThinU(ShouldComputeThinU), + m_computeFullV(ShouldComputeFullV), + m_computeThinV(ShouldComputeThinV), + m_computationOptions(internal::traits::Options), m_nonzeroSingularValues(0), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime), diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky.h index 423287b5b3..f3ce975cfe 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -58,6 +58,7 @@ class SimplicialCholeskyBase : public SparseSolverBase { enum { UpLo = internal::traits::UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; + typedef typename internal::traits::DiagonalScalar DiagonalScalar; typedef typename MatrixType::StorageIndex StorageIndex; typedef SparseMatrix CholMatrixType; typedef CholMatrixType const* ConstCholMatrixPtr; @@ -114,7 +115,7 @@ class SimplicialCholeskyBase : public SparseSolverBase { * * \returns a reference to \c *this. */ - Derived& setShift(const RealScalar& offset, const RealScalar& scale = 1) { + Derived& setShift(const DiagonalScalar& offset, const DiagonalScalar& scale = 1) { m_shiftOffset = offset; m_shiftScale = scale; return derived(); @@ -178,18 +179,18 @@ class SimplicialCholeskyBase : public SparseSolverBase { protected: /** Computes the sparse Cholesky decomposition of \a matrix */ - template + template void compute(const MatrixType& matrix) { eigen_assert(matrix.rows() == matrix.cols()); Index size = matrix.cols(); CholMatrixType tmp(size, size); ConstCholMatrixPtr pmat; - ordering(matrix, pmat, tmp); + ordering(matrix, pmat, tmp); analyzePattern_preordered(*pmat, DoLDLT); - factorize_preordered(*pmat); + factorize_preordered(*pmat); } - template + template void factorize(const MatrixType& a) { eigen_assert(a.rows() == a.cols()); Index size = a.cols(); @@ -200,28 +201,33 @@ class SimplicialCholeskyBase : public SparseSolverBase { // If there is no ordering, try to directly use the input matrix without any copy internal::simplicial_cholesky_grab_input::run(a, pmat, tmp); } else { - tmp.template selfadjointView() = a.template selfadjointView().twistedBy(m_P); + internal::permute_symm_to_symm(a, tmp, m_P.indices().data()); pmat = &tmp; } - factorize_preordered(*pmat); + factorize_preordered(*pmat); } - template + template void factorize_preordered(const CholMatrixType& a); - void analyzePattern(const MatrixType& a, bool doLDLT) { + template + void analyzePattern(const MatrixType& a) { eigen_assert(a.rows() == a.cols()); Index size = a.cols(); CholMatrixType tmp(size, size); ConstCholMatrixPtr pmat; - ordering(a, pmat, tmp); - analyzePattern_preordered(*pmat, doLDLT); + ordering(a, pmat, tmp); + analyzePattern_preordered(*pmat, DoLDLT); } void analyzePattern_preordered(const CholMatrixType& a, bool doLDLT); + template void ordering(const MatrixType& a, ConstCholMatrixPtr& pmat, CholMatrixType& ap); + inline DiagonalScalar getDiag(Scalar x) { return internal::traits::getDiag(x); } + inline Scalar getSymm(Scalar x) { return internal::traits::getSymm(x); } + /** keeps off-diagonal entries; drops diagonal entries */ struct keep_diag { inline bool operator()(const Index& row, const Index& col, const Scalar&) const { return row != col; } @@ -238,8 +244,8 @@ class SimplicialCholeskyBase : public SparseSolverBase { PermutationMatrix m_P; // the permutation PermutationMatrix m_Pinv; // the inverse permutation - RealScalar m_shiftOffset; - RealScalar m_shiftScale; + DiagonalScalar m_shiftOffset; + DiagonalScalar m_shiftScale; }; template > class SimplicialLDLT; +template > +class SimplicialNonHermitianLLT; +template > +class SimplicialNonHermitianLDLT; template > class SimplicialCholesky; @@ -260,12 +272,15 @@ struct traits > { typedef Ordering_ OrderingType; enum { UpLo = UpLo_ }; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar DiagonalScalar; typedef typename MatrixType::StorageIndex StorageIndex; typedef SparseMatrix CholMatrixType; typedef TriangularView MatrixL; typedef TriangularView MatrixU; static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); } static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); } + static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); } + static inline Scalar getSymm(Scalar x) { return numext::conj(x); } }; template @@ -274,12 +289,49 @@ struct traits > { typedef Ordering_ OrderingType; enum { UpLo = UpLo_ }; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar DiagonalScalar; typedef typename MatrixType::StorageIndex StorageIndex; typedef SparseMatrix CholMatrixType; typedef TriangularView MatrixL; typedef TriangularView MatrixU; static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); } static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); } + static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); } + static inline Scalar getSymm(Scalar x) { return numext::conj(x); } +}; + +template +struct traits > { + typedef MatrixType_ MatrixType; + typedef Ordering_ OrderingType; + enum { UpLo = UpLo_ }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Scalar DiagonalScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef SparseMatrix CholMatrixType; + typedef TriangularView MatrixL; + typedef TriangularView MatrixU; + static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.transpose()); } + static inline DiagonalScalar getDiag(Scalar x) { return x; } + static inline Scalar getSymm(Scalar x) { return x; } +}; + +template +struct traits > { + typedef MatrixType_ MatrixType; + typedef Ordering_ OrderingType; + enum { UpLo = UpLo_ }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Scalar DiagonalScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef SparseMatrix CholMatrixType; + typedef TriangularView MatrixL; + typedef TriangularView MatrixU; + static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.transpose()); } + static inline DiagonalScalar getDiag(Scalar x) { return x; } + static inline Scalar getSymm(Scalar x) { return x; } }; template @@ -287,6 +339,10 @@ struct traits > { typedef MatrixType_ MatrixType; typedef Ordering_ OrderingType; enum { UpLo = UpLo_ }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar DiagonalScalar; + static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); } + static inline Scalar getSymm(Scalar x) { return numext::conj(x); } }; } // namespace internal @@ -346,7 +402,7 @@ class SimplicialLLT : public SimplicialCholeskyBase(matrix); + Base::template compute(matrix); return *this; } @@ -356,7 +412,7 @@ class SimplicialLLT : public SimplicialCholeskyBase(a); } /** Performs a numeric decomposition of \a matrix * @@ -364,7 +420,7 @@ class SimplicialLLT : public SimplicialCholeskyBase(a); } + void factorize(const MatrixType& a) { Base::template factorize(a); } /** \returns the determinant of the underlying matrix from the current factorization */ Scalar determinant() const { @@ -434,7 +490,7 @@ class SimplicialLDLT : public SimplicialCholeskyBase(matrix); + Base::template compute(matrix); return *this; } @@ -444,7 +500,7 @@ class SimplicialLDLT : public SimplicialCholeskyBase(a); } /** Performs a numeric decomposition of \a matrix * @@ -452,7 +508,177 @@ class SimplicialLDLT : public SimplicialCholeskyBase(a); } + void factorize(const MatrixType& a) { Base::template factorize(a); } + + /** \returns the determinant of the underlying matrix from the current factorization */ + Scalar determinant() const { return Base::m_diag.prod(); } +}; + +/** \ingroup SparseCholesky_Module + * \class SimplicialNonHermitianLLT + * \brief A direct sparse LLT Cholesky factorizations, for symmetric non-hermitian matrices. + * + * This class provides a LL^T Cholesky factorizations of sparse matrices that are + * symmetric but not hermitian. For real matrices, this is equivalent to the regular LLT factorization. + * The factorization allows for solving A.X = B where X and B can be either dense or sparse. + * + * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization + * such that the factorized matrix is P A P^-1. + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> + * + * \implsparsesolverconcept + * + * \sa class SimplicialNonHermitianLDLT, SimplicialLLT, class AMDOrdering, class NaturalOrdering + */ +template +class SimplicialNonHermitianLLT + : public SimplicialCholeskyBase > { + public: + typedef MatrixType_ MatrixType; + enum { UpLo = UpLo_ }; + typedef SimplicialCholeskyBase Base; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef SparseMatrix CholMatrixType; + typedef Matrix VectorType; + typedef internal::traits Traits; + typedef typename Traits::MatrixL MatrixL; + typedef typename Traits::MatrixU MatrixU; + + public: + /** Default constructor */ + SimplicialNonHermitianLLT() : Base() {} + + /** Constructs and performs the LLT factorization of \a matrix */ + explicit SimplicialNonHermitianLLT(const MatrixType& matrix) : Base(matrix) {} + + /** \returns an expression of the factor L */ + inline const MatrixL matrixL() const { + eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized"); + return Traits::getL(Base::m_matrix); + } + + /** \returns an expression of the factor U (= L^*) */ + inline const MatrixU matrixU() const { + eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized"); + return Traits::getU(Base::m_matrix); + } + + /** Computes the sparse Cholesky decomposition of \a matrix */ + SimplicialNonHermitianLLT& compute(const MatrixType& matrix) { + Base::template compute(matrix); + return *this; + } + + /** Performs a symbolic decomposition on the sparcity of \a matrix. + * + * This function is particularly useful when solving for several problems having the same structure. + * + * \sa factorize() + */ + void analyzePattern(const MatrixType& a) { Base::template analyzePattern(a); } + + /** Performs a numeric decomposition of \a matrix + * + * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. + * + * \sa analyzePattern() + */ + void factorize(const MatrixType& a) { Base::template factorize(a); } + + /** \returns the determinant of the underlying matrix from the current factorization */ + Scalar determinant() const { + Scalar detL = Base::m_matrix.diagonal().prod(); + return detL * detL; + } +}; + +/** \ingroup SparseCholesky_Module + * \class SimplicialNonHermitianLDLT + * \brief A direct sparse LDLT Cholesky factorizations without square root, for symmetric non-hermitian matrices. + * + * This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are + * symmetric but not hermitian. For real matrices, this is equivalent to the regular LDLT factorization. + * The factorization allows for solving A.X = B where X and B can be either dense or sparse. + * + * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization + * such that the factorized matrix is P A P^-1. + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> + * + * \implsparsesolverconcept + * + * \sa class SimplicialNonHermitianLLT, SimplicialLDLT, class AMDOrdering, class NaturalOrdering + */ +template +class SimplicialNonHermitianLDLT + : public SimplicialCholeskyBase > { + public: + typedef MatrixType_ MatrixType; + enum { UpLo = UpLo_ }; + typedef SimplicialCholeskyBase Base; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef SparseMatrix CholMatrixType; + typedef Matrix VectorType; + typedef internal::traits Traits; + typedef typename Traits::MatrixL MatrixL; + typedef typename Traits::MatrixU MatrixU; + + public: + /** Default constructor */ + SimplicialNonHermitianLDLT() : Base() {} + + /** Constructs and performs the LLT factorization of \a matrix */ + explicit SimplicialNonHermitianLDLT(const MatrixType& matrix) : Base(matrix) {} + + /** \returns a vector expression of the diagonal D */ + inline const VectorType vectorD() const { + eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized"); + return Base::m_diag; + } + /** \returns an expression of the factor L */ + inline const MatrixL matrixL() const { + eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized"); + return Traits::getL(Base::m_matrix); + } + + /** \returns an expression of the factor U (= L^*) */ + inline const MatrixU matrixU() const { + eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized"); + return Traits::getU(Base::m_matrix); + } + + /** Computes the sparse Cholesky decomposition of \a matrix */ + SimplicialNonHermitianLDLT& compute(const MatrixType& matrix) { + Base::template compute(matrix); + return *this; + } + + /** Performs a symbolic decomposition on the sparcity of \a matrix. + * + * This function is particularly useful when solving for several problems having the same structure. + * + * \sa factorize() + */ + void analyzePattern(const MatrixType& a) { Base::template analyzePattern(a); } + + /** Performs a numeric decomposition of \a matrix + * + * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. + * + * \sa analyzePattern() + */ + void factorize(const MatrixType& a) { Base::template factorize(a); } /** \returns the determinant of the underlying matrix from the current factorization */ Scalar determinant() const { return Base::m_diag.prod(); } @@ -475,7 +701,6 @@ class SimplicialCholesky : public SimplicialCholeskyBase CholMatrixType; typedef Matrix VectorType; - typedef internal::traits Traits; typedef internal::traits > LDLTTraits; typedef internal::traits > LLTTraits; @@ -511,9 +736,9 @@ class SimplicialCholesky : public SimplicialCholeskyBase(matrix); + Base::template compute(matrix); else - Base::template compute(matrix); + Base::template compute(matrix); return *this; } @@ -523,7 +748,12 @@ class SimplicialCholesky : public SimplicialCholeskyBase(a); + else + Base::template analyzePattern(a); + } /** Performs a numeric decomposition of \a matrix * @@ -533,9 +763,9 @@ class SimplicialCholesky : public SimplicialCholeskyBase(a); + Base::template factorize(a); else - Base::template factorize(a); + Base::template factorize(a); } /** \internal */ @@ -594,6 +824,7 @@ class SimplicialCholesky : public SimplicialCholeskyBase +template void SimplicialCholeskyBase::ordering(const MatrixType& a, ConstCholMatrixPtr& pmat, CholMatrixType& ap) { eigen_assert(a.rows() == a.cols()); const Index size = a.rows(); @@ -602,7 +833,7 @@ void SimplicialCholeskyBase::ordering(const MatrixType& a, ConstCholMat if (!internal::is_same >::value) { { CholMatrixType C; - C = a.template selfadjointView(); + internal::permute_symm_to_fullsymm(a, C, NULL); OrderingType ordering; ordering(C, m_Pinv); @@ -614,14 +845,14 @@ void SimplicialCholeskyBase::ordering(const MatrixType& a, ConstCholMat m_P.resize(0); ap.resize(size, size); - ap.template selfadjointView() = a.template selfadjointView().twistedBy(m_P); + internal::permute_symm_to_symm(a, ap, m_P.indices().data()); } else { m_Pinv.resize(0); m_P.resize(0); if (int(UpLo) == int(Lower) || MatrixType::IsRowMajor) { // we have to transpose the lower part to to the upper one ap.resize(size, size); - ap.template selfadjointView() = a.template selfadjointView(); + internal::permute_symm_to_symm(a, ap, NULL); } else internal::simplicial_cholesky_grab_input::run(a, pmat, ap); } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h index abfbbe6bcc..0b13c56b5d 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h @@ -67,7 +67,7 @@ void SimplicialCholeskyBase::analyzePattern_preordered(const CholMatrix } template -template +template void SimplicialCholeskyBase::factorize_preordered(const CholMatrixType& ap) { using std::sqrt; @@ -97,7 +97,7 @@ void SimplicialCholeskyBase::factorize_preordered(const CholMatrixType& for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) { StorageIndex i = it.index(); if (i <= k) { - y[i] += numext::conj(it.value()); /* scatter A(i,k) into Y (sum duplicates) */ + y[i] += getSymm(it.value()); /* scatter A(i,k) into Y (sum duplicates) */ Index len; for (len = 0; tags[i] != k; i = m_parent[i]) { pattern[len++] = i; /* L(k,i) is nonzero */ @@ -109,8 +109,8 @@ void SimplicialCholeskyBase::factorize_preordered(const CholMatrixType& /* compute numerical values kth row of L (a sparse triangular solve) */ - RealScalar d = - numext::real(y[k]) * m_shiftScale + m_shiftOffset; // get D(k,k), apply the shift function, and clear Y(k) + DiagonalScalar d = + getDiag(y[k]) * m_shiftScale + m_shiftOffset; // get D(k,k), apply the shift function, and clear Y(k) y[k] = Scalar(0); for (; top < size; ++top) { Index i = pattern[top]; /* pattern[top:n-1] is pattern of L(:,k) */ @@ -120,14 +120,14 @@ void SimplicialCholeskyBase::factorize_preordered(const CholMatrixType& /* the nonzero entry L(k,i) */ Scalar l_ki; if (DoLDLT) - l_ki = yi / numext::real(m_diag[i]); + l_ki = yi / getDiag(m_diag[i]); else yi = l_ki = yi / Lx[Lp[i]]; Index p2 = Lp[i] + m_nonZerosPerCol[i]; Index p; - for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= numext::conj(Lx[p]) * yi; - d -= numext::real(l_ki * numext::conj(yi)); + for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= getSymm(Lx[p]) * yi; + d -= getDiag(l_ki * getSymm(yi)); Li[p] = k; /* store L(k,i) in column form of L */ Lx[p] = l_ki; ++m_nonZerosPerCol[i]; /* increment count of nonzeros in col i */ @@ -141,7 +141,7 @@ void SimplicialCholeskyBase::factorize_preordered(const CholMatrixType& } else { Index p = Lp[k] + m_nonZerosPerCol[k]++; Li[p] = k; /* store L(k,k) = sqrt (d) in column k */ - if (d <= RealScalar(0)) { + if (NonHermitian ? d == RealScalar(0) : numext::real(d) <= RealScalar(0)) { ok = false; /* failure, matrix is not positive definite */ break; } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/CompressedStorage.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/CompressedStorage.h index 123c89c0de..8f8a6963a9 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/CompressedStorage.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/CompressedStorage.h @@ -71,8 +71,13 @@ class CompressedStorage { void resize(Index size, double reserveSizeFactor = 0) { if (m_allocatedSize < size) { + // Avoid underflow on the std::min call by choosing the smaller index type. + using SmallerIndexType = + typename std::conditional((std::numeric_limits::max)()) < + static_cast((std::numeric_limits::max)()), + Index, StorageIndex>::type; Index realloc_size = - (std::min)(NumTraits::highest(), size + Index(reserveSizeFactor * double(size))); + (std::min)(NumTraits::highest(), size + Index(reserveSizeFactor * double(size))); if (realloc_size < size) internal::throw_std_bad_alloc(); reallocate(realloc_size); } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseDot.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseDot.h index aa876ecbca..f040915b27 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseDot.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseDot.h @@ -17,7 +17,8 @@ namespace Eigen { template template -typename internal::traits::Scalar SparseMatrixBase::dot(const MatrixBase& other) const { +inline typename internal::traits::Scalar SparseMatrixBase::dot( + const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived) @@ -30,17 +31,23 @@ typename internal::traits::Scalar SparseMatrixBase::dot(const internal::evaluator thisEval(derived()); typename internal::evaluator::InnerIterator i(thisEval, 0); - Scalar res(0); - while (i) { - res += numext::conj(i.value()) * other.coeff(i.index()); + // Two accumulators, which breaks the dependency chain on the accumulator + // and allows more instruction-level parallelism in the following loop. + Scalar res1(0); + Scalar res2(0); + for (; i; ++i) { + res1 += numext::conj(i.value()) * other.coeff(i.index()); ++i; + if (i) { + res2 += numext::conj(i.value()) * other.coeff(i.index()); + } } - return res; + return res1 + res2; } template template -typename internal::traits::Scalar SparseMatrixBase::dot( +inline typename internal::traits::Scalar SparseMatrixBase::dot( const SparseMatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h index 19dd40cdc7..849970a9b7 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h @@ -58,6 +58,7 @@ struct traits> { ColsAtCompileTime = Dynamic, MaxRowsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic, + Options = Options_, Flags = Options_ | NestByRefBit | LvalueBit | CompressedAccessBit, SupportedAccessPatterns = InnerRandomAccessPattern }; @@ -216,15 +217,18 @@ class SparseMatrix : public SparseCompressedBase= 0 && row < rows() && col >= 0 && col < cols()); const Index outer = IsRowMajor ? row : col; const Index inner = IsRowMajor ? col : row; @@ -239,17 +243,37 @@ class SparseMatrix : public SparseCompressedBaseswap(other); } + + template + inline SparseMatrix(SparseCompressedBase&& other) : SparseMatrix() { *this = other.derived().markAsRValue(); } @@ -833,7 +860,10 @@ class SparseMatrix : public SparseCompressedBaseswap(other); + return *this; + } #ifndef EIGEN_PARSED_BY_DOXYGEN template @@ -848,6 +878,12 @@ class SparseMatrix : public SparseCompressedBase EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase& other); + template + inline SparseMatrix& operator=(SparseCompressedBase&& other) { + *this = other.derived().markAsRValue(); + return *this; + } + #ifndef EIGEN_NO_IO friend std::ostream& operator<<(std::ostream& s, const SparseMatrix& m) { EIGEN_DBG_SPARSE( diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseSelfAdjointView.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseSelfAdjointView.h index 129899c454..3402baeb6b 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -34,13 +34,13 @@ namespace internal { template struct traits > : traits {}; -template +template void permute_symm_to_symm( const MatrixType& mat, SparseMatrix& _dest, const typename MatrixType::StorageIndex* perm = 0); -template +template void permute_symm_to_fullsymm( const MatrixType& mat, SparseMatrix& _dest, @@ -53,7 +53,7 @@ class SparseSelfAdjointView : public EigenBase::RowsAtCompileTime, ColsAtCompileTime = internal::traits::ColsAtCompileTime }; @@ -234,7 +234,7 @@ struct Assignment { template static void run(SparseMatrix& dst, const SrcXprType& src, const AssignOpType& /*func*/) { - internal::permute_symm_to_fullsymm(src.matrix(), dst); + internal::permute_symm_to_fullsymm(src.matrix(), dst); } // FIXME: the handling of += and -= in sparse matrices should be cleanup so that next two overloads could be reduced @@ -405,7 +405,7 @@ struct product_evaluator, ProductTag, Spar ***************************************************************************/ namespace internal { -template +template void permute_symm_to_fullsymm( const MatrixType& mat, SparseMatrix& _dest, @@ -476,13 +476,13 @@ void permute_symm_to_fullsymm( dest.valuePtr()[k] = it.value(); k = count[ip]++; dest.innerIndexPtr()[k] = jp; - dest.valuePtr()[k] = numext::conj(it.value()); + dest.valuePtr()[k] = (NonHermitian ? it.value() : numext::conj(it.value())); } } } } -template +template void permute_symm_to_symm(const MatrixType& mat, SparseMatrix& _dest, const typename MatrixType::StorageIndex* perm) { @@ -534,7 +534,7 @@ void permute_symm_to_symm(const MatrixType& mat, if (!StorageOrderMatch) std::swap(ip, jp); if (((int(DstMode) == int(Lower) && ip < jp) || (int(DstMode) == int(Upper) && ip > jp))) - dest.valuePtr()[k] = numext::conj(it.value()); + dest.valuePtr()[k] = (NonHermitian ? it.value() : numext::conj(it.value())); else dest.valuePtr()[k] = it.value(); } @@ -595,14 +595,14 @@ struct Assignment&) { // internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); SparseMatrix tmp; - internal::permute_symm_to_fullsymm(src.matrix(), tmp, src.perm().indices().data()); + internal::permute_symm_to_fullsymm(src.matrix(), tmp, src.perm().indices().data()); dst = tmp; } template static void run(SparseSelfAdjointView& dst, const SrcXprType& src, const internal::assign_op&) { - internal::permute_symm_to_symm(src.matrix(), dst.matrix(), src.perm().indices().data()); + internal::permute_symm_to_symm(src.matrix(), dst.matrix(), src.perm().indices().data()); } }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseVector.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseVector.h index 07337188b0..fac162e93e 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseVector.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseVector.h @@ -304,6 +304,24 @@ class SparseVector : public SparseCompressedBaseswap(other); } + + template + inline SparseVector(SparseCompressedBase&& other) : SparseVector() { + *this = other.derived().markAsRValue(); + } + + inline SparseVector& operator=(SparseVector&& other) { + this->swap(other); + return *this; + } + + template + inline SparseVector& operator=(SparseCompressedBase&& other) { + *this = other.derived().markAsRValue(); + return *this; + } + #ifndef EIGEN_PARSED_BY_DOXYGEN template inline SparseVector& operator=(const SparseSparseProduct& product) { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseLU/SparseLU.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseLU/SparseLU.h index aee3d947df..29be01a27a 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseLU/SparseLU.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseLU/SparseLU.h @@ -99,13 +99,34 @@ class SparseLUTransposeView : public SparseSolverBase A; - * SparseLU, COLAMDOrdering > solver; - * // fill A and b; - * // Compute the ordering permutation vector from the structural pattern of A + * SparseLU, COLAMDOrdering > solver; + * // Fill A and b. + * // Compute the ordering permutation vector from the structural pattern of A. * solver.analyzePattern(A); - * // Compute the numerical factorization + * // Compute the numerical factorization. * solver.factorize(A); - * //Use the factors to solve the linear system + * // Use the factors to solve the linear system. + * x = solver.solve(b); + * \endcode + * + * We can directly call compute() instead of analyzePattern() and factorize() + * \code + * VectorXd x(n), b(n); + * SparseMatrix A; + * SparseLU, COLAMDOrdering > solver; + * // Fill A and b. + * solver.compute(A); + * // Use the factors to solve the linear system. + * x = solver.solve(b); + * \endcode + * + * Or give the matrix to the constructor SparseLU(const MatrixType& matrix) + * \code + * VectorXd x(n), b(n); + * SparseMatrix A; + * // Fill A and b. + * SparseLU, COLAMDOrdering > solver(A); + * // Use the factors to solve the linear system. * x = solver.solve(b); * \endcode * @@ -150,10 +171,18 @@ class SparseLU : public SparseSolverBase>, enum { ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; public: + /** \brief Basic constructor of the solver. + * + * Construct a SparseLU. As no matrix is given as argument, compute() should be called afterward with a matrix. + */ SparseLU() : m_lastError(""), m_Ustore(0, 0, 0, 0, 0, 0), m_symmetricmode(false), m_diagpivotthresh(1.0), m_detPermR(1) { initperfvalues(); } + /** \brief Constructor of the solver already based on a specific matrix. + * + * Construct a SparseLU. compute() is already called with the given matrix. + */ explicit SparseLU(const MatrixType& matrix) : m_lastError(""), m_Ustore(0, 0, 0, 0, 0, 0), m_symmetricmode(false), m_diagpivotthresh(1.0), m_detPermR(1) { initperfvalues(); @@ -168,9 +197,15 @@ class SparseLU : public SparseSolverBase>, void factorize(const MatrixType& matrix); void simplicialfactorize(const MatrixType& matrix); - /** + /** \brief Analyze and factorize the matrix so the solver is ready to solve. + * * Compute the symbolic and numeric factorization of the input sparse matrix. - * The input matrix should be in column-major storage. + * The input matrix should be in column-major storage, otherwise analyzePattern() + * will do a heavy copy. + * + * Call analyzePattern() followed by factorize() + * + * \sa analyzePattern(), factorize() */ void compute(const MatrixType& matrix) { // Analyze @@ -179,7 +214,9 @@ class SparseLU : public SparseSolverBase>, factorize(matrix); } - /** \returns an expression of the transposed of the factored matrix. + /** \brief Return a solver for the transposed matrix. + * + * \returns an expression of the transposed of the factored matrix. * * A typical usage is to solve for the transposed problem A^T x = b: * \code @@ -196,7 +233,9 @@ class SparseLU : public SparseSolverBase>, return transposeView; } - /** \returns an expression of the adjoint of the factored matrix + /** \brief Return a solver for the adjointed matrix. + * + * \returns an expression of the adjoint of the factored matrix * * A typical usage is to solve for the adjoint problem A' x = b: * \code @@ -215,19 +254,28 @@ class SparseLU : public SparseSolverBase>, return adjointView; } + /** \brief Give the number of rows. + */ inline Index rows() const { return m_mat.rows(); } + /** \brief Give the numver of columns. + */ inline Index cols() const { return m_mat.cols(); } - /** Indicate that the pattern of the input matrix is symmetric */ + /** \brief Let you set that the pattern of the input matrix is symmetric + */ void isSymmetric(bool sym) { m_symmetricmode = sym; } - /** \returns an expression of the matrix L, internally stored as supernodes + /** \brief Give the matrixL + * + * \returns an expression of the matrix L, internally stored as supernodes * The only operation available with this expression is the triangular solve * \code * y = b; matrixL().solveInPlace(y); * \endcode */ SparseLUMatrixLReturnType matrixL() const { return SparseLUMatrixLReturnType(m_Lstore); } - /** \returns an expression of the matrix U, + /** \brief Give the MatrixU + * + * \returns an expression of the matrix U, * The only operation available with this expression is the triangular solve * \code * y = b; matrixU().solveInPlace(y); @@ -237,12 +285,14 @@ class SparseLU : public SparseSolverBase>, return SparseLUMatrixUReturnType>>(m_Lstore, m_Ustore); } - /** + /** \brief Give the row matrix permutation. + * * \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$ * \sa colsPermutation() */ inline const PermutationType& rowsPermutation() const { return m_perm_r; } - /** + /** \brief Give the column matrix permutation. + * * \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$ * \sa rowsPermutation() */ @@ -251,7 +301,9 @@ class SparseLU : public SparseSolverBase>, void setPivotThreshold(const RealScalar& thresh) { m_diagpivotthresh = thresh; } #ifdef EIGEN_PARSED_BY_DOXYGEN - /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. + /** \brief Solve a system \f$ A X = B \f$ + * + * \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. * * \warning the destination matrix X in X = this->solve(B) must be colmun-major. * @@ -267,14 +319,17 @@ class SparseLU : public SparseSolverBase>, * \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance * \c InvalidInput if the input matrix is invalid * - * \sa iparm() + * You can get a readable error message with lastErrorMessage(). + * + * \sa lastErrorMessage() */ ComputationInfo info() const { eigen_assert(m_isInitialized && "Decomposition is not initialized."); return m_info; } - /** + /** \brief Give a human readable error + * * \returns A string describing the type of error */ std::string lastErrorMessage() const { return m_lastError; } @@ -302,7 +357,8 @@ class SparseLU : public SparseSolverBase>, return true; } - /** + /** \brief Give the absolute value of the determinant. + * * \returns the absolute value of the determinant of the matrix of which * *this is the QR decomposition. * @@ -330,7 +386,9 @@ class SparseLU : public SparseSolverBase>, return det; } - /** \returns the natural log of the absolute value of the determinant of the matrix + /** \brief Give the natural log of the absolute determinant. + * + * \returns the natural log of the absolute value of the determinant of the matrix * of which **this is the QR decomposition * * \note This method is useful to work around the risk of overflow/underflow that's @@ -356,7 +414,9 @@ class SparseLU : public SparseSolverBase>, return det; } - /** \returns A number representing the sign of the determinant + /** \brief Give the sign of the determinant. + * + * \returns A number representing the sign of the determinant * * \sa absDeterminant(), logAbsDeterminant() */ @@ -380,7 +440,9 @@ class SparseLU : public SparseSolverBase>, return det * m_detPermR * m_detPermC; } - /** \returns The determinant of the matrix. + /** \brief Give the determinant. + * + * \returns The determinant of the matrix. * * \sa absDeterminant(), logAbsDeterminant() */ @@ -401,7 +463,11 @@ class SparseLU : public SparseSolverBase>, return (m_detPermR * m_detPermC) > 0 ? det : -det; } + /** \brief Give the number of non zero in matrix L. + */ Index nnzL() const { return m_nnzL; } + /** \brief Give the number of non zero in matrix U. + */ Index nnzU() const { return m_nnzU; } protected: @@ -442,7 +508,8 @@ class SparseLU : public SparseSolverBase>, }; // End class SparseLU // Functions needed by the anaysis phase -/** +/** \brief Compute the column permutation. + * * Compute the column permutation to minimize the fill-in * * - Apply this permutation to the input matrix - @@ -451,6 +518,11 @@ class SparseLU : public SparseSolverBase>, * * - Postorder the elimination tree and the column permutation * + * It is possible to call compute() instead of analyzePattern() + factorize(). + * + * If the matrix is row-major this function will do an heavy copy. + * + * \sa factorize(), compute() */ template void SparseLU::analyzePattern(const MatrixType& mat) { @@ -516,23 +588,24 @@ void SparseLU::analyzePattern(const MatrixType& mat) { // Functions needed by the numerical factorization phase -/** +/** \brief Factorize the matrix to get the solver ready. + * * - Numerical factorization * - Interleaved with the symbolic factorization - * On exit, info is * - * = 0: successful factorization + * To get error of this function you should check info(), you can get more info of + * errors with lastErrorMessage(). * - * > 0: if info = i, and i is + * In the past (before 2012 (git history is not older)), this function was returning an integer. + * This exit was 0 if successful factorization. + * > 0 if info = i, and i is been completed, but the factor U is exactly singular, + * and division by zero will occur if it is used to solve a system of equation. + * > A->ncol: number of bytes allocated when memory allocation failure occured, plus A->ncol. + * If lwork = -1, it is the estimated amount of space needed, plus A->ncol. * - * <= A->ncol: U(i,i) is exactly zero. The factorization has - * been completed, but the factor U is exactly singular, - * and division by zero will occur if it is used to solve a - * system of equations. + * It seems that A was the name of the matrix in the past. * - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. If lwork = -1, it is - * the estimated amount of space needed, plus A->ncol. + * \sa analyzePattern(), compute(), SparseLU(), info(), lastErrorMessage() */ template void SparseLU::factorize(const MatrixType& matrix) { @@ -572,6 +645,8 @@ void SparseLU::factorize(const MatrixType& matrix) { Index maxpanel = m_perfv.panel_size * m; // Allocate working storage common to the factor routines Index lwork = 0; + // Return the size of actually allocated memory when allocation failed, + // and 0 on success. Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); if (info) { m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n"; @@ -656,6 +731,7 @@ void SparseLU::factorize(const MatrixType& matrix) { // Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); + // Return 0 on success and > 0 number of bytes allocated when run out of space. info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if (info) { @@ -667,6 +743,7 @@ void SparseLU::factorize(const MatrixType& matrix) { // Numeric updates to this column VectorBlock dense_k(dense, k, m); VectorBlock segrep_k(segrep, nseg1, m - nseg1); + // Return 0 on success and > 0 number of bytes allocated when run out of space. info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); if (info) { m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() "; @@ -676,6 +753,7 @@ void SparseLU::factorize(const MatrixType& matrix) { } // Copy the U-segments to ucol(*) + // Return 0 on success and > 0 number of bytes allocated when run out of space. info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k, m_perm_r.indices(), dense_k, m_glu); if (info) { m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() "; @@ -685,6 +763,7 @@ void SparseLU::factorize(const MatrixType& matrix) { } // Form the L-segment + // Return O if success, i > 0 if U(i, i) is exactly zero. info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); if (info) { m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR"; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseQR/SparseQR.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseQR/SparseQR.h index 3e3352fb08..acb0c5ffca 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseQR/SparseQR.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseQR/SparseQR.h @@ -481,9 +481,7 @@ void SparseQR::factorize(const MatrixType& mat) { tdot *= m_hcoeffs(curIdx); // Then update tval = tval - q * tau - // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense - // ?= sparse") - for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq) tval(itq.row()) -= itq.value() * tdot; + tval -= tdot * m_Q.col(curIdx); // Detect fill-in for the current column of Q if (m_etree(Ridx(i)) == nonzeroCol) { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Image.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Image.h index fd1ac99265..38d516e28c 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Image.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Image.h @@ -27,7 +27,7 @@ struct traits > { MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose // dimension is the number of rows of the original matrix Dynamic, // we don't know at compile time the dimension of the image (the rank) - MatrixType::Options, + traits::Options, MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original // matrix, MatrixType::MaxColsAtCompileTime // so it has the same number of rows and at most as many columns. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Kernel.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Kernel.h index 55c3efe372..3ed458bc5f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Kernel.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/misc/Kernel.h @@ -28,7 +28,7 @@ struct traits > { // is the number of cols of the original matrix // so that the product "matrix * kernel = zero" makes sense Dynamic, // we don't know at compile-time the dimension of the kernel - MatrixType::Options, + traits::Options, MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, // whose dimension is the number of columns of the original matrix diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc index d03edc249c..5e5d45b052 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc @@ -37,6 +37,7 @@ typedef CwiseUnaryOp, const Derived> RoundRetu typedef CwiseUnaryOp, const Derived> RintReturnType; typedef CwiseUnaryOp, const Derived> FloorReturnType; typedef CwiseUnaryOp, const Derived> CeilReturnType; +typedef CwiseUnaryOp, const Derived> TruncReturnType; typedef CwiseUnaryOp, const Derived> IsNaNReturnType; typedef CwiseUnaryOp, const Derived> IsInfReturnType; typedef CwiseUnaryOp, const Derived> IsFiniteReturnType; @@ -347,6 +348,15 @@ EIGEN_DEVICE_FUNC inline const FloorReturnType floor() const { return FloorRetur */ EIGEN_DEVICE_FUNC inline const CeilReturnType ceil() const { return CeilReturnType(derived()); } +/** \returns an expression of the coefficient-wise truncation of *this. + * + * Example: \include Cwise_trunc.cpp + * Output: \verbinclude Cwise_trunc.out + * + * \sa Math functions, floor(), round() + */ +EIGEN_DEVICE_FUNC inline const TruncReturnType trunc() const { return TruncReturnType(derived()); } + template struct ShiftRightXpr { typedef CwiseUnaryOp, const Derived> Type; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/CommonCwiseUnaryOps.inc b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/CommonCwiseUnaryOps.inc index f20f2f8177..64f3648840 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/CommonCwiseUnaryOps.inc +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/CommonCwiseUnaryOps.inc @@ -118,7 +118,7 @@ EIGEN_DEVICE_FUNC inline const CwiseUnaryOp unaryE return CwiseUnaryOp(derived(), func); } -/// \returns an expression of a custom coefficient-wise unary operator \a func of *this +/// \returns a const expression of a custom coefficient-wise unary operator \a func of *this /// /// The template parameter \a CustomUnaryOp is the type of the functor /// of the custom unary operator. @@ -137,6 +137,21 @@ EIGEN_DEVICE_FUNC inline const CwiseUnaryView unary return CwiseUnaryView(derived(), func); } +/// \returns a non-const expression of a custom coefficient-wise unary view \a func of *this +/// +/// The template parameter \a CustomUnaryOp is the type of the functor +/// of the custom unary operator. +/// +EIGEN_DOC_UNARY_ADDONS(unaryViewExpr, unary function) +/// +/// \sa unaryExpr, binaryExpr class CwiseUnaryOp +/// +template +EIGEN_DEVICE_FUNC inline CwiseUnaryView unaryViewExpr( + const CustomViewOp& func = CustomViewOp()) { + return CwiseUnaryView(derived(), func); +} + /// \returns a non const expression of the real part of \c *this. /// EIGEN_DOC_UNARY_ADDONS(real, real part function) diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/IndexedViewMethods.inc b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/IndexedViewMethods.inc index 26e7b5fc19..a51e3492f5 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/IndexedViewMethods.inc +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/IndexedViewMethods.inc @@ -9,172 +9,7 @@ #if !defined(EIGEN_PARSED_BY_DOXYGEN) -protected: -// define some aliases to ease readability - -template -using IvcRowType = typename internal::IndexedViewCompatibleType::type; - -template -using IvcColType = typename internal::IndexedViewCompatibleType::type; - -template -using IvcType = typename internal::IndexedViewCompatibleType::type; - -typedef typename internal::IndexedViewCompatibleType::type IvcIndex; - -template -inline IvcRowType ivcRow(const Indices& indices) const { - return internal::makeIndexedViewCompatible( - indices, internal::variable_if_dynamic(derived().rows()), Specialized); -} - -template -inline IvcColType ivcCol(const Indices& indices) const { - return internal::makeIndexedViewCompatible( - indices, internal::variable_if_dynamic(derived().cols()), Specialized); -} - -template -inline IvcType ivcSize(const Indices& indices) const { - return internal::makeIndexedViewCompatible( - indices, internal::variable_if_dynamic(derived().size()), Specialized); -} - -// this helper class assumes internal::valid_indexed_view_overload::value == true -template , IvcColType>>::ReturnAsScalar, - bool UseBlock = - internal::traits, IvcColType>>::ReturnAsBlock, - bool UseGeneric = internal::traits< - IndexedView, IvcColType>>::ReturnAsIndexedView> -struct IndexedViewSelector; - -// Generic -template -struct IndexedViewSelector { - using ReturnType = IndexedView, IvcColType>; - using ConstReturnType = IndexedView, IvcColType>; - - static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { - return ReturnType(derived, derived.ivcRow(rowIndices), derived.ivcCol(colIndices)); - } - static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, - const ColIndices& colIndices) { - return ConstReturnType(derived, derived.ivcRow(rowIndices), derived.ivcCol(colIndices)); - } -}; - -// Block -template -struct IndexedViewSelector { - using IndexedViewType = IndexedView, IvcColType>; - using ConstIndexedViewType = IndexedView, IvcColType>; - using ReturnType = typename internal::traits::BlockType; - using ConstReturnType = typename internal::traits::BlockType; - - static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { - IvcRowType actualRowIndices = derived.ivcRow(rowIndices); - IvcColType actualColIndices = derived.ivcCol(colIndices); - return ReturnType(derived, internal::first(actualRowIndices), internal::first(actualColIndices), - internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices)); - } - static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, - const ColIndices& colIndices) { - IvcRowType actualRowIndices = derived.ivcRow(rowIndices); - IvcColType actualColIndices = derived.ivcCol(colIndices); - return ConstReturnType(derived, internal::first(actualRowIndices), internal::first(actualColIndices), - internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices)); - } -}; - -// Symbolic -template -struct IndexedViewSelector { - using ReturnType = typename DenseBase::Scalar&; - using ConstReturnType = typename DenseBase::CoeffReturnType; - - static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { - return derived(internal::eval_expr_given_size(rowIndices, derived.rows()), - internal::eval_expr_given_size(colIndices, derived.cols())); - } - static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, - const ColIndices& colIndices) { - return derived(internal::eval_expr_given_size(rowIndices, derived.rows()), - internal::eval_expr_given_size(colIndices, derived.cols())); - } -}; - -// this helper class assumes internal::is_valid_index_type::value == false -template ::value, - bool UseBlock = !UseSymbolic && internal::get_compile_time_incr>::value == 1, - bool UseGeneric = !UseSymbolic && !UseBlock> -struct VectorIndexedViewSelector; - -// Generic -template -struct VectorIndexedViewSelector { - static constexpr bool IsRowMajor = DenseBase::IsRowMajor; - - using RowMajorReturnType = IndexedView>; - using ConstRowMajorReturnType = IndexedView>; - - using ColMajorReturnType = IndexedView, IvcIndex>; - using ConstColMajorReturnType = IndexedView, IvcIndex>; - - using ReturnType = typename internal::conditional::type; - using ConstReturnType = - typename internal::conditional::type; - - template = true> - static inline RowMajorReturnType run(Derived& derived, const Indices& indices) { - return RowMajorReturnType(derived, IvcIndex(0), derived.ivcCol(indices)); - } - template = true> - static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) { - return ConstRowMajorReturnType(derived, IvcIndex(0), derived.ivcCol(indices)); - } - template = true> - static inline ColMajorReturnType run(Derived& derived, const Indices& indices) { - return ColMajorReturnType(derived, derived.ivcRow(indices), IvcIndex(0)); - } - template = true> - static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) { - return ConstColMajorReturnType(derived, derived.ivcRow(indices), IvcIndex(0)); - } -}; - -// Block -template -struct VectorIndexedViewSelector { - using ReturnType = VectorBlock::value>; - using ConstReturnType = VectorBlock::value>; - - static inline ReturnType run(Derived& derived, const Indices& indices) { - IvcType actualIndices = derived.ivcSize(indices); - return ReturnType(derived, internal::first(actualIndices), internal::index_list_size(actualIndices)); - } - static inline ConstReturnType run(const Derived& derived, const Indices& indices) { - IvcType actualIndices = derived.ivcSize(indices); - return ConstReturnType(derived, internal::first(actualIndices), internal::index_list_size(actualIndices)); - } -}; - -// Symbolic -template -struct VectorIndexedViewSelector { - using ReturnType = typename DenseBase::Scalar&; - using ConstReturnType = typename DenseBase::CoeffReturnType; - - static inline ReturnType run(Derived& derived, const Indices& id) { - return derived(internal::eval_expr_given_size(id, derived.size())); - } - static inline ConstReturnType run(const Derived& derived, const Indices& id) { - return derived(internal::eval_expr_given_size(id, derived.size())); - } -}; - +public: // SFINAE dummy types template @@ -197,24 +32,26 @@ public: // non-const versions -template -using IndexedViewType = typename IndexedViewSelector::ReturnType; + template + using IndexedViewType = typename internal::IndexedViewSelector::ReturnType; -template = true> -IndexedViewType operator()(const RowIndices& rowIndices, const ColIndices& colIndices) { - return IndexedViewSelector::run(derived(), rowIndices, colIndices); -} + template = true> + IndexedViewType operator()(const RowIndices& rowIndices, const ColIndices& colIndices) { + return internal::IndexedViewSelector::run(derived(), rowIndices, colIndices); + } template , EnableOverload = true> IndexedViewType operator()(const RowType (&rowIndices)[RowSize], const ColIndices& colIndices) { - return IndexedViewSelector::run(derived(), RowIndices{rowIndices}, colIndices); + return internal::IndexedViewSelector::run(derived(), RowIndices{rowIndices}, + colIndices); } template , EnableOverload = true> IndexedViewType operator()(const RowIndices& rowIndices, const ColType (&colIndices)[ColSize]) { - return IndexedViewSelector::run(derived(), rowIndices, ColIndices{colIndices}); + return internal::IndexedViewSelector::run(derived(), rowIndices, + ColIndices{colIndices}); } template = true> IndexedViewType operator()(const RowType (&rowIndices)[RowSize], const ColType (&colIndices)[ColSize]) { - return IndexedViewSelector::run(derived(), RowIndices{rowIndices}, ColIndices{colIndices}); + return internal::IndexedViewSelector::run(derived(), RowIndices{rowIndices}, + ColIndices{colIndices}); } // const versions template -using ConstIndexedViewType = typename IndexedViewSelector::ConstReturnType; +using ConstIndexedViewType = typename internal::IndexedViewSelector::ConstReturnType; template = true> ConstIndexedViewType operator()(const RowIndices& rowIndices, const ColIndices& colIndices) const { - return IndexedViewSelector::run(derived(), rowIndices, colIndices); + return internal::IndexedViewSelector::run(derived(), rowIndices, colIndices); } template , EnableConstOverload = true> ConstIndexedViewType operator()(const RowType (&rowIndices)[RowSize], const ColIndices& colIndices) const { - return IndexedViewSelector::run(derived(), RowIndices{rowIndices}, colIndices); + return internal::IndexedViewSelector::run(derived(), RowIndices{rowIndices}, + colIndices); } template , EnableConstOverload = true> ConstIndexedViewType operator()(const RowIndices& rowIndices, const ColType (&colIndices)[ColSize]) const { - return IndexedViewSelector::run(derived(), rowIndices, ColIndices{colIndices}); + return internal::IndexedViewSelector::run(derived(), rowIndices, + ColIndices{colIndices}); } template = true> ConstIndexedViewType operator()(const RowType (&rowIndices)[RowSize], const ColType (&colIndices)[ColSize]) const { - return IndexedViewSelector::run(derived(), RowIndices{rowIndices}, ColIndices{colIndices}); + return internal::IndexedViewSelector::run(derived(), RowIndices{rowIndices}, + ColIndices{colIndices}); } // Public API for 1D vectors/arrays @@ -263,37 +104,37 @@ ConstIndexedViewType operator()(const RowType (&rowIndic // non-const versions template -using VectorIndexedViewType = typename VectorIndexedViewSelector::ReturnType; +using VectorIndexedViewType = typename internal::VectorIndexedViewSelector::ReturnType; template = true> VectorIndexedViewType operator()(const Indices& indices) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector::run(derived(), indices); + return internal::VectorIndexedViewSelector::run(derived(), indices); } template , EnableVectorOverload = true> VectorIndexedViewType operator()(const IndexType (&indices)[Size]) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector::run(derived(), Indices{indices}); + return internal::VectorIndexedViewSelector::run(derived(), Indices{indices}); } // const versions template -using ConstVectorIndexedViewType = typename VectorIndexedViewSelector::ConstReturnType; +using ConstVectorIndexedViewType = typename internal::VectorIndexedViewSelector::ConstReturnType; template = true> ConstVectorIndexedViewType operator()(const Indices& indices) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector::run(derived(), indices); + return internal::VectorIndexedViewSelector::run(derived(), indices); } template , EnableConstVectorOverload = true> ConstVectorIndexedViewType operator()(const IndexType (&indices)[Size]) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector::run(derived(), Indices{indices}); + return internal::VectorIndexedViewSelector::run(derived(), Indices{indices}); } #else // EIGEN_PARSED_BY_DOXYGEN diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/MatrixCwiseUnaryOps.inc b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/MatrixCwiseUnaryOps.inc index b23f4a5aee..325b0fbe0d 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/MatrixCwiseUnaryOps.inc @@ -17,6 +17,7 @@ typedef CwiseUnaryOp, const Derived> CwiseArgRet typedef CwiseUnaryOp, const Derived> CwiseCArgReturnType; typedef CwiseUnaryOp, const Derived> CwiseSqrtReturnType; typedef CwiseUnaryOp, const Derived> CwiseCbrtReturnType; +typedef CwiseUnaryOp, const Derived> CwiseSquareReturnType; typedef CwiseUnaryOp, const Derived> CwiseSignReturnType; typedef CwiseUnaryOp, const Derived> CwiseInverseReturnType; @@ -66,7 +67,15 @@ EIGEN_DOC_UNARY_ADDONS(cwiseCbrt, cube - root) /// /// \sa cwiseSqrt(), cwiseSquare(), cwisePow() /// -EIGEN_DEVICE_FUNC inline const CwiseCbrtReturnType cwiseCbrt() const { return CwiseSCbrtReturnType(derived()); } +EIGEN_DEVICE_FUNC inline const CwiseCbrtReturnType cwiseCbrt() const { return CwiseCbrtReturnType(derived()); } + +/// \returns an expression of the coefficient-wise square of *this. +/// +EIGEN_DOC_UNARY_ADDONS(cwiseSquare, square) +/// +/// \sa cwisePow(), cwiseSqrt(), cwiseCbrt() +/// +EIGEN_DEVICE_FUNC inline const CwiseSquareReturnType cwiseSquare() const { return CwiseSquareReturnType(derived()); } /// \returns an expression of the coefficient-wise signum of *this. ///