mirror of
https://github.com/wpilibsuite/allwpilib
synced 2026-06-28 02:11:43 +00:00
[upstream_utils] Upgrade Eigen to include GCC 14 patches (#6646)
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
ColumnLimit: 120
|
||||
StatementMacros:
|
||||
- EIGEN_STATIC_ASSERT
|
||||
SortIncludes: false
|
||||
AttributeMacros:
|
||||
- EIGEN_STRONG_INLINE
|
||||
|
||||
@@ -404,6 +404,7 @@ using std::ptrdiff_t;
|
||||
#endif
|
||||
|
||||
#include "src/Core/GlobalFunctions.h"
|
||||
#include "src/Core/DeviceWrapper.h"
|
||||
// IWYU pragma: end_exports
|
||||
|
||||
#include "src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
@@ -167,7 +167,7 @@ class plainobjectbase_evaluator_data<Scalar, Dynamic> {
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
struct evaluator<PlainObjectBase<Derived> > : evaluator_base<Derived> {
|
||||
struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
|
||||
typedef PlainObjectBase<Derived> PlainObjectType;
|
||||
typedef typename PlainObjectType::Scalar Scalar;
|
||||
typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
|
||||
@@ -247,31 +247,29 @@ struct evaluator<PlainObjectBase<Derived> > : evaluator_base<Derived> {
|
||||
};
|
||||
|
||||
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
: evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > {
|
||||
struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>
|
||||
: evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>> {
|
||||
typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m)
|
||||
: evaluator<PlainObjectBase<XprType> >(m) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType>>(m) {}
|
||||
};
|
||||
|
||||
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
: evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > {
|
||||
struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>
|
||||
: evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>> {
|
||||
typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m)
|
||||
: evaluator<PlainObjectBase<XprType> >(m) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType>>(m) {}
|
||||
};
|
||||
|
||||
// -------------------- Transpose --------------------
|
||||
|
||||
template <typename ArgType>
|
||||
struct unary_evaluator<Transpose<ArgType>, IndexBased> : evaluator_base<Transpose<ArgType> > {
|
||||
struct unary_evaluator<Transpose<ArgType>, IndexBased> : evaluator_base<Transpose<ArgType>> {
|
||||
typedef Transpose<ArgType> XprType;
|
||||
|
||||
enum {
|
||||
@@ -460,8 +458,8 @@ struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
|
||||
#endif // MSVC workaround
|
||||
|
||||
template <typename NullaryOp, typename PlainObjectType>
|
||||
struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType> >
|
||||
: evaluator_base<CwiseNullaryOp<NullaryOp, PlainObjectType> > {
|
||||
struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType>>
|
||||
: evaluator_base<CwiseNullaryOp<NullaryOp, PlainObjectType>> {
|
||||
typedef CwiseNullaryOp<NullaryOp, PlainObjectType> XprType;
|
||||
typedef internal::remove_all_t<PlainObjectType> PlainObjectTypeCleaned;
|
||||
|
||||
@@ -509,7 +507,7 @@ struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType> >
|
||||
// -------------------- CwiseUnaryOp --------------------
|
||||
|
||||
template <typename UnaryOp, typename ArgType>
|
||||
struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased> : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> > {
|
||||
struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased> : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType>> {
|
||||
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
|
||||
|
||||
enum {
|
||||
@@ -762,17 +760,17 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
|
||||
// this is a ternary expression
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
|
||||
struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
|
||||
: public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
|
||||
struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>>
|
||||
: public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>> {
|
||||
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
|
||||
typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
|
||||
typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>> Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
|
||||
struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
|
||||
: evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
|
||||
: evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>> {
|
||||
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
|
||||
|
||||
enum {
|
||||
@@ -865,16 +863,16 @@ struct evaluator<CwiseTernaryOp<scalar_boolean_select_op<Scalar, Scalar, bool>,
|
||||
|
||||
// this is a binary expression
|
||||
template <typename BinaryOp, typename Lhs, typename Rhs>
|
||||
struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > {
|
||||
struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> {
|
||||
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||
typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
|
||||
typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
template <typename BinaryOp, typename Lhs, typename Rhs>
|
||||
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
|
||||
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > {
|
||||
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> {
|
||||
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||
|
||||
enum {
|
||||
@@ -939,7 +937,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
|
||||
template <typename UnaryOp, typename ArgType, typename StrideType>
|
||||
struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType, StrideType>, IndexBased>
|
||||
: evaluator_base<CwiseUnaryView<UnaryOp, ArgType, StrideType> > {
|
||||
: evaluator_base<CwiseUnaryView<UnaryOp, ArgType, StrideType>> {
|
||||
typedef CwiseUnaryView<UnaryOp, ArgType, StrideType> XprType;
|
||||
|
||||
enum {
|
||||
@@ -1067,7 +1065,7 @@ struct mapbase_evaluator : evaluator_base<Derived> {
|
||||
};
|
||||
|
||||
template <typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
struct evaluator<Map<PlainObjectType, MapOptions, StrideType>>
|
||||
: public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType> {
|
||||
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
@@ -1100,13 +1098,13 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
// -------------------- Ref --------------------
|
||||
|
||||
template <typename PlainObjectType, int RefOptions, typename StrideType>
|
||||
struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
|
||||
struct evaluator<Ref<PlainObjectType, RefOptions, StrideType>>
|
||||
: public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType> {
|
||||
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
|
||||
|
||||
enum {
|
||||
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
|
||||
Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
|
||||
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType>>::Flags,
|
||||
Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType>>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& ref)
|
||||
@@ -1120,7 +1118,7 @@ template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
|
||||
struct block_evaluator;
|
||||
|
||||
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>>
|
||||
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> {
|
||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
@@ -1171,7 +1169,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
// no direct-access => dispatch to a unary evaluator
|
||||
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false>
|
||||
: unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > {
|
||||
: unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>> {
|
||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block)
|
||||
@@ -1180,7 +1178,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
|
||||
|
||||
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
|
||||
: evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> > {
|
||||
: evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel>> {
|
||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& block)
|
||||
@@ -1293,8 +1291,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
|
||||
|
||||
// TODO enable vectorization for Select
|
||||
template <typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
|
||||
struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
: evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > {
|
||||
struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType>>
|
||||
: evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType>> {
|
||||
typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost +
|
||||
@@ -1335,8 +1333,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
// -------------------- Replicate --------------------
|
||||
|
||||
template <typename ArgType, int RowFactor, int ColFactor>
|
||||
struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
: evaluator_base<Replicate<ArgType, RowFactor, ColFactor> > {
|
||||
struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor>>
|
||||
: evaluator_base<Replicate<ArgType, RowFactor, ColFactor>> {
|
||||
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
enum { Factor = (RowFactor == Dynamic || ColFactor == Dynamic) ? Dynamic : RowFactor * ColFactor };
|
||||
@@ -1461,19 +1459,19 @@ struct evaluator_wrapper_base : evaluator_base<XprType> {
|
||||
};
|
||||
|
||||
template <typename TArgType>
|
||||
struct unary_evaluator<MatrixWrapper<TArgType> > : evaluator_wrapper_base<MatrixWrapper<TArgType> > {
|
||||
struct unary_evaluator<MatrixWrapper<TArgType>> : evaluator_wrapper_base<MatrixWrapper<TArgType>> {
|
||||
typedef MatrixWrapper<TArgType> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper)
|
||||
: evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) {}
|
||||
: evaluator_wrapper_base<MatrixWrapper<TArgType>>(wrapper.nestedExpression()) {}
|
||||
};
|
||||
|
||||
template <typename TArgType>
|
||||
struct unary_evaluator<ArrayWrapper<TArgType> > : evaluator_wrapper_base<ArrayWrapper<TArgType> > {
|
||||
struct unary_evaluator<ArrayWrapper<TArgType>> : evaluator_wrapper_base<ArrayWrapper<TArgType>> {
|
||||
typedef ArrayWrapper<TArgType> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper)
|
||||
: evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) {}
|
||||
: evaluator_wrapper_base<ArrayWrapper<TArgType>>(wrapper.nestedExpression()) {}
|
||||
};
|
||||
|
||||
// -------------------- Reverse --------------------
|
||||
@@ -1483,7 +1481,7 @@ template <typename PacketType, bool ReversePacket>
|
||||
struct reverse_packet_cond;
|
||||
|
||||
template <typename ArgType, int Direction>
|
||||
struct unary_evaluator<Reverse<ArgType, Direction> > : evaluator_base<Reverse<ArgType, Direction> > {
|
||||
struct unary_evaluator<Reverse<ArgType, Direction>> : evaluator_base<Reverse<ArgType, Direction>> {
|
||||
typedef Reverse<ArgType, Direction> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
@@ -1584,7 +1582,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> > : evaluator_base<Reverse<Ar
|
||||
// -------------------- Diagonal --------------------
|
||||
|
||||
template <typename ArgType, int DiagIndex>
|
||||
struct evaluator<Diagonal<ArgType, DiagIndex> > : evaluator_base<Diagonal<ArgType, DiagIndex> > {
|
||||
struct evaluator<Diagonal<ArgType, DiagIndex>> : evaluator_base<Diagonal<ArgType, DiagIndex>> {
|
||||
typedef Diagonal<ArgType, DiagIndex> XprType;
|
||||
|
||||
enum {
|
||||
@@ -1643,10 +1641,10 @@ template <typename ArgType>
|
||||
class EvalToTemp;
|
||||
|
||||
template <typename ArgType>
|
||||
struct traits<EvalToTemp<ArgType> > : public traits<ArgType> {};
|
||||
struct traits<EvalToTemp<ArgType>> : public traits<ArgType> {};
|
||||
|
||||
template <typename ArgType>
|
||||
class EvalToTemp : public dense_xpr_base<EvalToTemp<ArgType> >::type {
|
||||
class EvalToTemp : public dense_xpr_base<EvalToTemp<ArgType>>::type {
|
||||
public:
|
||||
typedef typename dense_xpr_base<EvalToTemp>::type Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
|
||||
@@ -1664,7 +1662,7 @@ class EvalToTemp : public dense_xpr_base<EvalToTemp<ArgType> >::type {
|
||||
};
|
||||
|
||||
template <typename ArgType>
|
||||
struct evaluator<EvalToTemp<ArgType> > : public evaluator<typename ArgType::PlainObject> {
|
||||
struct evaluator<EvalToTemp<ArgType>> : public evaluator<typename ArgType::PlainObject> {
|
||||
typedef EvalToTemp<ArgType> XprType;
|
||||
typedef typename ArgType::PlainObject PlainObject;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
@@ -622,19 +622,21 @@ class DenseBase
|
||||
EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
|
||||
/** Default constructor. Do nothing. */
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
EIGEN_DEVICE_FUNC constexpr DenseBase(){
|
||||
/* Just checks for self-consistency of the flags.
|
||||
* Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
|
||||
*/
|
||||
EIGEN_STATIC_ASSERT(
|
||||
(internal::check_implication(MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1, int(IsRowMajor)) &&
|
||||
internal::check_implication(MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1, int(!IsRowMajor))),
|
||||
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseBase() {
|
||||
/* Just checks for self-consistency of the flags.
|
||||
* Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
|
||||
*/
|
||||
EIGEN_STATIC_ASSERT(
|
||||
(internal::check_implication(MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1, int(IsRowMajor)) &&
|
||||
internal::check_implication(MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1, int(!IsRowMajor))),
|
||||
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC constexpr DenseBase() = default;
|
||||
#endif
|
||||
|
||||
private : EIGEN_DEVICE_FUNC explicit DenseBase(int);
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC explicit DenseBase(int);
|
||||
EIGEN_DEVICE_FUNC DenseBase(int, int);
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
|
||||
|
||||
155
wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DeviceWrapper.h
vendored
Normal file
155
wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DeviceWrapper.h
vendored
Normal file
@@ -0,0 +1,155 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2023 Charlie Schlosser <cs.schlosser@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_DEVICEWRAPPER_H
|
||||
#define EIGEN_DEVICEWRAPPER_H
|
||||
|
||||
namespace Eigen {
|
||||
template <typename Derived, typename Device>
|
||||
struct DeviceWrapper {
|
||||
using Base = EigenBase<internal::remove_all_t<Derived>>;
|
||||
using Scalar = typename Derived::Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC DeviceWrapper(Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {}
|
||||
EIGEN_DEVICE_FUNC DeviceWrapper(const Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {}
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived>& other) {
|
||||
using AssignOp = internal::assign_op<Scalar, typename OtherDerived::Scalar>;
|
||||
internal::call_assignment(*this, other.derived(), AssignOp());
|
||||
return m_xpr;
|
||||
}
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const EigenBase<OtherDerived>& other) {
|
||||
using AddAssignOp = internal::add_assign_op<Scalar, typename OtherDerived::Scalar>;
|
||||
internal::call_assignment(*this, other.derived(), AddAssignOp());
|
||||
return m_xpr;
|
||||
}
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const EigenBase<OtherDerived>& other) {
|
||||
using SubAssignOp = internal::sub_assign_op<Scalar, typename OtherDerived::Scalar>;
|
||||
internal::call_assignment(*this, other.derived(), SubAssignOp());
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& derived() { return m_xpr; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Device& device() { return m_device; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NoAlias<DeviceWrapper, EigenBase> noalias() {
|
||||
return NoAlias<DeviceWrapper, EigenBase>(*this);
|
||||
}
|
||||
|
||||
Derived& m_xpr;
|
||||
Device& m_device;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// this is where we differentiate between lazy assignment and specialized kernels (e.g. matrix products)
|
||||
template <typename DstXprType, typename SrcXprType, typename Functor, typename Device,
|
||||
typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
|
||||
typename evaluator_traits<SrcXprType>::Shape>::Kind,
|
||||
typename EnableIf = void>
|
||||
struct AssignmentWithDevice;
|
||||
|
||||
// unless otherwise specified, use the default product implementation
|
||||
template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Functor, typename Device,
|
||||
typename Weak>
|
||||
struct AssignmentWithDevice<DstXprType, Product<Lhs, Rhs, Options>, Functor, Device, Dense2Dense, Weak> {
|
||||
using SrcXprType = Product<Lhs, Rhs, Options>;
|
||||
using Base = Assignment<DstXprType, SrcXprType, Functor>;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func,
|
||||
Device&) {
|
||||
Base::run(dst, src, func);
|
||||
};
|
||||
};
|
||||
|
||||
// specialization for coeffcient-wise assignment
|
||||
template <typename DstXprType, typename SrcXprType, typename Functor, typename Device, typename Weak>
|
||||
struct AssignmentWithDevice<DstXprType, SrcXprType, Functor, Device, Dense2Dense, Weak> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func,
|
||||
Device& device) {
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
internal::check_for_aliasing(dst, src);
|
||||
#endif
|
||||
|
||||
call_dense_assignment_loop(dst, src, func, device);
|
||||
}
|
||||
};
|
||||
|
||||
// this allows us to use the default evaulation scheme if it is not specialized for the device
|
||||
template <typename Kernel, typename Device, int Traversal = Kernel::AssignmentTraits::Traversal,
|
||||
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
||||
struct dense_assignment_loop_with_device {
|
||||
using Base = dense_assignment_loop<Kernel, Traversal, Unrolling>;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Device&) { Base::run(kernel); }
|
||||
};
|
||||
|
||||
// entry point for a generic expression with device
|
||||
template <typename Dst, typename Src, typename Func, typename Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias(DeviceWrapper<Dst, Device> dst,
|
||||
const Src& src, const Func& func) {
|
||||
enum {
|
||||
NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) ||
|
||||
(int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) &&
|
||||
int(Dst::SizeAtCompileTime) != 1
|
||||
};
|
||||
|
||||
using ActualDstTypeCleaned = std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst>;
|
||||
using ActualDstType = std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&>;
|
||||
ActualDstType actualDst(dst.derived());
|
||||
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
|
||||
|
||||
// this provides a mechanism for specializing simple assignments, matrix products, etc
|
||||
AssignmentWithDevice<ActualDstTypeCleaned, Src, Func, Device>::run(actualDst, src, func, dst.device());
|
||||
}
|
||||
|
||||
// copy and pasted from AssignEvaluator except forward device to kernel
|
||||
template <typename DstXprType, typename SrcXprType, typename Functor, typename Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment_loop(DstXprType& dst,
|
||||
const SrcXprType& src,
|
||||
const Functor& func,
|
||||
Device& device) {
|
||||
using DstEvaluatorType = evaluator<DstXprType>;
|
||||
using SrcEvaluatorType = evaluator<SrcXprType>;
|
||||
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
|
||||
// we need to resize the destination after the source evaluator has been created.
|
||||
resize_if_allowed(dst, src, func);
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
using Kernel = generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor>;
|
||||
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
||||
|
||||
dense_assignment_loop_with_device<Kernel, Device>::run(kernel, device);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
template <typename Derived>
|
||||
template <typename Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<Derived, Device> EigenBase<Derived>::device(Device& device) {
|
||||
return DeviceWrapper<Derived, Device>(derived(), device);
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
template <typename Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<const Derived, Device> EigenBase<Derived>::device(
|
||||
Device& device) const {
|
||||
return DeviceWrapper<const Derived, Device>(derived(), device);
|
||||
}
|
||||
} // namespace Eigen
|
||||
#endif
|
||||
@@ -104,6 +104,11 @@ struct EigenBase {
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
dst = this->derived() * dst;
|
||||
}
|
||||
|
||||
template <typename Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<Derived, Device> device(Device& device);
|
||||
template <typename Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<const Derived, Device> device(Device& device) const;
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
|
||||
@@ -243,17 +243,15 @@ struct gemv_static_vector_if<Scalar, Size, Dynamic, true> {
|
||||
template <typename Scalar, int Size, int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
|
||||
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax>
|
||||
m_data;
|
||||
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
internal::plain_array<
|
||||
Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0>
|
||||
m_data;
|
||||
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES);
|
||||
return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
|
||||
EIGEN_MAX_ALIGN_BYTES);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -1249,20 +1249,40 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double trunc(const double& x) {
|
||||
// T is assumed to be an integer type with a>=0, and b>0
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T div_ceil(T a, T b) {
|
||||
using UnsignedT = typename internal::make_unsigned<T>::type;
|
||||
EIGEN_STATIC_ASSERT((NumTraits<T>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
|
||||
eigen_assert(a >= 0);
|
||||
eigen_assert(b > 0);
|
||||
// Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations
|
||||
const UnsignedT ua = UnsignedT(a);
|
||||
const UnsignedT ub = UnsignedT(b);
|
||||
// Note: This form is used because it cannot overflow.
|
||||
return a == 0 ? 0 : (a - 1) / b + 1;
|
||||
return ua == 0 ? 0 : (ua - 1) / ub + 1;
|
||||
}
|
||||
|
||||
// Integer round down to nearest power of b
|
||||
// T is assumed to be an integer type with a>=0, and b>0
|
||||
template <typename T, typename U>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T round_down(T a, U b) {
|
||||
using UnsignedT = typename internal::make_unsigned<T>::type;
|
||||
using UnsignedU = typename internal::make_unsigned<U>::type;
|
||||
EIGEN_STATIC_ASSERT((NumTraits<T>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
|
||||
EIGEN_STATIC_ASSERT((NumTraits<U>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
|
||||
eigen_assert(a >= 0);
|
||||
eigen_assert(b > 0);
|
||||
// Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations
|
||||
const UnsignedT ua = UnsignedT(a);
|
||||
const UnsignedU ub = UnsignedU(b);
|
||||
return ub * (ua / ub);
|
||||
}
|
||||
|
||||
/** Log base 2 for 32 bits positive integers.
|
||||
* Conveniently returns 0 for x==0. */
|
||||
inline int log2(int x) {
|
||||
EIGEN_CONSTEXPR inline int log2(int x) {
|
||||
eigen_assert(x >= 0);
|
||||
unsigned int v(x);
|
||||
static const int table[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
|
||||
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
|
||||
constexpr int table[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
|
||||
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
|
||||
@@ -125,7 +125,7 @@ struct traits<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>> {
|
||||
* coefficients.</dd>
|
||||
*
|
||||
* <dt><b>\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>
|
||||
* <dd>Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates
|
||||
* <dd>Fixed-size means that the numbers of rows and columns are known at compile-time. In this case, Eigen allocates
|
||||
* the array of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices,
|
||||
* typically up to 4x4, sometimes up to 16x16. Larger matrices should be declared as dynamic-size even if one happens to
|
||||
* know their size at compile-time.
|
||||
@@ -139,7 +139,7 @@ struct traits<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>> {
|
||||
* <dt><b>\anchor maxrows MaxRows_ and MaxCols_:</b></dt>
|
||||
* <dd>In most cases, one just leaves these parameters to the default values.
|
||||
* These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
|
||||
* when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they
|
||||
* when the exact numbers of rows and columns are not known at compile-time, but it is known at compile-time that they
|
||||
* cannot exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case
|
||||
* MaxRows_ and MaxCols_ are the dimensions of the original matrix, while Rows_ and Cols_ are Dynamic.</dd>
|
||||
* </dl>
|
||||
|
||||
@@ -80,15 +80,12 @@ class Replicate : public internal::dense_xpr_base<Replicate<MatrixType, RowFacto
|
||||
|
||||
template <typename OriginalMatrixType>
|
||||
EIGEN_DEVICE_FUNC inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
|
||||
: m_matrix(matrix),
|
||||
m_rowFactor(rowFactor),
|
||||
m_colFactor(colFactor){
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>, OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const {
|
||||
return m_matrix.rows() * m_rowFactor.value();
|
||||
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>, OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC const MatrixTypeNested_& nestedExpression() const { return m_matrix; }
|
||||
|
||||
@@ -57,7 +57,7 @@ EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageO
|
||||
Index rs = size - k - 1; // remaining size
|
||||
Index s = TriStorageOrder == RowMajor ? (IsLower ? 0 : i + 1) : IsLower ? i + 1 : i - rs;
|
||||
|
||||
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1)/conj(tri(i,i)));
|
||||
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1) / conj(tri(i, i)));
|
||||
for (Index j = 0; j < otherSize; ++j) {
|
||||
if (TriStorageOrder == RowMajor) {
|
||||
Scalar b(0);
|
||||
|
||||
@@ -81,10 +81,7 @@
|
||||
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325
|
||||
#pragma GCC diagnostic ignored "-Wattributes"
|
||||
#endif
|
||||
#if __GNUC__>=8
|
||||
#pragma GCC diagnostic ignored "-Wclass-memaccess"
|
||||
#endif
|
||||
#if __GNUC__>=11
|
||||
#if __GNUC__>=11 && __GNUC__<=13
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
#if __GNUC__>=12
|
||||
|
||||
@@ -502,6 +502,9 @@ struct stem_function {
|
||||
};
|
||||
} // namespace internal
|
||||
|
||||
template <typename XprType, typename Device>
|
||||
struct DeviceWrapper;
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_FORWARDDECLARATIONS_H
|
||||
|
||||
@@ -762,12 +762,22 @@ void swap(scoped_array<T>& a, scoped_array<T>& b) {
|
||||
#ifdef EIGEN_ALLOCA
|
||||
|
||||
#if EIGEN_DEFAULT_ALIGN_BYTES > 0
|
||||
// We always manually re-align the result of EIGEN_ALLOCA.
|
||||
// We always manually re-align the result of EIGEN_ALLOCA.
|
||||
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) \
|
||||
reinterpret_cast<void*>( \
|
||||
(std::uintptr_t(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1)) + EIGEN_DEFAULT_ALIGN_BYTES - 1) & \
|
||||
~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES - 1)))
|
||||
|
||||
#if (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG)
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES)
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) {
|
||||
constexpr std::uintptr_t mask = EIGEN_DEFAULT_ALIGN_BYTES - 1;
|
||||
std::uintptr_t ptr_int = std::uintptr_t(ptr);
|
||||
std::uintptr_t aligned_ptr_int = (ptr_int + mask) & ~mask;
|
||||
std::uintptr_t offset = aligned_ptr_int - ptr_int;
|
||||
return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
|
||||
}
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) eigen_aligned_alloca_helper(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1))
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
|
||||
#endif
|
||||
|
||||
@@ -96,17 +96,17 @@ class BaseExpr {
|
||||
return AddExpr<Derived, ValueExpr<>>(derived(), -a);
|
||||
}
|
||||
constexpr ProductExpr<Derived, ValueExpr<>> operator*(Index a) const {
|
||||
return ProductExpr<Derived, ValueExpr<> >(derived(), a);
|
||||
return ProductExpr<Derived, ValueExpr<>>(derived(), a);
|
||||
}
|
||||
constexpr QuotientExpr<Derived, ValueExpr<>> operator/(Index a) const {
|
||||
return QuotientExpr<Derived, ValueExpr<> >(derived(), a);
|
||||
return QuotientExpr<Derived, ValueExpr<>>(derived(), a);
|
||||
}
|
||||
|
||||
friend constexpr AddExpr<Derived, ValueExpr<>> operator+(Index a, const BaseExpr& b) {
|
||||
return AddExpr<Derived, ValueExpr<> >(b.derived(), a);
|
||||
return AddExpr<Derived, ValueExpr<>>(b.derived(), a);
|
||||
}
|
||||
friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<>> operator-(Index a, const BaseExpr& b) {
|
||||
return AddExpr<NegateExpr<Derived>, ValueExpr<> >(-b.derived(), a);
|
||||
return AddExpr<NegateExpr<Derived>, ValueExpr<>>(-b.derived(), a);
|
||||
}
|
||||
friend constexpr ProductExpr<ValueExpr<>, Derived> operator*(Index a, const BaseExpr& b) {
|
||||
return ProductExpr<ValueExpr<>, Derived>(a, b.derived());
|
||||
@@ -117,41 +117,41 @@ class BaseExpr {
|
||||
|
||||
template <int N>
|
||||
constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>) const {
|
||||
return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
|
||||
return AddExpr<Derived, ValueExpr<internal::FixedInt<N>>>(derived(), ValueExpr<internal::FixedInt<N>>());
|
||||
}
|
||||
template <int N>
|
||||
constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<-N>>> operator-(internal::FixedInt<N>) const {
|
||||
return AddExpr<Derived, ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >());
|
||||
return AddExpr<Derived, ValueExpr<internal::FixedInt<-N>>>(derived(), ValueExpr<internal::FixedInt<-N>>());
|
||||
}
|
||||
template <int N>
|
||||
constexpr ProductExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator*(internal::FixedInt<N>) const {
|
||||
return ProductExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
|
||||
return ProductExpr<Derived, ValueExpr<internal::FixedInt<N>>>(derived(), ValueExpr<internal::FixedInt<N>>());
|
||||
}
|
||||
template <int N>
|
||||
constexpr QuotientExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator/(internal::FixedInt<N>) const {
|
||||
return QuotientExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
|
||||
return QuotientExpr<Derived, ValueExpr<internal::FixedInt<N>>>(derived(), ValueExpr<internal::FixedInt<N>>());
|
||||
}
|
||||
|
||||
template <int N>
|
||||
friend constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>,
|
||||
const BaseExpr& b) {
|
||||
return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >());
|
||||
return AddExpr<Derived, ValueExpr<internal::FixedInt<N>>>(b.derived(), ValueExpr<internal::FixedInt<N>>());
|
||||
}
|
||||
template <int N>
|
||||
friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N>>> operator-(internal::FixedInt<N>,
|
||||
const BaseExpr& b) {
|
||||
return AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N> > >(-b.derived(),
|
||||
ValueExpr<internal::FixedInt<N> >());
|
||||
return AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N>>>(-b.derived(),
|
||||
ValueExpr<internal::FixedInt<N>>());
|
||||
}
|
||||
template <int N>
|
||||
friend constexpr ProductExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator*(internal::FixedInt<N>,
|
||||
const BaseExpr& b) {
|
||||
return ProductExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
|
||||
return ProductExpr<ValueExpr<internal::FixedInt<N>>, Derived>(ValueExpr<internal::FixedInt<N>>(), b.derived());
|
||||
}
|
||||
template <int N>
|
||||
friend constexpr QuotientExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator/(internal::FixedInt<N>,
|
||||
const BaseExpr& b) {
|
||||
return QuotientExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
|
||||
return QuotientExpr<ValueExpr<internal::FixedInt<N>>, Derived>(ValueExpr<internal::FixedInt<N>>(), b.derived());
|
||||
}
|
||||
|
||||
template <typename OtherDerived>
|
||||
@@ -161,7 +161,7 @@ class BaseExpr {
|
||||
|
||||
template <typename OtherDerived>
|
||||
constexpr AddExpr<Derived, NegateExpr<OtherDerived>> operator-(const BaseExpr<OtherDerived>& b) const {
|
||||
return AddExpr<Derived, NegateExpr<OtherDerived> >(derived(), -b.derived());
|
||||
return AddExpr<Derived, NegateExpr<OtherDerived>>(derived(), -b.derived());
|
||||
}
|
||||
|
||||
template <typename OtherDerived>
|
||||
@@ -179,7 +179,7 @@ template <typename T>
|
||||
struct is_symbolic {
|
||||
// BaseExpr has no conversion ctor, so we only have to check whether T can be statically cast to its base class
|
||||
// BaseExpr<T>.
|
||||
enum { value = internal::is_convertible<T, BaseExpr<T> >::value };
|
||||
enum { value = internal::is_convertible<T, BaseExpr<T>>::value };
|
||||
};
|
||||
|
||||
// A simple wrapper around an integral value to provide the eval method.
|
||||
@@ -317,7 +317,7 @@ struct EvalSymbolValueHelper<Tag, T1, OtherTypes...> {
|
||||
|
||||
/** Expression of a symbol uniquely identified by the template parameter type \c tag */
|
||||
template <typename tag>
|
||||
class SymbolExpr : public BaseExpr<SymbolExpr<tag> > {
|
||||
class SymbolExpr : public BaseExpr<SymbolExpr<tag>> {
|
||||
public:
|
||||
/** Alias to the template parameter \c tag */
|
||||
typedef tag Tag;
|
||||
@@ -349,7 +349,7 @@ class SymbolExpr : public BaseExpr<SymbolExpr<tag> > {
|
||||
};
|
||||
|
||||
template <typename Arg0>
|
||||
class NegateExpr : public BaseExpr<NegateExpr<Arg0> > {
|
||||
class NegateExpr : public BaseExpr<NegateExpr<Arg0>> {
|
||||
public:
|
||||
constexpr NegateExpr() = default;
|
||||
constexpr NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
|
||||
@@ -370,7 +370,7 @@ class NegateExpr : public BaseExpr<NegateExpr<Arg0> > {
|
||||
};
|
||||
|
||||
template <typename Arg0, typename Arg1>
|
||||
class AddExpr : public BaseExpr<AddExpr<Arg0, Arg1> > {
|
||||
class AddExpr : public BaseExpr<AddExpr<Arg0, Arg1>> {
|
||||
public:
|
||||
constexpr AddExpr() = default;
|
||||
constexpr AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
|
||||
@@ -393,7 +393,7 @@ class AddExpr : public BaseExpr<AddExpr<Arg0, Arg1> > {
|
||||
};
|
||||
|
||||
template <typename Arg0, typename Arg1>
|
||||
class ProductExpr : public BaseExpr<ProductExpr<Arg0, Arg1> > {
|
||||
class ProductExpr : public BaseExpr<ProductExpr<Arg0, Arg1>> {
|
||||
public:
|
||||
constexpr ProductExpr() = default;
|
||||
constexpr ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
|
||||
@@ -416,7 +416,7 @@ class ProductExpr : public BaseExpr<ProductExpr<Arg0, Arg1> > {
|
||||
};
|
||||
|
||||
template <typename Arg0, typename Arg1>
|
||||
class QuotientExpr : public BaseExpr<QuotientExpr<Arg0, Arg1> > {
|
||||
class QuotientExpr : public BaseExpr<QuotientExpr<Arg0, Arg1>> {
|
||||
public:
|
||||
constexpr QuotientExpr() = default;
|
||||
constexpr QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
|
||||
|
||||
@@ -206,6 +206,64 @@ struct functor_traits {
|
||||
enum { Cost = 10, PacketAccess = false, IsRepeatable = false };
|
||||
};
|
||||
|
||||
// estimates the cost of lazily evaluating a generic functor by unwinding the expression
|
||||
template <typename Xpr>
|
||||
struct nested_functor_cost {
|
||||
static constexpr Index Cost = static_cast<Index>(functor_traits<Xpr>::Cost);
|
||||
};
|
||||
|
||||
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
struct nested_functor_cost<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> {
|
||||
static constexpr Index Cost = 1;
|
||||
};
|
||||
|
||||
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
struct nested_functor_cost<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> {
|
||||
static constexpr Index Cost = 1;
|
||||
};
|
||||
|
||||
// TODO: assign a cost to the stride type?
|
||||
template <typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
struct nested_functor_cost<Map<PlainObjectType, MapOptions, StrideType>> : nested_functor_cost<PlainObjectType> {};
|
||||
|
||||
template <typename Func, typename Xpr>
|
||||
struct nested_functor_cost<CwiseUnaryOp<Func, Xpr>> {
|
||||
using XprCleaned = remove_all_t<Xpr>;
|
||||
using FuncCleaned = remove_all_t<Func>;
|
||||
static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<XprCleaned>::Cost;
|
||||
};
|
||||
|
||||
template <typename Func, typename Xpr>
|
||||
struct nested_functor_cost<CwiseNullaryOp<Func, Xpr>> {
|
||||
using XprCleaned = remove_all_t<Xpr>;
|
||||
using FuncCleaned = remove_all_t<Func>;
|
||||
static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<XprCleaned>::Cost;
|
||||
};
|
||||
|
||||
template <typename Func, typename LhsXpr, typename RhsXpr>
|
||||
struct nested_functor_cost<CwiseBinaryOp<Func, LhsXpr, RhsXpr>> {
|
||||
using LhsXprCleaned = remove_all_t<LhsXpr>;
|
||||
using RhsXprCleaned = remove_all_t<RhsXpr>;
|
||||
using FuncCleaned = remove_all_t<Func>;
|
||||
static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<LhsXprCleaned>::Cost +
|
||||
nested_functor_cost<RhsXprCleaned>::Cost;
|
||||
};
|
||||
|
||||
template <typename Func, typename LhsXpr, typename MidXpr, typename RhsXpr>
|
||||
struct nested_functor_cost<CwiseTernaryOp<Func, LhsXpr, MidXpr, RhsXpr>> {
|
||||
using LhsXprCleaned = remove_all_t<LhsXpr>;
|
||||
using MidXprCleaned = remove_all_t<MidXpr>;
|
||||
using RhsXprCleaned = remove_all_t<RhsXpr>;
|
||||
using FuncCleaned = remove_all_t<Func>;
|
||||
static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<LhsXprCleaned>::Cost +
|
||||
nested_functor_cost<MidXprCleaned>::Cost + nested_functor_cost<RhsXprCleaned>::Cost;
|
||||
};
|
||||
|
||||
template <typename Xpr>
|
||||
struct functor_cost {
|
||||
static constexpr Index Cost = plain_enum_max(nested_functor_cost<Xpr>::Cost, 1);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct packet_traits;
|
||||
|
||||
|
||||
@@ -1233,7 +1233,7 @@ void BDCSVD<MatrixType, Options>::deflation44(Index firstColu, Index firstColm,
|
||||
using std::conj;
|
||||
using std::pow;
|
||||
using std::sqrt;
|
||||
|
||||
|
||||
RealScalar s = m_computed(firstColm + i, firstColm);
|
||||
RealScalar c = m_computed(firstColm + j, firstColm);
|
||||
RealScalar r = numext::hypot(c, s);
|
||||
@@ -1424,8 +1424,7 @@ void BDCSVD<MatrixType, Options>::deflation(Index firstCol, Index lastCol, Index
|
||||
if ((diag(i) - diag(i - 1)) < epsilon_strict) {
|
||||
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
std::cout << "deflation 4.4 with i = " << i << " because " << diag(i) << " - " << diag(i - 1)
|
||||
<< " == " << (diag(i) - diag(i - 1)) << " < "
|
||||
<< epsilon_strict << "\n";
|
||||
<< " == " << (diag(i) - diag(i - 1)) << " < " << epsilon_strict << "\n";
|
||||
#endif
|
||||
eigen_internal_assert(abs(diag(i) - diag(i - 1)) < epsilon_coarse &&
|
||||
" diagonal entries are not properly sorted");
|
||||
|
||||
@@ -45,7 +45,6 @@ struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType,
|
||||
|
||||
Index n = lhs.outerSize();
|
||||
#ifdef EIGEN_HAS_OPENMP
|
||||
Eigen::initParallel();
|
||||
Index threads = Eigen::nbThreads();
|
||||
#endif
|
||||
|
||||
@@ -125,7 +124,6 @@ struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType,
|
||||
LhsEval lhsEval(lhs);
|
||||
|
||||
#ifdef EIGEN_HAS_OPENMP
|
||||
Eigen::initParallel();
|
||||
Index threads = Eigen::nbThreads();
|
||||
// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
|
||||
// It basically represents the minimal amount of work to be done to be worth it.
|
||||
|
||||
@@ -563,6 +563,8 @@ class SparseMatrix : public SparseCompressedBase<SparseMatrix<Scalar_, Options_,
|
||||
/** \internal
|
||||
* same as insert(Index,Index) except that the indices are given relative to the storage order */
|
||||
Scalar& insertByOuterInner(Index j, Index i) {
|
||||
eigen_assert(j >= 0 && j < m_outerSize && "invalid outer index");
|
||||
eigen_assert(i >= 0 && i < m_innerSize && "invalid inner index");
|
||||
Index start = m_outerIndex[j];
|
||||
Index end = isCompressed() ? m_outerIndex[j + 1] : start + m_innerNonZeros[j];
|
||||
Index dst = start == end ? end : m_data.searchLowerIndex(start, end, i);
|
||||
|
||||
327
wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/ThreadPool/CoreThreadPoolDevice.h
vendored
Normal file
327
wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/ThreadPool/CoreThreadPoolDevice.h
vendored
Normal file
@@ -0,0 +1,327 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2023 Charlie Schlosser <cs.schlosser@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CORE_THREAD_POOL_DEVICE_H
|
||||
#define EIGEN_CORE_THREAD_POOL_DEVICE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// CoreThreadPoolDevice provides an easy-to-understand Device for parallelizing Eigen Core expressions with
|
||||
// Threadpool. Expressions are recursively split evenly until the evaluation cost is less than the threshold for
|
||||
// delegating the task to a thread.
|
||||
|
||||
// a
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// a e
|
||||
// / \ / \
|
||||
// / \ / \
|
||||
// / \ / \
|
||||
// a c e g
|
||||
// / \ / \ / \ / \
|
||||
// / \ / \ / \ / \
|
||||
// a b c d e f g h
|
||||
|
||||
// Each task descends the binary tree to the left, delegates the right task to a new thread, and continues to the
|
||||
// left. This ensures that work is evenly distributed to the thread pool as quickly as possible and minimizes the number
|
||||
// of tasks created during the evaluation. Consider an expression that is divided into 8 chunks. The
|
||||
// primary task 'a' creates tasks 'e' 'c' and 'b', and executes its portion of the expression at the bottom of the
|
||||
// tree. Likewise, task 'e' creates tasks 'g' and 'f', and executes its portion of the expression.
|
||||
|
||||
struct CoreThreadPoolDevice {
|
||||
using Task = std::function<void()>;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoreThreadPoolDevice(ThreadPool& pool, float threadCostThreshold = 3e-5f)
|
||||
: m_pool(pool) {
|
||||
eigen_assert(threadCostThreshold >= 0.0f && "threadCostThreshold must be non-negative");
|
||||
m_costFactor = threadCostThreshold;
|
||||
}
|
||||
|
||||
template <int PacketSize>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int calculateLevels(Index size, float cost) const {
|
||||
eigen_assert(cost >= 0.0f && "cost must be non-negative");
|
||||
Index numOps = size / PacketSize;
|
||||
int actualThreads = numOps < m_pool.NumThreads() ? static_cast<int>(numOps) : m_pool.NumThreads();
|
||||
float totalCost = static_cast<float>(numOps) * cost;
|
||||
float idealThreads = totalCost * m_costFactor;
|
||||
if (idealThreads < static_cast<float>(actualThreads)) {
|
||||
idealThreads = numext::maxi(idealThreads, 1.0f);
|
||||
actualThreads = numext::mini(actualThreads, static_cast<int>(idealThreads));
|
||||
}
|
||||
int maxLevel = internal::log2_ceil(actualThreads);
|
||||
return maxLevel;
|
||||
}
|
||||
|
||||
// MSVC does not like inlining parallelForImpl
|
||||
#if EIGEN_COMP_MSVC && !EIGEN_COMP_CLANG
|
||||
#define EIGEN_PARALLEL_FOR_INLINE
|
||||
#else
|
||||
#define EIGEN_PARALLEL_FOR_INLINE EIGEN_STRONG_INLINE
|
||||
#endif
|
||||
|
||||
template <typename UnaryFunctor, int PacketSize>
|
||||
EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void parallelForImpl(Index begin, Index end, UnaryFunctor& f,
|
||||
Barrier& barrier, int level) {
|
||||
while (level > 0) {
|
||||
level--;
|
||||
Index size = end - begin;
|
||||
eigen_assert(size % PacketSize == 0 && "this function assumes size is a multiple of PacketSize");
|
||||
Index mid = begin + numext::round_down(size >> 1, PacketSize);
|
||||
Task right = [=, this, &f, &barrier]() {
|
||||
parallelForImpl<UnaryFunctor, PacketSize>(mid, end, f, barrier, level);
|
||||
};
|
||||
m_pool.Schedule(std::move(right));
|
||||
end = mid;
|
||||
}
|
||||
for (Index i = begin; i < end; i += PacketSize) f(i);
|
||||
barrier.Notify();
|
||||
}
|
||||
|
||||
template <typename BinaryFunctor, int PacketSize>
|
||||
EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void parallelForImpl(Index outerBegin, Index outerEnd, Index innerBegin,
|
||||
Index innerEnd, BinaryFunctor& f, Barrier& barrier,
|
||||
int level) {
|
||||
while (level > 0) {
|
||||
level--;
|
||||
Index outerSize = outerEnd - outerBegin;
|
||||
if (outerSize > 1) {
|
||||
Index outerMid = outerBegin + (outerSize >> 1);
|
||||
Task right = [=, this, &f, &barrier]() {
|
||||
parallelForImpl<BinaryFunctor, PacketSize>(outerMid, outerEnd, innerBegin, innerEnd, f, barrier, level);
|
||||
};
|
||||
m_pool.Schedule(std::move(right));
|
||||
outerEnd = outerMid;
|
||||
} else {
|
||||
Index innerSize = innerEnd - innerBegin;
|
||||
eigen_assert(innerSize % PacketSize == 0 && "this function assumes innerSize is a multiple of PacketSize");
|
||||
Index innerMid = innerBegin + numext::round_down(innerSize >> 1, PacketSize);
|
||||
Task right = [=, this, &f, &barrier]() {
|
||||
parallelForImpl<BinaryFunctor, PacketSize>(outerBegin, outerEnd, innerMid, innerEnd, f, barrier, level);
|
||||
};
|
||||
m_pool.Schedule(std::move(right));
|
||||
innerEnd = innerMid;
|
||||
}
|
||||
}
|
||||
for (Index outer = outerBegin; outer < outerEnd; outer++)
|
||||
for (Index inner = innerBegin; inner < innerEnd; inner += PacketSize) f(outer, inner);
|
||||
barrier.Notify();
|
||||
}
|
||||
|
||||
#undef EIGEN_PARALLEL_FOR_INLINE
|
||||
|
||||
template <typename UnaryFunctor, int PacketSize>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void parallelFor(Index begin, Index end, UnaryFunctor& f, float cost) {
|
||||
Index size = end - begin;
|
||||
int maxLevel = calculateLevels<PacketSize>(size, cost);
|
||||
Barrier barrier(1 << maxLevel);
|
||||
parallelForImpl<UnaryFunctor, PacketSize>(begin, end, f, barrier, maxLevel);
|
||||
barrier.Wait();
|
||||
}
|
||||
|
||||
template <typename BinaryFunctor, int PacketSize>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void parallelFor(Index outerBegin, Index outerEnd, Index innerBegin,
|
||||
Index innerEnd, BinaryFunctor& f, float cost) {
|
||||
Index outerSize = outerEnd - outerBegin;
|
||||
Index innerSize = innerEnd - innerBegin;
|
||||
Index size = outerSize * innerSize;
|
||||
int maxLevel = calculateLevels<PacketSize>(size, cost);
|
||||
Barrier barrier(1 << maxLevel);
|
||||
parallelForImpl<BinaryFunctor, PacketSize>(outerBegin, outerEnd, innerBegin, innerEnd, f, barrier, maxLevel);
|
||||
barrier.Wait();
|
||||
}
|
||||
|
||||
ThreadPool& m_pool;
|
||||
// costFactor is the cost of delegating a task to a thread
|
||||
// the inverse is used to avoid a floating point division
|
||||
float m_costFactor;
|
||||
};
|
||||
|
||||
// specialization of coefficient-wise assignment loops for CoreThreadPoolDevice
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename Kernel>
|
||||
struct cost_helper {
|
||||
using SrcEvaluatorType = typename Kernel::SrcEvaluatorType;
|
||||
using DstEvaluatorType = typename Kernel::DstEvaluatorType;
|
||||
using SrcXprType = typename SrcEvaluatorType::XprType;
|
||||
using DstXprType = typename DstEvaluatorType::XprType;
|
||||
static constexpr Index Cost = functor_cost<SrcXprType>::Cost + functor_cost<DstXprType>::Cost;
|
||||
};
|
||||
|
||||
template <typename Kernel>
|
||||
struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, DefaultTraversal, NoUnrolling> {
|
||||
static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost;
|
||||
struct AssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer, Index inner) {
|
||||
this->assignCoeffByOuterInner(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
constexpr float cost = static_cast<float>(XprEvaluationCost);
|
||||
AssignmentFunctor functor(kernel);
|
||||
device.template parallelFor<AssignmentFunctor, 1>(0, outerSize, 0, innerSize, functor, cost);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Kernel>
|
||||
struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, DefaultTraversal, InnerUnrolling> {
|
||||
using DstXprType = typename Kernel::DstEvaluatorType::XprType;
|
||||
static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, InnerSize = DstXprType::InnerSizeAtCompileTime;
|
||||
struct AssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer) {
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSize>::run(*this, outer);
|
||||
}
|
||||
};
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||
const Index outerSize = kernel.outerSize();
|
||||
AssignmentFunctor functor(kernel);
|
||||
constexpr float cost = static_cast<float>(XprEvaluationCost) * static_cast<float>(InnerSize);
|
||||
device.template parallelFor<AssignmentFunctor, 1>(0, outerSize, functor, cost);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Kernel>
|
||||
struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, InnerVectorizedTraversal, NoUnrolling> {
|
||||
using PacketType = typename Kernel::PacketType;
|
||||
static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, PacketSize = unpacket_traits<PacketType>::size,
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment;
|
||||
struct AssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer, Index inner) {
|
||||
this->template assignPacketByOuterInner<Unaligned, Unaligned, PacketType>(outer, inner);
|
||||
}
|
||||
};
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const float cost = static_cast<float>(XprEvaluationCost) * static_cast<float>(innerSize);
|
||||
AssignmentFunctor functor(kernel);
|
||||
device.template parallelFor<AssignmentFunctor, PacketSize>(0, outerSize, 0, innerSize, functor, cost);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Kernel>
|
||||
struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, InnerVectorizedTraversal, InnerUnrolling> {
|
||||
using PacketType = typename Kernel::PacketType;
|
||||
using DstXprType = typename Kernel::DstEvaluatorType::XprType;
|
||||
static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, PacketSize = unpacket_traits<PacketType>::size,
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment,
|
||||
InnerSize = DstXprType::InnerSizeAtCompileTime;
|
||||
struct AssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer) {
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(*this, outer);
|
||||
}
|
||||
};
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||
const Index outerSize = kernel.outerSize();
|
||||
constexpr float cost = static_cast<float>(XprEvaluationCost) * static_cast<float>(InnerSize);
|
||||
AssignmentFunctor functor(kernel);
|
||||
device.template parallelFor<AssignmentFunctor, PacketSize>(0, outerSize, functor, cost);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Kernel>
|
||||
struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, SliceVectorizedTraversal, NoUnrolling> {
|
||||
using Scalar = typename Kernel::Scalar;
|
||||
using PacketType = typename Kernel::PacketType;
|
||||
static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, PacketSize = unpacket_traits<PacketType>::size;
|
||||
struct PacketAssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketAssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer, Index inner) {
|
||||
this->template assignPacketByOuterInner<Unaligned, Unaligned, PacketType>(outer, inner);
|
||||
}
|
||||
};
|
||||
struct ScalarAssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarAssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer) {
|
||||
const Index innerSize = this->innerSize();
|
||||
const Index packetAccessSize = numext::round_down(innerSize, PacketSize);
|
||||
for (Index inner = packetAccessSize; inner < innerSize; inner++) this->assignCoeffByOuterInner(outer, inner);
|
||||
}
|
||||
};
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index packetAccessSize = numext::round_down(innerSize, PacketSize);
|
||||
constexpr float packetCost = static_cast<float>(XprEvaluationCost);
|
||||
const float scalarCost = static_cast<float>(XprEvaluationCost) * static_cast<float>(innerSize - packetAccessSize);
|
||||
PacketAssignmentFunctor packetFunctor(kernel);
|
||||
ScalarAssignmentFunctor scalarFunctor(kernel);
|
||||
device.template parallelFor<PacketAssignmentFunctor, PacketSize>(0, outerSize, 0, packetAccessSize, packetFunctor,
|
||||
packetCost);
|
||||
device.template parallelFor<ScalarAssignmentFunctor, 1>(0, outerSize, scalarFunctor, scalarCost);
|
||||
};
|
||||
};
|
||||
|
||||
template <typename Kernel>
|
||||
struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, LinearTraversal, NoUnrolling> {
|
||||
static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost;
|
||||
struct AssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index index) { this->assignCoeff(index); }
|
||||
};
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||
const Index size = kernel.size();
|
||||
constexpr float cost = static_cast<float>(XprEvaluationCost);
|
||||
AssignmentFunctor functor(kernel);
|
||||
device.template parallelFor<AssignmentFunctor, 1>(0, size, functor, cost);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Kernel>
|
||||
struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, LinearVectorizedTraversal, NoUnrolling> {
|
||||
using Scalar = typename Kernel::Scalar;
|
||||
using PacketType = typename Kernel::PacketType;
|
||||
static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost,
|
||||
RequestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment,
|
||||
DstAlignment = packet_traits<Scalar>::AlignedOnScalar ? RequestedAlignment
|
||||
: Kernel::AssignmentTraits::DstAlignment,
|
||||
SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
|
||||
struct AssignmentFunctor : public Kernel {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index index) {
|
||||
this->template assignPacket<DstAlignment, SrcAlignment, PacketType>(index);
|
||||
}
|
||||
};
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||
const Index size = kernel.size();
|
||||
const Index alignedStart =
|
||||
DstIsAligned ? 0 : internal::first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size);
|
||||
const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
|
||||
|
||||
unaligned_dense_assignment_loop<DstIsAligned != 0>::run(kernel, 0, alignedStart);
|
||||
|
||||
constexpr float cost = static_cast<float>(XprEvaluationCost);
|
||||
AssignmentFunctor functor(kernel);
|
||||
device.template parallelFor<AssignmentFunctor, PacketSize>(alignedStart, alignedEnd, functor, cost);
|
||||
|
||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_CORE_THREAD_POOL_DEVICE_H
|
||||
Reference in New Issue
Block a user