mirror of
https://github.com/wpilibsuite/allwpilib
synced 2026-06-30 02:31:44 +00:00
[wpimath] Replace constexpr coeff() and coeffRef() with operator() (#7391)
This commit is contained in:
@@ -97,6 +97,11 @@
|
||||
// for std::is_nothrow_move_assignable
|
||||
#include <type_traits>
|
||||
|
||||
// for std::this_thread::yield().
|
||||
#if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL))
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
// for outputting debug info
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
#include <iostream>
|
||||
@@ -117,8 +122,8 @@
|
||||
#include <CL/sycl.hpp>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0
|
||||
#define EIGEN_SYCL_LOCAL_THREAD_DIM0 16
|
||||
#endif
|
||||
@@ -319,6 +324,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/CwiseNullaryOp.h"
|
||||
#include "src/Core/CwiseUnaryView.h"
|
||||
#include "src/Core/SelfCwiseBinaryOp.h"
|
||||
#include "src/Core/InnerProduct.h"
|
||||
#include "src/Core/Dot.h"
|
||||
#include "src/Core/StableNorm.h"
|
||||
#include "src/Core/Stride.h"
|
||||
|
||||
@@ -117,18 +117,12 @@ class Array : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxR
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array() : Base() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME is it still needed ??
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC Array(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert()){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
|
||||
#ifdef EIGEN_INITIALIZE_COEFFS
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array() : Base() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array() = default;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC Array(Array && other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other)) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(Array&&) = default;
|
||||
EIGEN_DEVICE_FUNC Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value) {
|
||||
Base::operator=(std::move(other));
|
||||
return *this;
|
||||
@@ -232,7 +226,7 @@ class Array : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxR
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Array& other) : Base(other) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(const Array&) = default;
|
||||
|
||||
private:
|
||||
struct PrivateType {};
|
||||
|
||||
@@ -65,8 +65,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > {
|
||||
return m_expression.innerStride();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const {
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
@@ -144,8 +144,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > {
|
||||
return m_expression.innerStride();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const {
|
||||
return m_expression.derived().coeffRef(rowId, colId);
|
||||
|
||||
@@ -278,7 +278,7 @@ class BlockImpl_dense : public internal::dense_xpr_base<Block<XprType, BlockRows
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \sa MapBase::data() */
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const;
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const;
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const;
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const;
|
||||
#endif
|
||||
|
||||
@@ -124,8 +124,7 @@ struct evaluator_base {
|
||||
// noncopyable:
|
||||
// Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization)
|
||||
// and make complex evaluator much larger than then should do.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator_base() = default;
|
||||
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&);
|
||||
@@ -143,7 +142,7 @@ struct evaluator_base {
|
||||
template <typename Scalar, int OuterStride>
|
||||
class plainobjectbase_evaluator_data {
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride)
|
||||
: data(ptr) {
|
||||
#ifndef EIGEN_INTERNAL_DEBUGGING
|
||||
EIGEN_UNUSED_VARIABLE(outerStride);
|
||||
@@ -157,9 +156,9 @@ class plainobjectbase_evaluator_data {
|
||||
template <typename Scalar>
|
||||
class plainobjectbase_evaluator_data<Scalar, Dynamic> {
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride)
|
||||
: data(ptr), m_outerStride(outerStride) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outerStride() const { return m_outerStride; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const { return m_outerStride; }
|
||||
const Scalar* data;
|
||||
|
||||
protected:
|
||||
@@ -189,35 +188,37 @@ struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
|
||||
: RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() : m_d(0, OuterStrideAtCompileTime) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() : m_d(0, OuterStrideAtCompileTime) {
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const PlainObjectType& m)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const PlainObjectType& m)
|
||||
: m_d(m.data(), IsVectorAtCompileTime ? 0 : m.outerStride()) {
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const {
|
||||
if (IsRowMajor)
|
||||
return m_d.data[row * m_d.outerStride() + col];
|
||||
else
|
||||
return m_d.data[row + col * m_d.outerStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_d.data[index]; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { return m_d.data[index]; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) {
|
||||
if (IsRowMajor)
|
||||
return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col];
|
||||
else
|
||||
return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return const_cast<Scalar*>(m_d.data)[index]; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) {
|
||||
return const_cast<Scalar*>(m_d.data)[index];
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
if (IsRowMajor)
|
||||
return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col);
|
||||
else
|
||||
@@ -225,12 +226,12 @@ struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return ploadt<PacketType, LoadMode>(m_d.data + index);
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
if (IsRowMajor)
|
||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x);
|
||||
else
|
||||
@@ -238,7 +239,7 @@ struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x);
|
||||
}
|
||||
|
||||
@@ -251,9 +252,10 @@ struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>
|
||||
: evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>> {
|
||||
typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType>>(m) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m)
|
||||
: evaluator<PlainObjectBase<XprType>>(m) {}
|
||||
};
|
||||
|
||||
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
@@ -261,9 +263,10 @@ struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>
|
||||
: evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>> {
|
||||
typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType>>(m) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m)
|
||||
: evaluator<PlainObjectBase<XprType>>(m) {}
|
||||
};
|
||||
|
||||
// -------------------- Transpose --------------------
|
||||
@@ -296,22 +299,22 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased> : evaluator_base<Transpos
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
return m_argImpl.template packet<LoadMode, PacketType>(col, row);
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return m_argImpl.template packet<LoadMode, PacketType>(index);
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
m_argImpl.template writePacket<StoreMode, PacketType>(col, row, x);
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
m_argImpl.template writePacket<StoreMode, PacketType>(index, x);
|
||||
}
|
||||
|
||||
@@ -490,12 +493,12 @@ struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType>>
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType, typename IndexType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(IndexType row, IndexType col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType row, IndexType col) const {
|
||||
return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType, typename IndexType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(IndexType index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType index) const {
|
||||
return m_wrapper.template packetOp<PacketType>(m_functor, index);
|
||||
}
|
||||
|
||||
@@ -534,12 +537,12 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased> : evaluator_b
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col));
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index));
|
||||
}
|
||||
|
||||
@@ -627,7 +630,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType = SrcPacketType>
|
||||
EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const {
|
||||
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||
Index actualRow = IsRowMajor ? row : row + (offset * PacketSize);
|
||||
Index actualCol = IsRowMajor ? col + (offset * PacketSize) : col;
|
||||
@@ -635,7 +638,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
return m_argImpl.template packet<LoadMode, PacketType>(actualRow, actualCol);
|
||||
}
|
||||
template <int LoadMode, typename PacketType = SrcPacketType>
|
||||
EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const {
|
||||
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||
Index actualIndex = index + (offset * PacketSize);
|
||||
eigen_assert(check_array_bounds(actualIndex, PacketSize) && "Array index out of bounds");
|
||||
@@ -652,7 +655,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
// If not, perform full load. Otherwise, revert to a scalar loop to perform a partial load.
|
||||
// In either case, perform a vectorized cast of the source packet.
|
||||
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||
@@ -669,7 +672,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
}
|
||||
// Use the source packet type with the same size as DstPacketType, if it exists
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||
using SizedSrcPacketType = typename find_packet_by_size<SrcType, DstPacketSize>::type;
|
||||
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||
@@ -678,14 +681,14 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
}
|
||||
// unpacket_traits<DstPacketType>::size == 2 * SrcPacketSize
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs2<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
return pcast<SrcPacketType, DstPacketType>(srcPacket<SrcLoadMode>(row, col, 0),
|
||||
srcPacket<SrcLoadMode>(row, col, 1));
|
||||
}
|
||||
// unpacket_traits<DstPacketType>::size == 4 * SrcPacketSize
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs4<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
return pcast<SrcPacketType, DstPacketType>(srcPacket<SrcLoadMode>(row, col, 0), srcPacket<SrcLoadMode>(row, col, 1),
|
||||
srcPacket<SrcLoadMode>(row, col, 2),
|
||||
@@ -693,7 +696,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
}
|
||||
// unpacket_traits<DstPacketType>::size == 8 * SrcPacketSize
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs8<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
return pcast<SrcPacketType, DstPacketType>(
|
||||
srcPacket<SrcLoadMode>(row, col, 0), srcPacket<SrcLoadMode>(row, col, 1), srcPacket<SrcLoadMode>(row, col, 2),
|
||||
@@ -703,7 +706,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
|
||||
// Analogous routines for linear access.
|
||||
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||
@@ -719,7 +722,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
return pcast<SrcPacketType, DstPacketType>(src);
|
||||
}
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||
using SizedSrcPacketType = typename find_packet_by_size<SrcType, DstPacketSize>::type;
|
||||
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||
@@ -727,18 +730,18 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
return pcast<SizedSrcPacketType, DstPacketType>(srcPacket<SrcLoadMode, SizedSrcPacketType>(index, 0));
|
||||
}
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs2<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
return pcast<SrcPacketType, DstPacketType>(srcPacket<SrcLoadMode>(index, 0), srcPacket<SrcLoadMode>(index, 1));
|
||||
}
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs4<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
return pcast<SrcPacketType, DstPacketType>(srcPacket<SrcLoadMode>(index, 0), srcPacket<SrcLoadMode>(index, 1),
|
||||
srcPacket<SrcLoadMode>(index, 2), srcPacket<SrcLoadMode>(index, 3));
|
||||
}
|
||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs8<DstPacketType> = true>
|
||||
EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
return pcast<SrcPacketType, DstPacketType>(srcPacket<SrcLoadMode>(index, 0), srcPacket<SrcLoadMode>(index, 1),
|
||||
srcPacket<SrcLoadMode>(index, 2), srcPacket<SrcLoadMode>(index, 3),
|
||||
@@ -810,14 +813,14 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode, PacketType>(row, col),
|
||||
m_d.arg2Impl.template packet<LoadMode, PacketType>(row, col),
|
||||
m_d.arg3Impl.template packet<LoadMode, PacketType>(row, col));
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode, PacketType>(index),
|
||||
m_d.arg2Impl.template packet<LoadMode, PacketType>(index),
|
||||
m_d.arg3Impl.template packet<LoadMode, PacketType>(index));
|
||||
@@ -908,13 +911,13 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode, PacketType>(row, col),
|
||||
m_d.rhsImpl.template packet<LoadMode, PacketType>(row, col));
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode, PacketType>(index),
|
||||
m_d.rhsImpl.template packet<LoadMode, PacketType>(index));
|
||||
}
|
||||
@@ -1030,24 +1033,24 @@ struct mapbase_evaluator : evaluator_base<Derived> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_data[index * m_innerStride.value()]; }
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||
return internal::ploadt<PacketType, LoadMode>(ptr);
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
|
||||
}
|
||||
|
||||
@@ -1217,12 +1220,12 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
return m_argImpl.template packet<LoadMode, PacketType>(m_startRow.value() + row, m_startCol.value() + col);
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.template packet<LoadMode, PacketType>(m_linear_offset.value() + index);
|
||||
else
|
||||
@@ -1230,12 +1233,12 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
return m_argImpl.template writePacket<StoreMode, PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.template writePacket<StoreMode, PacketType>(m_linear_offset.value() + index, x);
|
||||
else
|
||||
@@ -1378,7 +1381,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor>>
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime == 1 ? 0
|
||||
: RowFactor == 1 ? row
|
||||
: row % m_rows.value();
|
||||
@@ -1390,7 +1393,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor>>
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime == 1
|
||||
? (ColFactor == 1 ? index : index % m_cols.value())
|
||||
: (RowFactor == 1 ? index : index % m_rows.value());
|
||||
@@ -1435,22 +1438,22 @@ struct evaluator_wrapper_base : evaluator_base<XprType> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); }
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
return m_argImpl.template packet<LoadMode, PacketType>(row, col);
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return m_argImpl.template packet<LoadMode, PacketType>(index);
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
m_argImpl.template writePacket<StoreMode>(row, col, x);
|
||||
}
|
||||
|
||||
template <int StoreMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||
}
|
||||
|
||||
@@ -1532,7 +1535,7 @@ struct unary_evaluator<Reverse<ArgType, Direction>> : evaluator_base<Reverse<Arg
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
@@ -1544,14 +1547,14 @@ struct unary_evaluator<Reverse<ArgType, Direction>> : evaluator_base<Reverse<Arg
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
return preverse(
|
||||
m_argImpl.template packet<LoadMode, PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||
// FIXME we could factorize some code with packet(i,j)
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
@@ -1565,7 +1568,7 @@ struct unary_evaluator<Reverse<ArgType, Direction>> : evaluator_base<Reverse<Arg
|
||||
}
|
||||
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
m_argImpl.template writePacket<LoadMode>(m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
|
||||
}
|
||||
|
||||
@@ -642,6 +642,18 @@ class DenseBase
|
||||
EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
|
||||
};
|
||||
|
||||
/** Free-function swap.
|
||||
*/
|
||||
template <typename DerivedA, typename DerivedB>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
// Use forwarding references to capture all combinations of cv-qualified l+r-value cases.
|
||||
std::enable_if_t<std::is_base_of<DenseBase<std::decay_t<DerivedA>>, std::decay_t<DerivedA>>::value &&
|
||||
std::is_base_of<DenseBase<std::decay_t<DerivedB>>, std::decay_t<DerivedB>>::value,
|
||||
void>
|
||||
swap(DerivedA&& a, DerivedB&& b) {
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DENSEBASE_H
|
||||
|
||||
@@ -298,7 +298,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
*
|
||||
* \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) {
|
||||
eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
return internal::evaluator<Derived>(derived()).coeffRef(row, col);
|
||||
}
|
||||
@@ -312,7 +312,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index row, Index col) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator()(Index row, Index col) {
|
||||
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
return coeffRef(row, col);
|
||||
}
|
||||
@@ -332,7 +332,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) {
|
||||
EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
|
||||
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
@@ -346,7 +346,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator[](Index index) {
|
||||
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -17,28 +17,18 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
|
||||
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
|
||||
// looking at the static assertions. Thus this is a trick to get better compile errors.
|
||||
template <typename T, typename U,
|
||||
bool NeedToTranspose = T::IsVectorAtCompileTime && U::IsVectorAtCompileTime &&
|
||||
((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1) ||
|
||||
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))>
|
||||
struct dot_nocheck {
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar, typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) {
|
||||
return a.template binaryExpr<conj_prod>(b).sum();
|
||||
template <typename Derived, typename Scalar = typename traits<Derived>::Scalar>
|
||||
struct squared_norm_impl {
|
||||
using Real = typename NumTraits<Scalar>::Real;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) {
|
||||
Scalar result = a.unaryExpr(squared_norm_functor<Scalar>()).sum();
|
||||
return numext::real(result) + numext::imag(result);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename U>
|
||||
struct dot_nocheck<T, U, true> {
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar, typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) {
|
||||
return a.transpose().template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
template <typename Derived>
|
||||
struct squared_norm_impl<Derived, bool> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
@@ -60,18 +50,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,
|
||||
typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived)
|
||||
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(
|
||||
Eigen::internal::scalar_conj_product_op<Scalar EIGEN_COMMA typename OtherDerived::Scalar>, Scalar,
|
||||
typename OtherDerived::Scalar);
|
||||
#endif
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<Derived, OtherDerived>::run(*this, other);
|
||||
return internal::dot_impl<Derived, OtherDerived>::run(derived(), other.derived());
|
||||
}
|
||||
|
||||
//---------- implementation of L2 norm and related functions ----------
|
||||
@@ -85,7 +64,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
template <typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::squaredNorm() const {
|
||||
return numext::real((*this).cwiseAbs2().sum());
|
||||
return internal::squared_norm_impl<Derived>::run(derived());
|
||||
}
|
||||
|
||||
/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
|
||||
|
||||
@@ -46,9 +46,9 @@ struct EigenBase {
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
|
||||
/** \returns a reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC constexpr Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
/** \returns a const reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const {
|
||||
return *static_cast<Derived*>(const_cast<EigenBase*>(this));
|
||||
|
||||
@@ -229,7 +229,7 @@ struct gemv_static_vector_if;
|
||||
|
||||
template <typename Scalar, int Size, int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar, Size, MaxSize, false> {
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() {
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() {
|
||||
eigen_internal_assert(false && "should never be called");
|
||||
return 0;
|
||||
}
|
||||
@@ -237,19 +237,19 @@ struct gemv_static_vector_if<Scalar, Size, MaxSize, false> {
|
||||
|
||||
template <typename Scalar, int Size>
|
||||
struct gemv_static_vector_if<Scalar, Size, Dynamic, true> {
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; }
|
||||
};
|
||||
|
||||
template <typename Scalar, int Size, int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
|
||||
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
EIGEN_STRONG_INLINE constexpr Scalar* data() {
|
||||
return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
|
||||
EIGEN_MAX_ALIGN_BYTES);
|
||||
}
|
||||
@@ -327,6 +327,7 @@ struct gemv_dense_selector<OnTheRight, ColMajor, true> {
|
||||
|
||||
if (!evalToDest) {
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
constexpr int Size = Dest::SizeAtCompileTime;
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
@@ -391,6 +392,7 @@ struct gemv_dense_selector<OnTheRight, RowMajor, true> {
|
||||
|
||||
if (!DirectlyUseRhs) {
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime;
|
||||
Index size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
|
||||
@@ -1083,8 +1083,13 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet&
|
||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
|
||||
EIGEN_USING_STD(exp);
|
||||
return exp(a);
|
||||
return numext::exp(a);
|
||||
}
|
||||
|
||||
/** \internal \returns the exp2 of \a a (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
|
||||
return numext::exp2(a);
|
||||
}
|
||||
|
||||
/** \internal \returns the expm1 of \a a (coeff-wise) */
|
||||
@@ -1113,7 +1118,7 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet&
|
||||
return log10(a);
|
||||
}
|
||||
|
||||
/** \internal \returns the log10 of \a a (coeff-wise) */
|
||||
/** \internal \returns the log2 of \a a (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
|
||||
using Scalar = typename internal::unpacket_traits<Packet>::type;
|
||||
|
||||
@@ -76,6 +76,7 @@ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf, scalar_erf_op, error function,\sa ArrayBas
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc, scalar_erfc_op, complement error function,\sa ArrayBase::erfc)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri, scalar_ndtri_op, inverse normal distribution function,\sa ArrayBase::ndtri)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp, scalar_exp_op, exponential,\sa ArrayBase::exp)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp2, scalar_exp2_op, exponential,\sa ArrayBase::exp2)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1, scalar_expm1_op, exponential of a value minus 1,\sa ArrayBase::expm1)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log, scalar_log_op, natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p, scalar_log1p_op, natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
|
||||
|
||||
250
wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/InnerProduct.h
vendored
Normal file
250
wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/InnerProduct.h
vendored
Normal file
@@ -0,0 +1,250 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2024 Charlie Schlosser <cs.schlosser@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_INNER_PRODUCT_EVAL_H
|
||||
#define EIGEN_INNER_PRODUCT_EVAL_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// recursively searches for the largest simd type that does not exceed Size, or the smallest if no such type exists
|
||||
template <typename Scalar, int Size, typename Packet = typename packet_traits<Scalar>::type,
|
||||
bool Stop =
|
||||
(unpacket_traits<Packet>::size <= Size) || is_same<Packet, typename unpacket_traits<Packet>::half>::value>
|
||||
struct find_inner_product_packet_helper;
|
||||
|
||||
template <typename Scalar, int Size, typename Packet>
|
||||
struct find_inner_product_packet_helper<Scalar, Size, Packet, false> {
|
||||
using type = typename find_inner_product_packet_helper<Scalar, Size, typename unpacket_traits<Packet>::half>::type;
|
||||
};
|
||||
|
||||
template <typename Scalar, int Size, typename Packet>
|
||||
struct find_inner_product_packet_helper<Scalar, Size, Packet, true> {
|
||||
using type = Packet;
|
||||
};
|
||||
|
||||
template <typename Scalar, int Size>
|
||||
struct find_inner_product_packet : find_inner_product_packet_helper<Scalar, Size> {};
|
||||
|
||||
template <typename Scalar>
|
||||
struct find_inner_product_packet<Scalar, Dynamic> {
|
||||
using type = typename packet_traits<Scalar>::type;
|
||||
};
|
||||
|
||||
template <typename Lhs, typename Rhs>
|
||||
struct inner_product_assert {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Lhs)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Rhs)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Lhs, Rhs)
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, const Rhs& rhs) {
|
||||
eigen_assert((lhs.size() == rhs.size()) && "Inner product: lhs and rhs vectors must have same size");
|
||||
}
|
||||
#else
|
||||
static EIGEN_DEVICE_FUNC void run(const Lhs&, const Rhs&) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename Func, typename Lhs, typename Rhs>
|
||||
struct inner_product_evaluator {
|
||||
static constexpr int LhsFlags = evaluator<Lhs>::Flags, RhsFlags = evaluator<Rhs>::Flags,
|
||||
SizeAtCompileTime = min_size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime),
|
||||
LhsAlignment = evaluator<Lhs>::Alignment, RhsAlignment = evaluator<Rhs>::Alignment;
|
||||
|
||||
using Scalar = typename Func::result_type;
|
||||
using Packet = typename find_inner_product_packet<Scalar, SizeAtCompileTime>::type;
|
||||
|
||||
static constexpr bool Vectorize =
|
||||
bool(LhsFlags & RhsFlags & PacketAccessBit) && Func::PacketAccess &&
|
||||
((SizeAtCompileTime == Dynamic) || (unpacket_traits<Packet>::size <= SizeAtCompileTime));
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit inner_product_evaluator(const Lhs& lhs, const Rhs& rhs,
|
||||
Func func = Func())
|
||||
: m_func(func), m_lhs(lhs), m_rhs(rhs), m_size(lhs.size()) {
|
||||
inner_product_assert<Lhs, Rhs>::run(lhs, rhs);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_size.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const {
|
||||
return m_func.coeff(m_lhs.coeff(index), m_rhs.coeff(index));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& value, Index index) const {
|
||||
return m_func.coeff(value, m_lhs.coeff(index), m_rhs.coeff(index));
|
||||
}
|
||||
|
||||
template <typename PacketType, int LhsMode = LhsAlignment, int RhsMode = RhsAlignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||
return m_func.packet(m_lhs.template packet<LhsMode, PacketType>(index),
|
||||
m_rhs.template packet<RhsMode, PacketType>(index));
|
||||
}
|
||||
|
||||
template <typename PacketType, int LhsMode = LhsAlignment, int RhsMode = RhsAlignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(const PacketType& value, Index index) const {
|
||||
return m_func.packet(value, m_lhs.template packet<LhsMode, PacketType>(index),
|
||||
m_rhs.template packet<RhsMode, PacketType>(index));
|
||||
}
|
||||
|
||||
const Func m_func;
|
||||
const evaluator<Lhs> m_lhs;
|
||||
const evaluator<Rhs> m_rhs;
|
||||
const variable_if_dynamic<Index, SizeAtCompileTime> m_size;
|
||||
};
|
||||
|
||||
template <typename Evaluator, bool Vectorize = Evaluator::Vectorize>
|
||||
struct inner_product_impl;
|
||||
|
||||
// scalar loop
|
||||
template <typename Evaluator>
|
||||
struct inner_product_impl<Evaluator, false> {
|
||||
using Scalar = typename Evaluator::Scalar;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) {
|
||||
const Index size = eval.size();
|
||||
if (size == 0) return Scalar(0);
|
||||
|
||||
Scalar result = eval.coeff(0);
|
||||
for (Index k = 1; k < size; k++) {
|
||||
result = eval.coeff(result, k);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
// vector loop
|
||||
template <typename Evaluator>
|
||||
struct inner_product_impl<Evaluator, true> {
|
||||
using UnsignedIndex = std::make_unsigned_t<Index>;
|
||||
using Scalar = typename Evaluator::Scalar;
|
||||
using Packet = typename Evaluator::Packet;
|
||||
static constexpr int PacketSize = unpacket_traits<Packet>::size;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) {
|
||||
const UnsignedIndex size = static_cast<UnsignedIndex>(eval.size());
|
||||
if (size < PacketSize) return inner_product_impl<Evaluator, false>::run(eval);
|
||||
|
||||
const UnsignedIndex packetEnd = numext::round_down(size, PacketSize);
|
||||
const UnsignedIndex quadEnd = numext::round_down(size, 4 * PacketSize);
|
||||
const UnsignedIndex numPackets = size / PacketSize;
|
||||
const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize;
|
||||
|
||||
Packet presult0, presult1, presult2, presult3;
|
||||
|
||||
presult0 = eval.template packet<Packet>(0 * PacketSize);
|
||||
if (numPackets >= 2) presult1 = eval.template packet<Packet>(1 * PacketSize);
|
||||
if (numPackets >= 3) presult2 = eval.template packet<Packet>(2 * PacketSize);
|
||||
if (numPackets >= 4) {
|
||||
presult3 = eval.template packet<Packet>(3 * PacketSize);
|
||||
|
||||
for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) {
|
||||
presult0 = eval.packet(presult0, k + 0 * PacketSize);
|
||||
presult1 = eval.packet(presult1, k + 1 * PacketSize);
|
||||
presult2 = eval.packet(presult2, k + 2 * PacketSize);
|
||||
presult3 = eval.packet(presult3, k + 3 * PacketSize);
|
||||
}
|
||||
|
||||
if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize);
|
||||
if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize);
|
||||
if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize);
|
||||
|
||||
presult2 = padd(presult2, presult3);
|
||||
}
|
||||
|
||||
if (numPackets >= 3) presult1 = padd(presult1, presult2);
|
||||
if (numPackets >= 2) presult0 = padd(presult0, presult1);
|
||||
|
||||
Scalar result = predux(presult0);
|
||||
for (UnsignedIndex k = packetEnd; k < size; k++) {
|
||||
result = eval.coeff(result, k);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar, bool Conj>
|
||||
struct conditional_conj;
|
||||
|
||||
template <typename Scalar>
|
||||
struct conditional_conj<Scalar, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return numext::conj(a); }
|
||||
template <typename Packet>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) {
|
||||
return pconj(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct conditional_conj<Scalar, false> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return a; }
|
||||
template <typename Packet>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) {
|
||||
return a;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename LhsScalar, typename RhsScalar, bool Conj>
|
||||
struct scalar_inner_product_op {
|
||||
using result_type = typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType;
|
||||
using conj_helper = conditional_conj<LhsScalar, Conj>;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const LhsScalar& a, const RhsScalar& b) const {
|
||||
return (conj_helper::coeff(a) * b);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const result_type& accum, const LhsScalar& a,
|
||||
const RhsScalar& b) const {
|
||||
return (conj_helper::coeff(a) * b) + accum;
|
||||
}
|
||||
static constexpr bool PacketAccess = false;
|
||||
};
|
||||
|
||||
template <typename Scalar, bool Conj>
|
||||
struct scalar_inner_product_op<Scalar, Scalar, Conj> {
|
||||
using result_type = Scalar;
|
||||
using conj_helper = conditional_conj<Scalar, Conj>;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a, const Scalar& b) const {
|
||||
return pmul(conj_helper::coeff(a), b);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& accum, const Scalar& a, const Scalar& b) const {
|
||||
return pmadd(conj_helper::coeff(a), b, accum);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a, const Packet& b) const {
|
||||
return pmul(conj_helper::packet(a), b);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& accum, const Packet& a, const Packet& b) const {
|
||||
return pmadd(conj_helper::packet(a), b, accum);
|
||||
}
|
||||
static constexpr bool PacketAccess = packet_traits<Scalar>::HasMul && packet_traits<Scalar>::HasAdd;
|
||||
};
|
||||
|
||||
template <typename Lhs, typename Rhs, bool Conj>
|
||||
struct default_inner_product_impl {
|
||||
using LhsScalar = typename traits<Lhs>::Scalar;
|
||||
using RhsScalar = typename traits<Rhs>::Scalar;
|
||||
using Op = scalar_inner_product_op<LhsScalar, RhsScalar, Conj>;
|
||||
using Evaluator = inner_product_evaluator<Op, Lhs, Rhs>;
|
||||
using result_type = typename Evaluator::Scalar;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type run(const MatrixBase<Lhs>& a, const MatrixBase<Rhs>& b) {
|
||||
Evaluator eval(a.derived(), b.derived(), Op());
|
||||
return inner_product_impl<Evaluator>::run(eval);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Lhs, typename Rhs>
|
||||
struct dot_impl : default_inner_product_impl<Lhs, Rhs, true> {};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_INNER_PRODUCT_EVAL_H
|
||||
@@ -92,7 +92,7 @@ struct unary_evaluator<Inverse<ArgType> > : public evaluator<typename Inverse<Ar
|
||||
|
||||
enum { Flags = Base::Flags | EvalBeforeNestingBit };
|
||||
|
||||
unary_evaluator(const InverseType& inv_xpr) : m_result(inv_xpr.rows(), inv_xpr.cols()) {
|
||||
EIGEN_DEVICE_FUNC unary_evaluator(const InverseType& inv_xpr) : m_result(inv_xpr.rows(), inv_xpr.cols()) {
|
||||
internal::construct_at<Base>(this, m_result);
|
||||
internal::call_assignment_no_alias(m_result, inv_xpr);
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ class MapBase<Derived, ReadOnlyAccessors> : public internal::dense_xpr_base<Deri
|
||||
*
|
||||
* \sa innerStride(), outerStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_data; }
|
||||
|
||||
/** \copydoc PlainObjectBase::coeff(Index,Index) const */
|
||||
EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const {
|
||||
@@ -233,8 +233,8 @@ class MapBase<Derived, WriteAccessors> : public MapBase<Derived, ReadOnlyAccesso
|
||||
|
||||
typedef std::conditional_t<internal::is_lvalue<Derived>::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return this->m_data; }
|
||||
EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() {
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return this->m_data; }
|
||||
EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() {
|
||||
return this->m_data;
|
||||
} // no const-cast here so non-const-correct code will give a compile error
|
||||
|
||||
|
||||
@@ -1477,6 +1477,63 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<double> exp(const std::comple
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp2(const T& x) {
|
||||
EIGEN_USING_STD(exp2);
|
||||
return exp2(x);
|
||||
}
|
||||
|
||||
// MSVC screws up some edge-cases for std::exp2(complex).
|
||||
#ifdef EIGEN_COMP_MSVC
|
||||
template <typename RealScalar>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<RealScalar> exp2(const std::complex<RealScalar>& x) {
|
||||
EIGEN_USING_STD(exp);
|
||||
// If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised.
|
||||
// If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised.
|
||||
if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) {
|
||||
return std::complex<RealScalar>(NumTraits<RealScalar>::quiet_NaN(), NumTraits<RealScalar>::quiet_NaN());
|
||||
}
|
||||
// If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
|
||||
// If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
|
||||
if ((real_ref(x) == NumTraits<RealScalar>::infinity() && !(isfinite)(imag_ref(x)))) {
|
||||
return std::complex<RealScalar>(NumTraits<RealScalar>::infinity(), NumTraits<RealScalar>::quiet_NaN());
|
||||
}
|
||||
return exp2(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp2, exp2)
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_GPUCC)
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp2(const float& x) {
|
||||
return ::exp2f(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp2(const double& x) {
|
||||
return ::exp2(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<float> exp2(const std::complex<float>& x) {
|
||||
float com = ::exp2f(x.real());
|
||||
float res_real = com * ::cosf(static_cast<float>(EIGEN_LN2) * x.imag());
|
||||
float res_imag = com * ::sinf(static_cast<float>(EIGEN_LN2) * x.imag());
|
||||
return std::complex<float>(res_real, res_imag);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<double> exp2(const std::complex<double>& x) {
|
||||
double com = ::exp2(x.real());
|
||||
double res_real = com * ::cos(static_cast<double>(EIGEN_LN2) * x.imag());
|
||||
double res_imag = com * ::sin(static_cast<double>(EIGEN_LN2) * x.imag());
|
||||
return std::complex<double>(res_real, res_imag);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x);
|
||||
|
||||
@@ -250,17 +250,12 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix()
|
||||
: Base(){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
|
||||
|
||||
// FIXME is it still needed
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit Matrix(
|
||||
internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert()){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix && other)
|
||||
EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other)) {}
|
||||
#if defined(EIGEN_INITIALIZE_COEFFS)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() = default;
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix&&) = default;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other)
|
||||
EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value) {
|
||||
Base::operator=(std::move(other));
|
||||
@@ -379,7 +374,7 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
|
||||
}
|
||||
|
||||
/** \brief Copy constructor */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(const Matrix&) = default;
|
||||
|
||||
/** \brief Copy constructor for generic expressions.
|
||||
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
|
||||
@@ -250,6 +250,7 @@ struct NumTraits<std::complex<Real_> > : GenericNumTraits<std::complex<Real_> >
|
||||
typedef typename NumTraits<Real_>::Literal Literal;
|
||||
enum {
|
||||
IsComplex = 1,
|
||||
IsSigned = NumTraits<Real_>::IsSigned,
|
||||
RequireInitialization = NumTraits<Real_>::RequireInitialization,
|
||||
ReadCost = 2 * NumTraits<Real_>::ReadCost,
|
||||
AddCost = 2 * NumTraits<Real>::AddCost,
|
||||
|
||||
@@ -270,10 +270,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
/** \returns a const pointer to the data array of this matrix */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar* data() const { return m_storage.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_storage.data(); }
|
||||
|
||||
/** \returns a pointer to the data array of this matrix */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() { return m_storage.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr Scalar* data() { return m_storage.data(); }
|
||||
|
||||
/** Resizes \c *this to a \a rows x \a cols matrix.
|
||||
*
|
||||
@@ -472,34 +472,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
// Prevent user from trying to instantiate PlainObjectBase objects
|
||||
// by making all its constructor protected. See bug 1074.
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() : m_storage() {
|
||||
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME is it still needed ?
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC constexpr explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
|
||||
: m_storage(internal::constructor_without_unaligned_array_assert()) {
|
||||
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
|
||||
: m_storage(std::move(other.m_storage)) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() = default;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(PlainObjectBase&&) = default;
|
||||
EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT {
|
||||
m_storage = std::move(other.m_storage);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other)
|
||||
: Base(), m_storage(other.m_storage) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase&) = default;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
|
||||
: m_storage(size, rows, cols) {
|
||||
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
: m_storage(size, rows, cols) {}
|
||||
|
||||
/** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients.
|
||||
*
|
||||
|
||||
@@ -235,19 +235,20 @@ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assig
|
||||
|
||||
template <typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, InnerProduct> {
|
||||
using impl = default_inner_product_impl<Lhs, Rhs, false>;
|
||||
template <typename Dst>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
||||
dst.coeffRef(0, 0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
dst.coeffRef(0, 0) = impl::run(lhs, rhs);
|
||||
}
|
||||
|
||||
template <typename Dst>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
||||
dst.coeffRef(0, 0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
dst.coeffRef(0, 0) += impl::run(lhs, rhs);
|
||||
}
|
||||
|
||||
template <typename Dst>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
||||
dst.coeffRef(0, 0) -= (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
dst.coeffRef(0, 0) -= impl::run(lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ struct random_float_impl<Scalar, false> {
|
||||
}
|
||||
};
|
||||
|
||||
#if !EIGEN_COMP_NVCC
|
||||
// random implementation for long double
|
||||
// this specialization is not compatible with double-double scalars
|
||||
template <bool Specialize = (sizeof(long double) == 2 * sizeof(uint64_t)) &&
|
||||
@@ -146,6 +147,7 @@ struct random_longdouble_impl<false> {
|
||||
};
|
||||
template <>
|
||||
struct random_float_impl<long double> : random_longdouble_impl<> {};
|
||||
#endif
|
||||
|
||||
template <typename Scalar>
|
||||
struct random_default_impl<Scalar, false, false> {
|
||||
|
||||
@@ -173,7 +173,7 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, false>
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \sa MapBase::data() */
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const;
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const;
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const;
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const;
|
||||
#endif
|
||||
|
||||
@@ -127,19 +127,25 @@ EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ReverseReturnType DenseBas
|
||||
* \sa VectorwiseOp::reverseInPlace(), reverse() */
|
||||
template <typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::reverseInPlace() {
|
||||
constexpr int HalfRowsAtCompileTime = RowsAtCompileTime == Dynamic ? Dynamic : RowsAtCompileTime / 2;
|
||||
constexpr int HalfColsAtCompileTime = ColsAtCompileTime == Dynamic ? Dynamic : ColsAtCompileTime / 2;
|
||||
if (cols() > rows()) {
|
||||
Index half = cols() / 2;
|
||||
leftCols(half).swap(rightCols(half).reverse());
|
||||
this->template leftCols<HalfColsAtCompileTime>(half).swap(
|
||||
this->template rightCols<HalfColsAtCompileTime>(half).reverse());
|
||||
if ((cols() % 2) == 1) {
|
||||
Index half2 = rows() / 2;
|
||||
col(half).head(half2).swap(col(half).tail(half2).reverse());
|
||||
col(half).template head<HalfRowsAtCompileTime>(half2).swap(
|
||||
col(half).template tail<HalfRowsAtCompileTime>(half2).reverse());
|
||||
}
|
||||
} else {
|
||||
Index half = rows() / 2;
|
||||
topRows(half).swap(bottomRows(half).reverse());
|
||||
this->template topRows<HalfRowsAtCompileTime>(half).swap(
|
||||
this->template bottomRows<HalfRowsAtCompileTime>(half).reverse());
|
||||
if ((rows() % 2) == 1) {
|
||||
Index half2 = cols() / 2;
|
||||
row(half).head(half2).swap(row(half).tail(half2).reverse());
|
||||
row(half).template head<HalfColsAtCompileTime>(half2).swap(
|
||||
row(half).template tail<HalfColsAtCompileTime>(half2).reverse());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,34 +48,16 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
|
||||
|
||||
template <typename VectorType, typename RealScalar>
|
||||
void stable_norm_impl_inner_step(const VectorType& vec, RealScalar& ssq, RealScalar& scale, RealScalar& invScale) {
|
||||
typedef typename VectorType::Scalar Scalar;
|
||||
const Index blockSize = 4096;
|
||||
|
||||
typedef typename internal::nested_eval<VectorType, 2>::type VectorTypeCopy;
|
||||
typedef internal::remove_all_t<VectorTypeCopy> VectorTypeCopyClean;
|
||||
const VectorTypeCopy copy(vec);
|
||||
|
||||
enum {
|
||||
CanAlign =
|
||||
((int(VectorTypeCopyClean::Flags) & DirectAccessBit) ||
|
||||
(int(internal::evaluator<VectorTypeCopyClean>::Alignment) > 0) // FIXME Alignment)>0 might not be enough
|
||||
) &&
|
||||
(blockSize * sizeof(Scalar) * 2 < EIGEN_STACK_ALLOCATION_LIMIT) &&
|
||||
(EIGEN_MAX_STATIC_ALIGN_BYTES >
|
||||
0) // if we cannot allocate on the stack, then let's not bother about this optimization
|
||||
};
|
||||
typedef std::conditional_t<
|
||||
CanAlign,
|
||||
Ref<const Matrix<Scalar, Dynamic, 1, 0, blockSize, 1>, internal::evaluator<VectorTypeCopyClean>::Alignment>,
|
||||
typename VectorTypeCopyClean::ConstSegmentReturnType>
|
||||
SegmentWrapper;
|
||||
Index n = vec.size();
|
||||
|
||||
Index bi = internal::first_default_aligned(copy);
|
||||
if (bi > 0) internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
|
||||
for (; bi < n; bi += blockSize)
|
||||
internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi, numext::mini(blockSize, n - bi))), ssq, scale,
|
||||
invScale);
|
||||
Index blockEnd = numext::round_down(n, blockSize);
|
||||
for (Index i = 0; i < blockEnd; i += blockSize) {
|
||||
internal::stable_norm_kernel(vec.template segment<blockSize>(i), ssq, scale, invScale);
|
||||
}
|
||||
if (n > blockEnd) {
|
||||
internal::stable_norm_kernel(vec.tail(n - blockEnd), ssq, scale, invScale);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename VectorType>
|
||||
@@ -85,8 +67,7 @@ typename VectorType::RealScalar stable_norm_impl(const VectorType& vec,
|
||||
using std::sqrt;
|
||||
|
||||
Index n = vec.size();
|
||||
|
||||
if (n == 1) return abs(vec.coeff(0));
|
||||
if (EIGEN_PREDICT_FALSE(n == 1)) return abs(vec.coeff(0));
|
||||
|
||||
typedef typename VectorType::RealScalar RealScalar;
|
||||
RealScalar scale(0);
|
||||
|
||||
@@ -325,7 +325,7 @@ class pointer_based_stl_iterator {
|
||||
public:
|
||||
typedef Index difference_type;
|
||||
typedef typename XprType::Scalar value_type;
|
||||
#if __cplusplus >= 202002L
|
||||
#if EIGEN_CPLUSPLUS >= 202002L
|
||||
typedef std::conditional_t<XprType::InnerStrideAtCompileTime == 1, std::contiguous_iterator_tag,
|
||||
std::random_access_iterator_tag>
|
||||
iterator_category;
|
||||
|
||||
@@ -70,6 +70,13 @@ class Stride {
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC Stride(const Stride& other) : m_outer(other.outer()), m_inner(other.inner()) {}
|
||||
|
||||
/** Copy assignment operator */
|
||||
EIGEN_DEVICE_FUNC Stride& operator=(const Stride& other) {
|
||||
m_outer.setValue(other.outer());
|
||||
m_inner.setValue(other.inner());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** \returns the outer stride */
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outer() const { return m_outer.value(); }
|
||||
/** \returns the inner stride */
|
||||
|
||||
@@ -119,10 +119,12 @@ class TransposeImpl<MatrixType, Dense> : public internal::TransposeImpl_base<Mat
|
||||
|
||||
typedef std::conditional_t<internal::is_lvalue<MatrixType>::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarWithConstIfNotLvalue* data() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr ScalarWithConstIfNotLvalue* data() {
|
||||
return derived().nestedExpression().data();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar* data() const {
|
||||
return derived().nestedExpression().data();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar* data() const { return derived().nestedExpression().data(); }
|
||||
|
||||
// FIXME: shall we keep the const version of coeffRef?
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const {
|
||||
|
||||
@@ -85,10 +85,14 @@ EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) {
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
||||
__m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
|
||||
__m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
__m256 result = _mm256_addsub_ps(tmp1, tmp2);
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) {
|
||||
__m256 tmp1 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
__m256 tmp2 = _mm256_moveldup_ps(a.v);
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
__m256 result = _mm256_fmaddsub_ps(tmp2, b.v, tmp1);
|
||||
#else
|
||||
__m256 result = _mm256_addsub_ps(_mm256_mul_ps(tmp2, b.v), tmp1);
|
||||
#endif
|
||||
return Packet4cf(result);
|
||||
}
|
||||
|
||||
@@ -121,11 +125,11 @@ EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packe
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pload<Packet4cf>(const std::complex<float>* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from)));
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(_mm256_load_ps(&numext::real_ref(*from)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from)));
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(_mm256_loadu_ps(&numext::real_ref(*from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -145,11 +149,11 @@ EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* fro
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v);
|
||||
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(&numext::real_ref(*to), from.v);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(&numext::real_ref(*to), from.v);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -283,13 +287,15 @@ EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) {
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
||||
__m256d tmp1 = _mm256_shuffle_pd(a.v, a.v, 0x0);
|
||||
__m256d even = _mm256_mul_pd(tmp1, b.v);
|
||||
__m256d tmp2 = _mm256_shuffle_pd(a.v, a.v, 0xF);
|
||||
__m256d tmp3 = _mm256_shuffle_pd(b.v, b.v, 0x5);
|
||||
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
|
||||
return Packet2cd(_mm256_addsub_pd(even, odd));
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) {
|
||||
__m256d tmp1 = _mm256_mul_pd(_mm256_permute_pd(a.v, 0xF), _mm256_permute_pd(b.v, 0x5));
|
||||
__m256d tmp2 = _mm256_movedup_pd(a.v);
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
__m256d result = _mm256_fmaddsub_pd(tmp2, b.v, tmp1);
|
||||
#else
|
||||
__m256d result = _mm256_addsub_pd(_mm256_mul_pd(tmp2, b.v), tmp1);
|
||||
#endif
|
||||
return Packet2cd(result);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -321,11 +327,11 @@ EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packe
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pload<Packet2cd>(const std::complex<double>* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from));
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(_mm256_load_pd((const double*)from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from));
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(_mm256_loadu_pd((const double*)from));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -342,11 +348,11 @@ EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* fr
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v);
|
||||
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd((double*)to, from.v);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd((double*)to, from.v);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -449,6 +455,74 @@ EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(const Packet4cf& a) {
|
||||
return pexp_complex<Packet4cf>(a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
// std::complex<float>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmaddsub_ps(a_odd, b_swap, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmsubadd_ps(a_odd, b_swap, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pnmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmaddsub_ps(a_even, b.v, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pnmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmsubadd_ps(a_even, b.v, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
// std::complex<double>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmaddsub_pd(a_odd, b_swap, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmsubadd_pd(a_odd, b_swap, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pnmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmaddsub_pd(a_even, b.v, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pnmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmsubadd_pd(a_even, b.v, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
#endif
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -23,14 +23,17 @@ namespace internal {
|
||||
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f)
|
||||
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(atan, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(atanh, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(exp, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(tanh, Packet4d)
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d)
|
||||
#endif
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4d)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4d)
|
||||
|
||||
// Notice that for newer processors, it is counterproductive to use Newton
|
||||
// iteration for square root. In particular, Skylake and Zen2 processors
|
||||
@@ -93,6 +96,7 @@ EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& expone
|
||||
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp2)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
|
||||
@@ -104,6 +108,7 @@ BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp2)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
|
||||
|
||||
@@ -124,6 +124,7 @@ struct packet_traits<float> : default_packet_traits {
|
||||
HasRsqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = EIGEN_FAST_MATH,
|
||||
HasErfc = EIGEN_FAST_MATH,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
@@ -142,11 +143,14 @@ struct packet_traits<double> : default_packet_traits {
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
#endif
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasLog = 1,
|
||||
HasErfc = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
@@ -657,6 +661,16 @@ EIGEN_STRONG_INLINE uint64_t predux<Packet4ul>(const Packet4ul& a) {
|
||||
__m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
|
||||
return numext::bit_cast<uint64_t>(_mm_extract_epi64_0(r) + _mm_extract_epi64_1(r));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4l& a) {
|
||||
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4ul& a) {
|
||||
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0;
|
||||
}
|
||||
|
||||
#define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M)
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4l, 4>& kernel) {
|
||||
__m256d T0 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 15);
|
||||
@@ -1999,6 +2013,11 @@ EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x) {
|
||||
return _mm256_movemask_ps(x) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4d& x) {
|
||||
return _mm256_movemask_pd(x) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8i& x) {
|
||||
return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
|
||||
@@ -2008,6 +2027,15 @@ EIGEN_STRONG_INLINE bool predux_any(const Packet8ui& x) {
|
||||
return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8h& x) {
|
||||
return _mm_movemask_epi8(x) != 0;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& x) {
|
||||
return _mm_movemask_epi8(x) != 0;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8f, 8>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
|
||||
@@ -613,6 +613,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
|
||||
return numext::bit_cast<bfloat16>(x);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) { return bfloat16(::expf(float(a))); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16& a) { return bfloat16(::exp2f(float(a))); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) { return bfloat16(numext::expm1(float(a))); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) { return bfloat16(::logf(float(a))); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) { return bfloat16(numext::log1p(float(a))); }
|
||||
@@ -762,6 +763,31 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat1
|
||||
return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 nextafter(const bfloat16& from, const bfloat16& to) {
|
||||
if (numext::isnan EIGEN_NOT_A_MACRO(from)) {
|
||||
return from;
|
||||
}
|
||||
if (numext::isnan EIGEN_NOT_A_MACRO(to)) {
|
||||
return to;
|
||||
}
|
||||
if (from == to) {
|
||||
return to;
|
||||
}
|
||||
uint16_t from_bits = numext::bit_cast<uint16_t>(from);
|
||||
bool from_sign = from_bits >> 15;
|
||||
// Whether we are adjusting toward the infinity with the same sign as from.
|
||||
bool toward_inf = (to > from) == !from_sign;
|
||||
if (toward_inf) {
|
||||
++from_bits;
|
||||
} else if ((from_bits & 0x7fff) == 0) {
|
||||
// Adjusting away from inf, but from is zero, so just toggle the sign.
|
||||
from_bits ^= 0x8000;
|
||||
} else {
|
||||
--from_bits;
|
||||
}
|
||||
return numext::bit_cast<bfloat16>(from_bits);
|
||||
}
|
||||
|
||||
} // namespace numext
|
||||
} // namespace Eigen
|
||||
|
||||
|
||||
@@ -42,6 +42,134 @@ struct make_integer<bfloat16> {
|
||||
typedef numext::int16_t type;
|
||||
};
|
||||
|
||||
/* polevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate polynomial
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* Scalar x, y, coef[N+1];
|
||||
*
|
||||
* y = polevl<decltype(x), N>( x, coef);
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates polynomial of degree N:
|
||||
*
|
||||
* 2 N
|
||||
* y = C + C x + C x +...+ C x
|
||||
* 0 1 2 N
|
||||
*
|
||||
* Coefficients are stored in reverse order:
|
||||
*
|
||||
* coef[0] = C , ..., coef[N] = C .
|
||||
* N 0
|
||||
*
|
||||
* The function p1evl() assumes that coef[N] = 1.0 and is
|
||||
* omitted from the array. Its calling arguments are
|
||||
* otherwise the same as polevl().
|
||||
*
|
||||
*
|
||||
* The Eigen implementation is templatized. For best speed, store
|
||||
* coef as a const array (constexpr), e.g.
|
||||
*
|
||||
* const double coef[] = {1.0, 2.0, 3.0, ...};
|
||||
*
|
||||
*/
|
||||
template <typename Packet, int N>
|
||||
struct ppolevl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x,
|
||||
const typename unpacket_traits<Packet>::type coeff[]) {
|
||||
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
return pmadd(ppolevl<Packet, N - 1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct ppolevl<Packet, 0> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x,
|
||||
const typename unpacket_traits<Packet>::type coeff[]) {
|
||||
EIGEN_UNUSED_VARIABLE(x);
|
||||
return pset1<Packet>(coeff[0]);
|
||||
}
|
||||
};
|
||||
|
||||
/* chbevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate Chebyshev series
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* Scalar x, y, coef[N], chebevl();
|
||||
*
|
||||
* y = chbevl( x, coef, N );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates the series
|
||||
*
|
||||
* N-1
|
||||
* - '
|
||||
* y = > coef[i] T (x/2)
|
||||
* - i
|
||||
* i=0
|
||||
*
|
||||
* of Chebyshev polynomials Ti at argument x/2.
|
||||
*
|
||||
* Coefficients are stored in reverse order, i.e. the zero
|
||||
* order term is last in the array. Note N is the number of
|
||||
* coefficients, not the order.
|
||||
*
|
||||
* If coefficients are for the interval a to b, x must
|
||||
* have been transformed to x -> 2(2x - b - a)/(b-a) before
|
||||
* entering the routine. This maps x from (a, b) to (-1, 1),
|
||||
* over which the Chebyshev polynomials are defined.
|
||||
*
|
||||
* If the coefficients are for the inverted interval, in
|
||||
* which (a, b) is mapped to (1/b, 1/a), the transformation
|
||||
* required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
|
||||
* this becomes x -> 4a/x - 1.
|
||||
*
|
||||
*
|
||||
*
|
||||
* SPEED:
|
||||
*
|
||||
* Taking advantage of the recurrence properties of the
|
||||
* Chebyshev polynomials, the routine requires one more
|
||||
* addition per loop than evaluating a nested polynomial of
|
||||
* the same degree.
|
||||
*
|
||||
*/
|
||||
|
||||
template <typename Packet, int N>
|
||||
struct pchebevl {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(Packet x,
|
||||
const typename unpacket_traits<Packet>::type coef[]) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
Packet b0 = pset1<Packet>(coef[0]);
|
||||
Packet b1 = pset1<Packet>(static_cast<Scalar>(0.f));
|
||||
Packet b2;
|
||||
|
||||
for (int i = 1; i < N; i++) {
|
||||
b2 = b1;
|
||||
b1 = b0;
|
||||
b0 = psub(pmadd(x, b1, pset1<Packet>(coef[i])), b2);
|
||||
}
|
||||
|
||||
return pmul(pset1<Packet>(static_cast<Scalar>(0.5f)), psub(b0, b2));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic_get_biased_exponent(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
@@ -193,21 +321,14 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_impl_float(const
|
||||
e = psub(e, pand(cst_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
// Polynomial coefficients for rational (3,3) r(x) = p(x)/q(x)
|
||||
// Polynomial coefficients for rational r(x) = p(x)/q(x)
|
||||
// approximating log(1+x) on [sqrt(0.5)-1;sqrt(2)-1].
|
||||
const Packet cst_p1 = pset1<Packet>(1.0000000190281136f);
|
||||
const Packet cst_p2 = pset1<Packet>(1.0000000190281063f);
|
||||
const Packet cst_p3 = pset1<Packet>(0.18256296349849254f);
|
||||
const Packet cst_q1 = pset1<Packet>(1.4999999999999927f);
|
||||
const Packet cst_q2 = pset1<Packet>(0.59923249590823520f);
|
||||
const Packet cst_q3 = pset1<Packet>(0.049616247954120038f);
|
||||
constexpr float alpha[] = {0.18256296349849254f, 1.0000000190281063f, 1.0000000190281136f};
|
||||
constexpr float beta[] = {0.049616247954120038f, 0.59923249590823520f, 1.4999999999999927f, 1.0f};
|
||||
|
||||
Packet p = pmadd(x, cst_p3, cst_p2);
|
||||
p = pmadd(x, p, cst_p1);
|
||||
Packet p = ppolevl<Packet, 2>::run(x, alpha);
|
||||
p = pmul(x, p);
|
||||
Packet q = pmadd(x, cst_q3, cst_q2);
|
||||
q = pmadd(x, q, cst_q1);
|
||||
q = pmadd(x, q, cst_1);
|
||||
Packet q = ppolevl<Packet, 3>::run(x, beta);
|
||||
x = pdiv(p, q);
|
||||
|
||||
// Add the logarithm of the exponent back to the result of the interpolation.
|
||||
@@ -348,7 +469,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2_double(const Pa
|
||||
See: http://www.plunk.org/~hatch/rightway.php
|
||||
*/
|
||||
template <typename Packet>
|
||||
Packet generic_plog1p(const Packet& x) {
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_log1p(const Packet& x) {
|
||||
typedef typename unpacket_traits<Packet>::type ScalarType;
|
||||
const Packet one = pset1<Packet>(ScalarType(1));
|
||||
Packet xp1 = padd(x, one);
|
||||
@@ -363,7 +484,7 @@ Packet generic_plog1p(const Packet& x) {
|
||||
See: http://www.plunk.org/~hatch/rightway.php
|
||||
*/
|
||||
template <typename Packet>
|
||||
Packet generic_expm1(const Packet& x) {
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_expm1(const Packet& x) {
|
||||
typedef typename unpacket_traits<Packet>::type ScalarType;
|
||||
const Packet one = pset1<Packet>(ScalarType(1));
|
||||
const Packet neg_one = pset1<Packet>(ScalarType(-1));
|
||||
@@ -919,13 +1040,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac
|
||||
const Packet cst_one = pset1<Packet>(1.0f);
|
||||
const Packet cst_two = pset1<Packet>(2.0f);
|
||||
const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
|
||||
// For |x| < 0.5 approximate asin(x)/x by an 8th order polynomial with
|
||||
// even terms only.
|
||||
const Packet p9 = pset1<Packet>(5.08838854730129241943359375e-2f);
|
||||
const Packet p7 = pset1<Packet>(3.95139865577220916748046875e-2f);
|
||||
const Packet p5 = pset1<Packet>(7.550220191478729248046875e-2f);
|
||||
const Packet p3 = pset1<Packet>(0.16664917767047882080078125f);
|
||||
const Packet p1 = pset1<Packet>(1.00000011920928955078125f);
|
||||
|
||||
const Packet abs_x = pabs(x_in);
|
||||
const Packet sign_mask = pandnot(x_in, abs_x);
|
||||
@@ -941,13 +1055,11 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac
|
||||
const Packet x = pselect(large_mask, x_large, abs_x);
|
||||
const Packet x2 = pmul(x, x);
|
||||
|
||||
// Compute polynomial.
|
||||
// x * (p1 + x^2*(p3 + x^2*(p5 + x^2*(p7 + x^2*p9))))
|
||||
|
||||
Packet p = pmadd(p9, x2, p7);
|
||||
p = pmadd(p, x2, p5);
|
||||
p = pmadd(p, x2, p3);
|
||||
p = pmadd(p, x2, p1);
|
||||
// For |x| < 0.5 approximate asin(x)/x by an 8th order polynomial with
|
||||
// even terms only.
|
||||
constexpr float alpha[] = {5.08838854730129241943359375e-2f, 3.95139865577220916748046875e-2f,
|
||||
7.550220191478729248046875e-2f, 0.16664917767047882080078125f, 1.00000011920928955078125f};
|
||||
Packet p = ppolevl<Packet, 4>::run(x2, alpha);
|
||||
p = pmul(p, x);
|
||||
|
||||
const Packet p_large = pnmadd(cst_two, p, cst_pi_over_two);
|
||||
@@ -958,228 +1070,239 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac
|
||||
return por(invalid_mask, p);
|
||||
}
|
||||
|
||||
// Computes elementwise atan(x) for x in [-1:1] with 2 ulp accuracy.
|
||||
template <typename Scalar>
|
||||
struct patan_reduced {
|
||||
template <typename Packet>
|
||||
static EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet run(const Packet& x);
|
||||
};
|
||||
|
||||
template <>
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced_float(const Packet& x) {
|
||||
const Packet q0 = pset1<Packet>(-0.3333314359188079833984375f);
|
||||
const Packet q2 = pset1<Packet>(0.19993579387664794921875f);
|
||||
const Packet q4 = pset1<Packet>(-0.14209578931331634521484375f);
|
||||
const Packet q6 = pset1<Packet>(0.1066047251224517822265625f);
|
||||
const Packet q8 = pset1<Packet>(-7.5408883392810821533203125e-2f);
|
||||
const Packet q10 = pset1<Packet>(4.3082617223262786865234375e-2f);
|
||||
const Packet q12 = pset1<Packet>(-1.62907354533672332763671875e-2f);
|
||||
const Packet q14 = pset1<Packet>(2.90188402868807315826416015625e-3f);
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced<double>::run(const Packet& x) {
|
||||
constexpr double alpha[] = {2.6667153866462208e-05, 3.0917513112462781e-03, 5.2574296781008604e-02,
|
||||
3.0409318473444424e-01, 7.5365702534987022e-01, 8.2704055405494614e-01,
|
||||
3.3004361289279920e-01};
|
||||
|
||||
// Approximate atan(x) by a polynomial of the form
|
||||
// P(x) = x + x^3 * Q(x^2),
|
||||
// where Q(x^2) is a 7th order polynomial in x^2.
|
||||
// We evaluate even and odd terms in x^2 in parallel
|
||||
// to take advantage of instruction level parallelism
|
||||
// and hardware with multiple FMA units.
|
||||
constexpr double beta[] = {
|
||||
2.7311202462436667e-04, 1.0899150928962708e-02, 1.1548932646420353e-01, 4.9716458728465573e-01, 1.0,
|
||||
9.3705509168587852e-01, 3.3004361289279920e-01};
|
||||
|
||||
// note: if x == -0, this returns +0
|
||||
const Packet x2 = pmul(x, x);
|
||||
const Packet x4 = pmul(x2, x2);
|
||||
Packet q_odd = pmadd(q14, x4, q10);
|
||||
Packet q_even = pmadd(q12, x4, q8);
|
||||
q_odd = pmadd(q_odd, x4, q6);
|
||||
q_even = pmadd(q_even, x4, q4);
|
||||
q_odd = pmadd(q_odd, x4, q2);
|
||||
q_even = pmadd(q_even, x4, q0);
|
||||
const Packet q = pmadd(q_odd, x2, q_even);
|
||||
return pmadd(q, pmul(x, x2), x);
|
||||
Packet x2 = pmul(x, x);
|
||||
Packet p = ppolevl<Packet, 6>::run(x2, alpha);
|
||||
Packet q = ppolevl<Packet, 6>::run(x2, beta);
|
||||
return pmul(x, pdiv(p, q));
|
||||
}
|
||||
|
||||
// Computes elementwise atan(x) for x in [-1:1] with 2 ulp accuracy.
|
||||
template <>
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced<float>::run(const Packet& x) {
|
||||
constexpr float alpha[] = {1.12026982009410858154296875e-01f, 7.296695709228515625e-01f, 8.109951019287109375e-01f};
|
||||
|
||||
constexpr float beta[] = {1.00917108356952667236328125e-02f, 2.8318560123443603515625e-01f, 1.0f,
|
||||
8.109951019287109375e-01f};
|
||||
|
||||
Packet x2 = pmul(x, x);
|
||||
Packet p = ppolevl<Packet, 2>::run(x2, alpha);
|
||||
Packet q = ppolevl<Packet, 3>::run(x2, beta);
|
||||
return pmul(x, pdiv(p, q));
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_float(const Packet& x_in) {
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_atan(const Packet& x_in) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
|
||||
|
||||
constexpr float kPiOverTwo = static_cast<float>(EIGEN_PI / 2);
|
||||
constexpr Scalar kPiOverTwo = static_cast<Scalar>(EIGEN_PI / 2);
|
||||
|
||||
const Packet cst_signmask = pset1<Packet>(-0.0f);
|
||||
const Packet cst_one = pset1<Packet>(1.0f);
|
||||
const Packet cst_signmask = pset1<Packet>(-Scalar(0));
|
||||
const Packet cst_one = pset1<Packet>(Scalar(1));
|
||||
const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
|
||||
|
||||
// "Large": For |x| > 1, use atan(1/x) = sign(x)*pi/2 - atan(x).
|
||||
// "Small": For |x| <= 1, approximate atan(x) directly by a polynomial
|
||||
// calculated using Sollya.
|
||||
// calculated using Rminimax.
|
||||
|
||||
const Packet abs_x = pabs(x_in);
|
||||
const Packet x_signmask = pand(x_in, cst_signmask);
|
||||
const Packet large_mask = pcmp_lt(cst_one, abs_x);
|
||||
const Packet x = pselect(large_mask, preciprocal(abs_x), abs_x);
|
||||
const Packet p = patan_reduced_float(x);
|
||||
const Packet p = patan_reduced<Scalar>::run(x);
|
||||
// Apply transformations according to the range reduction masks.
|
||||
Packet result = pselect(large_mask, psub(cst_pi_over_two, p), p);
|
||||
// Return correct sign
|
||||
return pxor(result, x_signmask);
|
||||
}
|
||||
|
||||
// Computes elementwise atan(x) for x in [-tan(pi/8):tan(pi/8)]
|
||||
// with 2 ulp accuracy.
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced_double(const Packet& x) {
|
||||
const Packet q0 = pset1<Packet>(-0.33333333333330028569463365784031338989734649658203);
|
||||
const Packet q2 = pset1<Packet>(0.199999999990664090177006073645316064357757568359375);
|
||||
const Packet q4 = pset1<Packet>(-0.142857141937123677255527809393242932856082916259766);
|
||||
const Packet q6 = pset1<Packet>(0.111111065991039953404495577160560060292482376098633);
|
||||
const Packet q8 = pset1<Packet>(-9.0907812986129224452902519715280504897236824035645e-2);
|
||||
const Packet q10 = pset1<Packet>(7.6900542950704739442180368769186316058039665222168e-2);
|
||||
const Packet q12 = pset1<Packet>(-6.6410112986494976294871150912513257935643196105957e-2);
|
||||
const Packet q14 = pset1<Packet>(5.6920144995467943094258345126945641823112964630127e-2);
|
||||
const Packet q16 = pset1<Packet>(-4.3577020814990513608577771265117917209863662719727e-2);
|
||||
const Packet q18 = pset1<Packet>(2.1244050233624342527427586446719942614436149597168e-2);
|
||||
|
||||
// Approximate atan(x) on [0:tan(pi/8)] by a polynomial of the form
|
||||
// P(x) = x + x^3 * Q(x^2),
|
||||
// where Q(x^2) is a 9th order polynomial in x^2.
|
||||
// We evaluate even and odd terms in x^2 in parallel
|
||||
// to take advantage of instruction level parallelism
|
||||
// and hardware with multiple FMA units.
|
||||
const Packet x2 = pmul(x, x);
|
||||
const Packet x4 = pmul(x2, x2);
|
||||
Packet q_odd = pmadd(q18, x4, q14);
|
||||
Packet q_even = pmadd(q16, x4, q12);
|
||||
q_odd = pmadd(q_odd, x4, q10);
|
||||
q_even = pmadd(q_even, x4, q8);
|
||||
q_odd = pmadd(q_odd, x4, q6);
|
||||
q_even = pmadd(q_even, x4, q4);
|
||||
q_odd = pmadd(q_odd, x4, q2);
|
||||
q_even = pmadd(q_even, x4, q0);
|
||||
const Packet p = pmadd(q_odd, x2, q_even);
|
||||
return pmadd(p, pmul(x, x2), x);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x_in) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
static_assert(std::is_same<Scalar, double>::value, "Scalar type must be double");
|
||||
|
||||
constexpr double kPiOverTwo = static_cast<double>(EIGEN_PI / 2);
|
||||
constexpr double kPiOverFour = static_cast<double>(EIGEN_PI / 4);
|
||||
constexpr double kTanPiOverEight = 0.4142135623730950488016887;
|
||||
constexpr double kTan3PiOverEight = 2.4142135623730950488016887;
|
||||
|
||||
const Packet cst_signmask = pset1<Packet>(-0.0);
|
||||
const Packet cst_one = pset1<Packet>(1.0);
|
||||
const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
|
||||
const Packet cst_pi_over_four = pset1<Packet>(kPiOverFour);
|
||||
const Packet cst_large = pset1<Packet>(kTan3PiOverEight);
|
||||
const Packet cst_medium = pset1<Packet>(kTanPiOverEight);
|
||||
|
||||
// Use the same range reduction strategy (to [0:tan(pi/8)]) as the
|
||||
// Cephes library:
|
||||
// "Large": For x >= tan(3*pi/8), use atan(1/x) = pi/2 - atan(x).
|
||||
// "Medium": For x in [tan(pi/8) : tan(3*pi/8)),
|
||||
// use atan(x) = pi/4 + atan((x-1)/(x+1)).
|
||||
// "Small": For x < tan(pi/8), approximate atan(x) directly by a polynomial
|
||||
// calculated using Sollya.
|
||||
|
||||
const Packet abs_x = pabs(x_in);
|
||||
const Packet x_signmask = pand(x_in, cst_signmask);
|
||||
const Packet large_mask = pcmp_lt(cst_large, abs_x);
|
||||
const Packet medium_mask = pandnot(pcmp_lt(cst_medium, abs_x), large_mask);
|
||||
|
||||
Packet x = abs_x;
|
||||
x = pselect(large_mask, preciprocal(abs_x), x);
|
||||
x = pselect(medium_mask, pdiv(psub(abs_x, cst_one), padd(abs_x, cst_one)), x);
|
||||
|
||||
// Compute approximation of p ~= atan(x') where x' is the argument reduced to
|
||||
// [0:tan(pi/8)].
|
||||
Packet p = patan_reduced_double(x);
|
||||
|
||||
// Apply transformations according to the range reduction masks.
|
||||
p = pselect(large_mask, psub(cst_pi_over_two, p), p);
|
||||
p = pselect(medium_mask, padd(cst_pi_over_four, p), p);
|
||||
// Return the correct sign
|
||||
return pxor(p, x_signmask);
|
||||
}
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||
Doesn't do anything fancy, just a 9/8-degree rational interpolant which
|
||||
is accurate up to a couple of ulps in the (approximate) range [-8, 8],
|
||||
outside of which tanh(x) = +/-1 in single precision. The input is clamped
|
||||
to the range [-c, c]. The value c is chosen as the smallest value where
|
||||
the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
|
||||
the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
|
||||
the approximation evaluates to exactly 1.
|
||||
|
||||
This implementation works on both scalars and packets.
|
||||
*/
|
||||
template <typename T>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x) {
|
||||
// Clamp the inputs to the range [-c, c]
|
||||
// Clamp the inputs to the range [-c, c] and set everything
|
||||
// outside that range to 1.0. The value c is chosen as the smallest
|
||||
// floating point argument such that the approximation is exactly 1.
|
||||
// This saves clamping the value at the end.
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
const T plus_clamp = pset1<T>(7.99881172180175781f);
|
||||
const T minus_clamp = pset1<T>(-7.99881172180175781f);
|
||||
const T plus_clamp = pset1<T>(8.01773357391357422f);
|
||||
const T minus_clamp = pset1<T>(-8.01773357391357422f);
|
||||
#else
|
||||
const T plus_clamp = pset1<T>(7.90531110763549805f);
|
||||
const T minus_clamp = pset1<T>(-7.90531110763549805f);
|
||||
const T plus_clamp = pset1<T>(7.90738964080810547f);
|
||||
const T minus_clamp = pset1<T>(-7.90738964080810547f);
|
||||
#endif
|
||||
const T tiny = pset1<T>(0.0004f);
|
||||
const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
|
||||
const T tiny_mask = pcmp_lt(pabs(a_x), tiny);
|
||||
|
||||
// The following rational approximation was generated by rminimax
|
||||
// (https://gitlab.inria.fr/sfilip/rminimax) using the following
|
||||
// command:
|
||||
// $ ratapprox --function="tanh(x)" --dom='[-8.67,8.67]' --num="odd"
|
||||
// --den="even" --type="[9,8]" --numF="[SG]" --denF="[SG]" --log
|
||||
// --output=tanhf.sollya --dispCoeff="dec"
|
||||
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
||||
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
||||
const T alpha_5 = pset1<T>(1.48572235717979e-05f);
|
||||
const T alpha_7 = pset1<T>(5.12229709037114e-08f);
|
||||
const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
|
||||
const T alpha_11 = pset1<T>(2.00018790482477e-13f);
|
||||
const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
|
||||
constexpr float alpha[] = {1.394553628e-8f, 2.102733560e-5f, 3.520756727e-3f, 1.340216100e-1f};
|
||||
|
||||
// The monomial coefficients of the denominator polynomial (even).
|
||||
const T beta_0 = pset1<T>(4.89352518554385e-03f);
|
||||
const T beta_2 = pset1<T>(2.26843463243900e-03f);
|
||||
const T beta_4 = pset1<T>(1.18534705686654e-04f);
|
||||
const T beta_6 = pset1<T>(1.19825839466702e-06f);
|
||||
constexpr float beta[] = {8.015776984e-7f, 3.326951409e-4f, 2.597254514e-2f, 4.673548340e-1f, 1.0f};
|
||||
|
||||
// Since the polynomials are odd/even, we need x^2.
|
||||
const T x2 = pmul(x, x);
|
||||
const T x3 = pmul(x2, x);
|
||||
|
||||
// Evaluate the numerator polynomial p.
|
||||
T p = pmadd(x2, alpha_13, alpha_11);
|
||||
p = pmadd(x2, p, alpha_9);
|
||||
p = pmadd(x2, p, alpha_7);
|
||||
p = pmadd(x2, p, alpha_5);
|
||||
p = pmadd(x2, p, alpha_3);
|
||||
p = pmadd(x2, p, alpha_1);
|
||||
p = pmul(x, p);
|
||||
|
||||
// Evaluate the denominator polynomial q.
|
||||
T q = pmadd(x2, beta_6, beta_4);
|
||||
q = pmadd(x2, q, beta_2);
|
||||
q = pmadd(x2, q, beta_0);
|
||||
T p = ppolevl<T, 3>::run(x2, alpha);
|
||||
T q = ppolevl<T, 4>::run(x2, beta);
|
||||
// Take advantage of the fact that the constant term in p is 1 to compute
|
||||
// x*(x^2*p + 1) = x^3 * p + x.
|
||||
p = pmadd(x3, p, x);
|
||||
|
||||
// Divide the numerator by the denominator.
|
||||
return pselect(tiny_mask, x, pdiv(p, q));
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||
This uses a 19/18-degree rational interpolant which
|
||||
is accurate up to a couple of ulps in the (approximate) range [-18.7, 18.7],
|
||||
outside of which tanh(x) = +/-1 in single precision. The input is clamped
|
||||
to the range [-c, c]. The value c is chosen as the smallest value where
|
||||
the approximation evaluates to exactly 1.
|
||||
|
||||
This implementation works on both scalars and packets.
|
||||
*/
|
||||
template <typename T>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_double(const T& a_x) {
|
||||
// Clamp the inputs to the range [-c, c] and set everything
|
||||
// outside that range to 1.0. The value c is chosen as the smallest
|
||||
// floating point argument such that the approximation is exactly 1.
|
||||
// This saves clamping the value at the end.
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
const T plus_clamp = pset1<T>(17.6610191624600077);
|
||||
const T minus_clamp = pset1<T>(-17.6610191624600077);
|
||||
#else
|
||||
const T plus_clamp = pset1<T>(17.714196154005176);
|
||||
const T minus_clamp = pset1<T>(-17.714196154005176);
|
||||
#endif
|
||||
const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
|
||||
|
||||
// The following rational approximation was generated by rminimax
|
||||
// (https://gitlab.inria.fr/sfilip/rminimax) using the following
|
||||
// command:
|
||||
// $ ./ratapprox --function="tanh(x)" --dom='[-18.72,18.72]'
|
||||
// --num="odd" --den="even" --type="[19,18]" --numF="[D]"
|
||||
// --denF="[D]" --log --output=tanh.sollya --dispCoeff="dec"
|
||||
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
constexpr double alpha[] = {2.6158007860482230e-23, 7.6534862268749319e-19, 3.1309488231386680e-15,
|
||||
4.2303918148209176e-12, 2.4618379131293676e-09, 6.8644367682497074e-07,
|
||||
9.3839087674268880e-05, 5.9809711724441161e-03, 1.5184719640284322e-01};
|
||||
|
||||
// The monomial coefficients of the denominator polynomial (even).
|
||||
constexpr double beta[] = {6.463747022670968018e-21, 5.782506856739003571e-17,
|
||||
1.293019623712687916e-13, 1.123643448069621992e-10,
|
||||
4.492975677839633985e-08, 8.785185266237658698e-06,
|
||||
8.295161192716231542e-04, 3.437448108450402717e-02,
|
||||
4.851805297361760360e-01, 1.0};
|
||||
|
||||
// Since the polynomials are odd/even, we need x^2.
|
||||
const T x2 = pmul(x, x);
|
||||
const T x3 = pmul(x2, x);
|
||||
|
||||
// Interleave the evaluation of the numerator polynomial p and
|
||||
// denominator polynomial q.
|
||||
T p = ppolevl<T, 8>::run(x2, alpha);
|
||||
T q = ppolevl<T, 9>::run(x2, beta);
|
||||
// Take advantage of the fact that the constant term in p is 1 to compute
|
||||
// x*(x^2*p + 1) = x^3 * p + x.
|
||||
p = pmadd(x3, p, x);
|
||||
|
||||
// Divide the numerator by the denominator.
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
|
||||
const Packet half = pset1<Packet>(0.5f);
|
||||
const Packet x_gt_half = pcmp_le(half, pabs(x));
|
||||
|
||||
// For |x| in [0:0.5] we use a polynomial approximation of the form
|
||||
// P(x) = x + x^3*(c3 + x^2 * (c5 + x^2 * (... x^2 * c11) ... )).
|
||||
const Packet C3 = pset1<Packet>(0.3333373963832855224609375f);
|
||||
const Packet C5 = pset1<Packet>(0.1997792422771453857421875f);
|
||||
const Packet C7 = pset1<Packet>(0.14672131836414337158203125f);
|
||||
const Packet C9 = pset1<Packet>(8.2311116158962249755859375e-2f);
|
||||
const Packet C11 = pset1<Packet>(0.1819281280040740966796875f);
|
||||
// P(x) = x + x^3*(alpha[4] + x^2 * (alpha[3] + x^2 * (... x^2 * alpha[0]) ... )).
|
||||
constexpr float alpha[] = {0.1819281280040740966796875f, 8.2311116158962249755859375e-2f,
|
||||
0.14672131836414337158203125f, 0.1997792422771453857421875f, 0.3333373963832855224609375f};
|
||||
const Packet x2 = pmul(x, x);
|
||||
Packet p = pmadd(C11, x2, C9);
|
||||
p = pmadd(x2, p, C7);
|
||||
p = pmadd(x2, p, C5);
|
||||
p = pmadd(x2, p, C3);
|
||||
p = pmadd(pmul(x, x2), p, x);
|
||||
const Packet x3 = pmul(x, x2);
|
||||
Packet p = ppolevl<Packet, 4>::run(x2, alpha);
|
||||
p = pmadd(x3, p, x);
|
||||
|
||||
// For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x));
|
||||
const Packet half = pset1<Packet>(0.5f);
|
||||
const Packet one = pset1<Packet>(1.0f);
|
||||
Packet r = pdiv(padd(one, x), psub(one, x));
|
||||
r = pmul(half, plog(r));
|
||||
return pselect(x_gt_half, r, p);
|
||||
|
||||
const Packet x_gt_half = pcmp_le(half, pabs(x));
|
||||
const Packet x_eq_one = pcmp_eq(one, pabs(x));
|
||||
const Packet x_gt_one = pcmp_lt(one, pabs(x));
|
||||
const Packet sign_mask = pset1<Packet>(-0.0f);
|
||||
const Packet x_sign = pand(sign_mask, x);
|
||||
const Packet inf = pset1<Packet>(std::numeric_limits<float>::infinity());
|
||||
return por(x_gt_one, pselect(x_eq_one, por(x_sign, inf), pselect(x_gt_half, r, p)));
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_double(const Packet& x) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
static_assert(std::is_same<Scalar, double>::value, "Scalar type must be double");
|
||||
// For x in [-0.5:0.5] we use a rational approximation of the form
|
||||
// R(x) = x + x^3*P(x^2)/Q(x^2), where P is or order 4 and Q is of order 5.
|
||||
constexpr double alpha[] = {3.3071338469301391e-03, -4.7129526768798737e-02, 1.8185306179826699e-01,
|
||||
-2.5949536095445679e-01, 1.2306328729812676e-01};
|
||||
|
||||
constexpr double beta[] = {-3.8679974580640881e-03, 7.6391885763341910e-02, -4.2828141436397615e-01,
|
||||
9.8733495886883648e-01, -1.0000000000000000e+00, 3.6918986189438030e-01};
|
||||
|
||||
const Packet x2 = pmul(x, x);
|
||||
const Packet x3 = pmul(x, x2);
|
||||
Packet p = ppolevl<Packet, 4>::run(x2, alpha);
|
||||
Packet q = ppolevl<Packet, 5>::run(x2, beta);
|
||||
Packet y_small = pmadd(x3, pdiv(p, q), x);
|
||||
|
||||
// For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x));
|
||||
const Packet half = pset1<Packet>(0.5);
|
||||
const Packet one = pset1<Packet>(1.0);
|
||||
Packet y_large = pdiv(padd(one, x), psub(one, x));
|
||||
y_large = pmul(half, plog(y_large));
|
||||
|
||||
const Packet x_gt_half = pcmp_le(half, pabs(x));
|
||||
const Packet x_eq_one = pcmp_eq(one, pabs(x));
|
||||
const Packet x_gt_one = pcmp_lt(one, pabs(x));
|
||||
const Packet sign_mask = pset1<Packet>(-0.0);
|
||||
const Packet x_sign = pand(sign_mask, x);
|
||||
const Packet inf = pset1<Packet>(std::numeric_limits<double>::infinity());
|
||||
return por(x_gt_one, pselect(x_eq_one, por(x_sign, inf), pselect(x_gt_half, y_large, y_small)));
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
@@ -1511,7 +1634,7 @@ struct psign_impl<Packet, std::enable_if_t<NumTraits<typename unpacket_traits<Pa
|
||||
// This function splits x into the nearest integer n and fractional part r,
|
||||
// such that x = n + r holds exactly.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) {
|
||||
n = pround(x);
|
||||
r = psub(x, n);
|
||||
}
|
||||
@@ -1519,7 +1642,7 @@ EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) {
|
||||
// This function computes the sum {s, r}, such that x + y = s_hi + s_lo
|
||||
// holds exactly, and s_hi = fl(x+y), if |x| >= |y|.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) {
|
||||
s_hi = padd(x, y);
|
||||
const Packet t = psub(s_hi, x);
|
||||
s_lo = psub(y, t);
|
||||
@@ -1531,11 +1654,18 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s
|
||||
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
|
||||
// p_hi = fl(x * y).
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
||||
p_hi = pmul(x, y);
|
||||
p_lo = pmsub(x, y, p_hi);
|
||||
}
|
||||
|
||||
// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that
|
||||
// x * y = xy + p_lo holds exactly.
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) {
|
||||
return pmsub(x, y, xy);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// This function implements the Veltkamp splitting. Given a floating point
|
||||
@@ -1544,7 +1674,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi,
|
||||
// This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions",
|
||||
// 3rd edition, Birkh\"auser, 2016.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
EIGEN_CONSTEXPR int shift = (NumTraits<Scalar>::digits() + 1) / 2;
|
||||
const Scalar shift_scale = Scalar(uint64_t(1) << shift); // Scalar constructor not necessarily constexpr.
|
||||
@@ -1559,7 +1689,7 @@ EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packe
|
||||
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
|
||||
// p_hi = fl(x * y).
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
||||
Packet x_hi, x_lo, y_hi, y_lo;
|
||||
veltkamp_splitting(x, x_hi, x_lo);
|
||||
veltkamp_splitting(y, y_hi, y_lo);
|
||||
@@ -1571,6 +1701,21 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi,
|
||||
p_lo = pmadd(x_lo, y_lo, p_lo);
|
||||
}
|
||||
|
||||
// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that
|
||||
// x * y = xy + p_lo holds exactly.
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) {
|
||||
Packet x_hi, x_lo, y_hi, y_lo;
|
||||
veltkamp_splitting(x, x_hi, x_lo);
|
||||
veltkamp_splitting(y, y_hi, y_lo);
|
||||
|
||||
Packet p_lo = pmadd(x_hi, y_hi, pnegate(xy));
|
||||
p_lo = pmadd(x_hi, y_lo, p_lo);
|
||||
p_lo = pmadd(x_lo, y_hi, p_lo);
|
||||
p_lo = pmadd(x_lo, y_lo, p_lo);
|
||||
return p_lo;
|
||||
}
|
||||
|
||||
#endif // EIGEN_VECTORIZE_FMA
|
||||
|
||||
// This function implements Dekker's algorithm for the addition
|
||||
@@ -1580,8 +1725,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi,
|
||||
// This is Algorithm 5 from Jean-Michel Muller, "Elementary Functions",
|
||||
// 3rd edition, Birkh\"auser, 2016.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo,
|
||||
Packet& s_hi, Packet& s_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi,
|
||||
const Packet& y_lo, Packet& s_hi, Packet& s_lo) {
|
||||
const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi));
|
||||
Packet r_hi_1, r_lo_1;
|
||||
fast_twosum(x_hi, y_hi, r_hi_1, r_lo_1);
|
||||
@@ -1599,8 +1744,8 @@ EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Pa
|
||||
// This is a version of twosum for double word numbers,
|
||||
// which assumes that |x_hi| >= |y_hi|.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo,
|
||||
Packet& s_hi, Packet& s_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi,
|
||||
const Packet& y_lo, Packet& s_hi, Packet& s_lo) {
|
||||
Packet r_hi, r_lo;
|
||||
fast_twosum(x_hi, y_hi, r_hi, r_lo);
|
||||
const Packet s = padd(padd(y_lo, r_lo), x_lo);
|
||||
@@ -1611,8 +1756,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, con
|
||||
// double word number {y_hi, y_lo} number, with the assumption
|
||||
// that |x| >= |y_hi|.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo, Packet& s_hi,
|
||||
Packet& s_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo,
|
||||
Packet& s_hi, Packet& s_lo) {
|
||||
Packet r_hi, r_lo;
|
||||
fast_twosum(x, y_hi, r_hi, r_lo);
|
||||
const Packet s = padd(y_lo, r_lo);
|
||||
@@ -1628,7 +1773,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const
|
||||
// This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions",
|
||||
// 3rd edition, Birkh\"auser, 2016.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y,
|
||||
Packet& p_hi, Packet& p_lo) {
|
||||
Packet c_hi, c_lo1;
|
||||
twoprod(x_hi, y, c_hi, c_lo1);
|
||||
const Packet c_lo2 = pmul(x_lo, y);
|
||||
@@ -1645,8 +1791,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P
|
||||
// of less than 2*2^{-2p}, where p is the number of significand bit
|
||||
// in the floating point type.
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo,
|
||||
Packet& p_hi, Packet& p_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi,
|
||||
const Packet& y_lo, Packet& p_hi, Packet& p_lo) {
|
||||
Packet p_hi_hi, p_hi_lo;
|
||||
twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
|
||||
Packet p_lo_hi, p_lo_lo;
|
||||
@@ -1659,7 +1805,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P
|
||||
// for basic building blocks of double-word arithmetic", Joldes, Muller, & Popescu,
|
||||
// 2017. https://hal.archives-ouvertes.fr/hal-01351529
|
||||
template <typename Packet>
|
||||
void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& z_hi, Packet& z_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y,
|
||||
Packet& z_hi, Packet& z_lo) {
|
||||
const Packet t_hi = pdiv(x_hi, y);
|
||||
Packet pi_hi, pi_lo;
|
||||
twoprod(t_hi, y, pi_hi, pi_lo);
|
||||
@@ -1674,7 +1821,7 @@ void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y,
|
||||
template <typename Scalar>
|
||||
struct accurate_log2 {
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||
log2_x_hi = plog2(x);
|
||||
log2_x_lo = pzero(x);
|
||||
}
|
||||
@@ -1689,7 +1836,7 @@ struct accurate_log2 {
|
||||
template <>
|
||||
struct accurate_log2<float> {
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||
// The function log(1+x)/x is approximated in the interval
|
||||
// [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form
|
||||
// Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))),
|
||||
@@ -1769,7 +1916,7 @@ struct accurate_log2<float> {
|
||||
template <>
|
||||
struct accurate_log2<double> {
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||
// We use a transformation of variables:
|
||||
// r = c * (x-1) / (x+1),
|
||||
// such that
|
||||
@@ -1850,13 +1997,13 @@ struct accurate_log2<double> {
|
||||
}
|
||||
};
|
||||
|
||||
// This function computes exp2(x) (i.e. 2**x).
|
||||
// This function accurately computes exp2(x) for x in [-0.5:0.5], which is
|
||||
// needed in pow(x,y).
|
||||
template <typename Scalar>
|
||||
struct fast_accurate_exp2 {
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||
// TODO(rmlarsen): Add a pexp2 packetop.
|
||||
return pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), x));
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||
return generic_exp2(x);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1867,7 +2014,7 @@ struct fast_accurate_exp2 {
|
||||
template <>
|
||||
struct fast_accurate_exp2<float> {
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||
// This function approximates exp2(x) by a degree 6 polynomial of the form
|
||||
// Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in
|
||||
// single precision, and the remaining steps are evaluated with extra precision using
|
||||
@@ -1924,7 +2071,7 @@ struct fast_accurate_exp2<float> {
|
||||
template <>
|
||||
struct fast_accurate_exp2<double> {
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||
// This function approximates exp2(x) by a degree 10 polynomial of the form
|
||||
// Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in
|
||||
// single precision, and the remaining steps are evaluated with extra precision using
|
||||
@@ -1990,7 +2137,7 @@ struct fast_accurate_exp2<double> {
|
||||
// TODO(rmlarsen): We should probably add this as a packet up 'ppow', to make it
|
||||
// easier to specialize or turn off for specific types and/or backends.x
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
// Split x into exponent e_x and mantissa m_x.
|
||||
Packet e_x;
|
||||
@@ -2107,134 +2254,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Pac
|
||||
pselect(negate_pow_abs, pnegate(pow_abs), pow_abs)))))));
|
||||
}
|
||||
|
||||
/* polevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate polynomial
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* Scalar x, y, coef[N+1];
|
||||
*
|
||||
* y = polevl<decltype(x), N>( x, coef);
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates polynomial of degree N:
|
||||
*
|
||||
* 2 N
|
||||
* y = C + C x + C x +...+ C x
|
||||
* 0 1 2 N
|
||||
*
|
||||
* Coefficients are stored in reverse order:
|
||||
*
|
||||
* coef[0] = C , ..., coef[N] = C .
|
||||
* N 0
|
||||
*
|
||||
* The function p1evl() assumes that coef[N] = 1.0 and is
|
||||
* omitted from the array. Its calling arguments are
|
||||
* otherwise the same as polevl().
|
||||
*
|
||||
*
|
||||
* The Eigen implementation is templatized. For best speed, store
|
||||
* coef as a const array (constexpr), e.g.
|
||||
*
|
||||
* const double coef[] = {1.0, 2.0, 3.0, ...};
|
||||
*
|
||||
*/
|
||||
template <typename Packet, int N>
|
||||
struct ppolevl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x,
|
||||
const typename unpacket_traits<Packet>::type coeff[]) {
|
||||
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
return pmadd(ppolevl<Packet, N - 1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct ppolevl<Packet, 0> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x,
|
||||
const typename unpacket_traits<Packet>::type coeff[]) {
|
||||
EIGEN_UNUSED_VARIABLE(x);
|
||||
return pset1<Packet>(coeff[0]);
|
||||
}
|
||||
};
|
||||
|
||||
/* chbevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate Chebyshev series
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* Scalar x, y, coef[N], chebevl();
|
||||
*
|
||||
* y = chbevl( x, coef, N );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates the series
|
||||
*
|
||||
* N-1
|
||||
* - '
|
||||
* y = > coef[i] T (x/2)
|
||||
* - i
|
||||
* i=0
|
||||
*
|
||||
* of Chebyshev polynomials Ti at argument x/2.
|
||||
*
|
||||
* Coefficients are stored in reverse order, i.e. the zero
|
||||
* order term is last in the array. Note N is the number of
|
||||
* coefficients, not the order.
|
||||
*
|
||||
* If coefficients are for the interval a to b, x must
|
||||
* have been transformed to x -> 2(2x - b - a)/(b-a) before
|
||||
* entering the routine. This maps x from (a, b) to (-1, 1),
|
||||
* over which the Chebyshev polynomials are defined.
|
||||
*
|
||||
* If the coefficients are for the inverted interval, in
|
||||
* which (a, b) is mapped to (1/b, 1/a), the transformation
|
||||
* required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
|
||||
* this becomes x -> 4a/x - 1.
|
||||
*
|
||||
*
|
||||
*
|
||||
* SPEED:
|
||||
*
|
||||
* Taking advantage of the recurrence properties of the
|
||||
* Chebyshev polynomials, the routine requires one more
|
||||
* addition per loop than evaluating a nested polynomial of
|
||||
* the same degree.
|
||||
*
|
||||
*/
|
||||
|
||||
template <typename Packet, int N>
|
||||
struct pchebevl {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(Packet x,
|
||||
const typename unpacket_traits<Packet>::type coef[]) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
Packet b0 = pset1<Packet>(coef[0]);
|
||||
Packet b1 = pset1<Packet>(static_cast<Scalar>(0.f));
|
||||
Packet b2;
|
||||
|
||||
for (int i = 1; i < N; i++) {
|
||||
b2 = b1;
|
||||
b1 = b0;
|
||||
b0 = psub(pmadd(x, b1, pset1<Packet>(coef[i])), b2);
|
||||
}
|
||||
|
||||
return pmul(pset1<Packet>(static_cast<Scalar>(0.5f)), psub(b0, b2));
|
||||
}
|
||||
};
|
||||
|
||||
namespace unary_pow {
|
||||
|
||||
template <typename ScalarExponent, bool IsInteger = NumTraits<ScalarExponent>::IsInteger>
|
||||
@@ -2469,6 +2488,32 @@ struct unary_pow_impl<Packet, ScalarExponent, true, true, false> {
|
||||
}
|
||||
};
|
||||
|
||||
// This function computes exp2(x) = exp(ln(2) * x).
|
||||
// To improve accuracy, the product ln(2)*x is computed using the twoprod
|
||||
// algorithm, such that ln(2) * x = p_hi + p_lo holds exactly. Then exp2(x) is
|
||||
// computed as exp2(x) = exp(p_hi) * exp(p_lo) ~= exp(p_hi) * (1 + p_lo). This
|
||||
// correction step this reduces the maximum absolute error as follows:
|
||||
//
|
||||
// type | max error (simple product) | max error (twoprod) |
|
||||
// -----------------------------------------------------------
|
||||
// float | 35 ulps | 4 ulps |
|
||||
// double | 363 ulps | 110 ulps |
|
||||
//
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_exp2(const Packet& _x) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
constexpr int max_exponent = std::numeric_limits<Scalar>::max_exponent;
|
||||
constexpr int digits = std::numeric_limits<Scalar>::digits;
|
||||
constexpr Scalar max_cap = Scalar(max_exponent + 1);
|
||||
constexpr Scalar min_cap = -Scalar(max_exponent + digits - 1);
|
||||
Packet x = pmax(pmin(_x, pset1<Packet>(max_cap)), pset1<Packet>(min_cap));
|
||||
Packet p_hi, p_lo;
|
||||
twoprod(pset1<Packet>(Scalar(EIGEN_LN2)), x, p_hi, p_lo);
|
||||
Packet exp2_hi = pexp(p_hi);
|
||||
Packet exp2_lo = padd(pset1<Packet>(Scalar(1)), p_lo);
|
||||
return pmul(exp2_hi, exp2_lo);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
@@ -2505,11 +2550,14 @@ template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
const Packet cst_1 = pset1<Packet>(Scalar(1));
|
||||
const Packet sign_mask = pset1<Packet>(static_cast<Scalar>(-0.0));
|
||||
Packet rint_a = generic_rint(a);
|
||||
// if rint(a) < a, then rint(a) == floor(a)
|
||||
Packet mask = pcmp_lt(rint_a, a);
|
||||
Packet offset = pand(cst_1, mask);
|
||||
Packet result = padd(rint_a, offset);
|
||||
// Signed zero must remain signed (e.g. ceil(-0.02) == -0).
|
||||
result = por(result, pand(sign_mask, a));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -60,11 +60,19 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2_double(const Pa
|
||||
|
||||
/** \internal \returns log(1 + x) */
|
||||
template <typename Packet>
|
||||
Packet generic_plog1p(const Packet& x);
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_log1p(const Packet& x);
|
||||
|
||||
/** \internal \returns exp(x)-1 */
|
||||
template <typename Packet>
|
||||
Packet generic_expm1(const Packet& x);
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_expm1(const Packet& x);
|
||||
|
||||
/** \internal \returns atan(x) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_atan(const Packet& x);
|
||||
|
||||
/** \internal \returns exp2(x) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_exp2(const Packet& x);
|
||||
|
||||
/** \internal \returns exp(x) for single precision float */
|
||||
template <typename Packet>
|
||||
@@ -98,22 +106,22 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos_float(const Packet& x);
|
||||
|
||||
/** \internal \returns atan(x) for single precision float */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_float(const Packet& x);
|
||||
|
||||
/** \internal \returns atan(x) for double precision float */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x);
|
||||
|
||||
/** \internal \returns tanh(x) for single precision float */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_float(const Packet& x);
|
||||
|
||||
/** \internal \returns tanh(x) for double precision float */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_double(const Packet& x);
|
||||
|
||||
/** \internal \returns atanh(x) for single precision float */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x);
|
||||
|
||||
/** \internal \returns atanh(x) for double precision float */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_double(const Packet& x);
|
||||
|
||||
/** \internal \returns sqrt(x) for complex types */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const Packet& a);
|
||||
@@ -155,36 +163,41 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a);
|
||||
return p##METHOD##_##SCALAR(_x); \
|
||||
}
|
||||
|
||||
// Macros for instantiating these generic functions for different backends.
|
||||
#define EIGEN_GENERIC_PACKET_FUNCTION(METHOD, PACKET) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET p##METHOD<PACKET>(const PACKET& _x) { \
|
||||
return generic_##METHOD(_x); \
|
||||
}
|
||||
|
||||
#define EIGEN_FLOAT_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, float, PACKET)
|
||||
#define EIGEN_DOUBLE_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, double, PACKET)
|
||||
|
||||
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(exp, PACKET) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET pexpm1<PACKET>(const PACKET& _x) { \
|
||||
return internal::generic_expm1(_x); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p<PACKET>(const PACKET& _x) { \
|
||||
return internal::generic_plog1p(_x); \
|
||||
}
|
||||
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(exp, PACKET) \
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(expm1, PACKET) \
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, PACKET) \
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(log1p, PACKET) \
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, PACKET)
|
||||
|
||||
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(atan, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(atanh, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(sin, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(cos, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log2, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(tanh, PACKET) \
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, PACKET) \
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, PACKET)
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -672,6 +672,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
|
||||
return half(::expf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp2(const half& a) {
|
||||
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
|
||||
defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
return half(hexp2(a));
|
||||
#else
|
||||
return half(::exp2f(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { return half(numext::expm1(float(a))); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
|
||||
#if (defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && \
|
||||
|
||||
@@ -108,6 +108,16 @@ EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f, Packet2cf>(const Packet2f& a) {
|
||||
return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a))));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cf pzero(const Packet1cf& /*a*/) {
|
||||
return Packet1cf(vdup_n_f32(0.0f));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pzero(const Packet2cf& /*a*/) {
|
||||
return Packet2cf(vdupq_n_f32(0.0f));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from) {
|
||||
return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from)));
|
||||
@@ -156,6 +166,20 @@ EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {
|
||||
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
|
||||
}
|
||||
|
||||
#ifdef __ARM_FEATURE_COMPLEX
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cf pmadd<Packet1cf>(const Packet1cf& a, const Packet1cf& b, const Packet1cf& c) {
|
||||
Packet1cf result;
|
||||
result.v = vcmla_f32(c.v, a.v, b.v);
|
||||
result.v = vcmla_rot90_f32(result.v, a.v, b.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
||||
return pmadd(a, b, pzero(a));
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
||||
Packet2f v1, v2;
|
||||
@@ -175,6 +199,22 @@ EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1c
|
||||
// Add and return the result
|
||||
return Packet1cf(vadd_f32(v1, v2));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_FEATURE_COMPLEX
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd<Packet2cf>(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
Packet2cf result;
|
||||
result.v = vcmlaq_f32(c.v, a.v, b.v);
|
||||
result.v = vcmlaq_rot90_f32(result.v, a.v, b.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
return pmadd(a, b, pzero(a));
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet4f v1, v2;
|
||||
@@ -194,6 +234,7 @@ EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2c
|
||||
// Add and return the result
|
||||
return Packet2cf(vaddq_f32(v1, v2));
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b) {
|
||||
@@ -461,13 +502,10 @@ EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
|
||||
//---------- double ----------
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
|
||||
// See bug 1325, clang fails to call vld1q_u64.
|
||||
#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML || EIGEN_COMP_CPE
|
||||
static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
|
||||
#else
|
||||
const uint64_t p2ul_conj_XOR_DATA[] = {0x0, 0x8000000000000000};
|
||||
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64(p2ul_conj_XOR_DATA);
|
||||
#endif
|
||||
inline uint64x2_t p2ul_CONJ_XOR() {
|
||||
static const uint64_t p2ul_conj_XOR_DATA[] = {0x0, 0x8000000000000000};
|
||||
return vld1q_u64(p2ul_conj_XOR_DATA);
|
||||
}
|
||||
|
||||
struct Packet1cd {
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
@@ -523,6 +561,11 @@ EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pzero<Packet1cd>(const Packet1cd& /*a*/) {
|
||||
return Packet1cd(vdupq_n_f64(0.0));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) {
|
||||
/* here we really have to use unaligned loads :( */
|
||||
@@ -546,9 +589,23 @@ EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
|
||||
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR)));
|
||||
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR())));
|
||||
}
|
||||
|
||||
#ifdef __ARM_FEATURE_COMPLEX
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd<Packet1cd>(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
Packet1cd result;
|
||||
result.v = vcmlaq_f64(c.v, a.v, b.v);
|
||||
result.v = vcmlaq_rot90_f64(result.v, a.v, b.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
return pmadd(a, b, pzero(a));
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
Packet2d v1, v2;
|
||||
@@ -562,12 +619,13 @@ EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1c
|
||||
// Multiply the imag a with b
|
||||
v2 = vmulq_f64(v2, b.v);
|
||||
// Conjugate v2
|
||||
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
|
||||
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR()));
|
||||
// Swap real/imag elements in v2.
|
||||
v2 = preverse<Packet2d>(v2);
|
||||
// Add and return the result
|
||||
return Packet1cd(vaddq_f64(v1, v2));
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
|
||||
|
||||
@@ -37,6 +37,7 @@ BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin)
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos)
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog)
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pexp)
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pexp2)
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, ptanh)
|
||||
|
||||
template <>
|
||||
|
||||
@@ -209,6 +209,7 @@ struct packet_traits<float> : default_packet_traits {
|
||||
HasRsqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = EIGEN_FAST_MATH,
|
||||
HasErfc = EIGEN_FAST_MATH,
|
||||
HasBessel = 0, // Issues with accuracy.
|
||||
HasNdtri = 0
|
||||
};
|
||||
@@ -654,6 +655,16 @@ struct unpacket_traits<Packet2ul> {
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pzero(const Packet2f& /*a*/) {
|
||||
return vdup_n_f32(0.0f);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pzero(const Packet4f& /*a*/) {
|
||||
return vdupq_n_f32(0.0f);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pset1<Packet2f>(const float& from) {
|
||||
return vdup_n_f32(from);
|
||||
@@ -5123,13 +5134,15 @@ struct packet_traits<double> : default_packet_traits {
|
||||
HasExp = 1,
|
||||
HasLog = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
#endif
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasTanh = 0,
|
||||
HasErf = 0
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = 0,
|
||||
HasErfc = EIGEN_FAST_MATH
|
||||
};
|
||||
};
|
||||
|
||||
@@ -5147,6 +5160,11 @@ struct unpacket_traits<Packet2d> {
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pzero<Packet2d>(const Packet2d& /*a*/) {
|
||||
return vdupq_n_f64(0.0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
|
||||
return vdupq_n_f64(from);
|
||||
|
||||
@@ -89,19 +89,25 @@ EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) {
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
|
||||
_mm_mul_ps(_mm_movehdup_ps(a.v), vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
// vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
__m128 tmp1 = _mm_mul_ps(_mm_movehdup_ps(a.v), vec4f_swizzle1(b.v, 1, 0, 3, 2));
|
||||
__m128 tmp2 = _mm_moveldup_ps(a.v);
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x00000000, 0x80000000, 0x00000000));
|
||||
return Packet2cf(
|
||||
_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
|
||||
__m128 tmp1 = _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), vec4f_swizzle1(b.v, 1, 0, 3, 2));
|
||||
__m128 tmp2 = vec4f_swizzle1(a.v, 0, 0, 2, 2);
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
__m128 result = _mm_fmaddsub_ps(tmp2, b.v, tmp1);
|
||||
#else
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
__m128 result = _mm_addsub_ps(_mm_mul_ps(tmp2, b.v), tmp1);
|
||||
#else
|
||||
const __m128 mask = _mm_setr_ps(-0.0f, 0.0f, -0.0f, 0.0f);
|
||||
__m128 result = _mm_add_ps(_mm_mul_ps(tmp2, b.v), _mm_xor_ps(tmp1, mask));
|
||||
#endif
|
||||
#endif
|
||||
return Packet2cf(result);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -127,11 +133,11 @@ EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packe
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from)));
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(_mm_load_ps(&numext::real_ref(*from)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from)));
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(_mm_loadu_ps(&numext::real_ref(*from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -148,11 +154,11 @@ EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* fro
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v));
|
||||
EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(&numext::real_ref(*to), from.v);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v));
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(&numext::real_ref(*to), from.v);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -277,15 +283,24 @@ EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) {
|
||||
__m128d tmp1 = _mm_mul_pd(_mm_unpackhi_pd(a.v, a.v), vec2d_swizzle1(b.v, 1, 0));
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
|
||||
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0))));
|
||||
__m128d tmp2 = _mm_movedup_pd(a.v);
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0, 0x0, 0x80000000, 0x0));
|
||||
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
|
||||
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)), mask)));
|
||||
__m128d tmp2 = _mm_unpacklo_pd(a.v, a.v);
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
__m128d result = _mm_fmaddsub_pd(tmp2, b.v, tmp1);
|
||||
#else
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
__m128d result = _mm_addsub_pd(_mm_mul_pd(tmp2, b.v), tmp1);
|
||||
#else
|
||||
const __m128d mask = _mm_setr_pd(-0.0, 0.0);
|
||||
__m128d result = _mm_add_pd(_mm_mul_pd(tmp2, b.v), _mm_xor_pd(tmp1, mask));
|
||||
#endif
|
||||
#endif
|
||||
return Packet1cd(result);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -312,11 +327,11 @@ EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packe
|
||||
// FIXME force unaligned load, this is a temporary fix
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from));
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(_mm_load_pd((const double*)from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from));
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(_mm_loadu_pd((const double*)from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd
|
||||
@@ -332,11 +347,11 @@ EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* fr
|
||||
// FIXME force unaligned store, this is a temporary fix
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v));
|
||||
EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd((double*)to, from.v);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v));
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd((double*)to, from.v);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -430,6 +445,74 @@ EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
|
||||
return pexp_complex<Packet2cf>(a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
// std::complex<float>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmaddsub_ps(a_odd, b_swap, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmsubadd_ps(a_odd, b_swap, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pnmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmaddsub_ps(a_even, b.v, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pnmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmsubadd_ps(a_even, b.v, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
// std::complex<double>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0x3);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x1);
|
||||
__m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmaddsub_pd(a_odd, b_swap, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0x3);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x1);
|
||||
__m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmsubadd_pd(a_odd, b_swap, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pnmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0x3);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x1);
|
||||
__m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmaddsub_pd(a_even, b.v, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pnmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0x3);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x1);
|
||||
__m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmsubadd_pd(a_even, b.v, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
#endif
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -197,6 +197,7 @@ struct packet_traits<float> : default_packet_traits {
|
||||
HasRsqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = EIGEN_FAST_MATH,
|
||||
HasErfc = EIGEN_FAST_MATH,
|
||||
HasBlend = 1,
|
||||
HasSign = 0 // The manually vectorized version is slightly slower for SSE.
|
||||
};
|
||||
@@ -214,11 +215,14 @@ struct packet_traits<double> : default_packet_traits {
|
||||
HasDiv = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasLog = 1,
|
||||
HasErfc = EIGEN_FAST_MATH,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
@@ -1228,13 +1232,13 @@ EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(const Packet4ui& a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {
|
||||
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF));
|
||||
return _mm_and_ps(a, mask);
|
||||
const __m128i mask = _mm_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF);
|
||||
return _mm_castsi128_ps(_mm_and_si128(mask, _mm_castps_si128(a)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF));
|
||||
return _mm_and_pd(a, mask);
|
||||
const __m128i mask = _mm_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF);
|
||||
return _mm_castsi128_pd(_mm_and_si128(mask, _mm_castpd_si128(a)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) {
|
||||
@@ -2277,8 +2281,6 @@ EIGEN_STRONG_INLINE __m128i half2floatsse(__m128i h) {
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE __m128i float2half(__m128 f) {
|
||||
__m128i o = _mm_setzero_si128();
|
||||
|
||||
// unsigned int sign_mask = 0x80000000u;
|
||||
__m128i sign = _mm_set1_epi32(0x80000000u);
|
||||
// unsigned int sign = f.u & sign_mask;
|
||||
@@ -2305,7 +2307,7 @@ EIGEN_STRONG_INLINE __m128i float2half(__m128 f) {
|
||||
// f.f += denorm_magic.f;
|
||||
f = _mm_add_ps(f, _mm_castsi128_ps(denorm_magic));
|
||||
// f.u - denorm_magic.u
|
||||
o = _mm_sub_epi32(_mm_castps_si128(f), denorm_magic);
|
||||
__m128i o = _mm_sub_epi32(_mm_castps_si128(f), denorm_magic);
|
||||
o = _mm_and_si128(o, subnorm_mask);
|
||||
// Correct result for inf/nan/zero/subnormal, 0 otherwise
|
||||
o = _mm_or_si128(o, naninf_value);
|
||||
|
||||
@@ -103,6 +103,26 @@ struct functor_traits<scalar_abs2_op<Scalar>> {
|
||||
enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 };
|
||||
};
|
||||
|
||||
template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
|
||||
struct squared_norm_functor {
|
||||
typedef Scalar result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const {
|
||||
return Scalar(numext::real(a) * numext::real(a), numext::imag(a) * numext::imag(a));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const {
|
||||
return Packet(pmul(a.v, a.v));
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct squared_norm_functor<Scalar, false> : scalar_abs2_op<Scalar> {};
|
||||
|
||||
template <typename Scalar>
|
||||
struct functor_traits<squared_norm_functor<Scalar>> {
|
||||
using Real = typename NumTraits<Scalar>::Real;
|
||||
enum { Cost = NumTraits<Real>::MulCost, PacketAccess = packet_traits<Real>::HasMul };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate of a complex value
|
||||
*
|
||||
@@ -356,6 +376,22 @@ struct functor_traits<scalar_exp_op<Scalar>> {
|
||||
};
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct scalar_exp2_op {
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return internal::pexp2(a); }
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
|
||||
return internal::pexp2(a);
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct functor_traits<scalar_exp2_op<Scalar>> {
|
||||
enum {
|
||||
PacketAccess = packet_traits<Scalar>::HasExp,
|
||||
Cost = functor_traits<scalar_exp_op<Scalar>>::Cost // TODO measure cost of exp2
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \brief Template functor to compute the exponential of a scalar - 1.
|
||||
|
||||
@@ -97,6 +97,7 @@ struct general_matrix_matrix_product<Index, LhsScalar, LhsStorageOrder, Conjugat
|
||||
// Then, we set info->task_info[tid].users to the number of threads to mark that all other threads are going to
|
||||
// use it.
|
||||
while (info->task_info[tid].users != 0) {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
info->task_info[tid].users = threads;
|
||||
|
||||
@@ -115,6 +116,7 @@ struct general_matrix_matrix_product<Index, LhsScalar, LhsStorageOrder, Conjugat
|
||||
// However, no need to wait for the B' part which has been updated by the current thread!
|
||||
if (shift > 0) {
|
||||
while (info->task_info[i].sync != k) {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,10 @@ struct general_matrix_matrix_triangular_product<Index, LhsScalar, LhsStorageOrde
|
||||
const RhsScalar* rhs_, Index rhsStride, ResScalar* res_, Index resIncr,
|
||||
Index resStride, const ResScalar& alpha,
|
||||
level3_blocking<LhsScalar, RhsScalar>& blocking) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
typedef gebp_traits<LhsScalar, RhsScalar> Traits;
|
||||
|
||||
typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
|
||||
@@ -157,7 +161,7 @@ struct tribb_kernel {
|
||||
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel1;
|
||||
gebp_kernel<LhsScalar, RhsScalar, Index, BufferMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel2;
|
||||
|
||||
Matrix<ResScalar, BlockSize, BlockSize, ColMajor> buffer((internal::constructor_without_unaligned_array_assert()));
|
||||
Matrix<ResScalar, BlockSize, BlockSize, ColMajor> buffer;
|
||||
|
||||
// let's process the block per panel of actual_mc x BlockSize,
|
||||
// again, each is split into three parts, etc.
|
||||
|
||||
@@ -71,7 +71,7 @@ inline void setNbThreads(int v) { internal::manage_multi_threading(SetAction, &v
|
||||
// TODO(rmlarsen): Make the device API available instead of
|
||||
// storing a local static pointer variable to avoid this issue.
|
||||
inline ThreadPool* setGemmThreadPool(ThreadPool* new_pool) {
|
||||
static ThreadPool* pool;
|
||||
static ThreadPool* pool = nullptr;
|
||||
if (new_pool != nullptr) {
|
||||
// This will wait for work in all threads in *pool to finish,
|
||||
// then destroy the old ThreadPool, and then replace it with new_pool.
|
||||
@@ -232,7 +232,6 @@ EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index
|
||||
}
|
||||
|
||||
#elif defined(EIGEN_GEMM_THREADPOOL)
|
||||
ei_declare_aligned_stack_constructed_variable(GemmParallelTaskInfo<Index>, meta_info, threads, 0);
|
||||
Barrier barrier(threads);
|
||||
auto task = [=, &func, &barrier, &task_info](int i) {
|
||||
Index actual_threads = threads;
|
||||
|
||||
@@ -197,6 +197,7 @@ struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, 0, true> {
|
||||
|
||||
if (!EvalToDest) {
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
constexpr int Size = Dest::SizeAtCompileTime;
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
@@ -205,6 +206,7 @@ struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, 0, true> {
|
||||
|
||||
if (!UseRhs) {
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime;
|
||||
Index size = rhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
|
||||
@@ -113,13 +113,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
// To work around an "error: member reference base type 'Matrix<...>
|
||||
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
|
||||
// not a structure or union" compilation error in nvcc (tested V8.0.61),
|
||||
// create a dummy internal::constructor_without_unaligned_array_assert
|
||||
// object to pass to the Matrix constructor.
|
||||
internal::constructor_without_unaligned_array_assert a;
|
||||
Matrix<Scalar, SmallPanelWidth, SmallPanelWidth, LhsStorageOrder> triangularBuffer(a);
|
||||
Matrix<Scalar, SmallPanelWidth, SmallPanelWidth, LhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
if ((Mode & ZeroDiag) == ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
@@ -245,8 +239,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
internal::constructor_without_unaligned_array_assert a;
|
||||
Matrix<Scalar, SmallPanelWidth, SmallPanelWidth, RhsStorageOrder> triangularBuffer(a);
|
||||
Matrix<Scalar, SmallPanelWidth, SmallPanelWidth, RhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
if ((Mode & ZeroDiag) == ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
|
||||
@@ -230,6 +230,7 @@ struct trmv_selector<Mode, ColMajor> {
|
||||
|
||||
if (!evalToDest) {
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
constexpr int Size = Dest::SizeAtCompileTime;
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
@@ -310,6 +311,7 @@ struct trmv_selector<Mode, RowMajor> {
|
||||
#endif
|
||||
}
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime;
|
||||
Index size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
|
||||
@@ -241,7 +241,7 @@ class blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, 1> {
|
||||
|
||||
EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
|
||||
EIGEN_DEVICE_FUNC const Index incr() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_data; }
|
||||
|
||||
EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
|
||||
if (std::uintptr_t(m_data) % sizeof(Scalar)) {
|
||||
@@ -430,7 +430,7 @@ class blas_data_mapper {
|
||||
|
||||
EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
|
||||
EIGEN_DEVICE_FUNC const Index incr() const { return m_incr.value(); }
|
||||
EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC constexpr Scalar* data() const { return m_data; }
|
||||
|
||||
protected:
|
||||
Scalar* EIGEN_RESTRICT m_data;
|
||||
|
||||
@@ -89,7 +89,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined __NVCC__
|
||||
#if defined __NVCC__ && defined __CUDACC__
|
||||
// MSVC 14.16 (required by CUDA 9.*) does not support the _Pragma keyword, so
|
||||
// we instead use Microsoft's __pragma extension.
|
||||
#if defined _MSC_VER
|
||||
|
||||
@@ -69,7 +69,7 @@ static constexpr auto lastp1 = last + fix<1>;
|
||||
#else
|
||||
// Using a FixedExpr<1> expression is important here to make sure the compiler
|
||||
// can fully optimize the computation starting indices with zero overhead.
|
||||
static constexpr lastp1_t lastp1(last + fix<1>());
|
||||
static constexpr lastp1_t lastp1 = lastp1_t{};
|
||||
#endif
|
||||
|
||||
/** \var end
|
||||
|
||||
@@ -196,6 +196,13 @@
|
||||
#define EIGEN_COMP_PGI 0
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_COMP_NVHPC set to NVHPC version if the compiler is nvc++
|
||||
#if defined(__NVCOMPILER)
|
||||
#define EIGEN_COMP_NVHPC (__NVCOMPILER_MAJOR__ * 100 + __NVCOMPILER_MINOR__)
|
||||
#else
|
||||
#define EIGEN_COMP_NVHPC 0
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
|
||||
#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
|
||||
#define EIGEN_COMP_ARM 1
|
||||
@@ -1076,14 +1083,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void ignore_unused_variable(cons
|
||||
#define EIGEN_USING_STD(FUNC) using std::FUNC;
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_NVCC
|
||||
// Wwhen compiling with NVCC, using the base operator is necessary,
|
||||
// otherwise we get duplicate definition errors
|
||||
// For later MSVC versions, we require explicit operator= definition, otherwise we get
|
||||
// use of implicitly deleted operator errors.
|
||||
// (cf Bugs 920, 1000, 1324, 2291)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) using Base::operator=;
|
||||
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||
#if EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||
using Base::operator=; \
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { \
|
||||
|
||||
@@ -116,17 +116,17 @@ class MaxSizeVector {
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool empty() const { return size_ == 0; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* data() { return data_; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return data_; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T* data() const { return data_; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return data_; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* begin() { return data_; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* begin() { return data_; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* end() { return data_ + size_; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* end() { return data_ + size_; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T* begin() const { return data_; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* begin() const { return data_; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T* end() const { return data_ + size_; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* end() const { return data_ + size_; }
|
||||
|
||||
private:
|
||||
size_t reserve_;
|
||||
|
||||
@@ -147,7 +147,7 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size,
|
||||
check_that_malloc_is_allowed();
|
||||
EIGEN_USING_STD(malloc)
|
||||
void* original = malloc(size + alignment);
|
||||
if (original == 0) return 0;
|
||||
if (original == nullptr) return nullptr;
|
||||
uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
|
||||
void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
|
||||
*(static_cast<uint8_t*>(aligned) - 1) = offset;
|
||||
@@ -391,7 +391,8 @@ EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T* ptr, T* src, std
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) {
|
||||
if (size > std::size_t(-1) / sizeof(T)) throw_std_bad_alloc();
|
||||
constexpr std::size_t max_elements = (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T);
|
||||
if (size > max_elements) throw_std_bad_alloc();
|
||||
}
|
||||
|
||||
/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
|
||||
@@ -473,7 +474,7 @@ EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t
|
||||
|
||||
template <typename T, bool Align>
|
||||
EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) {
|
||||
if (size == 0) return 0; // short-cut. Also fixes Bug 884
|
||||
if (size == 0) return nullptr; // short-cut. Also fixes Bug 884
|
||||
check_size_for_overflow<T>(size);
|
||||
T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
|
||||
if (NumTraits<T>::RequireInitialization) {
|
||||
@@ -765,7 +766,7 @@ void swap(scoped_array<T>& a, scoped_array<T>& b) {
|
||||
// We always manually re-align the result of EIGEN_ALLOCA.
|
||||
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
||||
|
||||
#if (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG)
|
||||
#if ((EIGEN_COMP_GNUC || EIGEN_COMP_CLANG) && !EIGEN_COMP_NVHPC)
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES)
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) {
|
||||
|
||||
@@ -319,17 +319,24 @@ template <typename MatrixType>
|
||||
MatrixType EigenSolver<MatrixType>::pseudoEigenvalueMatrix() const {
|
||||
eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
|
||||
const RealScalar precision = RealScalar(2) * NumTraits<RealScalar>::epsilon();
|
||||
Index n = m_eivalues.rows();
|
||||
const Index n = m_eivalues.rows();
|
||||
MatrixType matD = MatrixType::Zero(n, n);
|
||||
for (Index i = 0; i < n; ++i) {
|
||||
if (internal::isMuchSmallerThan(numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i)), precision))
|
||||
matD.coeffRef(i, i) = numext::real(m_eivalues.coeff(i));
|
||||
else {
|
||||
matD.template block<2, 2>(i, i) << numext::real(m_eivalues.coeff(i)), numext::imag(m_eivalues.coeff(i)),
|
||||
-numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i));
|
||||
Index i = 0;
|
||||
for (; i < n - 1; ++i) {
|
||||
RealScalar real = numext::real(m_eivalues.coeff(i));
|
||||
RealScalar imag = numext::imag(m_eivalues.coeff(i));
|
||||
matD.coeffRef(i, i) = real;
|
||||
if (!internal::isMuchSmallerThan(imag, real, precision)) {
|
||||
matD.coeffRef(i, i + 1) = imag;
|
||||
matD.coeffRef(i + 1, i) = -imag;
|
||||
matD.coeffRef(i + 1, i + 1) = real;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
if (i == n - 1) {
|
||||
matD.coeffRef(i, i) = numext::real(m_eivalues.coeff(i));
|
||||
}
|
||||
|
||||
return matD;
|
||||
}
|
||||
|
||||
|
||||
@@ -161,7 +161,7 @@ class GeneralizedEigenSolver {
|
||||
compute(A, B, computeEigenvectors);
|
||||
}
|
||||
|
||||
/* \brief Returns the computed generalized eigenvectors.
|
||||
/** \brief Returns the computed generalized eigenvectors.
|
||||
*
|
||||
* \returns %Matrix whose columns are the (possibly complex) right eigenvectors.
|
||||
* i.e. the eigenvectors that solve (A - l*B)x = 0. The ordering matches the eigenvalues.
|
||||
|
||||
@@ -588,9 +588,9 @@ class Transform {
|
||||
EIGEN_DEVICE_FUNC inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const;
|
||||
|
||||
/** \returns a const pointer to the column major internal matrix */
|
||||
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_matrix.data(); }
|
||||
/** \returns a non-const pointer to the column major internal matrix */
|
||||
EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); }
|
||||
EIGEN_DEVICE_FUNC constexpr Scalar* data() { return m_matrix.data(); }
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
|
||||
@@ -74,7 +74,9 @@ struct cross3_impl<Architecture::Target, VectorLhs, VectorRhs, float, true> {
|
||||
Packet4f mul1 = pmul(vec4f_swizzle1(a, 1, 2, 0, 3), vec4f_swizzle1(b, 2, 0, 1, 3));
|
||||
Packet4f mul2 = pmul(vec4f_swizzle1(a, 2, 0, 1, 3), vec4f_swizzle1(b, 1, 2, 0, 3));
|
||||
DstPlainType res;
|
||||
pstoret<float, Packet4f, DstAlignment>(&res.x(), psub(mul1, mul2));
|
||||
pstoret<float, Packet4f, DstAlignment>(res.data(), psub(mul1, mul2));
|
||||
// Ensure last component is 0 in case original a or b contain inf/nan.
|
||||
res[3] = 0.0f;
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -50,25 +50,6 @@ struct traits<BDCSVD<MatrixType_, Options> > : svd_traits<MatrixType_, Options>
|
||||
typedef MatrixType_ MatrixType;
|
||||
};
|
||||
|
||||
template <typename MatrixType, int Options>
|
||||
struct allocate_small_svd {
|
||||
static void run(JacobiSVD<MatrixType, Options>& smallSvd, Index rows, Index cols, unsigned int) {
|
||||
internal::construct_at(&smallSvd, rows, cols);
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_DIAGNOSTICS(push)
|
||||
EIGEN_DISABLE_DEPRECATED_WARNING
|
||||
|
||||
template <typename MatrixType>
|
||||
struct allocate_small_svd<MatrixType, 0> {
|
||||
static void run(JacobiSVD<MatrixType>& smallSvd, Index rows, Index cols, unsigned int computationOptions) {
|
||||
internal::construct_at(&smallSvd, rows, cols, computationOptions);
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_DIAGNOSTICS(pop)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \ingroup SVD_Module
|
||||
@@ -288,7 +269,7 @@ void BDCSVD<MatrixType, Options>::allocate(Index rows, Index cols, unsigned int
|
||||
if (Base::allocate(rows, cols, computationOptions)) return;
|
||||
|
||||
if (cols < m_algoswap)
|
||||
internal::allocate_small_svd<MatrixType, ComputationOptions>::run(smallSvd, rows, cols, computationOptions);
|
||||
smallSvd.allocate(rows, cols, Options == 0 ? computationOptions : internal::get_computation_options(Options));
|
||||
|
||||
m_computed = MatrixXr::Zero(diagSize() + 1, diagSize());
|
||||
m_compU = computeV();
|
||||
|
||||
@@ -553,8 +553,7 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
* \deprecated Will be removed in the next major Eigen version. Options should
|
||||
* be specified in the \a Options template parameter.
|
||||
*/
|
||||
// EIGEN_DEPRECATED // this constructor is used to allocate memory in BDCSVD
|
||||
JacobiSVD(Index rows, Index cols, unsigned int computationOptions) {
|
||||
EIGEN_DEPRECATED JacobiSVD(Index rows, Index cols, unsigned int computationOptions) {
|
||||
internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, rows, cols);
|
||||
allocate(rows, cols, computationOptions);
|
||||
}
|
||||
@@ -612,7 +611,6 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
using Base::rank;
|
||||
using Base::rows;
|
||||
|
||||
private:
|
||||
void allocate(Index rows_, Index cols_, unsigned int computationOptions) {
|
||||
if (Base::allocate(rows_, cols_, computationOptions)) return;
|
||||
eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
|
||||
@@ -625,6 +623,7 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
if (rows() > cols()) m_qr_precond_morerows.allocate(*this);
|
||||
}
|
||||
|
||||
private:
|
||||
JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
|
||||
|
||||
protected:
|
||||
|
||||
@@ -202,9 +202,9 @@ class SparseMatrix : public SparseCompressedBase<SparseMatrix<Scalar_, Options_,
|
||||
inline StorageIndex* innerNonZeroPtr() { return m_innerNonZeros; }
|
||||
|
||||
/** \internal */
|
||||
inline Storage& data() { return m_data; }
|
||||
constexpr Storage& data() { return m_data; }
|
||||
/** \internal */
|
||||
inline const Storage& data() const { return m_data; }
|
||||
constexpr const Storage& data() const { return m_data; }
|
||||
|
||||
/** \returns the value of the matrix at position \a i, \a j
|
||||
* This function returns Scalar(0) if the element is an explicit \em zero */
|
||||
@@ -829,6 +829,8 @@ class SparseMatrix : public SparseCompressedBase<SparseMatrix<Scalar_, Options_,
|
||||
std::swap(m_innerNonZeros, other.m_innerNonZeros);
|
||||
m_data.swap(other.m_data);
|
||||
}
|
||||
/** Free-function swap. */
|
||||
friend EIGEN_DEVICE_FUNC void swap(SparseMatrix& a, SparseMatrix& b) { a.swap(b); }
|
||||
|
||||
/** Sets *this to the identity matrix.
|
||||
* This function also turns the matrix into compressed mode, and drop any reserved memory. */
|
||||
|
||||
@@ -90,9 +90,9 @@ class SparseVector : public SparseCompressedBase<SparseVector<Scalar_, Options_,
|
||||
inline StorageIndex* innerNonZeroPtr() { return 0; }
|
||||
|
||||
/** \internal */
|
||||
inline Storage& data() { return m_data; }
|
||||
constexpr Storage& data() { return m_data; }
|
||||
/** \internal */
|
||||
inline const Storage& data() const { return m_data; }
|
||||
constexpr const Storage& data() const { return m_data; }
|
||||
|
||||
inline Scalar coeff(Index row, Index col) const {
|
||||
eigen_assert(IsColVector ? (col == 0 && row >= 0 && row < m_size) : (row == 0 && col >= 0 && col < m_size));
|
||||
@@ -278,6 +278,7 @@ class SparseVector : public SparseCompressedBase<SparseVector<Scalar_, Options_,
|
||||
std::swap(m_size, other.m_size);
|
||||
m_data.swap(other.m_data);
|
||||
}
|
||||
friend EIGEN_DEVICE_FUNC void swap(SparseVector& a, SparseVector& b) { a.swap(b); }
|
||||
|
||||
template <int OtherOptions>
|
||||
inline void swap(SparseMatrix<Scalar, OtherOptions, StorageIndex>& other) {
|
||||
@@ -285,6 +286,14 @@ class SparseVector : public SparseCompressedBase<SparseVector<Scalar_, Options_,
|
||||
std::swap(m_size, other.m_innerSize);
|
||||
m_data.swap(other.m_data);
|
||||
}
|
||||
template <int OtherOptions>
|
||||
friend EIGEN_DEVICE_FUNC void swap(SparseVector& a, SparseMatrix<Scalar, OtherOptions, StorageIndex>& b) {
|
||||
a.swap(b);
|
||||
}
|
||||
template <int OtherOptions>
|
||||
friend EIGEN_DEVICE_FUNC void swap(SparseMatrix<Scalar, OtherOptions, StorageIndex>& a, SparseVector& b) {
|
||||
b.swap(a);
|
||||
}
|
||||
|
||||
inline SparseVector& operator=(const SparseVector& other) {
|
||||
if (other.isRValue()) {
|
||||
|
||||
@@ -11,6 +11,7 @@ typedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> Boo
|
||||
typedef CwiseUnaryOp<internal::scalar_bitwise_not_op<Scalar>, const Derived> BitwiseNotReturnType;
|
||||
|
||||
typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_exp2_op<Scalar>, const Derived> Exp2ReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_expm1_op<Scalar>, const Derived> Expm1ReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType;
|
||||
@@ -78,10 +79,20 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Abs2ReturnType abs2() const { return
|
||||
* Example: \include Cwise_exp.cpp
|
||||
* Output: \verbinclude Cwise_exp.out
|
||||
*
|
||||
* \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_exp">Math functions</a>, pow(), log(), sin(), cos()
|
||||
* \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_exp">Math functions</a>, exp2(), pow(), log(), sin(),
|
||||
* cos()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline const ExpReturnType exp() const { return ExpReturnType(derived()); }
|
||||
|
||||
/** \returns an expression of the coefficient-wise exponential of *this.
|
||||
*
|
||||
* This function computes the coefficient-wise base2 exponential, i.e. 2^x.
|
||||
*
|
||||
* \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_exp">Math functions</a>, exp(), pow(), log(), sin(),
|
||||
* cos()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline const Exp2ReturnType exp2() const { return Exp2ReturnType(derived()); }
|
||||
|
||||
/** \returns an expression of the coefficient-wise exponential of *this minus 1.
|
||||
*
|
||||
* In exact arithmetic, \c x.expm1() is equivalent to \c x.exp() - 1,
|
||||
|
||||
@@ -1304,14 +1304,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ConstFixedSegmentReturnType<N>::T
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename FixedSegmentReturnType<N>::Type tail(Index n = N) {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
|
||||
return typename FixedSegmentReturnType<N>::Type(derived(), size() - n, n);
|
||||
}
|
||||
|
||||
/// This is the const version of tail<int>.
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
|
||||
return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n, n);
|
||||
}
|
||||
|
||||
/// \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
|
||||
|
||||
Reference in New Issue
Block a user