From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Tyler Veness Date: Sun, 12 Jan 2025 21:04:07 -0800 Subject: [PATCH 3/3] Make assignment constexpr --- Eigen/src/Core/AssignEvaluator.h | 165 +++++++++++-------- Eigen/src/Core/EigenBase.h | 2 +- Eigen/src/Core/functors/AssignmentFunctors.h | 2 +- 3 files changed, 102 insertions(+), 67 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f7f0b238b8ca70bbc9100262479cc1dbebab9979..9c2436afa7fe98692a036e6ef255ed104a5bf388 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -263,7 +263,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling { DstAlignment = Kernel::AssignmentTraits::DstAlignment }; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { kernel.template assignPacketByOuterInner(outer, inner); enum { NextIndex = Index + unpacket_traits::size }; copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); @@ -431,17 +431,25 @@ struct dense_assignment_loop { template struct dense_assignment_loop { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { - size = DstXprType::SizeAtCompileTime, - packetSize = unpacket_traits::size, - alignedSize = (int(size) / packetSize) * packetSize - }; - - copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); - copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + if (internal::is_constant_evaluated()) { + for (Index outer = 0; outer < kernel.outerSize(); ++outer) { + for (Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } else { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { + size = DstXprType::SizeAtCompileTime, + packetSize = unpacket_traits::size, + alignedSize = (int(size) / packetSize) * packetSize + }; + + copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } } }; @@ -465,9 +473,17 @@ struct dense_assignment_loop { template struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + if (internal::is_constant_evaluated()) { + for (Index outer = 0; outer < kernel.outerSize(); ++outer) { + for (Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } else { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } } }; @@ -498,8 +514,16 @@ struct dense_assignment_loop { template struct dense_assignment_loop { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + if (internal::is_constant_evaluated()) { + for (Index outer = 0; outer < kernel.outerSize(); ++outer) { + for (Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } else { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } } }; @@ -510,41 +534,49 @@ struct dense_assignment_loop { template struct dense_assignment_loop { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::Scalar Scalar; - typedef typename Kernel::PacketType PacketType; - enum { - packetSize = unpacket_traits::size, - requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), - alignable = - packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar), - dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment), - dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment) - }; - const Scalar* dst_ptr = kernel.dstDataPtr(); - if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) { - // the pointer is not aligned-on scalar, so alignment is not possible - return dense_assignment_loop::run(kernel); - } - const Index packetAlignedMask = packetSize - 1; - const Index innerSize = kernel.innerSize(); - const Index outerSize = kernel.outerSize(); - const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = - ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); - - for (Index outer = 0; outer < outerSize; ++outer) { - const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner); - - // do the vectorizable part of the assignment - for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize) - kernel.template assignPacketByOuterInner(outer, inner); - - // do the non-vectorizable part of the assignment - for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner); - - alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize); + if (internal::is_constant_evaluated()) { + for (Index outer = 0; outer < kernel.outerSize(); ++outer) { + for (Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } else { + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + packetSize = unpacket_traits::size, + requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), + alignable = + packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment), + dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment) + }; + const Scalar* dst_ptr = kernel.dstDataPtr(); + if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) { + // the pointer is not aligned-on scalar, so alignment is not possible + return dense_assignment_loop::run(kernel); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = + ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for (Index outer = 0; outer < outerSize; ++outer) { + const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner); + + // do the vectorizable part of the assignment + for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize) + kernel.template assignPacketByOuterInner(outer, inner); + + // do the non-vectorizable part of the assignment + for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner); + + alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize); + } } } }; @@ -594,9 +626,9 @@ class generic_dense_assignment_kernel { typedef copy_using_evaluator_traits AssignmentTraits; typedef typename AssignmentTraits::PacketType PacketType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE generic_dense_assignment_kernel(DstEvaluatorType& dst, - const SrcEvaluatorType& src, - const Functor& func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst, const SrcEvaluatorType& src, + const Functor& func, DstXprType& dstExpr) : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) { #ifdef EIGEN_DEBUG_ASSIGN AssignmentTraits::debug(); @@ -614,7 +646,7 @@ class generic_dense_assignment_kernel { EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; } /// Assign src(row,col) to dst(row,col) through the assignment functor. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col)); } @@ -624,7 +656,7 @@ class generic_dense_assignment_kernel { } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignCoeff(row, col); @@ -648,7 +680,7 @@ class generic_dense_assignment_kernel { assignPacket(row, col); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner @@ -656,7 +688,7 @@ class generic_dense_assignment_kernel { : inner; } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner @@ -708,8 +740,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, co } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src, - const internal::assign_op& /*func*/) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src, + const internal::assign_op& /*func*/) { Index dstRows = src.rows(); Index dstCols = src.cols(); if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols); @@ -790,7 +822,7 @@ struct Assignment; // not has to bother about these annoying details. template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) { call_assignment(dst, src, internal::assign_op()); } template @@ -807,7 +839,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment( } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment( +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment( Dst& dst, const Src& src, const Func& func, std::enable_if_t::value, void*> = 0) { call_assignment_no_alias(dst, src, func); } @@ -891,9 +923,12 @@ EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src); // both partial specialization+SFINAE without ambiguous specialization template struct Assignment { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func) { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src, + const Functor& func) { #ifndef EIGEN_NO_DEBUG - internal::check_for_aliasing(dst, src); + if (!internal::is_constant_evaluated()) { + internal::check_for_aliasing(dst, src); + } #endif call_dense_assignment_loop(dst, src, func); diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index 6d167006a094181fa3693b19f6b9daeb6f2afb79..894bfc13b15eb994abd90f100da15de5bd8b22b7 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -50,7 +50,7 @@ struct EigenBase { /** \returns a const reference to the derived object */ EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast(this); } - EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const { + EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast(this); } diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index 09d1da8ca2bcb41384520f46e2b793ba8b28a798..3687bb20db4dfe1a2f6cf1342b4fcbd8f91f1f68 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -23,7 +23,7 @@ namespace internal { */ template struct assign_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; } template EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const {