// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob // Copyright (C) 2008-2010 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_COEFFBASED_PRODUCT_H #define EIGEN_COEFFBASED_PRODUCT_H namespace Eigen { namespace internal { /********************************************************************************* * Coefficient based product implementation. * It is designed for the following use cases: * - small fixed sizes * - lazy products *********************************************************************************/ /* Since the all the dimensions of the product are small, here we can rely * on the generic Assign mechanism to evaluate the product per coeff (or packet). * * Note that here the inner-loops should always be unrolled. */ template struct product_coeff_impl; template struct product_packet_impl; template struct traits > { typedef MatrixXpr XprKind; typedef typename remove_all::type _LhsNested; typedef typename remove_all::type _RhsNested; typedef typename scalar_product_traits::ReturnType Scalar; typedef typename promote_storage_type::StorageKind, typename traits<_RhsNested>::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, typename traits<_RhsNested>::Index>::type Index; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, LhsFlags = _LhsNested::Flags, RhsFlags = _RhsNested::Flags, RowsAtCompileTime = _LhsNested::RowsAtCompileTime, ColsAtCompileTime = _RhsNested::ColsAtCompileTime, InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, SameType = is_same::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % packet_traits::size) == 0 && (RhsFlags&AlignedBit) ) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % packet_traits::size) == 0 && (LhsFlags&AlignedBit) ) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 : (RhsRowMajor && !CanVectorizeLhs), Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) | NestingFlags | (LhsFlags & RhsFlags & AlignedBit) // TODO enable vectorization for mixed types | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), CoeffReadCost = InnerSize == Dynamic ? Dynamic : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. */ CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) && (LhsFlags & RhsFlags & AlignedBit) && (InnerSize % packet_traits::size == 0) }; }; } // end namespace internal template class CoeffBasedProduct : internal::no_assignment_operator, public MatrixBase > { public: typedef MatrixBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) typedef typename Base::PlainObject PlainObject; private: typedef typename internal::traits::_LhsNested _LhsNested; typedef typename internal::traits::_RhsNested _RhsNested; enum { PacketSize = internal::packet_traits::size, InnerSize = internal::traits::InnerSize, Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, CanVectorizeInner = internal::traits::CanVectorizeInner }; typedef internal::product_coeff_impl ScalarCoeffImpl; typedef CoeffBasedProduct LazyCoeffBasedProductType; public: inline CoeffBasedProduct(const CoeffBasedProduct& other) : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) {} template inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. // We still allow to mix T and complex. EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { Scalar res; ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, * which is why we don't set the LinearAccessBit. */ EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { Scalar res; const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } template EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const { PacketScalar res; internal::product_packet_impl ::run(row, col, m_lhs, m_rhs, res); return res; } // Implicit conversion to the nested type (trigger the evaluation of the product) EIGEN_STRONG_INLINE operator const PlainObject& () const { m_result.lazyAssign(*this); return m_result; } const _LhsNested& lhs() const { return m_lhs; } const _RhsNested& rhs() const { return m_rhs; } const Diagonal diagonal() const { return reinterpret_cast(*this); } template const Diagonal diagonal() const { return reinterpret_cast(*this); } const Diagonal diagonal(Index index) const { return reinterpret_cast(*this).diagonal(index); } protected: typename internal::add_const_on_value_type::type m_lhs; typename internal::add_const_on_value_type::type m_rhs; mutable PlainObject m_result; }; namespace internal { // here we need to overload the nested rule for products // such that the nested type is a const reference to a plain matrix template struct nested, N, PlainObject> { typedef PlainObject const& type; }; /*************************************************************************** * Normal product .coeff() implementation (with meta-unrolling) ***************************************************************************/ /************************************** *** Scalar path - no vectorization *** **************************************/ template struct product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { product_coeff_impl::run(row, col, lhs, rhs, res); res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); } }; template struct product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { res = lhs.coeff(row, 0) * rhs.coeff(0, col); } }; template struct product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) { eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = lhs.coeff(row, 0) * rhs.coeff(0, col); for(Index i = 1; i < lhs.cols(); ++i) res += lhs.coeff(row, i) * rhs.coeff(i, col); } }; /******************************************* *** Scalar path with inner vectorization *** *******************************************/ template struct product_coeff_vectorized_unroller { typedef typename Lhs::Index Index; enum { PacketSize = packet_traits::size }; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); pres = padd(pres, pmul( lhs.template packet(row, UnrollingIndex) , rhs.template packet(UnrollingIndex, col) )); } }; template struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { pres = pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); } }; template struct product_coeff_impl { typedef typename Lhs::PacketScalar Packet; typedef typename Lhs::Index Index; enum { PacketSize = packet_traits::size }; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { Packet pres; product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); product_coeff_impl::run(row, col, lhs, rhs, res); res = predux(pres); } }; template struct product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); } }; // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower // NOTE maybe they are now useless since we have a specialization for Block template struct product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); } }; template struct product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); } }; template struct product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.transpose().cwiseProduct(rhs).sum(); } }; template struct product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, res); } }; /******************* *** Packet path *** *******************/ template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl::run(row, col, lhs, rhs, res); res = pmadd(pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet(UnrollingIndex, col), res); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl::run(row, col, lhs, rhs, res); res = pmadd(lhs.template packet(row, UnrollingIndex), pset1(rhs.coeff(UnrollingIndex, col)), res); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); for(Index i = 1; i < lhs.cols(); ++i) res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); } }; template struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); for(Index i = 1; i < lhs.cols(); ++i) res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); } }; } // end namespace internal } // end namespace Eigen #endif // EIGEN_COEFFBASED_PRODUCT_H