1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_COEFFBASED_PRODUCT_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_COEFFBASED_PRODUCT_H 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/********************************************************************************* 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Coefficient based product implementation. 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* It is designed for the following use cases: 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* - small fixed sizes 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* - lazy products 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*********************************************************************************/ 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Since the all the dimensions of the product are small, here we can rely 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * on the generic Assign mechanism to evaluate the product per coeff (or packet). 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Note that here the inner-loops should always be unrolled. 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_impl; 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_packet_impl; 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename LhsNested, typename RhsNested, int NestingFlags> 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef MatrixXpr XprKind; 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename remove_all<LhsNested>::type _LhsNested; 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename remove_all<RhsNested>::type _RhsNested; 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename traits<_RhsNested>::StorageKind>::ret StorageKind; 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename promote_index_type<typename traits<_LhsNested>::Index, 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename traits<_RhsNested>::Index>::type Index; 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsCoeffReadCost = _LhsNested::CoeffReadCost, 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RhsCoeffReadCost = _RhsNested::CoeffReadCost, 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsFlags = _LhsNested::Flags, 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RhsFlags = _RhsNested::Flags, 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RowsAtCompileTime = _LhsNested::RowsAtCompileTime, 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ColsAtCompileTime = _RhsNested::ColsAtCompileTime, 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsRowMajor = LhsFlags & RowMajorBit, 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RhsRowMajor = RhsFlags & RowMajorBit, 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (ColsAtCompileTime == Dynamic 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (RhsFlags&AlignedBit) 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ) 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ), 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (RowsAtCompileTime == Dynamic 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (LhsFlags&AlignedBit) 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ) 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ), 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : (RhsRowMajor && !CanVectorizeLhs), 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath | (EvalToRowMajor ? RowMajorBit : 0) 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath | NestingFlags 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath | (LhsFlags & RhsFlags & AlignedBit) 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO enable vectorization for mixed types 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CoeffReadCost = InnerSize == Dynamic ? Dynamic 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath + (InnerSize - 1) * NumTraits<Scalar>::AddCost, 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CanVectorizeInner = SameType 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && LhsRowMajor 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (!RhsRowMajor) 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (LhsFlags & RhsFlags & ActualPacketAccessBit) 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (LhsFlags & RhsFlags & AlignedBit) 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (InnerSize % packet_traits<Scalar>::size == 0) 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename LhsNested, typename RhsNested, int NestingFlags> 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathclass CoeffBasedProduct 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : internal::no_assignment_operator, 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath public: 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef MatrixBase<CoeffBasedProduct> Base; 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Base::PlainObject PlainObject; 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath private: 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath PacketSize = internal::packet_traits<Scalar>::size, 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Unroll ? InnerSize-1 : Dynamic, 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath public: 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath inline CoeffBasedProduct(const CoeffBasedProduct& other) 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath {} 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<typename Lhs, typename Rhs> 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : m_lhs(lhs), m_rhs(rhs) 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // We still allow to mix T and complex<T>. 1537faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined), 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(lhs.cols() == rhs.rows() 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && "invalid matrix product" 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar res; 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * which is why we don't set the LinearAccessBit. 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE const Scalar coeff(Index index) const 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar res; 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index row = RowsAtCompileTime == 1 ? 0 : index; 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index col = RowsAtCompileTime == 1 ? index : 0; 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<int LoadMode> 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath PacketScalar res; 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Unroll ? InnerSize-1 : Dynamic, 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _LhsNested, _RhsNested, PacketScalar, LoadMode> 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ::run(row, col, m_lhs, m_rhs, res); 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Implicit conversion to the nested type (trigger the evaluation of the product) 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE operator const PlainObject& () const 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath m_result.lazyAssign(*this); 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return m_result; 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const _LhsNested& lhs() const { return m_lhs; } 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const _RhsNested& rhs() const { return m_rhs; } 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<int DiagonalIndex> 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath protected: 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename internal::add_const_on_value_type<LhsNested>::type m_lhs; 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename internal::add_const_on_value_type<RhsNested>::type m_rhs; 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mutable PlainObject m_result; 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// here we need to overload the nested rule for products 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// such that the nested type is a const reference to a plain matrix 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int N, typename PlainObject> 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef PlainObject const& type; 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Normal product .coeff() implementation (with meta-unrolling) 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/************************************** 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*** Scalar path - no vectorization *** 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath**************************************/ 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename RetScalar> 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = lhs.coeff(row, 0) * rhs.coeff(0, col); 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename RetScalar> 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = lhs.coeff(row, 0) * rhs.coeff(0, col); 267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i = 1; i < lhs.cols(); ++i) 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res += lhs.coeff(row, i) * rhs.coeff(i, col); 269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/******************************************* 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*** Scalar path with inner vectorization *** 274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*******************************************/ 275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> 277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_vectorized_unroller 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); 285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 286c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename Packet> 289c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> 290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); 295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::PacketScalar Packet; 302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet pres; 307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); 309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = predux(pres); 310c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 311c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 312c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 313c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> 314c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_vectorized_dyn_selector 315c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 316c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 317c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 318c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 319c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); 320c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 321c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 322c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 323c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower 324c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// NOTE maybe they are now useless since we have a specialization for Block<Matrix> 325c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int RhsCols> 326c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> 327c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 328c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 329c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 330c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 331c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); 332c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 333c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 334c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 335c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int LhsRows> 336c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> 337c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 338c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 339c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 340c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 341c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); 342c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 343c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 344c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 345c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs> 346c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> 347c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 348c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 349c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 350c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 351c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = lhs.transpose().cwiseProduct(rhs).sum(); 352c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 353c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 354c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 355c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename RetScalar> 356c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> 357c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 358c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 359c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 360c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 361c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); 362c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 363c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 364c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 365c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/******************* 366c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*** Packet path *** 367c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*******************/ 368c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 369c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 370c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 371c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 372c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 373c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 374c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 375c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 376c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); 377c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 378c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 379c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 380c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 381c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 382c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 383c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 384c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 385c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 386c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 387c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); 388c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 389c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 390c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 391c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode> 392c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> 393c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 394c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 395c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 396c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 397c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 398c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 399c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 400c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 401c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode> 402c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> 403c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 404c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 405c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 406c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 407c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 408c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 409c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 410c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 411c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode> 412c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 413c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 414c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 415c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 416c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 417c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 418c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 419c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i = 1; i < lhs.cols(); ++i) 420c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); 421c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 422c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 423c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 424c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode> 425c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 426c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 427c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Lhs::Index Index; 428c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 429c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 430c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 431c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 432c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i = 1; i < lhs.cols(); ++i) 433c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); 434c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 435c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 436c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 437c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 438c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 439c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 440c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 441c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_COEFFBASED_PRODUCT_H 442