1// This file is part of Eigen, a lightweight C++ template library 2// for linear algebra. 3// 4// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 5// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 6// 7// This Source Code Form is subject to the terms of the Mozilla 8// Public License v. 2.0. If a copy of the MPL was not distributed 9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11#ifndef EIGEN_COEFFBASED_PRODUCT_H 12#define EIGEN_COEFFBASED_PRODUCT_H 13 14namespace Eigen { 15 16namespace internal { 17 18/********************************************************************************* 19* Coefficient based product implementation. 20* It is designed for the following use cases: 21* - small fixed sizes 22* - lazy products 23*********************************************************************************/ 24 25/* Since the all the dimensions of the product are small, here we can rely 26 * on the generic Assign mechanism to evaluate the product per coeff (or packet). 27 * 28 * Note that here the inner-loops should always be unrolled. 29 */ 30 31template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 32struct product_coeff_impl; 33 34template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 35struct product_packet_impl; 36 37template<typename LhsNested, typename RhsNested, int NestingFlags> 38struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > 39{ 40 typedef MatrixXpr XprKind; 41 typedef typename remove_all<LhsNested>::type _LhsNested; 42 typedef typename remove_all<RhsNested>::type _RhsNested; 43 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; 44 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, 45 typename traits<_RhsNested>::StorageKind>::ret StorageKind; 46 typedef typename promote_index_type<typename traits<_LhsNested>::Index, 47 typename traits<_RhsNested>::Index>::type Index; 48 49 enum { 50 LhsCoeffReadCost = _LhsNested::CoeffReadCost, 51 RhsCoeffReadCost = _RhsNested::CoeffReadCost, 52 LhsFlags = _LhsNested::Flags, 53 RhsFlags = _RhsNested::Flags, 54 55 RowsAtCompileTime = _LhsNested::RowsAtCompileTime, 56 ColsAtCompileTime = _RhsNested::ColsAtCompileTime, 57 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), 58 59 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, 60 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, 61 62 LhsRowMajor = LhsFlags & RowMajorBit, 63 RhsRowMajor = RhsFlags & RowMajorBit, 64 65 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, 66 67 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) 68 && (ColsAtCompileTime == Dynamic 69 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 70 && (RhsFlags&AlignedBit) 71 ) 72 ), 73 74 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) 75 && (RowsAtCompileTime == Dynamic 76 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 77 && (LhsFlags&AlignedBit) 78 ) 79 ), 80 81 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 82 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 83 : (RhsRowMajor && !CanVectorizeLhs), 84 85 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) 86 | (EvalToRowMajor ? RowMajorBit : 0) 87 | NestingFlags 88 | (LhsFlags & RhsFlags & AlignedBit) 89 // TODO enable vectorization for mixed types 90 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), 91 92 CoeffReadCost = InnerSize == Dynamic ? Dynamic 93 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) 94 + (InnerSize - 1) * NumTraits<Scalar>::AddCost, 95 96 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside 97 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner 98 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect 99 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. 100 */ 101 CanVectorizeInner = SameType 102 && LhsRowMajor 103 && (!RhsRowMajor) 104 && (LhsFlags & RhsFlags & ActualPacketAccessBit) 105 && (LhsFlags & RhsFlags & AlignedBit) 106 && (InnerSize % packet_traits<Scalar>::size == 0) 107 }; 108}; 109 110} // end namespace internal 111 112template<typename LhsNested, typename RhsNested, int NestingFlags> 113class CoeffBasedProduct 114 : internal::no_assignment_operator, 115 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > 116{ 117 public: 118 119 typedef MatrixBase<CoeffBasedProduct> Base; 120 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) 121 typedef typename Base::PlainObject PlainObject; 122 123 private: 124 125 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; 126 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; 127 128 enum { 129 PacketSize = internal::packet_traits<Scalar>::size, 130 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, 131 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, 132 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner 133 }; 134 135 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, 136 Unroll ? InnerSize-1 : Dynamic, 137 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; 138 139 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; 140 141 public: 142 143 inline CoeffBasedProduct(const CoeffBasedProduct& other) 144 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) 145 {} 146 147 template<typename Lhs, typename Rhs> 148 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) 149 : m_lhs(lhs), m_rhs(rhs) 150 { 151 // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. 152 // We still allow to mix T and complex<T>. 153 EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined), 154 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 155 eigen_assert(lhs.cols() == rhs.rows() 156 && "invalid matrix product" 157 && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); 158 } 159 160 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } 161 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } 162 163 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 164 { 165 Scalar res; 166 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 167 return res; 168 } 169 170 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, 171 * which is why we don't set the LinearAccessBit. 172 */ 173 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const 174 { 175 Scalar res; 176 const Index row = RowsAtCompileTime == 1 ? 0 : index; 177 const Index col = RowsAtCompileTime == 1 ? index : 0; 178 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 179 return res; 180 } 181 182 template<int LoadMode> 183 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const 184 { 185 PacketScalar res; 186 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, 187 Unroll ? InnerSize-1 : Dynamic, 188 _LhsNested, _RhsNested, PacketScalar, LoadMode> 189 ::run(row, col, m_lhs, m_rhs, res); 190 return res; 191 } 192 193 // Implicit conversion to the nested type (trigger the evaluation of the product) 194 EIGEN_STRONG_INLINE operator const PlainObject& () const 195 { 196 m_result.lazyAssign(*this); 197 return m_result; 198 } 199 200 const _LhsNested& lhs() const { return m_lhs; } 201 const _RhsNested& rhs() const { return m_rhs; } 202 203 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const 204 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 205 206 template<int DiagonalIndex> 207 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const 208 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 209 210 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const 211 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } 212 213 protected: 214 typename internal::add_const_on_value_type<LhsNested>::type m_lhs; 215 typename internal::add_const_on_value_type<RhsNested>::type m_rhs; 216 217 mutable PlainObject m_result; 218}; 219 220namespace internal { 221 222// here we need to overload the nested rule for products 223// such that the nested type is a const reference to a plain matrix 224template<typename Lhs, typename Rhs, int N, typename PlainObject> 225struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> 226{ 227 typedef PlainObject const& type; 228}; 229 230/*************************************************************************** 231* Normal product .coeff() implementation (with meta-unrolling) 232***************************************************************************/ 233 234/************************************** 235*** Scalar path - no vectorization *** 236**************************************/ 237 238template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 239struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 240{ 241 typedef typename Lhs::Index Index; 242 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 243 { 244 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); 245 res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); 246 } 247}; 248 249template<typename Lhs, typename Rhs, typename RetScalar> 250struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> 251{ 252 typedef typename Lhs::Index Index; 253 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 254 { 255 res = lhs.coeff(row, 0) * rhs.coeff(0, col); 256 } 257}; 258 259template<typename Lhs, typename Rhs, typename RetScalar> 260struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> 261{ 262 typedef typename Lhs::Index Index; 263 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) 264 { 265 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 266 res = lhs.coeff(row, 0) * rhs.coeff(0, col); 267 for(Index i = 1; i < lhs.cols(); ++i) 268 res += lhs.coeff(row, i) * rhs.coeff(i, col); 269 } 270}; 271 272/******************************************* 273*** Scalar path with inner vectorization *** 274*******************************************/ 275 276template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> 277struct product_coeff_vectorized_unroller 278{ 279 typedef typename Lhs::Index Index; 280 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 281 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 282 { 283 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 284 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); 285 } 286}; 287 288template<typename Lhs, typename Rhs, typename Packet> 289struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> 290{ 291 typedef typename Lhs::Index Index; 292 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 293 { 294 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); 295 } 296}; 297 298template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 299struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 300{ 301 typedef typename Lhs::PacketScalar Packet; 302 typedef typename Lhs::Index Index; 303 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 304 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 305 { 306 Packet pres; 307 product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 308 product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); 309 res = predux(pres); 310 } 311}; 312 313template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> 314struct product_coeff_vectorized_dyn_selector 315{ 316 typedef typename Lhs::Index Index; 317 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 318 { 319 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); 320 } 321}; 322 323// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower 324// NOTE maybe they are now useless since we have a specialization for Block<Matrix> 325template<typename Lhs, typename Rhs, int RhsCols> 326struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> 327{ 328 typedef typename Lhs::Index Index; 329 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 330 { 331 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); 332 } 333}; 334 335template<typename Lhs, typename Rhs, int LhsRows> 336struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> 337{ 338 typedef typename Lhs::Index Index; 339 static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 340 { 341 res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); 342 } 343}; 344 345template<typename Lhs, typename Rhs> 346struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> 347{ 348 typedef typename Lhs::Index Index; 349 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 350 { 351 res = lhs.transpose().cwiseProduct(rhs).sum(); 352 } 353}; 354 355template<typename Lhs, typename Rhs, typename RetScalar> 356struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> 357{ 358 typedef typename Lhs::Index Index; 359 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 360 { 361 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); 362 } 363}; 364 365/******************* 366*** Packet path *** 367*******************/ 368 369template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 370struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 371{ 372 typedef typename Lhs::Index Index; 373 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 374 { 375 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 376 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); 377 } 378}; 379 380template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 381struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 382{ 383 typedef typename Lhs::Index Index; 384 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 385 { 386 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 387 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); 388 } 389}; 390 391template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 392struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> 393{ 394 typedef typename Lhs::Index Index; 395 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 396 { 397 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 398 } 399}; 400 401template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 402struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> 403{ 404 typedef typename Lhs::Index Index; 405 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 406 { 407 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 408 } 409}; 410 411template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 412struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 413{ 414 typedef typename Lhs::Index Index; 415 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 416 { 417 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 418 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 419 for(Index i = 1; i < lhs.cols(); ++i) 420 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); 421 } 422}; 423 424template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 425struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 426{ 427 typedef typename Lhs::Index Index; 428 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 429 { 430 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 431 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 432 for(Index i = 1; i < lhs.cols(); ++i) 433 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); 434 } 435}; 436 437} // end namespace internal 438 439} // end namespace Eigen 440 441#endif // EIGEN_COEFFBASED_PRODUCT_H 442