1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_COEFFBASED_PRODUCT_H
12#define EIGEN_COEFFBASED_PRODUCT_H
13
14namespace Eigen {
15
16namespace internal {
17
18/*********************************************************************************
19*  Coefficient based product implementation.
20*  It is designed for the following use cases:
21*  - small fixed sizes
22*  - lazy products
23*********************************************************************************/
24
25/* Since the all the dimensions of the product are small, here we can rely
26 * on the generic Assign mechanism to evaluate the product per coeff (or packet).
27 *
28 * Note that here the inner-loops should always be unrolled.
29 */
30
31template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
32struct product_coeff_impl;
33
34template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
35struct product_packet_impl;
36
37template<typename LhsNested, typename RhsNested, int NestingFlags>
38struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
39{
40  typedef MatrixXpr XprKind;
41  typedef typename remove_all<LhsNested>::type _LhsNested;
42  typedef typename remove_all<RhsNested>::type _RhsNested;
43  typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
44  typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
45                                           typename traits<_RhsNested>::StorageKind>::ret StorageKind;
46  typedef typename promote_index_type<typename traits<_LhsNested>::Index,
47                                         typename traits<_RhsNested>::Index>::type Index;
48
49  enum {
50      LhsCoeffReadCost = _LhsNested::CoeffReadCost,
51      RhsCoeffReadCost = _RhsNested::CoeffReadCost,
52      LhsFlags = _LhsNested::Flags,
53      RhsFlags = _RhsNested::Flags,
54
55      RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
56      ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
57      InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
58
59      MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
60      MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
61
62      LhsRowMajor = LhsFlags & RowMajorBit,
63      RhsRowMajor = RhsFlags & RowMajorBit,
64
65      SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
66
67      CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
68                      && (ColsAtCompileTime == Dynamic
69                          || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
70                              && (RhsFlags&AlignedBit)
71                             )
72                         ),
73
74      CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
75                      && (RowsAtCompileTime == Dynamic
76                          || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
77                              && (LhsFlags&AlignedBit)
78                             )
79                         ),
80
81      EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
82                     : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
83                     : (RhsRowMajor && !CanVectorizeLhs),
84
85      Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
86            | (EvalToRowMajor ? RowMajorBit : 0)
87            | NestingFlags
88            | (LhsFlags & RhsFlags & AlignedBit)
89            // TODO enable vectorization for mixed types
90            | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
91
92      CoeffReadCost = InnerSize == Dynamic ? Dynamic
93                    : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
94                      + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
95
96      /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
97      * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
98      * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
99      * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
100      */
101      CanVectorizeInner =    SameType
102                          && LhsRowMajor
103                          && (!RhsRowMajor)
104                          && (LhsFlags & RhsFlags & ActualPacketAccessBit)
105                          && (LhsFlags & RhsFlags & AlignedBit)
106                          && (InnerSize % packet_traits<Scalar>::size == 0)
107    };
108};
109
110} // end namespace internal
111
112template<typename LhsNested, typename RhsNested, int NestingFlags>
113class CoeffBasedProduct
114  : internal::no_assignment_operator,
115    public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
116{
117  public:
118
119    typedef MatrixBase<CoeffBasedProduct> Base;
120    EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
121    typedef typename Base::PlainObject PlainObject;
122
123  private:
124
125    typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
126    typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
127
128    enum {
129      PacketSize = internal::packet_traits<Scalar>::size,
130      InnerSize  = internal::traits<CoeffBasedProduct>::InnerSize,
131      Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
132      CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
133    };
134
135    typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
136                                   Unroll ? InnerSize-1 : Dynamic,
137                                   _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
138
139    typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
140
141  public:
142
143    inline CoeffBasedProduct(const CoeffBasedProduct& other)
144      : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
145    {}
146
147    template<typename Lhs, typename Rhs>
148    inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
149      : m_lhs(lhs), m_rhs(rhs)
150    {
151      // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
152      // We still allow to mix T and complex<T>.
153      EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
154        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
155      eigen_assert(lhs.cols() == rhs.rows()
156        && "invalid matrix product"
157        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
158    }
159
160    EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
161    EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
162
163    EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
164    {
165      Scalar res;
166      ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
167      return res;
168    }
169
170    /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
171     * which is why we don't set the LinearAccessBit.
172     */
173    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
174    {
175      Scalar res;
176      const Index row = RowsAtCompileTime == 1 ? 0 : index;
177      const Index col = RowsAtCompileTime == 1 ? index : 0;
178      ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
179      return res;
180    }
181
182    template<int LoadMode>
183    EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
184    {
185      PacketScalar res;
186      internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
187                              Unroll ? InnerSize-1 : Dynamic,
188                              _LhsNested, _RhsNested, PacketScalar, LoadMode>
189        ::run(row, col, m_lhs, m_rhs, res);
190      return res;
191    }
192
193    // Implicit conversion to the nested type (trigger the evaluation of the product)
194    EIGEN_STRONG_INLINE operator const PlainObject& () const
195    {
196      m_result.lazyAssign(*this);
197      return m_result;
198    }
199
200    const _LhsNested& lhs() const { return m_lhs; }
201    const _RhsNested& rhs() const { return m_rhs; }
202
203    const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
204    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
205
206    template<int DiagonalIndex>
207    const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
208    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
209
210    const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
211    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
212
213  protected:
214    typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
215    typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
216
217    mutable PlainObject m_result;
218};
219
220namespace internal {
221
222// here we need to overload the nested rule for products
223// such that the nested type is a const reference to a plain matrix
224template<typename Lhs, typename Rhs, int N, typename PlainObject>
225struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
226{
227  typedef PlainObject const& type;
228};
229
230/***************************************************************************
231* Normal product .coeff() implementation (with meta-unrolling)
232***************************************************************************/
233
234/**************************************
235*** Scalar path  - no vectorization ***
236**************************************/
237
238template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
239struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
240{
241  typedef typename Lhs::Index Index;
242  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
243  {
244    product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
245    res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
246  }
247};
248
249template<typename Lhs, typename Rhs, typename RetScalar>
250struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
251{
252  typedef typename Lhs::Index Index;
253  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
254  {
255    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
256  }
257};
258
259template<typename Lhs, typename Rhs, typename RetScalar>
260struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
261{
262  typedef typename Lhs::Index Index;
263  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
264  {
265    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
266    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
267      for(Index i = 1; i < lhs.cols(); ++i)
268        res += lhs.coeff(row, i) * rhs.coeff(i, col);
269  }
270};
271
272/*******************************************
273*** Scalar path with inner vectorization ***
274*******************************************/
275
276template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
277struct product_coeff_vectorized_unroller
278{
279  typedef typename Lhs::Index Index;
280  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
281  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
282  {
283    product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
284    pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
285  }
286};
287
288template<typename Lhs, typename Rhs, typename Packet>
289struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
290{
291  typedef typename Lhs::Index Index;
292  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
293  {
294    pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
295  }
296};
297
298template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
299struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
300{
301  typedef typename Lhs::PacketScalar Packet;
302  typedef typename Lhs::Index Index;
303  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
304  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
305  {
306    Packet pres;
307    product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
308    product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
309    res = predux(pres);
310  }
311};
312
313template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
314struct product_coeff_vectorized_dyn_selector
315{
316  typedef typename Lhs::Index Index;
317  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
318  {
319    res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
320  }
321};
322
323// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
324// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
325template<typename Lhs, typename Rhs, int RhsCols>
326struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
327{
328  typedef typename Lhs::Index Index;
329  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
330  {
331    res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
332  }
333};
334
335template<typename Lhs, typename Rhs, int LhsRows>
336struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
337{
338  typedef typename Lhs::Index Index;
339  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
340  {
341    res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
342  }
343};
344
345template<typename Lhs, typename Rhs>
346struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
347{
348  typedef typename Lhs::Index Index;
349  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
350  {
351    res = lhs.transpose().cwiseProduct(rhs).sum();
352  }
353};
354
355template<typename Lhs, typename Rhs, typename RetScalar>
356struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
357{
358  typedef typename Lhs::Index Index;
359  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
360  {
361    product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
362  }
363};
364
365/*******************
366*** Packet path  ***
367*******************/
368
369template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
370struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
371{
372  typedef typename Lhs::Index Index;
373  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
374  {
375    product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
376    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
377  }
378};
379
380template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
381struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
382{
383  typedef typename Lhs::Index Index;
384  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
385  {
386    product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
387    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
388  }
389};
390
391template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
392struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
393{
394  typedef typename Lhs::Index Index;
395  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
396  {
397    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
398  }
399};
400
401template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
402struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
403{
404  typedef typename Lhs::Index Index;
405  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
406  {
407    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
408  }
409};
410
411template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
412struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
413{
414  typedef typename Lhs::Index Index;
415  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
416  {
417    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
418    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
419      for(Index i = 1; i < lhs.cols(); ++i)
420        res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
421  }
422};
423
424template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
425struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
426{
427  typedef typename Lhs::Index Index;
428  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
429  {
430    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
431    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
432      for(Index i = 1; i < lhs.cols(); ++i)
433        res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
434  }
435};
436
437} // end namespace internal
438
439} // end namespace Eigen
440
441#endif // EIGEN_COEFFBASED_PRODUCT_H
442