1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
14namespace Eigen {
15
16namespace internal {
17
18// TODO
19//  * implement other kind of vectorization
20//  * factorize code
21
22/***************************************************************************
23* Part 1 : the logic deciding a strategy for vectorization and unrolling
24***************************************************************************/
25
26template<typename Func, typename Derived>
27struct redux_traits
28{
29public:
30  enum {
31    PacketSize = packet_traits<typename Derived::Scalar>::size,
32    InnerMaxSize = int(Derived::IsRowMajor)
33                 ? Derived::MaxColsAtCompileTime
34                 : Derived::MaxRowsAtCompileTime
35  };
36
37  enum {
38    MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39                  && (functor_traits<Func>::PacketAccess),
40    MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41    MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42  };
43
44public:
45  enum {
46    Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
48                                        : int(DefaultTraversal)
49  };
50
51public:
52  enum {
53    Cost = (  Derived::SizeAtCompileTime == Dynamic
54           || Derived::CoeffReadCost == Dynamic
55           || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
56           ) ? Dynamic
57           : Derived::SizeAtCompileTime * Derived::CoeffReadCost
58               + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
59    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
60  };
61
62public:
63  enum {
64    Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
65              ? CompleteUnrolling
66              : NoUnrolling
67  };
68};
69
70/***************************************************************************
71* Part 2 : unrollers
72***************************************************************************/
73
74/*** no vectorization ***/
75
76template<typename Func, typename Derived, int Start, int Length>
77struct redux_novec_unroller
78{
79  enum {
80    HalfLength = Length/2
81  };
82
83  typedef typename Derived::Scalar Scalar;
84
85  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
86  {
87    return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
88                redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
89  }
90};
91
92template<typename Func, typename Derived, int Start>
93struct redux_novec_unroller<Func, Derived, Start, 1>
94{
95  enum {
96    outer = Start / Derived::InnerSizeAtCompileTime,
97    inner = Start % Derived::InnerSizeAtCompileTime
98  };
99
100  typedef typename Derived::Scalar Scalar;
101
102  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
103  {
104    return mat.coeffByOuterInner(outer, inner);
105  }
106};
107
108// This is actually dead code and will never be called. It is required
109// to prevent false warnings regarding failed inlining though
110// for 0 length run() will never be called at all.
111template<typename Func, typename Derived, int Start>
112struct redux_novec_unroller<Func, Derived, Start, 0>
113{
114  typedef typename Derived::Scalar Scalar;
115  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
116};
117
118/*** vectorization ***/
119
120template<typename Func, typename Derived, int Start, int Length>
121struct redux_vec_unroller
122{
123  enum {
124    PacketSize = packet_traits<typename Derived::Scalar>::size,
125    HalfLength = Length/2
126  };
127
128  typedef typename Derived::Scalar Scalar;
129  typedef typename packet_traits<Scalar>::type PacketScalar;
130
131  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
132  {
133    return func.packetOp(
134            redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
135            redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
136  }
137};
138
139template<typename Func, typename Derived, int Start>
140struct redux_vec_unroller<Func, Derived, Start, 1>
141{
142  enum {
143    index = Start * packet_traits<typename Derived::Scalar>::size,
144    outer = index / int(Derived::InnerSizeAtCompileTime),
145    inner = index % int(Derived::InnerSizeAtCompileTime),
146    alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
147  };
148
149  typedef typename Derived::Scalar Scalar;
150  typedef typename packet_traits<Scalar>::type PacketScalar;
151
152  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
153  {
154    return mat.template packetByOuterInner<alignment>(outer, inner);
155  }
156};
157
158/***************************************************************************
159* Part 3 : implementation of all cases
160***************************************************************************/
161
162template<typename Func, typename Derived,
163         int Traversal = redux_traits<Func, Derived>::Traversal,
164         int Unrolling = redux_traits<Func, Derived>::Unrolling
165>
166struct redux_impl;
167
168template<typename Func, typename Derived>
169struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
170{
171  typedef typename Derived::Scalar Scalar;
172  typedef typename Derived::Index Index;
173  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
174  {
175    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
176    Scalar res;
177    res = mat.coeffByOuterInner(0, 0);
178    for(Index i = 1; i < mat.innerSize(); ++i)
179      res = func(res, mat.coeffByOuterInner(0, i));
180    for(Index i = 1; i < mat.outerSize(); ++i)
181      for(Index j = 0; j < mat.innerSize(); ++j)
182        res = func(res, mat.coeffByOuterInner(i, j));
183    return res;
184  }
185};
186
187template<typename Func, typename Derived>
188struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
189  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
190{};
191
192template<typename Func, typename Derived>
193struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
194{
195  typedef typename Derived::Scalar Scalar;
196  typedef typename packet_traits<Scalar>::type PacketScalar;
197  typedef typename Derived::Index Index;
198
199  static Scalar run(const Derived& mat, const Func& func)
200  {
201    const Index size = mat.size();
202    eigen_assert(size && "you are using an empty matrix");
203    const Index packetSize = packet_traits<Scalar>::size;
204    const Index alignedStart = internal::first_aligned(mat);
205    enum {
206      alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
207                ? Aligned : Unaligned
208    };
209    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
210    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
211    const Index alignedEnd2 = alignedStart + alignedSize2;
212    const Index alignedEnd  = alignedStart + alignedSize;
213    Scalar res;
214    if(alignedSize)
215    {
216      PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
217      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
218      {
219        PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
220        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
221        {
222          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
223          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
224        }
225
226        packet_res0 = func.packetOp(packet_res0,packet_res1);
227        if(alignedEnd>alignedEnd2)
228          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
229      }
230      res = func.predux(packet_res0);
231
232      for(Index index = 0; index < alignedStart; ++index)
233        res = func(res,mat.coeff(index));
234
235      for(Index index = alignedEnd; index < size; ++index)
236        res = func(res,mat.coeff(index));
237    }
238    else // too small to vectorize anything.
239         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
240    {
241      res = mat.coeff(0);
242      for(Index index = 1; index < size; ++index)
243        res = func(res,mat.coeff(index));
244    }
245
246    return res;
247  }
248};
249
250template<typename Func, typename Derived>
251struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
252{
253  typedef typename Derived::Scalar Scalar;
254  typedef typename packet_traits<Scalar>::type PacketScalar;
255  typedef typename Derived::Index Index;
256
257  static Scalar run(const Derived& mat, const Func& func)
258  {
259    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
260    const Index innerSize = mat.innerSize();
261    const Index outerSize = mat.outerSize();
262    enum {
263      packetSize = packet_traits<Scalar>::size
264    };
265    const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
266    Scalar res;
267    if(packetedInnerSize)
268    {
269      PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
270      for(Index j=0; j<outerSize; ++j)
271        for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
272          packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
273
274      res = func.predux(packet_res);
275      for(Index j=0; j<outerSize; ++j)
276        for(Index i=packetedInnerSize; i<innerSize; ++i)
277          res = func(res, mat.coeffByOuterInner(j,i));
278    }
279    else // too small to vectorize anything.
280         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
281    {
282      res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
283    }
284
285    return res;
286  }
287};
288
289template<typename Func, typename Derived>
290struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
291{
292  typedef typename Derived::Scalar Scalar;
293  typedef typename packet_traits<Scalar>::type PacketScalar;
294  enum {
295    PacketSize = packet_traits<Scalar>::size,
296    Size = Derived::SizeAtCompileTime,
297    VectorizedSize = (Size / PacketSize) * PacketSize
298  };
299  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
300  {
301    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
302    Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
303    if (VectorizedSize != Size)
304      res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
305    return res;
306  }
307};
308
309} // end namespace internal
310
311/***************************************************************************
312* Part 4 : public API
313***************************************************************************/
314
315
316/** \returns the result of a full redux operation on the whole matrix or vector using \a func
317  *
318  * The template parameter \a BinaryOp is the type of the functor \a func which must be
319  * an associative operator. Both current STL and TR1 functor styles are handled.
320  *
321  * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
322  */
323template<typename Derived>
324template<typename Func>
325EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
326DenseBase<Derived>::redux(const Func& func) const
327{
328  typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
329  return internal::redux_impl<Func, ThisNested>
330            ::run(derived(), func);
331}
332
333/** \returns the minimum of all coefficients of \c *this.
334  * \warning the result is undefined if \c *this contains NaN.
335  */
336template<typename Derived>
337EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
338DenseBase<Derived>::minCoeff() const
339{
340  return this->redux(Eigen::internal::scalar_min_op<Scalar>());
341}
342
343/** \returns the maximum of all coefficients of \c *this.
344  * \warning the result is undefined if \c *this contains NaN.
345  */
346template<typename Derived>
347EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
348DenseBase<Derived>::maxCoeff() const
349{
350  return this->redux(Eigen::internal::scalar_max_op<Scalar>());
351}
352
353/** \returns the sum of all coefficients of *this
354  *
355  * \sa trace(), prod(), mean()
356  */
357template<typename Derived>
358EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
359DenseBase<Derived>::sum() const
360{
361  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
362    return Scalar(0);
363  return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
364}
365
366/** \returns the mean of all coefficients of *this
367*
368* \sa trace(), prod(), sum()
369*/
370template<typename Derived>
371EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
372DenseBase<Derived>::mean() const
373{
374  return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
375}
376
377/** \returns the product of all coefficients of *this
378  *
379  * Example: \include MatrixBase_prod.cpp
380  * Output: \verbinclude MatrixBase_prod.out
381  *
382  * \sa sum(), mean(), trace()
383  */
384template<typename Derived>
385EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
386DenseBase<Derived>::prod() const
387{
388  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
389    return Scalar(1);
390  return this->redux(Eigen::internal::scalar_product_op<Scalar>());
391}
392
393/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
394  *
395  * \c *this can be any matrix, not necessarily square.
396  *
397  * \sa diagonal(), sum()
398  */
399template<typename Derived>
400EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
401MatrixBase<Derived>::trace() const
402{
403  return derived().diagonal().sum();
404}
405
406} // end namespace Eigen
407
408#endif // EIGEN_REDUX_H
409