1// This file is part of Eigen, a lightweight C++ template library 2// for linear algebra. 3// 4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 6// 7// This Source Code Form is subject to the terms of the Mozilla 8// Public License v. 2.0. If a copy of the MPL was not distributed 9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11#ifndef EIGEN_REDUX_H 12#define EIGEN_REDUX_H 13 14namespace Eigen { 15 16namespace internal { 17 18// TODO 19// * implement other kind of vectorization 20// * factorize code 21 22/*************************************************************************** 23* Part 1 : the logic deciding a strategy for vectorization and unrolling 24***************************************************************************/ 25 26template<typename Func, typename Derived> 27struct redux_traits 28{ 29public: 30 typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType; 31 enum { 32 PacketSize = unpacket_traits<PacketType>::size, 33 InnerMaxSize = int(Derived::IsRowMajor) 34 ? Derived::MaxColsAtCompileTime 35 : Derived::MaxRowsAtCompileTime 36 }; 37 38 enum { 39 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) 40 && (functor_traits<Func>::PacketAccess), 41 MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), 42 MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize 43 }; 44 45public: 46 enum { 47 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 48 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 49 : int(DefaultTraversal) 50 }; 51 52public: 53 enum { 54 Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost 55 : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, 56 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) 57 }; 58 59public: 60 enum { 61 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling 62 }; 63 64#ifdef EIGEN_DEBUG_ASSIGN 65 static void debug() 66 { 67 std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; 68 std::cerr.setf(std::ios::hex, std::ios::basefield); 69 EIGEN_DEBUG_VAR(Derived::Flags) 70 std::cerr.unsetf(std::ios::hex); 71 EIGEN_DEBUG_VAR(InnerMaxSize) 72 EIGEN_DEBUG_VAR(PacketSize) 73 EIGEN_DEBUG_VAR(MightVectorize) 74 EIGEN_DEBUG_VAR(MayLinearVectorize) 75 EIGEN_DEBUG_VAR(MaySliceVectorize) 76 EIGEN_DEBUG_VAR(Traversal) 77 EIGEN_DEBUG_VAR(UnrollingLimit) 78 EIGEN_DEBUG_VAR(Unrolling) 79 std::cerr << std::endl; 80 } 81#endif 82}; 83 84/*************************************************************************** 85* Part 2 : unrollers 86***************************************************************************/ 87 88/*** no vectorization ***/ 89 90template<typename Func, typename Derived, int Start, int Length> 91struct redux_novec_unroller 92{ 93 enum { 94 HalfLength = Length/2 95 }; 96 97 typedef typename Derived::Scalar Scalar; 98 99 EIGEN_DEVICE_FUNC 100 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 101 { 102 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 103 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); 104 } 105}; 106 107template<typename Func, typename Derived, int Start> 108struct redux_novec_unroller<Func, Derived, Start, 1> 109{ 110 enum { 111 outer = Start / Derived::InnerSizeAtCompileTime, 112 inner = Start % Derived::InnerSizeAtCompileTime 113 }; 114 115 typedef typename Derived::Scalar Scalar; 116 117 EIGEN_DEVICE_FUNC 118 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) 119 { 120 return mat.coeffByOuterInner(outer, inner); 121 } 122}; 123 124// This is actually dead code and will never be called. It is required 125// to prevent false warnings regarding failed inlining though 126// for 0 length run() will never be called at all. 127template<typename Func, typename Derived, int Start> 128struct redux_novec_unroller<Func, Derived, Start, 0> 129{ 130 typedef typename Derived::Scalar Scalar; 131 EIGEN_DEVICE_FUNC 132 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } 133}; 134 135/*** vectorization ***/ 136 137template<typename Func, typename Derived, int Start, int Length> 138struct redux_vec_unroller 139{ 140 enum { 141 PacketSize = redux_traits<Func, Derived>::PacketSize, 142 HalfLength = Length/2 143 }; 144 145 typedef typename Derived::Scalar Scalar; 146 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 147 148 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) 149 { 150 return func.packetOp( 151 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 152 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); 153 } 154}; 155 156template<typename Func, typename Derived, int Start> 157struct redux_vec_unroller<Func, Derived, Start, 1> 158{ 159 enum { 160 index = Start * redux_traits<Func, Derived>::PacketSize, 161 outer = index / int(Derived::InnerSizeAtCompileTime), 162 inner = index % int(Derived::InnerSizeAtCompileTime), 163 alignment = Derived::Alignment 164 }; 165 166 typedef typename Derived::Scalar Scalar; 167 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 168 169 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) 170 { 171 return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner); 172 } 173}; 174 175/*************************************************************************** 176* Part 3 : implementation of all cases 177***************************************************************************/ 178 179template<typename Func, typename Derived, 180 int Traversal = redux_traits<Func, Derived>::Traversal, 181 int Unrolling = redux_traits<Func, Derived>::Unrolling 182> 183struct redux_impl; 184 185template<typename Func, typename Derived> 186struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> 187{ 188 typedef typename Derived::Scalar Scalar; 189 EIGEN_DEVICE_FUNC 190 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 191 { 192 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 193 Scalar res; 194 res = mat.coeffByOuterInner(0, 0); 195 for(Index i = 1; i < mat.innerSize(); ++i) 196 res = func(res, mat.coeffByOuterInner(0, i)); 197 for(Index i = 1; i < mat.outerSize(); ++i) 198 for(Index j = 0; j < mat.innerSize(); ++j) 199 res = func(res, mat.coeffByOuterInner(i, j)); 200 return res; 201 } 202}; 203 204template<typename Func, typename Derived> 205struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> 206 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> 207{}; 208 209template<typename Func, typename Derived> 210struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> 211{ 212 typedef typename Derived::Scalar Scalar; 213 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 214 215 static Scalar run(const Derived &mat, const Func& func) 216 { 217 const Index size = mat.size(); 218 219 const Index packetSize = redux_traits<Func, Derived>::PacketSize; 220 const int packetAlignment = unpacket_traits<PacketScalar>::alignment; 221 enum { 222 alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), 223 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) 224 }; 225 const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); 226 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); 227 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); 228 const Index alignedEnd2 = alignedStart + alignedSize2; 229 const Index alignedEnd = alignedStart + alignedSize; 230 Scalar res; 231 if(alignedSize) 232 { 233 PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart); 234 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop 235 { 236 PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize); 237 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) 238 { 239 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index)); 240 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize)); 241 } 242 243 packet_res0 = func.packetOp(packet_res0,packet_res1); 244 if(alignedEnd>alignedEnd2) 245 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2)); 246 } 247 res = func.predux(packet_res0); 248 249 for(Index index = 0; index < alignedStart; ++index) 250 res = func(res,mat.coeff(index)); 251 252 for(Index index = alignedEnd; index < size; ++index) 253 res = func(res,mat.coeff(index)); 254 } 255 else // too small to vectorize anything. 256 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 257 { 258 res = mat.coeff(0); 259 for(Index index = 1; index < size; ++index) 260 res = func(res,mat.coeff(index)); 261 } 262 263 return res; 264 } 265}; 266 267// NOTE: for SliceVectorizedTraversal we simply bypass unrolling 268template<typename Func, typename Derived, int Unrolling> 269struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling> 270{ 271 typedef typename Derived::Scalar Scalar; 272 typedef typename redux_traits<Func, Derived>::PacketType PacketType; 273 274 EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) 275 { 276 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 277 const Index innerSize = mat.innerSize(); 278 const Index outerSize = mat.outerSize(); 279 enum { 280 packetSize = redux_traits<Func, Derived>::PacketSize 281 }; 282 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; 283 Scalar res; 284 if(packetedInnerSize) 285 { 286 PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0); 287 for(Index j=0; j<outerSize; ++j) 288 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) 289 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i)); 290 291 res = func.predux(packet_res); 292 for(Index j=0; j<outerSize; ++j) 293 for(Index i=packetedInnerSize; i<innerSize; ++i) 294 res = func(res, mat.coeffByOuterInner(j,i)); 295 } 296 else // too small to vectorize anything. 297 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 298 { 299 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); 300 } 301 302 return res; 303 } 304}; 305 306template<typename Func, typename Derived> 307struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> 308{ 309 typedef typename Derived::Scalar Scalar; 310 311 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 312 enum { 313 PacketSize = redux_traits<Func, Derived>::PacketSize, 314 Size = Derived::SizeAtCompileTime, 315 VectorizedSize = (Size / PacketSize) * PacketSize 316 }; 317 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 318 { 319 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 320 if (VectorizedSize > 0) { 321 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); 322 if (VectorizedSize != Size) 323 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); 324 return res; 325 } 326 else { 327 return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func); 328 } 329 } 330}; 331 332// evaluator adaptor 333template<typename _XprType> 334class redux_evaluator 335{ 336public: 337 typedef _XprType XprType; 338 EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} 339 340 typedef typename XprType::Scalar Scalar; 341 typedef typename XprType::CoeffReturnType CoeffReturnType; 342 typedef typename XprType::PacketScalar PacketScalar; 343 typedef typename XprType::PacketReturnType PacketReturnType; 344 345 enum { 346 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, 347 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, 348 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator 349 Flags = evaluator<XprType>::Flags & ~DirectAccessBit, 350 IsRowMajor = XprType::IsRowMajor, 351 SizeAtCompileTime = XprType::SizeAtCompileTime, 352 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, 353 CoeffReadCost = evaluator<XprType>::CoeffReadCost, 354 Alignment = evaluator<XprType>::Alignment 355 }; 356 357 EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } 358 EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } 359 EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } 360 EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } 361 EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } 362 363 EIGEN_DEVICE_FUNC 364 CoeffReturnType coeff(Index row, Index col) const 365 { return m_evaluator.coeff(row, col); } 366 367 EIGEN_DEVICE_FUNC 368 CoeffReturnType coeff(Index index) const 369 { return m_evaluator.coeff(index); } 370 371 template<int LoadMode, typename PacketType> 372 PacketType packet(Index row, Index col) const 373 { return m_evaluator.template packet<LoadMode,PacketType>(row, col); } 374 375 template<int LoadMode, typename PacketType> 376 PacketType packet(Index index) const 377 { return m_evaluator.template packet<LoadMode,PacketType>(index); } 378 379 EIGEN_DEVICE_FUNC 380 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const 381 { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 382 383 template<int LoadMode, typename PacketType> 384 PacketType packetByOuterInner(Index outer, Index inner) const 385 { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 386 387 const XprType & nestedExpression() const { return m_xpr; } 388 389protected: 390 internal::evaluator<XprType> m_evaluator; 391 const XprType &m_xpr; 392}; 393 394} // end namespace internal 395 396/*************************************************************************** 397* Part 4 : public API 398***************************************************************************/ 399 400 401/** \returns the result of a full redux operation on the whole matrix or vector using \a func 402 * 403 * The template parameter \a BinaryOp is the type of the functor \a func which must be 404 * an associative operator. Both current C++98 and C++11 functor styles are handled. 405 * 406 * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() 407 */ 408template<typename Derived> 409template<typename Func> 410typename internal::traits<Derived>::Scalar 411DenseBase<Derived>::redux(const Func& func) const 412{ 413 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); 414 415 typedef typename internal::redux_evaluator<Derived> ThisEvaluator; 416 ThisEvaluator thisEval(derived()); 417 418 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func); 419} 420 421/** \returns the minimum of all coefficients of \c *this. 422 * \warning the result is undefined if \c *this contains NaN. 423 */ 424template<typename Derived> 425EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 426DenseBase<Derived>::minCoeff() const 427{ 428 return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>()); 429} 430 431/** \returns the maximum of all coefficients of \c *this. 432 * \warning the result is undefined if \c *this contains NaN. 433 */ 434template<typename Derived> 435EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 436DenseBase<Derived>::maxCoeff() const 437{ 438 return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>()); 439} 440 441/** \returns the sum of all coefficients of \c *this 442 * 443 * If \c *this is empty, then the value 0 is returned. 444 * 445 * \sa trace(), prod(), mean() 446 */ 447template<typename Derived> 448EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 449DenseBase<Derived>::sum() const 450{ 451 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 452 return Scalar(0); 453 return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>()); 454} 455 456/** \returns the mean of all coefficients of *this 457* 458* \sa trace(), prod(), sum() 459*/ 460template<typename Derived> 461EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 462DenseBase<Derived>::mean() const 463{ 464#ifdef __INTEL_COMPILER 465 #pragma warning push 466 #pragma warning ( disable : 2259 ) 467#endif 468 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size()); 469#ifdef __INTEL_COMPILER 470 #pragma warning pop 471#endif 472} 473 474/** \returns the product of all coefficients of *this 475 * 476 * Example: \include MatrixBase_prod.cpp 477 * Output: \verbinclude MatrixBase_prod.out 478 * 479 * \sa sum(), mean(), trace() 480 */ 481template<typename Derived> 482EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 483DenseBase<Derived>::prod() const 484{ 485 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 486 return Scalar(1); 487 return derived().redux(Eigen::internal::scalar_product_op<Scalar>()); 488} 489 490/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. 491 * 492 * \c *this can be any matrix, not necessarily square. 493 * 494 * \sa diagonal(), sum() 495 */ 496template<typename Derived> 497EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 498MatrixBase<Derived>::trace() const 499{ 500 return derived().diagonal().sum(); 501} 502 503} // end namespace Eigen 504 505#endif // EIGEN_REDUX_H 506