1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_REDUX_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_REDUX_H 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// TODO 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// * implement other kind of vectorization 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// * factorize code 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Part 1 : the logic deciding a strategy for vectorization and unrolling 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived> 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_traits 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpublic: 302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType; 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketSize = unpacket_traits<PacketType>::size, 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath InnerMaxSize = int(Derived::IsRowMajor) 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ? Derived::MaxColsAtCompileTime 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : Derived::MaxRowsAtCompileTime 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && (functor_traits<Func>::PacketAccess), 412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), 422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpublic: 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : int(DefaultTraversal) 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpublic: 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost 552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpublic: 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef EIGEN_DEBUG_ASSIGN 652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static void debug() 662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; 682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::cerr.setf(std::ios::hex, std::ios::basefield); 692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(Derived::Flags) 702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::cerr.unsetf(std::ios::hex); 712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(InnerMaxSize) 722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(PacketSize) 732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(MightVectorize) 742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(MayLinearVectorize) 752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(MaySliceVectorize) 762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(Traversal) 772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(UnrollingLimit) 782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_VAR(Unrolling) 792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::cerr << std::endl; 802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Part 2 : unrollers 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*** no vectorization ***/ 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived, int Start, int Length> 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_novec_unroller 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HalfLength = Length/2 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived, int Start> 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_novec_unroller<Func, Derived, Start, 1> 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath outer = Start / Derived::InnerSizeAtCompileTime, 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath inner = Start % Derived::InnerSizeAtCompileTime 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return mat.coeffByOuterInner(outer, inner); 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This is actually dead code and will never be called. It is required 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// to prevent false warnings regarding failed inlining though 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for 0 length run() will never be called at all. 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived, int Start> 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_novec_unroller<Func, Derived, Start, 0> 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 1312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*** vectorization ***/ 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived, int Start, int Length> 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_vec_unroller 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 1412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketSize = redux_traits<Func, Derived>::PacketSize, 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HalfLength = Length/2 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 1462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return func.packetOp( 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived, int Start> 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_vec_unroller<Func, Derived, Start, 1> 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 1602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang index = Start * redux_traits<Func, Derived>::PacketSize, 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath outer = index / int(Derived::InnerSizeAtCompileTime), 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath inner = index % int(Derived::InnerSizeAtCompileTime), 1632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang alignment = Derived::Alignment 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 1672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 1712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner); 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Part 3 : implementation of all cases 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived, 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int Traversal = redux_traits<Func, Derived>::Traversal, 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int Unrolling = redux_traits<Func, Derived>::Unrolling 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath> 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_impl; 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived> 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 1892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC 1902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar res; 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = mat.coeffByOuterInner(0, 0); 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i = 1; i < mat.innerSize(); ++i) 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func(res, mat.coeffByOuterInner(0, i)); 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i = 1; i < mat.outerSize(); ++i) 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index j = 0; j < mat.innerSize(); ++j) 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func(res, mat.coeffByOuterInner(i, j)); 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived> 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{}; 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived> 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 2132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static Scalar run(const Derived &mat, const Func& func) 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index size = mat.size(); 2182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const Index packetSize = redux_traits<Func, Derived>::PacketSize; 2202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const int packetAlignment = unpacket_traits<PacketScalar>::alignment; 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 2222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), 2232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 2252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index alignedEnd2 = alignedStart + alignedSize2; 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index alignedEnd = alignedStart + alignedSize; 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar res; 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(alignedSize) 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart); 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize); 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index)); 2402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize)); 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath packet_res0 = func.packetOp(packet_res0,packet_res1); 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(alignedEnd>alignedEnd2) 2452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2)); 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func.predux(packet_res0); 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index index = 0; index < alignedStart; ++index) 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func(res,mat.coeff(index)); 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index index = alignedEnd; index < size; ++index) 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func(res,mat.coeff(index)); 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else // too small to vectorize anything. 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = mat.coeff(0); 259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index index = 1; index < size; ++index) 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func(res,mat.coeff(index)); 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// NOTE: for SliceVectorizedTraversal we simply bypass unrolling 2682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<typename Func, typename Derived, int Unrolling> 2692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling> 270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 2722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename redux_traits<Func, Derived>::PacketType PacketType; 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) 275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index innerSize = mat.innerSize(); 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index outerSize = mat.outerSize(); 279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 2802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang packetSize = redux_traits<Func, Derived>::PacketSize 281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; 283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar res; 284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(packetedInnerSize) 285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0); 287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index j=0; j<outerSize; ++j) 288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) 2892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i)); 290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func.predux(packet_res); 292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index j=0; j<outerSize; ++j) 293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i=packetedInnerSize; i<innerSize; ++i) 294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = func(res, mat.coeffByOuterInner(j,i)); 295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else // too small to vectorize anything. 297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); 300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func, typename Derived> 307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> 308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived::Scalar Scalar; 3102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 312c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 3132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketSize = redux_traits<Func, Derived>::PacketSize, 314c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Size = Derived::SizeAtCompileTime, 315c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorizedSize = (Size / PacketSize) * PacketSize 316c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 3172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 318c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 319c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 3202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (VectorizedSize > 0) { 3212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); 3222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (VectorizedSize != Size) 3232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); 3242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return res; 3252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang else { 3272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func); 3282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 329c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 330c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 331c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// evaluator adaptor 3332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<typename _XprType> 3342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangclass redux_evaluator 3352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangpublic: 3372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef _XprType XprType; 3382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} 3392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename XprType::Scalar Scalar; 3412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename XprType::CoeffReturnType CoeffReturnType; 3422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename XprType::PacketScalar PacketScalar; 3432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename XprType::PacketReturnType PacketReturnType; 3442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang enum { 3462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, 3472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, 3482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator 3492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Flags = evaluator<XprType>::Flags & ~DirectAccessBit, 3502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang IsRowMajor = XprType::IsRowMajor, 3512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang SizeAtCompileTime = XprType::SizeAtCompileTime, 3522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, 3532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang CoeffReadCost = evaluator<XprType>::CoeffReadCost, 3542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Alignment = evaluator<XprType>::Alignment 3552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }; 3562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } 3582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } 3592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } 3602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } 3612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } 3622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC 3642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang CoeffReturnType coeff(Index row, Index col) const 3652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return m_evaluator.coeff(row, col); } 3662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC 3682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang CoeffReturnType coeff(Index index) const 3692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return m_evaluator.coeff(index); } 3702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang template<int LoadMode, typename PacketType> 3722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketType packet(Index row, Index col) const 3732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return m_evaluator.template packet<LoadMode,PacketType>(row, col); } 3742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang template<int LoadMode, typename PacketType> 3762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketType packet(Index index) const 3772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return m_evaluator.template packet<LoadMode,PacketType>(index); } 3782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEVICE_FUNC 3802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang CoeffReturnType coeffByOuterInner(Index outer, Index inner) const 3812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 3822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang template<int LoadMode, typename PacketType> 3842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang PacketType packetByOuterInner(Index outer, Index inner) const 3852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 3862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const XprType & nestedExpression() const { return m_xpr; } 3882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangprotected: 3902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang internal::evaluator<XprType> m_evaluator; 3912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const XprType &m_xpr; 3922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 3932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 394c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 395c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 396c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 397c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Part 4 : public API 398c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 399c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 400c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 401c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \returns the result of a full redux operation on the whole matrix or vector using \a func 402c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 403c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * The template parameter \a BinaryOp is the type of the functor \a func which must be 4042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * an associative operator. Both current C++98 and C++11 functor styles are handled. 405c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 406c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() 407c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 408c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived> 409c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Func> 4102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtypename internal::traits<Derived>::Scalar 411c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathDenseBase<Derived>::redux(const Func& func) const 412c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 4132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); 4142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename internal::redux_evaluator<Derived> ThisEvaluator; 4162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ThisEvaluator thisEval(derived()); 4172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func); 419c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 420c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4217faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez/** \returns the minimum of all coefficients of \c *this. 4227faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * \warning the result is undefined if \c *this contains NaN. 423c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 424c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived> 425c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 426c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathDenseBase<Derived>::minCoeff() const 427c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 4282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>()); 429c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 430c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4317faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez/** \returns the maximum of all coefficients of \c *this. 4327faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * \warning the result is undefined if \c *this contains NaN. 433c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 434c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived> 435c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 436c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathDenseBase<Derived>::maxCoeff() const 437c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 4382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>()); 439c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 440c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/** \returns the sum of all coefficients of \c *this 4422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * 4432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * If \c *this is empty, then the value 0 is returned. 444c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 445c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \sa trace(), prod(), mean() 446c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 447c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived> 448c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 449c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathDenseBase<Derived>::sum() const 450c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 451c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 452c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Scalar(0); 4532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>()); 454c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 455c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 456c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \returns the mean of all coefficients of *this 457c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* 458c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* \sa trace(), prod(), sum() 459c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*/ 460c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived> 461c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 462c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathDenseBase<Derived>::mean() const 463c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 4642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __INTEL_COMPILER 4652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang #pragma warning push 4662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang #pragma warning ( disable : 2259 ) 4672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 4682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size()); 4692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __INTEL_COMPILER 4702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang #pragma warning pop 4712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 472c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 473c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 474c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \returns the product of all coefficients of *this 475c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 476c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Example: \include MatrixBase_prod.cpp 477c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Output: \verbinclude MatrixBase_prod.out 478c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 479c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \sa sum(), mean(), trace() 480c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 481c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived> 482c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 483c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathDenseBase<Derived>::prod() const 484c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 485c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 486c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Scalar(1); 4872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return derived().redux(Eigen::internal::scalar_product_op<Scalar>()); 488c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 489c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 490c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. 491c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 492c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \c *this can be any matrix, not necessarily square. 493c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 494c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \sa diagonal(), sum() 495c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 496c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived> 497c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 498c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathMatrixBase<Derived>::trace() const 499c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 500c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return derived().diagonal().sum(); 501c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 502c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 503c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 504c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 505c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_REDUX_H 506