1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Optimized selfadjoint matrix * vector product: 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * This algorithm processes 2 columns at onces that allows to both reduce 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * the number of load/stores of the result by a factor 2 and to reduce 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * the instruction dependency. 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized> 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct selfadjoint_matrix_vector_product; 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version> 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct selfadjoint_matrix_vector_product 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic EIGEN_DONT_INLINE void run( 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index size, 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Scalar* lhs, Index lhsStride, 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Scalar* _rhs, Index rhsIncr, 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar* res, 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar alpha) 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename packet_traits<Scalar>::type Packet; 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename NumTraits<Scalar>::Real RealScalar; 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index PacketSize = sizeof(Packet)/sizeof(Scalar); 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsRowMajor = StorageOrder==RowMajor ? 1 : 0, 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsLower = UpLo == Lower ? 1 : 0, 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath FirstTriangular = IsRowMajor == IsLower 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd; 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha; 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed. 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // this is because we need to extract packets 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0); 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (rhsIncr!=1) 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Scalar* it = _rhs; 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (Index i=0; i<size; ++i, it+=rhsIncr) 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath rhs[i] = *it; 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index bound = (std::max)(Index(0),size-8) & 0xfffffffe; 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (FirstTriangular) 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath bound = size - bound; 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (Index j=FirstTriangular ? bound : 0; 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath j<(FirstTriangular ? size : bound);j+=2) 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride; 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar t0 = cjAlpha * rhs[j]; 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet ptmp0 = pset1<Packet>(t0); 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar t1 = cjAlpha * rhs[j+1]; 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet ptmp1 = pset1<Packet>(t1); 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar t2(0); 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet ptmp2 = pset1<Packet>(t2); 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar t3(0); 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet ptmp3 = pset1<Packet>(t3); 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size_t starti = FirstTriangular ? 0 : j+2; 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size_t endi = FirstTriangular ? j : size; 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti); 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j] += cjd.pmul(internal::real(A0[j]), t0); 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1); 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(FirstTriangular) 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j] += cj0.pmul(A1[j], t1); 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t3 += cj1.pmul(A1[j], rhs[j]); 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j+1] += cj0.pmul(A0[j+1],t0); 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t2 += cj1.pmul(A0[j+1], rhs[j+1]); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (size_t i=starti; i<alignedStart; ++i) 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[i] += t0 * A0[i] + t1 * A1[i]; 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t2 += conj(A0[i]) * rhs[i]; 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t3 += conj(A1[i]) * rhs[i]; 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up) 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // gcc 4.2 does this optimization automatically. 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart; 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart; 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart; 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar* EIGEN_RESTRICT resIt = res + alignedStart; 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize; 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize; 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet Xi = pload <Packet>(resIt); 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi)); 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2); 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3); 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pstore(resIt,Xi); resIt += PacketSize; 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (size_t i=alignedEnd; i<endi; i++) 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1); 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t2 += cj1.pmul(A0[i], rhs[i]); 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t3 += cj1.pmul(A1[i], rhs[i]); 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j] += alpha * (t2 + predux(ptmp2)); 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j+1] += alpha * (t3 + predux(ptmp3)); 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++) 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar t1 = cjAlpha * rhs[j]; 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar t2(0); 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j] += cjd.pmul(internal::real(A0[j]), t1); 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[i] += cj0.pmul(A0[i], t1); 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t2 += cj1.pmul(A0[i], rhs[i]); 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res[j] += alpha * t2; 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Wrapper to product_selfadjoint_vector 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, int LhsMode, typename Rhs> 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{}; 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, int LhsMode, typename Rhs> 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs > 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsUpLo = LhsMode&(Upper|Lower) 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Dest::Scalar ResScalar; 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Base::RhsScalar RhsScalar; 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * RhsBlasTraits::extractScalarFactor(m_rhs); 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EvalToDest = (Dest::InnerStrideAtCompileTime==1), 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1) 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest; 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs; 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EvalToDest ? dest.data() : static_dest.data()); 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(), 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data()); 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(!EvalToDest) 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int size = dest.size(); 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_DENSE_STORAGE_CTOR_PLUGIN 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MappedDest(actualDestPtr, dest.size()) = dest; 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(!UseRhs) 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int size = rhs.size(); 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_DENSE_STORAGE_CTOR_PLUGIN 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs; 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ( 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath lhs.rows(), // size 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath actualRhsPtr, 1, // rhs info 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath actualDestPtr, // result info 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath actualAlpha // scale factor 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ); 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(!EvalToDest) 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath dest = MappedDest(actualDestPtr, dest.size()); 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int RhsMode> 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> > 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> > 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{}; 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int RhsMode> 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs > 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RhsUpLo = RhsMode&(Upper|Lower) 259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // let's simply transpose the product 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Transpose<Dest> destT(dest); 267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false, 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha); 269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H 275