SelfadjointMatrixVector.h revision c981c48f5bc9aefeffc0bcb0cc3934c2fae179dd
1c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// This file is part of Eigen, a lightweight C++ template library 2c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// for linear algebra. 3c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// 4c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> 55c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu// 65c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu// This Source Code Form is subject to the terms of the Mozilla 75c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu// Public License v. 2.0. If a copy of the MPL was not distributed 85c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 105c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H 11c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H 125c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 135c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liunamespace Eigen { 145c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 155c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liunamespace internal { 165c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 175c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu/* Optimized selfadjoint matrix * vector product: 185c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu * This algorithm processes 2 columns at onces that allows to both reduce 195c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu * the number of load/stores of the result by a factor 2 and to reduce 20c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch * the instruction dependency. 21c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch */ 225c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 235c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liutemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized> 245c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liustruct selfadjoint_matrix_vector_product; 255c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 26c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochtemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version> 27c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochstruct selfadjoint_matrix_vector_product 28c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 29c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch{ 30c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochstatic EIGEN_DONT_INLINE void run( 31c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Index size, 325c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu const Scalar* lhs, Index lhsStride, 335c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu const Scalar* _rhs, Index rhsIncr, 34c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Scalar* res, 35c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Scalar alpha) 365c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu{ 37c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch typedef typename packet_traits<Scalar>::type Packet; 385c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu typedef typename NumTraits<Scalar>::Real RealScalar; 39c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch const Index PacketSize = sizeof(Packet)/sizeof(Scalar); 405c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 41c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch enum { 42c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch IsRowMajor = StorageOrder==RowMajor ? 1 : 0, 435c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu IsLower = UpLo == Lower ? 1 : 0, 445c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu FirstTriangular = IsRowMajor == IsLower 455c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu }; 465c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 47c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; 48c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; 49c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd; 50c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 51c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; 52c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; 53c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 54c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha; 55c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 56c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed. 57c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, 58c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch // this is because we need to extract packets 59c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0); 60c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch if (rhsIncr!=1) 61c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch { 625c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu const Scalar* it = _rhs; 635c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu for (Index i=0; i<size; ++i, it+=rhsIncr) 645c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu rhs[i] = *it; 655c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu } 665c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 675c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Index bound = (std::max)(Index(0),size-8) & 0xfffffffe; 685c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if (FirstTriangular) 695c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu bound = size - bound; 705c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 715c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu for (Index j=FirstTriangular ? bound : 0; 725c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu j<(FirstTriangular ? size : bound);j+=2) 735c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu { 745c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; 755c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride; 765c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 775c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Scalar t0 = cjAlpha * rhs[j]; 785c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet ptmp0 = pset1<Packet>(t0); 795c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Scalar t1 = cjAlpha * rhs[j+1]; 805c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet ptmp1 = pset1<Packet>(t1); 815c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 825c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Scalar t2(0); 835c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet ptmp2 = pset1<Packet>(t2); 845c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Scalar t3(0); 855c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet ptmp3 = pset1<Packet>(t3); 865c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 875c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu size_t starti = FirstTriangular ? 0 : j+2; 885c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu size_t endi = FirstTriangular ? j : size; 895c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti); 905c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); 915c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 925c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed 935c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu res[j] += cjd.pmul(internal::real(A0[j]), t0); 945c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1); 955c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if(FirstTriangular) 965c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu { 975c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu res[j] += cj0.pmul(A1[j], t1); 985c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu t3 += cj1.pmul(A1[j], rhs[j]); 995c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu } 1005c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu else 1015c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu { 1025c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu res[j+1] += cj0.pmul(A0[j+1],t0); 1035c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu t2 += cj1.pmul(A0[j+1], rhs[j+1]); 1045c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu } 1055c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 1065c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu for (size_t i=starti; i<alignedStart; ++i) 1075c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu { 1085c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu res[i] += t0 * A0[i] + t1 * A1[i]; 1095c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu t2 += conj(A0[i]) * rhs[i]; 1105c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu t3 += conj(A1[i]) * rhs[i]; 1115c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu } 1125c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up) 1135c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // gcc 4.2 does this optimization automatically. 1145c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart; 1155c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart; 1165c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart; 1175c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Scalar* EIGEN_RESTRICT resIt = res + alignedStart; 1185c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) 1195c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu { 1205c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize; 1215c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize; 1225c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases 1235c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Packet Xi = pload <Packet>(resIt); 1245c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 125c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi)); 126c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2); 127c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3); 128c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch pstore(resIt,Xi); resIt += PacketSize; 129c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 1305c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu for (size_t i=alignedEnd; i<endi; i++) 131c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch { 132c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1); 133c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch t2 += cj1.pmul(A0[i], rhs[i]); 134c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch t3 += cj1.pmul(A1[i], rhs[i]); 135c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 136c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 137c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch res[j] += alpha * (t2 + predux(ptmp2)); 1385c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu res[j+1] += alpha * (t3 + predux(ptmp3)); 139c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 140c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++) 141c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch { 142c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; 143c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 144c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Scalar t1 = cjAlpha * rhs[j]; 145c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Scalar t2(0); 1465c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed 147c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch res[j] += cjd.pmul(internal::real(A0[j]), t1); 148c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) 149c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch { 1505c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu res[i] += cj0.pmul(A0[i], t1); 151c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch t2 += cj1.pmul(A0[i], rhs[i]); 152c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 153c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch res[j] += alpha * t2; 154c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 1555c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu} 156c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch}; 157c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 158c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch} // end namespace internal 159c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 160c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch/*************************************************************************** 1615c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu* Wrapper to product_selfadjoint_vector 162c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch***************************************************************************/ 163c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 164c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochnamespace internal { 165c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochtemplate<typename Lhs, int LhsMode, typename Rhs> 166c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochstruct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > 167c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > 168c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch{}; 169c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch} 1705c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 171c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochtemplate<typename Lhs, int LhsMode, typename Rhs> 172c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochstruct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> 173c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs > 174c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch{ 1755c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 1765c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 177c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch enum { 178c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch LhsUpLo = LhsMode&(Upper|Lower) 179c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch }; 180c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 181c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 182c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 183c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const 184c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch { 185c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch typedef typename Dest::Scalar ResScalar; 186c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch typedef typename Base::RhsScalar RhsScalar; 187c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; 188c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 189c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); 190c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 191c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); 192c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); 193c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 194c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) 195c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch * RhsBlasTraits::extractScalarFactor(m_rhs); 196c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 197c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch enum { 198c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch EvalToDest = (Dest::InnerStrideAtCompileTime==1), 199c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1) 200c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch }; 201c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 202c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest; 203c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs; 204c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 205c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), 206c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch EvalToDest ? dest.data() : static_dest.data()); 207c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 208c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(), 209c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data()); 210c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 211c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch if(!EvalToDest) 2125c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu { 2135c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 2145c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu int size = dest.size(); 2155c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu EIGEN_DENSE_STORAGE_CTOR_PLUGIN 2165c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu #endif 2175c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu MappedDest(actualDestPtr, dest.size()) = dest; 2185c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu } 2195c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 220c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch if(!UseRhs) 221c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch { 222c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 223c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch int size = rhs.size(); 224c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch EIGEN_DENSE_STORAGE_CTOR_PLUGIN 225c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch #endif 226c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs; 227c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 228c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 229c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 230c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run 231c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch ( 232c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch lhs.rows(), // size 233c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info 234c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch actualRhsPtr, 1, // rhs info 235c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch actualDestPtr, // result info 236c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch actualAlpha // scale factor 237c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch ); 238c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 239c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch if(!EvalToDest) 240c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch dest = MappedDest(actualDestPtr, dest.size()); 241c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 242c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch}; 243c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 244c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochnamespace internal { 245c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochtemplate<typename Lhs, typename Rhs, int RhsMode> 246c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochstruct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> > 247c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> > 248c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch{}; 249c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch} 250c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 251c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochtemplate<typename Lhs, typename Rhs, int RhsMode> 252c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochstruct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> 253c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs > 254c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch{ 255c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 256c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 257c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch enum { 258c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch RhsUpLo = RhsMode&(Upper|Lower) 259c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch }; 260c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 261c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 262c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 263c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const 264c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch { 265c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch // let's simply transpose the product 266c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Transpose<Dest> destT(dest); 267c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false, 268c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha); 269c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch } 270c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch}; 271c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 272c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch} // end namespace Eigen 273c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 274c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H 275c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch