1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra.
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen {
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Optimized selfadjoint matrix * vector product:
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * This algorithm processes 2 columns at onces that allows to both reduce
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * the number of load/stores of the result by a factor 2 and to reduce
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * the instruction dependency.
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct selfadjoint_matrix_vector_product;
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct selfadjoint_matrix_vector_product
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic EIGEN_DONT_INLINE void run(
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Index size,
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  const Scalar*  lhs, Index lhsStride,
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  const Scalar* _rhs, Index rhsIncr,
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Scalar* res,
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Scalar alpha)
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef typename packet_traits<Scalar>::type Packet;
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef typename NumTraits<Scalar>::Real RealScalar;
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsRowMajor = StorageOrder==RowMajor ? 1 : 0,
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsLower = UpLo == Lower ? 1 : 0,
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    FirstTriangular = IsRowMajor == IsLower
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // this is because we need to extract packets
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (rhsIncr!=1)
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const Scalar* it = _rhs;
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (Index i=0; i<size; ++i, it+=rhsIncr)
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      rhs[i] = *it;
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (FirstTriangular)
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    bound = size - bound;
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (Index j=FirstTriangular ? bound : 0;
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       j<(FirstTriangular ? size : bound);j+=2)
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Scalar t0 = cjAlpha * rhs[j];
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Packet ptmp0 = pset1<Packet>(t0);
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Scalar t1 = cjAlpha * rhs[j+1];
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Packet ptmp1 = pset1<Packet>(t1);
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Scalar t2(0);
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Packet ptmp2 = pset1<Packet>(t2);
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Scalar t3(0);
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Packet ptmp3 = pset1<Packet>(t3);
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    size_t starti = FirstTriangular ? 0 : j+2;
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    size_t endi   = FirstTriangular ? j : size;
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    res[j]   += cjd.pmul(internal::real(A0[j]), t0);
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(FirstTriangular)
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      res[j]   += cj0.pmul(A1[j],   t1);
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t3       += cj1.pmul(A1[j],   rhs[j]);
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      res[j+1] += cj0.pmul(A0[j+1],t0);
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t2 += cj1.pmul(A0[j+1], rhs[j+1]);
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (size_t i=starti; i<alignedStart; ++i)
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      res[i] += t0 * A0[i] + t1 * A1[i];
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t2 += conj(A0[i]) * rhs[i];
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t3 += conj(A1[i]) * rhs[i];
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // gcc 4.2 does this optimization automatically.
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const Scalar* EIGEN_RESTRICT a0It  = A0  + alignedStart;
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const Scalar* EIGEN_RESTRICT a1It  = A1  + alignedStart;
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Packet A0i = ploadu<Packet>(a0It);  a0It  += PacketSize;
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Packet A1i = ploadu<Packet>(a1It);  a1It  += PacketSize;
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Packet Bi  = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Packet Xi  = pload <Packet>(resIt);
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Xi    = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      ptmp2 = pcj1.pmadd(A0i,  Bi, ptmp2);
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      ptmp3 = pcj1.pmadd(A1i,  Bi, ptmp3);
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      pstore(resIt,Xi); resIt += PacketSize;
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (size_t i=alignedEnd; i<endi; i++)
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t2 += cj1.pmul(A0[i], rhs[i]);
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t3 += cj1.pmul(A1[i], rhs[i]);
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    res[j]   += alpha * (t2 + predux(ptmp2));
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    res[j+1] += alpha * (t3 + predux(ptmp3));
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Scalar t1 = cjAlpha * rhs[j];
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Scalar t2(0);
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    res[j] += cjd.pmul(internal::real(A0[j]), t1);
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      res[i] += cj0.pmul(A0[i], t1);
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t2 += cj1.pmul(A0[i], rhs[i]);
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    res[j] += alpha * t2;
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/***************************************************************************
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Wrapper to product_selfadjoint_vector
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, int LhsMode, typename Rhs>
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{};
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, int LhsMode, typename Rhs>
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    LhsUpLo = LhsMode&(Upper|Lower)
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    typedef typename Dest::Scalar ResScalar;
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    typedef typename Base::RhsScalar RhsScalar;
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                               * RhsBlasTraits::extractScalarFactor(m_rhs);
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    enum {
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      EvalToDest = (Dest::InnerStrideAtCompileTime==1),
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    };
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                                                  EvalToDest ? dest.data() : static_dest.data());
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(!EvalToDest)
212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      int size = dest.size();
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      #endif
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MappedDest(actualDestPtr, dest.size()) = dest;
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(!UseRhs)
221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      int size = rhs.size();
224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      #endif
226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      (
232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        lhs.rows(),                             // size
233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        actualRhsPtr, 1,                        // rhs info
235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        actualDestPtr,                          // result info
236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        actualAlpha                             // scale factor
237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      );
238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(!EvalToDest)
240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      dest = MappedDest(actualDestPtr, dest.size());
241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int RhsMode>
246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{};
249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Lhs, typename Rhs, int RhsMode>
252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    RhsUpLo = RhsMode&(Upper|Lower)
259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // let's simply transpose the product
266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Transpose<Dest> destT(dest);
267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                             Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen
273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
275