1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Copyright (c) 2011, Intel Corporation. All rights reserved.
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Redistribution and use in source and binary forms, with or without modification,
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath are permitted provided that the following conditions are met:
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions of source code must retain the above copyright notice, this
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   list of conditions and the following disclaimer.
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions in binary form must reproduce the above copyright notice,
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   this list of conditions and the following disclaimer in the documentation
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   and/or other materials provided with the distribution.
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Neither the name of Intel Corporation nor the names of its contributors may
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   be used to endorse or promote products derived from this software without
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   specific prior written permission.
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ********************************************************************************
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath *   Content : Eigen bindings to Intel(R) MKL
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath *   Triangular matrix * matrix product functionality based on ?TRMM.
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ********************************************************************************
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*/
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen {
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Scalar, typename Index,
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int Mode, bool LhsIsTriangular,
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int LhsStorageOrder, bool ConjugateLhs,
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int RhsStorageOrder, bool ConjugateRhs,
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int ResStorageOrder>
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix_trmm :
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       product_triangular_matrix_matrix<Scalar,Index,Mode,
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// try to go to BLAS specialization
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, int Mode, \
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int LhsStorageOrder, bool ConjugateLhs, \
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int RhsStorageOrder, bool ConjugateRhs> \
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath           LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
607faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
647faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez        _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  } \
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(double, true)
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(double, false)
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(float, true)
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(float, false)
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// implements col-major += alpha * op(triangular) * op(general)
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, int Mode, \
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int LhsStorageOrder, bool ConjugateLhs, \
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int RhsStorageOrder, bool ConjugateRhs> \
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ \
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum { \
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsLower = (Mode&Lower) == Lower, \
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    LowUp = IsLower ? Lower : Upper, \
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }; \
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
947faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  static void run( \
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Index _rows, Index _cols, Index _depth, \
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const EIGTYPE* _lhs, Index lhsStride, \
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const EIGTYPE* _rhs, Index rhsStride, \
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    EIGTYPE* res,        Index resStride, \
997faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { \
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index diagSize  = (std::min)(_rows,_depth); \
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index rows      = IsLower ? _rows : diagSize; \
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index depth     = IsLower ? diagSize : _depth; \
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index cols      = _cols; \
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if (rows != depth) { \
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
1187faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     } else { \
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     /* Make sense to call GEMM */ \
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       MKL_INT aStride = aa_tmp.outerStride(); \
1257faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
1277faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     } \
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     return; \
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   } \
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   char side = 'L', transa, uplo, diag = 'N'; \
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   EIGTYPE *b; \
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   const EIGTYPE *a; \
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MKL_INT m, n, lda, ldb; \
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MKLTYPE alpha_; \
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set alpha_*/ \
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set m, n */ \
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   m = (MKL_INT)diagSize; \
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   n = (MKL_INT)cols; \
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set trans */ \
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set b, ldb */ \
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MatrixX##EIGPREFIX b_tmp; \
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   b = b_tmp.data(); \
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   ldb = b_tmp.outerStride(); \
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set uplo */ \
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   uplo = IsLower ? 'L' : 'U'; \
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set a, lda */ \
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MatrixLhs a_tmp; \
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if ((conjA!=0) || (SetDiag==0)) { \
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     if (IsZeroDiag) \
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       a_tmp.diagonal().setZero(); \
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     else if (IsUnitDiag) \
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       a_tmp.diagonal().setOnes();\
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     a = a_tmp.data(); \
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     lda = a_tmp.outerStride(); \
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   } else { \
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     a = _lhs; \
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     lda = lhsStride; \
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   } \
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* call ?trmm*/ \
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Add op(a_triangular)*b into res*/ \
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   res_tmp=res_tmp+b_tmp; \
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  } \
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(double, double, d, d)
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(float, float, f, s)
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// implements col-major += alpha * op(general) * op(triangular)
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, int Mode, \
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int LhsStorageOrder, bool ConjugateLhs, \
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          int RhsStorageOrder, bool ConjugateRhs> \
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ \
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum { \
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsLower = (Mode&Lower) == Lower, \
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    LowUp = IsLower ? Lower : Upper, \
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }; \
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
2087faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  static void run( \
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Index _rows, Index _cols, Index _depth, \
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const EIGTYPE* _lhs, Index lhsStride, \
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const EIGTYPE* _rhs, Index rhsStride, \
212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    EIGTYPE* res,        Index resStride, \
2137faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { \
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index diagSize  = (std::min)(_cols,_depth); \
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index rows      = _rows; \
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index depth     = IsLower ? _depth : diagSize; \
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Index cols      = IsLower ? diagSize : _cols; \
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if (cols != depth) { \
225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
2327faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     } else { \
235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     /* Make sense to call GEMM */ \
236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       MKL_INT aStride = aa_tmp.outerStride(); \
2397faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
2417faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     } \
245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     return; \
246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   } \
247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   char side = 'R', transa, uplo, diag = 'N'; \
248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   EIGTYPE *b; \
249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   const EIGTYPE *a; \
250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MKL_INT m, n, lda, ldb; \
251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MKLTYPE alpha_; \
252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set alpha_*/ \
254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set m, n */ \
257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   m = (MKL_INT)rows; \
258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   n = (MKL_INT)diagSize; \
259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set trans */ \
261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set b, ldb */ \
264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MatrixX##EIGPREFIX b_tmp; \
266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   b = b_tmp.data(); \
269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   ldb = b_tmp.outerStride(); \
270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set uplo */ \
272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   uplo = IsLower ? 'L' : 'U'; \
273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set a, lda */ \
275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MatrixRhs a_tmp; \
277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   if ((conjA!=0) || (SetDiag==0)) { \
279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     if (IsZeroDiag) \
281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       a_tmp.diagonal().setZero(); \
282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     else if (IsUnitDiag) \
283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath       a_tmp.diagonal().setOnes();\
284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     a = a_tmp.data(); \
285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     lda = a_tmp.outerStride(); \
286c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   } else { \
287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     a = _rhs; \
288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath     lda = rhsStride; \
289c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   } \
290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* call ?trmm*/ \
292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\
294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Add op(a_triangular)*b into res*/ \
295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   res_tmp=res_tmp+b_tmp; \
297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  } \
298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(double, double, d, d)
301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(float, float, f, s)
303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal
306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen
308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
310