1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Copyright (c) 2011, Intel Corporation. All rights reserved. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Redistribution and use in source and binary forms, with or without modification, 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath are permitted provided that the following conditions are met: 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions of source code must retain the above copyright notice, this 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath list of conditions and the following disclaimer. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions in binary form must reproduce the above copyright notice, 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath this list of conditions and the following disclaimer in the documentation 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath and/or other materials provided with the distribution. 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Neither the name of Intel Corporation nor the names of its contributors may 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath be used to endorse or promote products derived from this software without 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath specific prior written permission. 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ******************************************************************************** 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Content : Eigen bindings to Intel(R) MKL 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Triangular matrix * matrix product functionality based on ?TRMM. 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ******************************************************************************** 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*/ 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Scalar, typename Index, 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int Mode, bool LhsIsTriangular, 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int LhsStorageOrder, bool ConjugateLhs, 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int RhsStorageOrder, bool ConjugateRhs, 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int ResStorageOrder> 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix_trmm : 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_triangular_matrix_matrix<Scalar,Index,Mode, 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsIsTriangular,LhsStorageOrder,ConjugateLhs, 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {}; 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// try to go to BLAS specialization 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, int Mode, \ 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int LhsStorageOrder, bool ConjugateLhs, \ 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int RhsStorageOrder, bool ConjugateRhs> \ 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \ 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \ 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ 607faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \ 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \ 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \ 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RhsStorageOrder, ConjugateRhs, ColMajor>::run( \ 647faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(double, true) 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(double, false) 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(float, true) 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(float, false) 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// implements col-major += alpha * op(triangular) * op(general) 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, int Mode, \ 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int LhsStorageOrder, bool ConjugateLhs, \ 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int RhsStorageOrder, bool ConjugateRhs> \ 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \ 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ \ 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { \ 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsLower = (Mode&Lower) == Lower, \ 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LowUp = IsLower ? Lower : Upper, \ 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \ 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; \ 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 947faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez static void run( \ 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index _rows, Index _cols, Index _depth, \ 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const EIGTYPE* _lhs, Index lhsStride, \ 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const EIGTYPE* _rhs, Index rhsStride, \ 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGTYPE* res, Index resStride, \ 997faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \ 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { \ 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index diagSize = (std::min)(_rows,_depth); \ 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index rows = IsLower ? _rows : diagSize; \ 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index depth = IsLower ? diagSize : _depth; \ 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index cols = _cols; \ 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (rows != depth) { \ 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \ 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ 1187faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } else { \ 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* Make sense to call GEMM */ \ 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \ 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKL_INT aStride = aa_tmp.outerStride(); \ 1257faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \ 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ 1277faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return; \ 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath char side = 'L', transa, uplo, diag = 'N'; \ 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGTYPE *b; \ 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const EIGTYPE *a; \ 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKL_INT m, n, lda, ldb; \ 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKLTYPE alpha_; \ 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set alpha_*/ \ 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set m, n */ \ 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath m = (MKL_INT)diagSize; \ 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath n = (MKL_INT)cols; \ 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set trans */ \ 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set b, ldb */ \ 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \ 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MatrixX##EIGPREFIX b_tmp; \ 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b = b_tmp.data(); \ 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ldb = b_tmp.outerStride(); \ 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set uplo */ \ 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath uplo = IsLower ? 'L' : 'U'; \ 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set a, lda */ \ 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MatrixLhs a_tmp; \ 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if ((conjA!=0) || (SetDiag==0)) { \ 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \ 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (IsZeroDiag) \ 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_tmp.diagonal().setZero(); \ 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if (IsUnitDiag) \ 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_tmp.diagonal().setOnes();\ 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a = a_tmp.data(); \ 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath lda = a_tmp.outerStride(); \ 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } else { \ 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a = _lhs; \ 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath lda = lhsStride; \ 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* call ?trmm*/ \ 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Add op(a_triangular)*b into res*/ \ 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res_tmp=res_tmp+b_tmp; \ 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(double, double, d, d) 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(float, float, f, s) 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// implements col-major += alpha * op(general) * op(triangular) 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, int Mode, \ 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int LhsStorageOrder, bool ConjugateLhs, \ 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int RhsStorageOrder, bool ConjugateRhs> \ 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \ 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ \ 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { \ 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsLower = (Mode&Lower) == Lower, \ 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LowUp = IsLower ? Lower : Upper, \ 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \ 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; \ 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 2087faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez static void run( \ 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index _rows, Index _cols, Index _depth, \ 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const EIGTYPE* _lhs, Index lhsStride, \ 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const EIGTYPE* _rhs, Index rhsStride, \ 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGTYPE* res, Index resStride, \ 2137faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \ 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { \ 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index diagSize = (std::min)(_cols,_depth); \ 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index rows = _rows; \ 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index depth = IsLower ? _depth : diagSize; \ 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index cols = IsLower ? diagSize : _cols; \ 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (cols != depth) { \ 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \ 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ 2327faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } else { \ 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* Make sense to call GEMM */ \ 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \ 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKL_INT aStride = aa_tmp.outerStride(); \ 2397faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \ 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ 2417faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return; \ 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath char side = 'R', transa, uplo, diag = 'N'; \ 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGTYPE *b; \ 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const EIGTYPE *a; \ 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKL_INT m, n, lda, ldb; \ 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKLTYPE alpha_; \ 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set alpha_*/ \ 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ 255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set m, n */ \ 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath m = (MKL_INT)rows; \ 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath n = (MKL_INT)diagSize; \ 259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set trans */ \ 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set b, ldb */ \ 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MatrixX##EIGPREFIX b_tmp; \ 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b = b_tmp.data(); \ 269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ldb = b_tmp.outerStride(); \ 270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set uplo */ \ 272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath uplo = IsLower ? 'L' : 'U'; \ 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ 274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Set a, lda */ \ 275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \ 276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MatrixRhs a_tmp; \ 277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if ((conjA!=0) || (SetDiag==0)) { \ 279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \ 280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (IsZeroDiag) \ 281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_tmp.diagonal().setZero(); \ 282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if (IsUnitDiag) \ 283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_tmp.diagonal().setOnes();\ 284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a = a_tmp.data(); \ 285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath lda = a_tmp.outerStride(); \ 286c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } else { \ 287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a = _rhs; \ 288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath lda = rhsStride; \ 289c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ 291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* call ?trmm*/ \ 292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ 293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath\ 294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Add op(a_triangular)*b into res*/ \ 295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ 296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res_tmp=res_tmp+b_tmp; \ 297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(double, double, d, d) 301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) 302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(float, float, f, s) 303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) 304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H 310