1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 157faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandeztemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs> 167faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezstruct selfadjoint_rank1_update; 177faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/********************************************************************** 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* This file implements a general A * B product while 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* evaluating only one triangular part of the product. 232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang* This is a more general version of self adjoint product (C += A A^T) 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* as the level 3 SYRK Blas routine. 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath**********************************************************************/ 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// forward declarations (defined at the end of this file) 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct tribb_kernel; 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* Optimized matrix-matrix product evaluating only one triangular half */ 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int ResStorageOrder, int UpLo, int Version = Specialized> 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct general_matrix_matrix_triangular_product; 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// as usual if the result is row major => we transpose the product 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version> 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, 452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, 462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking) 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath general_matrix_matrix_triangular_product<Index, 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ColMajor, UpLo==Lower?Upper:Lower> 522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking); 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version> 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, 622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, 632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking) 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef gebp_traits<LhsScalar,RhsScalar> Traits; 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper; 682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper; 692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; 702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang LhsMapper lhs(_lhs,lhsStride); 712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang RhsMapper rhs(_rhs,rhsStride); 722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ResMapper res(_res, resStride); 732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Index kc = blocking.kc(); 752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Index mc = (std::min)(size,blocking.mc()); 762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // !!! mc must be a multiple of nr: 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(mc > Traits::nr) 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mc = (mc/Traits::nr)*Traits::nr; 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::size_t sizeA = kc*mc; 822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::size_t sizeB = kc*size; 832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA()); 852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB()); 862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; 892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb; 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index k2=0; k2<depth; k2+=kc) 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index actual_kc = (std::min)(k2+kc,depth)-k2; 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // note that the actual rhs is the transpose/adjoint of mat 972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, size); 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i2=0; i2<size; i2+=mc) 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index actual_mc = (std::min)(i2+mc,size)-i2; 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // the selected actual_mc * size panel of res is split into three different part: 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 1 - before the diagonal => processed with gebp or skipped 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 3 - after the diagonal => processed with gebp or skipped 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (UpLo==Lower) 1102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, 1112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang (std::min)(size,i2), alpha, -1, -1, 0, 0); 1122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha); 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (UpLo==Upper) 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index j2 = i2+actual_mc; 1192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gebp(res.getSubMapper(i2, j2), blockA, blockB+actual_kc*j2, actual_mc, 1202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang actual_kc, (std::max)(Index(0), size-j2), alpha, -1, -1, 0, 0); 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Optimized packed Block * packed Block product kernel evaluating only one given triangular part 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This kernel is built on top of the gebp kernel: 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// - the current destination block is processed per panel of actual_mc x BlockSize 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// where BlockSize is set to the minimal value allowing gebp to be as fast as possible 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// - then, as usual, each panel is split into three parts along the diagonal, 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// the sub blocks above and below the diagonal are processed as usual, 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// while the triangular block overlapping the diagonal is evaluated into a 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// small temporary buffer which is then accumulated into the result using a 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// triangular traversal. 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct tribb_kernel 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits; 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Traits::ResScalar ResScalar; 1412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 1432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 1452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 1472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef blas_data_mapper<ResScalar, Index, ColMajor> ResMapper; 1482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ResMapper res(_res, resStride); 1492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel; 1502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert())); 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // let's process the block per panel of actual_mc x BlockSize, 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // again, each is split into three parts, etc. 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (Index j=0; j<size; j+=BlockSize) 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index actualBlockSize = std::min<Index>(BlockSize,size - j); 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const RhsScalar* actual_b = blockB+j*depth; 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(UpLo==Upper) 1612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, 1622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang -1, -1, 0, 0); 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // selfadjoint micro block 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index i = j; 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath buffer.setZero(); 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 1 - apply the kernel on the temporary buffer 1692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, 1702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang -1, -1, 0, 0); 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 2 - triangular accumulation 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index j1=0; j1<actualBlockSize; ++j1) 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 1742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ResScalar* r = &res(i, j + j1); 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i1=UpLo==Lower ? j1 : 0; 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1) 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath r[i1] += buffer(i1,j1); 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(UpLo==Lower) 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index i = j+actualBlockSize; 1842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang gebp_kernel(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i, 1852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang depth, actualBlockSize, alpha, -1, -1, 0, 0); 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// high level API 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1957faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandeztemplate<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct> 1967faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezstruct general_product_to_triangular_selector; 1977faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 1987faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 1997faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandeztemplate<typename MatrixType, typename ProductType, int UpLo> 2007faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezstruct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true> 2017faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez{ 2022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) 2037faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez { 2047faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename MatrixType::Scalar Scalar; 2057faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2067faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs; 2077faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef internal::blas_traits<Lhs> LhsBlasTraits; 2087faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; 2097faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; 2107faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); 2117faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2127faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs; 2137faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef internal::blas_traits<Rhs> RhsBlasTraits; 2147faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; 2157faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; 2167faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); 2177faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2187faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); 2197faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if(!beta) 2212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang mat.template triangularView<UpLo>().setZero(); 2222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2237faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez enum { 2247faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor, 2257faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, 2267faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1 2277faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez }; 2287faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2297faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs; 2307faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(), 2317faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data())); 2327faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs; 2337faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2347faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs; 2357faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(), 2367faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data())); 2377faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; 2387faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2397faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2407faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo, 2417faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, 2427faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex> 2437faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha); 2447faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez } 2457faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez}; 2467faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2477faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandeztemplate<typename MatrixType, typename ProductType, int UpLo> 2487faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezstruct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> 2497faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez{ 2502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) 2517faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez { 2527faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs; 2537faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef internal::blas_traits<Lhs> LhsBlasTraits; 2547faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; 2557faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; 2567faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); 2577faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2587faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs; 2597faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef internal::blas_traits<Rhs> RhsBlasTraits; 2607faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; 2617faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; 2627faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); 2637faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2647faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); 2657faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 2662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if(!beta) 2672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang mat.template triangularView<UpLo>().setZero(); 2682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang enum { 2702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, 2712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, 2722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 2732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }; 2742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Index size = mat.cols(); 2762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Index depth = actualLhs.cols(); 2772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, 2792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType; 2802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BlockingType blocking(size, size, depth, 1, false); 2822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2837faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez internal::general_matrix_matrix_triangular_product<Index, 2842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, 2852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, 2862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang IsRowMajor ? RowMajor : ColMajor, UpLo> 2872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ::run(size, depth, 2887faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), 2892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang mat.data(), mat.outerStride(), actualAlpha, blocking); 2907faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez } 2917faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez}; 2927faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename MatrixType, unsigned int UpLo> 2942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<typename ProductType> 2952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangTriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) 296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 2972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); 2982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); 300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return derived(); 302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H 307