TriangularMatrixMatrix.h revision c981c48f5bc9aefeffc0bcb0cc3934c2fae179dd
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file is part of Eigen, a lightweight C++ template library 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for linear algebra. 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> 5bb1529ce867d8845a77ec7cdf3e3003ef1771a40Ben Murdoch// 6bb1529ce867d8845a77ec7cdf3e3003ef1771a40Ben Murdoch// This Source Code Form is subject to the terms of the Mozilla 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Public License v. 2.0. If a copy of the MPL was not distributed 87d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace Eigen { 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode> 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// struct gemm_pack_lhs_triangular 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// { 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Matrix<Scalar,mr,mr, 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows) 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// { 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride); 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// int count = 0; 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// const int peeled_mc = (rows/mr)*mr; 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for(int i=0; i<peeled_mc; i+=mr) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// { 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for(int k=0; k<depth; k++) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for(int w=0; w<mr; w++) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// blockA[count++] = cj(lhs(i+w, k)); 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for(int i=peeled_mc; i<rows; i++) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// { 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for(int k=0; k<depth; k++) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// blockA[count++] = cj(lhs(i, k)); 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// }; 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Optimized triangular matrix * matrix (_TRMM++) product built on top of 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the general matrix matrix product. 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template <typename Scalar, typename Index, 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int Mode, bool LhsIsTriangular, 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int LhsStorageOrder, bool ConjugateLhs, 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int RhsStorageOrder, bool ConjugateRhs, 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ResStorageOrder, int Version = Specialized> 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct product_triangular_matrix_matrix; 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template <typename Scalar, typename Index, 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int Mode, bool LhsIsTriangular, 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int LhsStorageOrder, bool ConjugateLhs, 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int RhsStorageOrder, bool ConjugateRhs, int Version> 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LhsStorageOrder,ConjugateLhs, 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RhsStorageOrder,ConjugateRhs,RowMajor,Version> 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static EIGEN_STRONG_INLINE void run( 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Index rows, Index cols, Index depth, 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Scalar* lhs, Index lhsStride, 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Scalar* rhs, Index rhsStride, 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Scalar* res, Index resStride, 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) product_triangular_matrix_matrix<Scalar, Index, 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper), 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!LhsIsTriangular), 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RhsStorageOrder==RowMajor ? ColMajor : RowMajor, 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ConjugateRhs, 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LhsStorageOrder==RowMajor ? ColMajor : RowMajor, 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ConjugateLhs, 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ColMajor> 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); 75bb1529ce867d8845a77ec7cdf3e3003ef1771a40Ben Murdoch } 76}; 77 78// implements col-major += alpha * op(triangular) * op(general) 79template <typename Scalar, typename Index, int Mode, 80 int LhsStorageOrder, bool ConjugateLhs, 81 int RhsStorageOrder, bool ConjugateRhs, int Version> 82struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, 83 LhsStorageOrder,ConjugateLhs, 84 RhsStorageOrder,ConjugateRhs,ColMajor,Version> 85{ 86 87 typedef gebp_traits<Scalar,Scalar> Traits; 88 enum { 89 SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), 90 IsLower = (Mode&Lower) == Lower, 91 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1 92 }; 93 94 static EIGEN_DONT_INLINE void run( 95 Index _rows, Index _cols, Index _depth, 96 const Scalar* _lhs, Index lhsStride, 97 const Scalar* _rhs, Index rhsStride, 98 Scalar* res, Index resStride, 99 Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) 100 { 101 // strip zeros 102 Index diagSize = (std::min)(_rows,_depth); 103 Index rows = IsLower ? _rows : diagSize; 104 Index depth = IsLower ? diagSize : _depth; 105 Index cols = _cols; 106 107 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 108 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); 109 110 Index kc = blocking.kc(); // cache block size along the K direction 111 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction 112 113 std::size_t sizeA = kc*mc; 114 std::size_t sizeB = kc*cols; 115 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 116 117 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); 118 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); 119 ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); 120 121 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer; 122 triangularBuffer.setZero(); 123 if((Mode&ZeroDiag)==ZeroDiag) 124 triangularBuffer.diagonal().setZero(); 125 else 126 triangularBuffer.diagonal().setOnes(); 127 128 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 129 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 130 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 131 132 for(Index k2=IsLower ? depth : 0; 133 IsLower ? k2>0 : k2<depth; 134 IsLower ? k2-=kc : k2+=kc) 135 { 136 Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc); 137 Index actual_k2 = IsLower ? k2-actual_kc : k2; 138 139 // align blocks with the end of the triangular part for trapezoidal lhs 140 if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows)) 141 { 142 actual_kc = rows-k2; 143 k2 = k2+actual_kc-kc; 144 } 145 146 pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols); 147 148 // the selected lhs's panel has to be split in three different parts: 149 // 1 - the part which is zero => skip it 150 // 2 - the diagonal block => special kernel 151 // 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP 152 153 // the block diagonal, if any: 154 if(IsLower || actual_k2<rows) 155 { 156 // for each small vertical panels of lhs 157 for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) 158 { 159 Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); 160 Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; 161 Index startBlock = actual_k2+k1; 162 Index blockBOffset = k1; 163 164 // => GEBP with the micro triangular block 165 // The trick is to pack this micro block while filling the opposite triangular part with zeros. 166 // To this end we do an extra triangular copy to a small temporary buffer 167 for (Index k=0;k<actualPanelWidth;++k) 168 { 169 if (SetDiag) 170 triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k); 171 for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i) 172 triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k); 173 } 174 pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth); 175 176 gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha, 177 actualPanelWidth, actual_kc, 0, blockBOffset, blockW); 178 179 // GEBP with remaining micro panel 180 if (lengthTarget>0) 181 { 182 Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2; 183 184 pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget); 185 186 gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha, 187 actualPanelWidth, actual_kc, 0, blockBOffset, blockW); 188 } 189 } 190 } 191 // the part below (lower case) or above (upper case) the diagonal => GEPP 192 { 193 Index start = IsLower ? k2 : 0; 194 Index end = IsLower ? rows : (std::min)(actual_k2,rows); 195 for(Index i2=start; i2<end; i2+=mc) 196 { 197 const Index actual_mc = (std::min)(i2+mc,end)-i2; 198 gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>() 199 (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); 200 201 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW); 202 } 203 } 204 } 205 } 206}; 207 208// implements col-major += alpha * op(general) * op(triangular) 209template <typename Scalar, typename Index, int Mode, 210 int LhsStorageOrder, bool ConjugateLhs, 211 int RhsStorageOrder, bool ConjugateRhs, int Version> 212struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, 213 LhsStorageOrder,ConjugateLhs, 214 RhsStorageOrder,ConjugateRhs,ColMajor,Version> 215{ 216 typedef gebp_traits<Scalar,Scalar> Traits; 217 enum { 218 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), 219 IsLower = (Mode&Lower) == Lower, 220 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1 221 }; 222 223 static EIGEN_DONT_INLINE void run( 224 Index _rows, Index _cols, Index _depth, 225 const Scalar* _lhs, Index lhsStride, 226 const Scalar* _rhs, Index rhsStride, 227 Scalar* res, Index resStride, 228 Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) 229 { 230 // strip zeros 231 Index diagSize = (std::min)(_cols,_depth); 232 Index rows = _rows; 233 Index depth = IsLower ? _depth : diagSize; 234 Index cols = IsLower ? diagSize : _cols; 235 236 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 237 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); 238 239 Index kc = blocking.kc(); // cache block size along the K direction 240 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction 241 242 std::size_t sizeA = kc*mc; 243 std::size_t sizeB = kc*cols; 244 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 245 246 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); 247 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); 248 ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); 249 250 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer; 251 triangularBuffer.setZero(); 252 if((Mode&ZeroDiag)==ZeroDiag) 253 triangularBuffer.diagonal().setZero(); 254 else 255 triangularBuffer.diagonal().setOnes(); 256 257 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 258 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 259 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 260 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; 261 262 for(Index k2=IsLower ? 0 : depth; 263 IsLower ? k2<depth : k2>0; 264 IsLower ? k2+=kc : k2-=kc) 265 { 266 Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc); 267 Index actual_k2 = IsLower ? k2 : k2-actual_kc; 268 269 // align blocks with the end of the triangular part for trapezoidal rhs 270 if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols)) 271 { 272 actual_kc = cols-k2; 273 k2 = actual_k2 + actual_kc - kc; 274 } 275 276 // remaining size 277 Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2; 278 // size of the triangular part 279 Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc; 280 281 Scalar* geb = blockB+ts*ts; 282 283 pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs); 284 285 // pack the triangular part of the rhs padding the unrolled blocks with zeros 286 if(ts>0) 287 { 288 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth) 289 { 290 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth); 291 Index actual_j2 = actual_k2 + j2; 292 Index panelOffset = IsLower ? j2+actualPanelWidth : 0; 293 Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; 294 // general part 295 pack_rhs_panel(blockB+j2*actual_kc, 296 &rhs(actual_k2+panelOffset, actual_j2), rhsStride, 297 panelLength, actualPanelWidth, 298 actual_kc, panelOffset); 299 300 // append the triangular part via a temporary buffer 301 for (Index j=0;j<actualPanelWidth;++j) 302 { 303 if (SetDiag) 304 triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j); 305 for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k) 306 triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j); 307 } 308 309 pack_rhs_panel(blockB+j2*actual_kc, 310 triangularBuffer.data(), triangularBuffer.outerStride(), 311 actualPanelWidth, actualPanelWidth, 312 actual_kc, j2); 313 } 314 } 315 316 for (Index i2=0; i2<rows; i2+=mc) 317 { 318 const Index actual_mc = (std::min)(mc,rows-i2); 319 pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); 320 321 // triangular kernel 322 if(ts>0) 323 { 324 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth) 325 { 326 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth); 327 Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth; 328 Index blockOffset = IsLower ? j2 : 0; 329 330 gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride, 331 blockA, blockB+j2*actual_kc, 332 actual_mc, panelLength, actualPanelWidth, 333 alpha, 334 actual_kc, actual_kc, // strides 335 blockOffset, blockOffset,// offsets 336 blockW); // workspace 337 } 338 } 339 gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride, 340 blockA, geb, actual_mc, actual_kc, rs, 341 alpha, 342 -1, -1, 0, 0, blockW); 343 } 344 } 345 } 346}; 347 348/*************************************************************************** 349* Wrapper to product_triangular_matrix_matrix 350***************************************************************************/ 351 352template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> 353struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> > 354 : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> > 355{}; 356 357} // end namespace internal 358 359template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> 360struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> 361 : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs > 362{ 363 EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) 364 365 TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 366 367 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const 368 { 369 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); 370 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); 371 372 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) 373 * RhsBlasTraits::extractScalarFactor(m_rhs); 374 375 typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, 376 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; 377 378 enum { IsLower = (Mode&Lower) == Lower }; 379 Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols()); 380 Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows()); 381 Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows())) 382 : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols())); 383 384 BlockingType blocking(stripedRows, stripedCols, stripedDepth); 385 386 internal::product_triangular_matrix_matrix<Scalar, Index, 387 Mode, LhsIsTriangular, 388 (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, 389 (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, 390 (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> 391 ::run( 392 stripedRows, stripedCols, stripedDepth, // sizes 393 &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info 394 &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info 395 &dst.coeffRef(0,0), dst.outerStride(), // result info 396 actualAlpha, blocking 397 ); 398 } 399}; 400 401} // end namespace Eigen 402 403#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H 404