1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_) 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> class blas_interface<SCALAR> : public c_interface_base<SCALAR> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpublic : 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static SCALAR fone; 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static SCALAR fzero; 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline std::string name() 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return MAKE_STRING(CBLASNAME); 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(gemv)(¬rans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone); 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone); 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N); 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){ 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N); 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){ 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s); 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone); 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N); 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// } 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N); 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){ 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone); 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){ 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(scal)(&N,&b,Y,&intone); 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone); 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int N2 = N*N; 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath char uplo = 'L'; 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int info = 0; 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(potrf)(&uplo, &N, C, &N, &info); 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(info!=0) std::cerr << "potrf_ error " << info << "\n"; 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int N2 = N*N; 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath char uplo = 'L'; 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int info = 0; 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int * ipiv = (int*)alloca(sizeof(int)*N); 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info); 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(info!=0) std::cerr << "getrf_ error " << info << "\n"; 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(copy)(&N, B, &intone, X, &intone); 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(trsv)(&lower, ¬rans, &nonunit, &N, L, &N, X, &intone); 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){ 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(copy)(&N, B, &intone, X, &intone); 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(trsm)(&right, &lower, ¬rans, &nonunit, &N, &N, &fone, L, &N, X, &N); 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(trmm)(&left, &lower, ¬rans,&nonunit, &N,&N,&fone,A,&N,B,&N); 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef HAS_LAPACK 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int N2 = N*N; 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath char uplo = 'L'; 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int info = 0; 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int * ipiv = (int*)alloca(sizeof(int)*N); 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int * jpiv = (int*)alloca(sizeof(int)*N); 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info); 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int N2 = N*N; 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int inc = 1; 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(copy)(&N2, X, &inc, C, &inc); 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int info = 0; 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int ilo = 1; 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int ihi = N; 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int bsize = 64; 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int worksize = N*bsize; 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SCALAR* d = new SCALAR[N+worksize]; 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info); 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath delete[] d; 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int N2 = N*N; 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int inc = 1; 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(copy)(&N2, X, &inc, C, &inc); 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath char uplo = 'U'; 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int info = 0; 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int ilo = 1; 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int ihi = N; 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int bsize = 64; 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int worksize = N*bsize; 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SCALAR* d = new SCALAR[3*N+worksize]; 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info); 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath delete[] d; 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif // HAS_LAPACK 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathSCALAR blas_interface<SCALAR>::fone = SCALAR(1); 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathSCALAR blas_interface<SCALAR>::fzero = SCALAR(0); 152