1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpublic :
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static SCALAR fone;
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static SCALAR fzero;
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline std::string name()
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return MAKE_STRING(CBLASNAME);
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(gemv)(&notrans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone);
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N);
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void rot(gene_vector & A,  gene_vector & B, SCALAR c, SCALAR s, int N){
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s);
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//     ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   }
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(syrk)(&lower,&notrans,&N,&N,&fone,A,&N,&fzero,X,&N);
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone);
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(scal)(&N,&b,Y,&intone);
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone);
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int N2 = N*N;
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    char uplo = 'L';
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int info = 0;
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(potrf)(&uplo, &N, C, &N, &info);
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(info!=0) std::cerr << "potrf_ error " << info << "\n";
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int N2 = N*N;
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    char uplo = 'L';
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int info = 0;
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int * ipiv = (int*)alloca(sizeof(int)*N);
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info);
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(info!=0) std::cerr << "getrf_ error " << info << "\n";
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(trsv)(&lower, &notrans, &nonunit, &N, L, &N, X, &intone);
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(trsm)(&right, &lower, &notrans, &nonunit, &N, &N, &fone, L, &N, X, &N);
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(trmm)(&left, &lower, &notrans,&nonunit, &N,&N,&fone,A,&N,B,&N);
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  #ifdef HAS_LAPACK
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int N2 = N*N;
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    char uplo = 'L';
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int info = 0;
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int * ipiv = (int*)alloca(sizeof(int)*N);
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int * jpiv = (int*)alloca(sizeof(int)*N);
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info);
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      int N2 = N*N;
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      int inc = 1;
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int info = 0;
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int ilo = 1;
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int ihi = N;
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int bsize = 64;
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int worksize = N*bsize;
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    SCALAR* d = new SCALAR[N+worksize];
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info);
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    delete[] d;
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      int N2 = N*N;
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      int inc = 1;
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    char uplo = 'U';
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int info = 0;
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int ilo = 1;
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int ihi = N;
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int bsize = 64;
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int worksize = N*bsize;
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    SCALAR* d = new SCALAR[3*N+worksize];
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info);
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    delete[] d;
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  #endif // HAS_LAPACK
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathSCALAR blas_interface<SCALAR>::fone = SCALAR(1);
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathSCALAR blas_interface<SCALAR>::fzero = SCALAR(0);
152