rsCpuIntrinsicBLAS.cpp revision 64c682b65cd04ac83b51251b40dca14423df351a
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20#include "cblas.h"
21
22using namespace android;
23using namespace android::renderscript;
24
25namespace android {
26namespace renderscript {
27
28
29class RsdCpuScriptIntrinsicBLAS : public RsdCpuScriptIntrinsic {
30public:
31    virtual void invokeForEach(uint32_t slot,
32                               const Allocation ** ain,
33                               uint32_t inLen,
34                               Allocation * aout,
35                               const void * usr,
36                               uint32_t usrLen,
37                               const RsScriptCall *sc);
38
39    virtual void populateScript(Script *);
40    virtual ~RsdCpuScriptIntrinsicBLAS();
41    RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, const Script *s);
42
43protected:
44
45
46};
47
48}
49}
50
51void RsdCpuScriptIntrinsicBLAS::populateScript(Script *s) {
52    s->mHal.info.exportedVariableCount = 0;
53}
54
55static void initABC(const Allocation ** ain,
56                    size_t size,
57                    void** A,
58                    void** B,
59                    void** C,
60                    int* lda,
61                    int* ldb,
62                    int* ldc)
63{
64    if (ain[0]) {
65        *A = ain[0]->mHal.drvState.lod[0].mallocPtr;
66        *lda = (int)(ain[0]->mHal.drvState.lod[0].stride/size);
67    }
68    if (ain[1]) {
69        *B = ain[1]->mHal.drvState.lod[0].mallocPtr;
70        *ldb = (int)(ain[1]->mHal.drvState.lod[0].stride/size);
71    }
72    if (ain[2]) {
73        *C = ain[2]->mHal.drvState.lod[0].mallocPtr;
74        *ldc = (int)(ain[2]->mHal.drvState.lod[0].stride/size);
75    }
76
77
78}
79
80void RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot,
81                                              const Allocation ** ain,
82                                              uint32_t inLen,
83                                              Allocation * aout,
84                                              const void * usr,
85                                              uint32_t usrLen,
86                                              const RsScriptCall *sc) {
87    RsBlasCall* call = (RsBlasCall*) usr;
88    // setup BLAS enum args
89    enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA;
90    enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB;
91    enum CBLAS_UPLO Uplo = (enum CBLAS_UPLO)call->uplo;
92    enum CBLAS_DIAG Diag = (enum CBLAS_DIAG)call->diag;
93    enum CBLAS_SIDE Side = (enum CBLAS_SIDE)call->side;
94
95    void *A = nullptr;
96    void *B = nullptr;
97    void *C = nullptr;
98    void *X = nullptr;
99    void *Y = nullptr;
100
101    int lda = 0, ldb = 0, ldc = 0;
102
103    switch (call->func) {
104
105    // Level 1 BLAS: returns into a 1D Allocation
106
107
108    // Level 2 BLAS
109    case (RsBlas_sgemv):
110        initABC(ain, sizeof(float), &A, &X, &C, &lda, &ldb, &ldc);
111        cblas_sgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.f, (float*)A,
112                    lda, (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
113        break;
114    case (RsBlas_sgbmv):
115        initABC(ain, sizeof(float), &A, &X, &C, &lda, &ldb, &ldc);
116        cblas_sgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
117                    call->alpha.f, (float*)A, lda, (float*)X, call->incX,
118                    call->beta.f, (float*)Y, call->incY);
119        break;
120    case (RsBlas_strmv):
121        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
122        cblas_strmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
123                    lda, (float*)X, call->incX);
124        break;
125    case (RsBlas_stbmv):
126        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
127        cblas_stbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
128                    lda, (float*)X, call->incX);
129        break;
130    // stpmv takes a packed 1D Allocation only
131    case (RsBlas_stpmv):
132        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
133        cblas_stpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
134                    (float*)X, call->incX);
135        break;
136    case (RsBlas_strsv):
137        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
138        cblas_strsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, lda,
139                    (float*)X, call->incX);
140        break;
141    case (RsBlas_stbsv):
142        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
143        cblas_stbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
144                    lda, (float*)X, call->incX);
145        break;
146    case (RsBlas_stpsv):
147        initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
148        cblas_stpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
149                    (float*)X, call->incX);
150        break;
151    case (RsBlas_dgemv):
152        initABC(ain, sizeof(double), &A, &X, &C, &lda, &ldb, &ldc);
153        cblas_dgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.d, (double*)A,
154                    lda, (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
155        break;
156    case (RsBlas_dgbmv):
157        initABC(ain, sizeof(double), &A, &X, &C, &lda, &ldb, &ldc);
158        cblas_dgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
159                    call->alpha.d, (double*)A, lda, (double*)X, call->incX,
160                    call->beta.d, (double*)Y, call->incY);
161        break;
162    case (RsBlas_dtrmv):
163        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
164        cblas_dtrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
165                    lda, (double*)X, call->incX);
166        break;
167    case (RsBlas_dtbmv):
168        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
169        cblas_dtbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
170                    lda, (double*)X, call->incX);
171        break;
172    // stpmv takes a packed 1D Allocation only
173    case (RsBlas_dtpmv):
174        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
175        cblas_dtpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
176                    (double*)X, call->incX);
177        break;
178    case (RsBlas_dtrsv):
179        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
180        cblas_dtrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, lda,
181                    (double*)X, call->incX);
182        break;
183    case (RsBlas_dtbsv):
184        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
185        cblas_dtbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
186                    lda, (double*)X, call->incX);
187        break;
188    case (RsBlas_dtpsv):
189        initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
190        cblas_dtpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
191                    (double*)X, call->incX);
192        break;
193    case (RsBlas_cgemv):
194        initABC(ain, sizeof(float)*2, &A, &X, &C, &lda, &ldb, &ldc);
195        cblas_cgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.c, (void*)A,
196                    lda, (void*)X, call->incX, (void*)&call->beta.c, (void*)Y, call->incY);
197        break;
198    case (RsBlas_cgbmv):
199        initABC(ain, sizeof(float)*2, &A, &X, &C, &lda, &ldb, &ldc);
200        cblas_cgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
201                    (void*)&call->alpha.c, (void*)A, lda, (void*)X, call->incX,
202                    (void*)&call->beta.c, (void*)Y, call->incY);
203        break;
204    case (RsBlas_ctrmv):
205        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
206        cblas_ctrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
207                    lda, (void*)X, call->incX);
208        break;
209    case (RsBlas_ctbmv):
210        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
211        cblas_ctbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
212                    lda, (void*)X, call->incX);
213        break;
214    // stpmv takes a packed 1D Allocation only
215    case (RsBlas_ctpmv):
216        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
217        cblas_ctpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
218                    (void*)X, call->incX);
219        break;
220    case (RsBlas_ctrsv):
221        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
222        cblas_ctrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
223                    (void*)X, call->incX);
224        break;
225    case (RsBlas_ctbsv):
226        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
227        cblas_ctbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
228                    lda, (void*)X, call->incX);
229        break;
230    case (RsBlas_ctpsv):
231        initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
232        cblas_ctpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
233                    (void*)X, call->incX);
234        break;
235    case (RsBlas_zgemv):
236        initABC(ain, sizeof(double)*2, &A, &X, &C, &lda, &ldb, &ldc);
237        cblas_zgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.z, (void*)A,
238                    lda, (void*)X, call->incX, (void*)&call->beta.z, (void*)Y, call->incY);
239        break;
240    case (RsBlas_zgbmv):
241        initABC(ain, sizeof(double)*2, &A, &X, &C, &lda, &ldb, &ldc);
242        cblas_zgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
243                    (void*)&call->alpha.z, (void*)A, lda, (void*)X, call->incX,
244                    (void*)&call->beta.z, (void*)Y, call->incY);
245        break;
246    case (RsBlas_ztrmv):
247        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
248        cblas_ztrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
249                    lda, (void*)X, call->incX);
250        break;
251    case (RsBlas_ztbmv):
252        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
253        cblas_ztbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
254                    lda, (void*)X, call->incX);
255        break;
256    // stpmv takes a packed 1D Allocation only
257    case (RsBlas_ztpmv):
258        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
259        cblas_ztpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
260                    (void*)X, call->incX);
261        break;
262    case (RsBlas_ztrsv):
263        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
264        cblas_ztrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
265                    (void*)X, call->incX);
266        break;
267    case (RsBlas_ztbsv):
268        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
269        cblas_ztbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
270                    lda, (void*)X, call->incX);
271        break;
272    case (RsBlas_ztpsv):
273        initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
274        cblas_ztpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
275                    (void*)X, call->incX);
276        break;
277
278
279    // S and D only
280    case (RsBlas_ssymv):
281        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
282        cblas_ssymv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, lda,
283                    (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
284        break;
285    case (RsBlas_ssbmv):
286        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
287        cblas_ssbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.f,
288                    (float*)A, lda, (float*)X, call->incX, call->beta.f,
289                    (float*)Y, call->incY);
290        break;
291    //sspmv requires a packed 1D Allocation
292    case (RsBlas_sspmv):
293        initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
294        cblas_sspmv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A,
295                    (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
296        break;
297    // following calls have init reordered because A is output matrix
298    case (RsBlas_sger):
299        initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
300        cblas_sger(CblasRowMajor, call->M, call->N, call->alpha.f, (float*)X,
301                   call->incX, (float*)Y, call->incY, (float*)A, lda);
302        break;
303    case (RsBlas_ssyr):
304        initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
305        cblas_ssyr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
306                   (float*)A, lda);
307        break;
308    // sspr is packed 1D Allocation A only
309    case (RsBlas_sspr):
310        initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
311        cblas_sspr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
312                   (float*)A);
313        break;
314    case (RsBlas_ssyr2):
315        initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
316        cblas_ssyr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
317                    (float*)Y, call->incY, (float*)A, lda);
318        break;
319    // sspr2 is packed 1D Allocation A only
320    case (RsBlas_sspr2):
321        initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
322        cblas_sspr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
323                    (float*)Y, call->incY, (float*)A);
324        break;
325    case (RsBlas_dsymv):
326        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
327        cblas_dsymv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, lda,
328                    (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
329        break;
330    case (RsBlas_dsbmv):
331        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
332        cblas_dsbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.d,
333                    (double*)A, lda, (double*)X, call->incX, call->beta.d,
334                    (double*)Y, call->incY);
335        break;
336    // dspmv requires a packed 1D Allocation
337    case (RsBlas_dspmv):
338        initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
339        cblas_dspmv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A,
340                    (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
341        break;
342    // following calls have init reordered because A is output matrix
343    case (RsBlas_dger):
344        initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
345        cblas_dger(CblasRowMajor, call->M, call->N, call->alpha.d, (double*)X,
346                   call->incX, (double*)Y, call->incY, (double*)A, lda);
347        break;
348    case (RsBlas_dsyr):
349        initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
350        cblas_dsyr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
351                   (double*)A, lda);
352        break;
353    // dspr is packed 1D Allocation A only
354    case (RsBlas_dspr):
355        initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
356        cblas_dspr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
357                   (double*)A);
358        break;
359    case (RsBlas_dsyr2):
360        initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
361        cblas_dsyr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
362                    (double*)Y, call->incY, (double*)A, lda);
363        break;
364    // dspr2 is packed 1D Allocation A only
365    case (RsBlas_dspr2):
366        initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
367        cblas_dspr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
368                    (double*)Y, call->incY, (double*)A);
369        break;
370
371    // C and Z only
372    case (RsBlas_chemv):
373        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
374        cblas_chemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, lda,
375                    X, call->incX, (void*)&call->beta.c, Y, call->incY);
376        break;
377    case (RsBlas_chbmv):
378        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
379        cblas_chbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.c,
380                    A, lda, X, call->incX, (void*)&call->beta.c, Y, call->incY);
381        break;
382    case (RsBlas_chpmv):
383        initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
384        cblas_chpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A,
385                    X, call->incX, (void*)&call->beta.c, Y, call->incY);
386        break;
387    case (RsBlas_cgeru):
388        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
389        cblas_cgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
390                    X, call->incX, Y, call->incY, A, lda);
391        break;
392    case (RsBlas_cgerc):
393        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
394        cblas_cgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
395                    X, call->incX, Y, call->incY, A, lda);
396        break;
397    case (RsBlas_cher):
398        initABC(ain, sizeof(float)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
399        cblas_cher(CblasRowMajor, Uplo, call->N, call->alpha.f,
400                   X, call->incX, A, lda);
401        break;
402    // packed 1D Allocations only
403    case (RsBlas_chpr):
404        initABC(ain, sizeof(float)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
405        cblas_chpr(CblasRowMajor, Uplo, call->N, call->alpha.f, X,
406                   call->incX, A);
407        break;
408    case (RsBlas_cher2):
409        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
410        cblas_cher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c,
411                   X, call->incX, Y, call->incY, A, lda);
412        break;
413    // packed 1D Allocations only
414    case (RsBlas_chpr2):
415        initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
416        cblas_chpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, X,
417                   call->incX, Y, call->incY, A);
418        break;
419    case (RsBlas_zhemv):
420        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
421        cblas_zhemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, lda,
422                    X, call->incX, (void*)&call->beta.z, Y, call->incY);
423        break;
424    case (RsBlas_zhbmv):
425        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
426        cblas_zhbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.z,
427                    A, lda, X, call->incX, (void*)&call->beta.z, Y, call->incY);
428        break;
429    case (RsBlas_zhpmv):
430        initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
431        cblas_zhpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A,
432                    X, call->incX, (void*)&call->beta.z, Y, call->incY);
433        break;
434    case (RsBlas_zgeru):
435        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
436        cblas_zgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
437                    X, call->incX, Y, call->incY, A, lda);
438        break;
439    case (RsBlas_zgerc):
440        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
441        cblas_zgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
442                    X, call->incX, Y, call->incY, A, lda);
443        break;
444    case (RsBlas_zher):
445        initABC(ain, sizeof(double)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
446        cblas_zher(CblasRowMajor, Uplo, call->N, call->alpha.d,
447                   X, call->incX, A, lda);
448        break;
449    // packed 1D Allocations only
450    case (RsBlas_zhpr):
451        initABC(ain, sizeof(double)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
452        cblas_zhpr(CblasRowMajor, Uplo, call->N, call->alpha.d, X,
453                   call->incX, A);
454        break;
455    case (RsBlas_zher2):
456        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
457        cblas_zher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z,
458                   X, call->incX, Y, call->incY, A, lda);
459        break;
460    // packed 1D Allocations only
461    case (RsBlas_zhpr2):
462        initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
463        cblas_zhpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, X,
464                   call->incX, Y, call->incY, A);
465        break;
466
467    // Level 3 BLAS
468    case (RsBlas_sgemm):
469        initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
470        ALOGE("call->M = %d, call->N = %d, call->K = %d, lda = %d, ldb = %d, ldc = %d", call->M, call->N, call->K, lda, ldb, ldc);
471        cblas_sgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.f,
472                    (float*)A, lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
473        break;
474    case (RsBlas_ssymm):
475        initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
476        cblas_ssymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.f, (float*)A,
477                    lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
478        break;
479    case (RsBlas_ssyrk):
480        initABC(ain, sizeof(float), &A, nullptr, &C, &lda, nullptr, &ldc);
481        cblas_ssyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
482                    lda, call->beta.f, (float*)C, ldc);
483        break;
484    case (RsBlas_ssyr2k):
485        initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
486        cblas_ssyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
487                     lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
488        break;
489    case (RsBlas_strmm):
490        initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
491        cblas_strmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
492                    (float*)A, lda, (float*)B, ldb);
493        break;
494    case (RsBlas_strsm):
495        initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
496        cblas_strsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
497                    (float*)A, lda, (float*)B, ldb);
498        break;
499
500
501    case (RsBlas_dgemm):
502        initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
503        cblas_dgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.d,
504                    (double*)A, lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
505        break;
506    case (RsBlas_dsymm):
507        initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
508        cblas_dsymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.d, (double*)A,
509                    lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
510        break;
511    case (RsBlas_dsyrk):
512        initABC(ain, sizeof(double), &A, nullptr, &C, &lda, nullptr, &ldc);
513        cblas_dsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
514                    lda, call->beta.d, (double*)C, ldc);
515        break;
516    case (RsBlas_dsyr2k):
517        initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
518        cblas_dsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
519                     lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
520        break;
521    case (RsBlas_dtrmm):
522        initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
523        cblas_dtrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
524                    (double*)A, lda, (double*)B, ldb);
525        break;
526    case (RsBlas_dtrsm):
527        initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
528        cblas_dtrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
529                    (double*)A, lda, (double*)B, ldb);
530        break;
531
532    case (RsBlas_cgemm):
533        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
534        cblas_cgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.c,
535                    A, lda, B, ldb, (void*)&call->beta.c, C, ldc);
536        break;
537    case (RsBlas_csymm):
538        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
539        cblas_csymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A,
540                    lda, B, ldb, (void*)&call->beta.c, C, ldc);
541        break;
542    case (RsBlas_csyrk):
543        initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
544        cblas_csyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
545                    lda, (void*)&call->beta.c, C, ldc);
546        break;
547    case (RsBlas_csyr2k):
548        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
549        cblas_csyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
550                     lda, B, ldb, (void*)&call->beta.c, C, ldc);
551        break;
552    case (RsBlas_ctrmm):
553        initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
554        cblas_ctrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
555                    A, lda, B, ldb);
556        break;
557    case (RsBlas_ctrsm):
558        initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
559        cblas_ctrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
560                    A, lda, B, ldb);
561        break;
562
563    case (RsBlas_zgemm):
564        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
565        cblas_zgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.z,
566                    A, lda, B, ldb, (void*)&call->beta.z, C, ldc);
567        break;
568    case (RsBlas_zsymm):
569        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
570        cblas_zsymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A,
571                    lda, B, ldb, (void*)&call->beta.z, C, ldc);
572        break;
573    case (RsBlas_zsyrk):
574        initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
575        cblas_zsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
576                    lda, (void*)&call->beta.z, C, ldc);
577        break;
578    case (RsBlas_zsyr2k):
579        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
580        cblas_zsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
581                     lda, B, ldb, (void*)&call->beta.z, C, ldc);
582        break;
583    case (RsBlas_ztrmm):
584        initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
585        cblas_ztrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
586                    A, lda, B, ldb);
587        break;
588    case (RsBlas_ztrsm):
589        initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
590        cblas_ztrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
591                    A, lda, B, ldb);
592        break;
593
594    // Level 3 C and Z only
595    case (RsBlas_chemm):
596        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
597        cblas_chemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, lda,
598                    B, ldb, (void*)&call->beta.c, C, ldc);
599        break;
600    case (RsBlas_cherk):
601        initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
602        cblas_cherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, A, lda,
603                    call->beta.f, C, ldc);
604        break;
605    case (RsBlas_cher2k):
606        initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
607        cblas_cher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, lda,
608                     B, ldb, call->beta.f, C, ldc);
609        break;
610
611    case (RsBlas_zhemm):
612        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
613        cblas_zhemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, lda,
614                    B, ldb, (void*)&call->beta.z, C, ldc);
615        break;
616    case (RsBlas_zherk):
617        initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
618        cblas_zherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, A, lda,
619                    call->beta.d, C, ldc);
620        break;
621    case (RsBlas_zher2k):
622        initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
623        cblas_zher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, lda,
624                     B, ldb, call->beta.d, C, ldc);
625        break;
626
627    default:
628        ALOGE("unimplemented\n");
629    }
630
631
632}
633
634
635RsdCpuScriptIntrinsicBLAS::RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx,
636                                                   const Script *s)
637            : RsdCpuScriptIntrinsic(ctx, s, nullptr, RS_SCRIPT_INTRINSIC_ID_BLAS) {
638
639
640}
641
642RsdCpuScriptIntrinsicBLAS::~RsdCpuScriptIntrinsicBLAS() {
643}
644
645
646
647
648
649RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
650                                    const Script *s, const Element *e) {
651
652    return new RsdCpuScriptIntrinsicBLAS(ctx, s);
653}
654