rsCpuIntrinsicBLAS.cpp revision 64c682b65cd04ac83b51251b40dca14423df351a
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsCpuIntrinsic.h" 19#include "rsCpuIntrinsicInlines.h" 20#include "cblas.h" 21 22using namespace android; 23using namespace android::renderscript; 24 25namespace android { 26namespace renderscript { 27 28 29class RsdCpuScriptIntrinsicBLAS : public RsdCpuScriptIntrinsic { 30public: 31 virtual void invokeForEach(uint32_t slot, 32 const Allocation ** ain, 33 uint32_t inLen, 34 Allocation * aout, 35 const void * usr, 36 uint32_t usrLen, 37 const RsScriptCall *sc); 38 39 virtual void populateScript(Script *); 40 virtual ~RsdCpuScriptIntrinsicBLAS(); 41 RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, const Script *s); 42 43protected: 44 45 46}; 47 48} 49} 50 51void RsdCpuScriptIntrinsicBLAS::populateScript(Script *s) { 52 s->mHal.info.exportedVariableCount = 0; 53} 54 55static void initABC(const Allocation ** ain, 56 size_t size, 57 void** A, 58 void** B, 59 void** C, 60 int* lda, 61 int* ldb, 62 int* ldc) 63{ 64 if (ain[0]) { 65 *A = ain[0]->mHal.drvState.lod[0].mallocPtr; 66 *lda = (int)(ain[0]->mHal.drvState.lod[0].stride/size); 67 } 68 if (ain[1]) { 69 *B = ain[1]->mHal.drvState.lod[0].mallocPtr; 70 *ldb = (int)(ain[1]->mHal.drvState.lod[0].stride/size); 71 } 72 if (ain[2]) { 73 *C = ain[2]->mHal.drvState.lod[0].mallocPtr; 74 *ldc = (int)(ain[2]->mHal.drvState.lod[0].stride/size); 75 } 76 77 78} 79 80void RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot, 81 const Allocation ** ain, 82 uint32_t inLen, 83 Allocation * aout, 84 const void * usr, 85 uint32_t usrLen, 86 const RsScriptCall *sc) { 87 RsBlasCall* call = (RsBlasCall*) usr; 88 // setup BLAS enum args 89 enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA; 90 enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB; 91 enum CBLAS_UPLO Uplo = (enum CBLAS_UPLO)call->uplo; 92 enum CBLAS_DIAG Diag = (enum CBLAS_DIAG)call->diag; 93 enum CBLAS_SIDE Side = (enum CBLAS_SIDE)call->side; 94 95 void *A = nullptr; 96 void *B = nullptr; 97 void *C = nullptr; 98 void *X = nullptr; 99 void *Y = nullptr; 100 101 int lda = 0, ldb = 0, ldc = 0; 102 103 switch (call->func) { 104 105 // Level 1 BLAS: returns into a 1D Allocation 106 107 108 // Level 2 BLAS 109 case (RsBlas_sgemv): 110 initABC(ain, sizeof(float), &A, &X, &C, &lda, &ldb, &ldc); 111 cblas_sgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.f, (float*)A, 112 lda, (float*)X, call->incX, call->beta.f, (float*)Y, call->incY); 113 break; 114 case (RsBlas_sgbmv): 115 initABC(ain, sizeof(float), &A, &X, &C, &lda, &ldb, &ldc); 116 cblas_sgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 117 call->alpha.f, (float*)A, lda, (float*)X, call->incX, 118 call->beta.f, (float*)Y, call->incY); 119 break; 120 case (RsBlas_strmv): 121 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 122 cblas_strmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, 123 lda, (float*)X, call->incX); 124 break; 125 case (RsBlas_stbmv): 126 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 127 cblas_stbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A, 128 lda, (float*)X, call->incX); 129 break; 130 // stpmv takes a packed 1D Allocation only 131 case (RsBlas_stpmv): 132 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 133 cblas_stpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, 134 (float*)X, call->incX); 135 break; 136 case (RsBlas_strsv): 137 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 138 cblas_strsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, lda, 139 (float*)X, call->incX); 140 break; 141 case (RsBlas_stbsv): 142 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 143 cblas_stbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A, 144 lda, (float*)X, call->incX); 145 break; 146 case (RsBlas_stpsv): 147 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr); 148 cblas_stpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, 149 (float*)X, call->incX); 150 break; 151 case (RsBlas_dgemv): 152 initABC(ain, sizeof(double), &A, &X, &C, &lda, &ldb, &ldc); 153 cblas_dgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.d, (double*)A, 154 lda, (double*)X, call->incX, call->beta.d, (double*)Y, call->incY); 155 break; 156 case (RsBlas_dgbmv): 157 initABC(ain, sizeof(double), &A, &X, &C, &lda, &ldb, &ldc); 158 cblas_dgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 159 call->alpha.d, (double*)A, lda, (double*)X, call->incX, 160 call->beta.d, (double*)Y, call->incY); 161 break; 162 case (RsBlas_dtrmv): 163 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 164 cblas_dtrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, 165 lda, (double*)X, call->incX); 166 break; 167 case (RsBlas_dtbmv): 168 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 169 cblas_dtbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A, 170 lda, (double*)X, call->incX); 171 break; 172 // stpmv takes a packed 1D Allocation only 173 case (RsBlas_dtpmv): 174 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 175 cblas_dtpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, 176 (double*)X, call->incX); 177 break; 178 case (RsBlas_dtrsv): 179 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 180 cblas_dtrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, lda, 181 (double*)X, call->incX); 182 break; 183 case (RsBlas_dtbsv): 184 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 185 cblas_dtbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A, 186 lda, (double*)X, call->incX); 187 break; 188 case (RsBlas_dtpsv): 189 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr); 190 cblas_dtpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, 191 (double*)X, call->incX); 192 break; 193 case (RsBlas_cgemv): 194 initABC(ain, sizeof(float)*2, &A, &X, &C, &lda, &ldb, &ldc); 195 cblas_cgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.c, (void*)A, 196 lda, (void*)X, call->incX, (void*)&call->beta.c, (void*)Y, call->incY); 197 break; 198 case (RsBlas_cgbmv): 199 initABC(ain, sizeof(float)*2, &A, &X, &C, &lda, &ldb, &ldc); 200 cblas_cgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 201 (void*)&call->alpha.c, (void*)A, lda, (void*)X, call->incX, 202 (void*)&call->beta.c, (void*)Y, call->incY); 203 break; 204 case (RsBlas_ctrmv): 205 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 206 cblas_ctrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 207 lda, (void*)X, call->incX); 208 break; 209 case (RsBlas_ctbmv): 210 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 211 cblas_ctbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 212 lda, (void*)X, call->incX); 213 break; 214 // stpmv takes a packed 1D Allocation only 215 case (RsBlas_ctpmv): 216 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 217 cblas_ctpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 218 (void*)X, call->incX); 219 break; 220 case (RsBlas_ctrsv): 221 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 222 cblas_ctrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda, 223 (void*)X, call->incX); 224 break; 225 case (RsBlas_ctbsv): 226 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 227 cblas_ctbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 228 lda, (void*)X, call->incX); 229 break; 230 case (RsBlas_ctpsv): 231 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 232 cblas_ctpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 233 (void*)X, call->incX); 234 break; 235 case (RsBlas_zgemv): 236 initABC(ain, sizeof(double)*2, &A, &X, &C, &lda, &ldb, &ldc); 237 cblas_zgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.z, (void*)A, 238 lda, (void*)X, call->incX, (void*)&call->beta.z, (void*)Y, call->incY); 239 break; 240 case (RsBlas_zgbmv): 241 initABC(ain, sizeof(double)*2, &A, &X, &C, &lda, &ldb, &ldc); 242 cblas_zgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU, 243 (void*)&call->alpha.z, (void*)A, lda, (void*)X, call->incX, 244 (void*)&call->beta.z, (void*)Y, call->incY); 245 break; 246 case (RsBlas_ztrmv): 247 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 248 cblas_ztrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 249 lda, (void*)X, call->incX); 250 break; 251 case (RsBlas_ztbmv): 252 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 253 cblas_ztbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 254 lda, (void*)X, call->incX); 255 break; 256 // stpmv takes a packed 1D Allocation only 257 case (RsBlas_ztpmv): 258 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 259 cblas_ztpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 260 (void*)X, call->incX); 261 break; 262 case (RsBlas_ztrsv): 263 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 264 cblas_ztrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda, 265 (void*)X, call->incX); 266 break; 267 case (RsBlas_ztbsv): 268 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 269 cblas_ztbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A, 270 lda, (void*)X, call->incX); 271 break; 272 case (RsBlas_ztpsv): 273 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr); 274 cblas_ztpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, 275 (void*)X, call->incX); 276 break; 277 278 279 // S and D only 280 case (RsBlas_ssymv): 281 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 282 cblas_ssymv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, lda, 283 (float*)X, call->incX, call->beta.f, (float*)Y, call->incY); 284 break; 285 case (RsBlas_ssbmv): 286 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 287 cblas_ssbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.f, 288 (float*)A, lda, (float*)X, call->incX, call->beta.f, 289 (float*)Y, call->incY); 290 break; 291 //sspmv requires a packed 1D Allocation 292 case (RsBlas_sspmv): 293 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc); 294 cblas_sspmv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, 295 (float*)X, call->incX, call->beta.f, (float*)Y, call->incY); 296 break; 297 // following calls have init reordered because A is output matrix 298 case (RsBlas_sger): 299 initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda); 300 cblas_sger(CblasRowMajor, call->M, call->N, call->alpha.f, (float*)X, 301 call->incX, (float*)Y, call->incY, (float*)A, lda); 302 break; 303 case (RsBlas_ssyr): 304 initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr); 305 cblas_ssyr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 306 (float*)A, lda); 307 break; 308 // sspr is packed 1D Allocation A only 309 case (RsBlas_sspr): 310 initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr); 311 cblas_sspr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 312 (float*)A); 313 break; 314 case (RsBlas_ssyr2): 315 initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda); 316 cblas_ssyr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 317 (float*)Y, call->incY, (float*)A, lda); 318 break; 319 // sspr2 is packed 1D Allocation A only 320 case (RsBlas_sspr2): 321 initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda); 322 cblas_sspr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX, 323 (float*)Y, call->incY, (float*)A); 324 break; 325 case (RsBlas_dsymv): 326 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 327 cblas_dsymv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, lda, 328 (double*)X, call->incX, call->beta.d, (double*)Y, call->incY); 329 break; 330 case (RsBlas_dsbmv): 331 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 332 cblas_dsbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.d, 333 (double*)A, lda, (double*)X, call->incX, call->beta.d, 334 (double*)Y, call->incY); 335 break; 336 // dspmv requires a packed 1D Allocation 337 case (RsBlas_dspmv): 338 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc); 339 cblas_dspmv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, 340 (double*)X, call->incX, call->beta.d, (double*)Y, call->incY); 341 break; 342 // following calls have init reordered because A is output matrix 343 case (RsBlas_dger): 344 initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda); 345 cblas_dger(CblasRowMajor, call->M, call->N, call->alpha.d, (double*)X, 346 call->incX, (double*)Y, call->incY, (double*)A, lda); 347 break; 348 case (RsBlas_dsyr): 349 initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr); 350 cblas_dsyr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 351 (double*)A, lda); 352 break; 353 // dspr is packed 1D Allocation A only 354 case (RsBlas_dspr): 355 initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr); 356 cblas_dspr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 357 (double*)A); 358 break; 359 case (RsBlas_dsyr2): 360 initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda); 361 cblas_dsyr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 362 (double*)Y, call->incY, (double*)A, lda); 363 break; 364 // dspr2 is packed 1D Allocation A only 365 case (RsBlas_dspr2): 366 initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda); 367 cblas_dspr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX, 368 (double*)Y, call->incY, (double*)A); 369 break; 370 371 // C and Z only 372 case (RsBlas_chemv): 373 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 374 cblas_chemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, lda, 375 X, call->incX, (void*)&call->beta.c, Y, call->incY); 376 break; 377 case (RsBlas_chbmv): 378 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 379 cblas_chbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.c, 380 A, lda, X, call->incX, (void*)&call->beta.c, Y, call->incY); 381 break; 382 case (RsBlas_chpmv): 383 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc); 384 cblas_chpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, 385 X, call->incX, (void*)&call->beta.c, Y, call->incY); 386 break; 387 case (RsBlas_cgeru): 388 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 389 cblas_cgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c, 390 X, call->incX, Y, call->incY, A, lda); 391 break; 392 case (RsBlas_cgerc): 393 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 394 cblas_cgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c, 395 X, call->incX, Y, call->incY, A, lda); 396 break; 397 case (RsBlas_cher): 398 initABC(ain, sizeof(float)*2, &X, &A, nullptr, &ldb, &lda, nullptr); 399 cblas_cher(CblasRowMajor, Uplo, call->N, call->alpha.f, 400 X, call->incX, A, lda); 401 break; 402 // packed 1D Allocations only 403 case (RsBlas_chpr): 404 initABC(ain, sizeof(float)*2, &X, &A, nullptr, &ldb, &lda, nullptr); 405 cblas_chpr(CblasRowMajor, Uplo, call->N, call->alpha.f, X, 406 call->incX, A); 407 break; 408 case (RsBlas_cher2): 409 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 410 cblas_cher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, 411 X, call->incX, Y, call->incY, A, lda); 412 break; 413 // packed 1D Allocations only 414 case (RsBlas_chpr2): 415 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda); 416 cblas_chpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, X, 417 call->incX, Y, call->incY, A); 418 break; 419 case (RsBlas_zhemv): 420 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 421 cblas_zhemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, lda, 422 X, call->incX, (void*)&call->beta.z, Y, call->incY); 423 break; 424 case (RsBlas_zhbmv): 425 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 426 cblas_zhbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.z, 427 A, lda, X, call->incX, (void*)&call->beta.z, Y, call->incY); 428 break; 429 case (RsBlas_zhpmv): 430 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc); 431 cblas_zhpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, 432 X, call->incX, (void*)&call->beta.z, Y, call->incY); 433 break; 434 case (RsBlas_zgeru): 435 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 436 cblas_zgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z, 437 X, call->incX, Y, call->incY, A, lda); 438 break; 439 case (RsBlas_zgerc): 440 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 441 cblas_zgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z, 442 X, call->incX, Y, call->incY, A, lda); 443 break; 444 case (RsBlas_zher): 445 initABC(ain, sizeof(double)*2, &X, &A, nullptr, &ldb, &lda, nullptr); 446 cblas_zher(CblasRowMajor, Uplo, call->N, call->alpha.d, 447 X, call->incX, A, lda); 448 break; 449 // packed 1D Allocations only 450 case (RsBlas_zhpr): 451 initABC(ain, sizeof(double)*2, &X, &A, nullptr, &ldb, &lda, nullptr); 452 cblas_zhpr(CblasRowMajor, Uplo, call->N, call->alpha.d, X, 453 call->incX, A); 454 break; 455 case (RsBlas_zher2): 456 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 457 cblas_zher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, 458 X, call->incX, Y, call->incY, A, lda); 459 break; 460 // packed 1D Allocations only 461 case (RsBlas_zhpr2): 462 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda); 463 cblas_zhpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, X, 464 call->incX, Y, call->incY, A); 465 break; 466 467 // Level 3 BLAS 468 case (RsBlas_sgemm): 469 initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc); 470 ALOGE("call->M = %d, call->N = %d, call->K = %d, lda = %d, ldb = %d, ldc = %d", call->M, call->N, call->K, lda, ldb, ldc); 471 cblas_sgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.f, 472 (float*)A, lda, (float*)B, ldb, call->beta.f, (float*)C, ldc); 473 break; 474 case (RsBlas_ssymm): 475 initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc); 476 cblas_ssymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.f, (float*)A, 477 lda, (float*)B, ldb, call->beta.f, (float*)C, ldc); 478 break; 479 case (RsBlas_ssyrk): 480 initABC(ain, sizeof(float), &A, nullptr, &C, &lda, nullptr, &ldc); 481 cblas_ssyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A, 482 lda, call->beta.f, (float*)C, ldc); 483 break; 484 case (RsBlas_ssyr2k): 485 initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc); 486 cblas_ssyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A, 487 lda, (float*)B, ldb, call->beta.f, (float*)C, ldc); 488 break; 489 case (RsBlas_strmm): 490 initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr); 491 cblas_strmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f, 492 (float*)A, lda, (float*)B, ldb); 493 break; 494 case (RsBlas_strsm): 495 initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr); 496 cblas_strsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f, 497 (float*)A, lda, (float*)B, ldb); 498 break; 499 500 501 case (RsBlas_dgemm): 502 initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc); 503 cblas_dgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.d, 504 (double*)A, lda, (double*)B, ldb, call->beta.d, (double*)C, ldc); 505 break; 506 case (RsBlas_dsymm): 507 initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc); 508 cblas_dsymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.d, (double*)A, 509 lda, (double*)B, ldb, call->beta.d, (double*)C, ldc); 510 break; 511 case (RsBlas_dsyrk): 512 initABC(ain, sizeof(double), &A, nullptr, &C, &lda, nullptr, &ldc); 513 cblas_dsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A, 514 lda, call->beta.d, (double*)C, ldc); 515 break; 516 case (RsBlas_dsyr2k): 517 initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc); 518 cblas_dsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A, 519 lda, (double*)B, ldb, call->beta.d, (double*)C, ldc); 520 break; 521 case (RsBlas_dtrmm): 522 initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr); 523 cblas_dtrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d, 524 (double*)A, lda, (double*)B, ldb); 525 break; 526 case (RsBlas_dtrsm): 527 initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr); 528 cblas_dtrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d, 529 (double*)A, lda, (double*)B, ldb); 530 break; 531 532 case (RsBlas_cgemm): 533 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 534 cblas_cgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.c, 535 A, lda, B, ldb, (void*)&call->beta.c, C, ldc); 536 break; 537 case (RsBlas_csymm): 538 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 539 cblas_csymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, 540 lda, B, ldb, (void*)&call->beta.c, C, ldc); 541 break; 542 case (RsBlas_csyrk): 543 initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 544 cblas_csyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, 545 lda, (void*)&call->beta.c, C, ldc); 546 break; 547 case (RsBlas_csyr2k): 548 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 549 cblas_csyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, 550 lda, B, ldb, (void*)&call->beta.c, C, ldc); 551 break; 552 case (RsBlas_ctrmm): 553 initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 554 cblas_ctrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c, 555 A, lda, B, ldb); 556 break; 557 case (RsBlas_ctrsm): 558 initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 559 cblas_ctrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c, 560 A, lda, B, ldb); 561 break; 562 563 case (RsBlas_zgemm): 564 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 565 cblas_zgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.z, 566 A, lda, B, ldb, (void*)&call->beta.z, C, ldc); 567 break; 568 case (RsBlas_zsymm): 569 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 570 cblas_zsymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, 571 lda, B, ldb, (void*)&call->beta.z, C, ldc); 572 break; 573 case (RsBlas_zsyrk): 574 initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 575 cblas_zsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, 576 lda, (void*)&call->beta.z, C, ldc); 577 break; 578 case (RsBlas_zsyr2k): 579 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 580 cblas_zsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, 581 lda, B, ldb, (void*)&call->beta.z, C, ldc); 582 break; 583 case (RsBlas_ztrmm): 584 initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 585 cblas_ztrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z, 586 A, lda, B, ldb); 587 break; 588 case (RsBlas_ztrsm): 589 initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr); 590 cblas_ztrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z, 591 A, lda, B, ldb); 592 break; 593 594 // Level 3 C and Z only 595 case (RsBlas_chemm): 596 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 597 cblas_chemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, lda, 598 B, ldb, (void*)&call->beta.c, C, ldc); 599 break; 600 case (RsBlas_cherk): 601 initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 602 cblas_cherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, A, lda, 603 call->beta.f, C, ldc); 604 break; 605 case (RsBlas_cher2k): 606 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc); 607 cblas_cher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, lda, 608 B, ldb, call->beta.f, C, ldc); 609 break; 610 611 case (RsBlas_zhemm): 612 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 613 cblas_zhemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, lda, 614 B, ldb, (void*)&call->beta.z, C, ldc); 615 break; 616 case (RsBlas_zherk): 617 initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc); 618 cblas_zherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, A, lda, 619 call->beta.d, C, ldc); 620 break; 621 case (RsBlas_zher2k): 622 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc); 623 cblas_zher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, lda, 624 B, ldb, call->beta.d, C, ldc); 625 break; 626 627 default: 628 ALOGE("unimplemented\n"); 629 } 630 631 632} 633 634 635RsdCpuScriptIntrinsicBLAS::RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, 636 const Script *s) 637 : RsdCpuScriptIntrinsic(ctx, s, nullptr, RS_SCRIPT_INTRINSIC_ID_BLAS) { 638 639 640} 641 642RsdCpuScriptIntrinsicBLAS::~RsdCpuScriptIntrinsicBLAS() { 643} 644 645 646 647 648 649RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx, 650 const Script *s, const Element *e) { 651 652 return new RsdCpuScriptIntrinsicBLAS(ctx, s); 653} 654