1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.support.v8.renderscript; 18 19import android.support.annotation.IntDef; 20import java.lang.annotation.Retention; 21import java.lang.annotation.RetentionPolicy; 22 23/** 24 * 25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26 * 27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28 * building blocks for performing basic vector and matrix operations. 29 * 30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31 * 32 **/ 33public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 34 private Allocation mLUT; 35 private static final int INTRINSIC_API_LEVEL = 23; 36 37 private ScriptIntrinsicBLAS(long id, RenderScript rs) { 38 super(id, rs); 39 } 40 41 private static final int RsBlas_sdsdot = 1; 42 private static final int RsBlas_dsdot = 2; 43 private static final int RsBlas_sdot = 3; 44 private static final int RsBlas_ddot = 4; 45 private static final int RsBlas_cdotu_sub = 5; 46 private static final int RsBlas_cdotc_sub = 6; 47 private static final int RsBlas_zdotu_sub = 7; 48 private static final int RsBlas_zdotc_sub = 8; 49 private static final int RsBlas_snrm2 = 9; 50 private static final int RsBlas_sasum = 10; 51 private static final int RsBlas_dnrm2 = 11; 52 private static final int RsBlas_dasum = 12; 53 private static final int RsBlas_scnrm2 = 13; 54 private static final int RsBlas_scasum = 14; 55 private static final int RsBlas_dznrm2 = 15; 56 private static final int RsBlas_dzasum = 16; 57 private static final int RsBlas_isamax = 17; 58 private static final int RsBlas_idamax = 18; 59 private static final int RsBlas_icamax = 19; 60 private static final int RsBlas_izamax = 20; 61 private static final int RsBlas_sswap = 21; 62 private static final int RsBlas_scopy = 22; 63 private static final int RsBlas_saxpy = 23; 64 private static final int RsBlas_dswap = 24; 65 private static final int RsBlas_dcopy = 25; 66 private static final int RsBlas_daxpy = 26; 67 private static final int RsBlas_cswap = 27; 68 private static final int RsBlas_ccopy = 28; 69 private static final int RsBlas_caxpy = 29; 70 private static final int RsBlas_zswap = 30; 71 private static final int RsBlas_zcopy = 31; 72 private static final int RsBlas_zaxpy = 32; 73 private static final int RsBlas_srotg = 33; 74 private static final int RsBlas_srotmg = 34; 75 private static final int RsBlas_srot = 35; 76 private static final int RsBlas_srotm = 36; 77 private static final int RsBlas_drotg = 37; 78 private static final int RsBlas_drotmg = 38; 79 private static final int RsBlas_drot = 39; 80 private static final int RsBlas_drotm = 40; 81 private static final int RsBlas_sscal = 41; 82 private static final int RsBlas_dscal = 42; 83 private static final int RsBlas_cscal = 43; 84 private static final int RsBlas_zscal = 44; 85 private static final int RsBlas_csscal = 45; 86 private static final int RsBlas_zdscal = 46; 87 private static final int RsBlas_sgemv = 47; 88 private static final int RsBlas_sgbmv = 48; 89 private static final int RsBlas_strmv = 49; 90 private static final int RsBlas_stbmv = 50; 91 private static final int RsBlas_stpmv = 51; 92 private static final int RsBlas_strsv = 52; 93 private static final int RsBlas_stbsv = 53; 94 private static final int RsBlas_stpsv = 54; 95 private static final int RsBlas_dgemv = 55; 96 private static final int RsBlas_dgbmv = 56; 97 private static final int RsBlas_dtrmv = 57; 98 private static final int RsBlas_dtbmv = 58; 99 private static final int RsBlas_dtpmv = 59; 100 private static final int RsBlas_dtrsv = 60; 101 private static final int RsBlas_dtbsv = 61; 102 private static final int RsBlas_dtpsv = 62; 103 private static final int RsBlas_cgemv = 63; 104 private static final int RsBlas_cgbmv = 64; 105 private static final int RsBlas_ctrmv = 65; 106 private static final int RsBlas_ctbmv = 66; 107 private static final int RsBlas_ctpmv = 67; 108 private static final int RsBlas_ctrsv = 68; 109 private static final int RsBlas_ctbsv = 69; 110 private static final int RsBlas_ctpsv = 70; 111 private static final int RsBlas_zgemv = 71; 112 private static final int RsBlas_zgbmv = 72; 113 private static final int RsBlas_ztrmv = 73; 114 private static final int RsBlas_ztbmv = 74; 115 private static final int RsBlas_ztpmv = 75; 116 private static final int RsBlas_ztrsv = 76; 117 private static final int RsBlas_ztbsv = 77; 118 private static final int RsBlas_ztpsv = 78; 119 private static final int RsBlas_ssymv = 79; 120 private static final int RsBlas_ssbmv = 80; 121 private static final int RsBlas_sspmv = 81; 122 private static final int RsBlas_sger = 82; 123 private static final int RsBlas_ssyr = 83; 124 private static final int RsBlas_sspr = 84; 125 private static final int RsBlas_ssyr2 = 85; 126 private static final int RsBlas_sspr2 = 86; 127 private static final int RsBlas_dsymv = 87; 128 private static final int RsBlas_dsbmv = 88; 129 private static final int RsBlas_dspmv = 89; 130 private static final int RsBlas_dger = 90; 131 private static final int RsBlas_dsyr = 91; 132 private static final int RsBlas_dspr = 92; 133 private static final int RsBlas_dsyr2 = 93; 134 private static final int RsBlas_dspr2 = 94; 135 private static final int RsBlas_chemv = 95; 136 private static final int RsBlas_chbmv = 96; 137 private static final int RsBlas_chpmv = 97; 138 private static final int RsBlas_cgeru = 98; 139 private static final int RsBlas_cgerc = 99; 140 private static final int RsBlas_cher = 100; 141 private static final int RsBlas_chpr = 101; 142 private static final int RsBlas_cher2 = 102; 143 private static final int RsBlas_chpr2 = 103; 144 private static final int RsBlas_zhemv = 104; 145 private static final int RsBlas_zhbmv = 105; 146 private static final int RsBlas_zhpmv = 106; 147 private static final int RsBlas_zgeru = 107; 148 private static final int RsBlas_zgerc = 108; 149 private static final int RsBlas_zher = 109; 150 private static final int RsBlas_zhpr = 110; 151 private static final int RsBlas_zher2 = 111; 152 private static final int RsBlas_zhpr2 = 112; 153 private static final int RsBlas_sgemm = 113; 154 private static final int RsBlas_ssymm = 114; 155 private static final int RsBlas_ssyrk = 115; 156 private static final int RsBlas_ssyr2k = 116; 157 private static final int RsBlas_strmm = 117; 158 private static final int RsBlas_strsm = 118; 159 private static final int RsBlas_dgemm = 119; 160 private static final int RsBlas_dsymm = 120; 161 private static final int RsBlas_dsyrk = 121; 162 private static final int RsBlas_dsyr2k = 122; 163 private static final int RsBlas_dtrmm = 123; 164 private static final int RsBlas_dtrsm = 124; 165 private static final int RsBlas_cgemm = 125; 166 private static final int RsBlas_csymm = 126; 167 private static final int RsBlas_csyrk = 127; 168 private static final int RsBlas_csyr2k = 128; 169 private static final int RsBlas_ctrmm = 129; 170 private static final int RsBlas_ctrsm = 130; 171 private static final int RsBlas_zgemm = 131; 172 private static final int RsBlas_zsymm = 132; 173 private static final int RsBlas_zsyrk = 133; 174 private static final int RsBlas_zsyr2k = 134; 175 private static final int RsBlas_ztrmm = 135; 176 private static final int RsBlas_ztrsm = 136; 177 private static final int RsBlas_chemm = 137; 178 private static final int RsBlas_cherk = 138; 179 private static final int RsBlas_cher2k = 139; 180 private static final int RsBlas_zhemm = 140; 181 private static final int RsBlas_zherk = 141; 182 private static final int RsBlas_zher2k = 142; 183 184 // BLAS extensions start here 185 private static final int RsBlas_bnnm = 1000; 186 187 /** 188 * Create an intrinsic to access BLAS subroutines. 189 * 190 * @param rs The RenderScript context 191 * @return ScriptIntrinsicBLAS 192 */ 193 public static ScriptIntrinsicBLAS create(RenderScript rs) { 194 long id; 195 boolean mUseIncSupp = rs.isUseNative() && 196 android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL; 197 198 id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp); 199 ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs); 200 si.setIncSupp(mUseIncSupp); 201 return si; 202 } 203 204 /** 205 * @hide 206 */ 207 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 208 @Retention(RetentionPolicy.SOURCE) 209 public @interface Transpose {} 210 211 /** 212 * @hide 213 */ 214 @IntDef({UPPER, LOWER}) 215 @Retention(RetentionPolicy.SOURCE) 216 public @interface Uplo {} 217 218 /** 219 * @hide 220 */ 221 @IntDef({NON_UNIT, UNIT}) 222 @Retention(RetentionPolicy.SOURCE) 223 public @interface Diag {} 224 225 /** 226 * @hide 227 */ 228 @IntDef({LEFT, RIGHT}) 229 @Retention(RetentionPolicy.SOURCE) 230 public @interface Side {} 231 232 public static final int NO_TRANSPOSE = 111; 233 public static final int TRANSPOSE = 112; 234 public static final int CONJ_TRANSPOSE = 113; 235 236 public static final int UPPER = 121; 237 public static final int LOWER = 122; 238 239 public static final int NON_UNIT = 131; 240 public static final int UNIT = 132; 241 242 public static final int LEFT = 141; 243 public static final int RIGHT = 142; 244 245 static void validateSide(@Side int Side) { 246 if (Side != LEFT && Side != RIGHT) { 247 throw new RSRuntimeException("Invalid side passed to BLAS"); 248 } 249 } 250 251 static void validateTranspose(@Transpose int Trans) { 252 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 253 Trans != CONJ_TRANSPOSE) { 254 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 255 } 256 } 257 258 static void validateConjTranspose(@Transpose int Trans) { 259 if (Trans != NO_TRANSPOSE && 260 Trans != CONJ_TRANSPOSE) { 261 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 262 } 263 } 264 265 static void validateDiag(@Diag int Diag) { 266 if (Diag != NON_UNIT && Diag != UNIT) { 267 throw new RSRuntimeException("Invalid diag passed to BLAS"); 268 } 269 } 270 271 static void validateUplo(@Uplo int Uplo) { 272 if (Uplo != UPPER && Uplo != LOWER) { 273 throw new RSRuntimeException("Invalid uplo passed to BLAS"); 274 } 275 } 276 277 278 /** 279 * Level 2 BLAS 280 */ 281 282 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 283 validateTranspose(TransA); 284 int M = A.getType().getY(); 285 int N = A.getType().getX(); 286 if (!A.getType().getElement().isCompatible(e) || 287 !X.getType().getElement().isCompatible(e) || 288 !Y.getType().getElement().isCompatible(e)) { 289 throw new RSRuntimeException("Called BLAS with wrong Element type"); 290 } 291 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 292 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 293 } 294 295 if (incX <= 0 || incY <= 0) { 296 throw new RSRuntimeException("Vector increments must be greater than 0"); 297 } 298 int expectedXDim = -1, expectedYDim = -1; 299 if (TransA == NO_TRANSPOSE) { 300 expectedXDim = 1 + (N - 1) * incX; 301 expectedYDim = 1 + (M - 1) * incY; 302 } else { 303 expectedXDim = 1 + (M - 1) * incX; 304 expectedYDim = 1 + (N - 1) * incY; 305 } 306 if (X.getType().getX() != expectedXDim || 307 Y.getType().getX() != expectedYDim) { 308 throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 309 } 310 } 311 312 /** 313 * SGEMV performs one of the matrix-vector operations 314 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 315 * 316 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 317 * 318 * @param TransA The type of transpose applied to matrix A. 319 * @param alpha The scalar alpha. 320 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 321 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 322 * @param incX The increment for the elements of vector x, must be larger than zero. 323 * @param beta The scalar beta. 324 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 325 * @param incY The increment for the elements of vector y, must be larger than zero. 326 */ 327 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 328 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 329 int M = A.getType().getY(); 330 int N = A.getType().getX(); 331 332 boolean mUseIncSupp = isIncSupp(); 333 long aID = A.getID(mRS); 334 long xID = X.getID(mRS); 335 long yID = Y.getID(mRS); 336 if (mUseIncSupp) { 337 aID = getDummyAlloc(A); 338 xID = getDummyAlloc(X); 339 yID = getDummyAlloc(Y); 340 } 341 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 342 } 343 344 /** 345 * DGEMV performs one of the matrix-vector operations 346 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 347 * 348 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 349 * 350 * @param TransA The type of transpose applied to matrix A. 351 * @param alpha The scalar alpha. 352 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 353 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 354 * @param incX The increment for the elements of vector x, must be larger than zero. 355 * @param beta The scalar beta. 356 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 357 * @param incY The increment for the elements of vector y, must be larger than zero. 358 */ 359 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 360 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 361 int M = A.getType().getY(); 362 int N = A.getType().getX(); 363 364 boolean mUseIncSupp = isIncSupp(); 365 long aID = A.getID(mRS); 366 long xID = X.getID(mRS); 367 long yID = Y.getID(mRS); 368 if (mUseIncSupp) { 369 aID = getDummyAlloc(A); 370 xID = getDummyAlloc(X); 371 yID = getDummyAlloc(Y); 372 } 373 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 374 } 375 376 /** 377 * CGEMV performs one of the matrix-vector operations 378 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 379 * 380 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 381 * 382 * @param TransA The type of transpose applied to matrix A. 383 * @param alpha The scalar alpha. 384 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 385 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 386 * @param incX The increment for the elements of vector x, must be larger than zero. 387 * @param beta The scalar beta. 388 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 389 * @param incY The increment for the elements of vector y, must be larger than zero. 390 */ 391 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 392 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 393 int M = A.getType().getY(); 394 int N = A.getType().getX(); 395 396 boolean mUseIncSupp = isIncSupp(); 397 long aID = A.getID(mRS); 398 long xID = X.getID(mRS); 399 long yID = Y.getID(mRS); 400 if (mUseIncSupp) { 401 aID = getDummyAlloc(A); 402 xID = getDummyAlloc(X); 403 yID = getDummyAlloc(Y); 404 } 405 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 406 } 407 408 /** 409 * ZGEMV performs one of the matrix-vector operations 410 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 411 * 412 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 413 * 414 * @param TransA The type of transpose applied to matrix A. 415 * @param alpha The scalar alpha. 416 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 417 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 418 * @param incX The increment for the elements of vector x, must be larger than zero. 419 * @param beta The scalar beta. 420 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 421 * @param incY The increment for the elements of vector y, must be larger than zero. 422 */ 423 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 424 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 425 int M = A.getType().getY(); 426 int N = A.getType().getX(); 427 428 boolean mUseIncSupp = isIncSupp(); 429 long aID = A.getID(mRS); 430 long xID = X.getID(mRS); 431 long yID = Y.getID(mRS); 432 if (mUseIncSupp) { 433 aID = getDummyAlloc(A); 434 xID = getDummyAlloc(X); 435 yID = getDummyAlloc(Y); 436 } 437 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 438 } 439 440 /** 441 * SGBMV performs one of the matrix-vector operations 442 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 443 * 444 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 445 * 446 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 447 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 448 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 449 * for i in range(0, m): 450 * for j in range(max(0, i-kl), min(i+ku+1, n)): 451 * b[i, j-i+kl] = a[i, j] 452 * 453 * @param TransA The type of transpose applied to matrix A. 454 * @param KL The number of sub-diagonals of the matrix A. 455 * @param KU The number of super-diagonals of the matrix A. 456 * @param alpha The scalar alpha. 457 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 458 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 459 * @param incX The increment for the elements of vector x, must be larger than zero. 460 * @param beta The scalar beta. 461 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 462 * @param incY The increment for the elements of vector y, must be larger than zero. 463 */ 464 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 465 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 466 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 467 if (KL < 0 || KU < 0) { 468 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 469 } 470 int M = A.getType().getY(); 471 int N = A.getType().getX(); 472 473 boolean mUseIncSupp = isIncSupp(); 474 long aID = A.getID(mRS); 475 long xID = X.getID(mRS); 476 long yID = Y.getID(mRS); 477 if (mUseIncSupp) { 478 aID = getDummyAlloc(A); 479 xID = getDummyAlloc(X); 480 yID = getDummyAlloc(Y); 481 } 482 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 483 } 484 485 /** 486 * DGBMV performs one of the matrix-vector operations 487 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 488 * 489 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 490 * 491 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 492 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 493 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 494 * for i in range(0, m): 495 * for j in range(max(0, i-kl), min(i+ku+1, n)): 496 * b[i, j-i+kl] = a[i, j] 497 * 498 * @param TransA The type of transpose applied to matrix A. 499 * @param KL The number of sub-diagonals of the matrix A. 500 * @param KU The number of super-diagonals of the matrix A. 501 * @param alpha The scalar alpha. 502 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 503 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 504 * @param incX The increment for the elements of vector x, must be larger than zero. 505 * @param beta The scalar beta. 506 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 507 * @param incY The increment for the elements of vector y, must be larger than zero. 508 */ 509 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 510 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 511 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 512 if (KL < 0 || KU < 0) { 513 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 514 } 515 int M = A.getType().getY(); 516 int N = A.getType().getX(); 517 518 boolean mUseIncSupp = isIncSupp(); 519 long aID = A.getID(mRS); 520 long xID = X.getID(mRS); 521 long yID = Y.getID(mRS); 522 if (mUseIncSupp) { 523 aID = getDummyAlloc(A); 524 xID = getDummyAlloc(X); 525 yID = getDummyAlloc(Y); 526 } 527 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 528 } 529 530 /** 531 * CGBMV performs one of the matrix-vector operations 532 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 533 * 534 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 535 * 536 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 537 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 538 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 539 * for i in range(0, m): 540 * for j in range(max(0, i-kl), min(i+ku+1, n)): 541 * b[i, j-i+kl] = a[i, j] 542 * 543 * @param TransA The type of transpose applied to matrix A. 544 * @param KL The number of sub-diagonals of the matrix A. 545 * @param KU The number of super-diagonals of the matrix A. 546 * @param alpha The scalar alpha. 547 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 548 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 549 * @param incX The increment for the elements of vector x, must be larger than zero. 550 * @param beta The scalar beta. 551 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 552 * @param incY The increment for the elements of vector y, must be larger than zero. 553 */ 554 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 555 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 556 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 557 if (KL < 0 || KU < 0) { 558 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 559 } 560 int M = A.getType().getY(); 561 int N = A.getType().getX(); 562 563 boolean mUseIncSupp = isIncSupp(); 564 long aID = A.getID(mRS); 565 long xID = X.getID(mRS); 566 long yID = Y.getID(mRS); 567 if (mUseIncSupp) { 568 aID = getDummyAlloc(A); 569 xID = getDummyAlloc(X); 570 yID = getDummyAlloc(Y); 571 } 572 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 573 } 574 575 /** 576 * ZGBMV performs one of the matrix-vector operations 577 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 578 * 579 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 580 * 581 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 582 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 583 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 584 * for i in range(0, m): 585 * for j in range(max(0, i-kl), min(i+ku+1, n)): 586 * b[i, j-i+kl] = a[i, j] 587 * 588 * @param TransA The type of transpose applied to matrix A. 589 * @param KL The number of sub-diagonals of the matrix A. 590 * @param KU The number of super-diagonals of the matrix A. 591 * @param alpha The scalar alpha. 592 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 593 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 594 * @param incX The increment for the elements of vector x, must be larger than zero. 595 * @param beta The scalar beta. 596 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 597 * @param incY The increment for the elements of vector y, must be larger than zero. 598 */ 599 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 600 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 601 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 602 if (KL < 0 || KU < 0) { 603 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 604 } 605 int M = A.getType().getY(); 606 int N = A.getType().getX(); 607 608 boolean mUseIncSupp = isIncSupp(); 609 long aID = A.getID(mRS); 610 long xID = X.getID(mRS); 611 long yID = Y.getID(mRS); 612 if (mUseIncSupp) { 613 aID = getDummyAlloc(A); 614 xID = getDummyAlloc(X); 615 yID = getDummyAlloc(Y); 616 } 617 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 618 } 619 620 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 621 validateTranspose(TransA); 622 validateUplo(Uplo); 623 validateDiag(Diag); 624 int N = A.getType().getY(); 625 if (A.getType().getX() != N) { 626 throw new RSRuntimeException("A must be a square matrix for TRMV"); 627 } 628 if (!A.getType().getElement().isCompatible(e) || 629 !X.getType().getElement().isCompatible(e)) { 630 throw new RSRuntimeException("Called BLAS with wrong Element type"); 631 } 632 if (X.getType().getY() > 1) { 633 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 634 } 635 636 if (incX <= 0) { 637 throw new RSRuntimeException("Vector increments must be greater than 0"); 638 } 639 int expectedXDim = 1 + (N - 1) * incX; 640 if (X.getType().getX() != expectedXDim) { 641 throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 642 } 643 } 644 645 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 646 validateTranspose(TransA); 647 validateUplo(Uplo); 648 validateDiag(Diag); 649 if (!Ap.getType().getElement().isCompatible(e) || 650 !X.getType().getElement().isCompatible(e)) { 651 throw new RSRuntimeException("Called BLAS with wrong Element type"); 652 } 653 if (X.getType().getY() > 1) { 654 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 655 } 656 657 if (Ap.getType().getY() > 1) { 658 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 659 } 660 661 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 662 //is it really doing anything? 663 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 664 throw new RSRuntimeException("Invalid dimension for Ap"); 665 } 666 if (incX <= 0) { 667 throw new RSRuntimeException("Vector increments must be greater than 0"); 668 } 669 int expectedXDim = 1 + (N - 1) * incX; 670 if (X.getType().getX() != expectedXDim) { 671 throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 672 } 673 674 return N; 675 } 676 677 /** 678 * STRMV performs one of the matrix-vector operations 679 * x := A*x or x := A**T*x 680 * 681 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 682 * 683 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 684 * @param TransA The type of transpose applied to matrix A. 685 * @param Diag Specifies whether or not A is unit triangular. 686 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 687 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 688 * @param incX The increment for the elements of vector x, must be larger than zero. 689 */ 690 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 691 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 692 int N = A.getType().getY(); 693 694 boolean mUseIncSupp = isIncSupp(); 695 long aID = A.getID(mRS); 696 long xID = X.getID(mRS); 697 if (mUseIncSupp) { 698 aID = getDummyAlloc(A); 699 xID = getDummyAlloc(X); 700 } 701 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 702 } 703 704 /** 705 * DTRMV performs one of the matrix-vector operations 706 * x := A*x or x := A**T*x 707 * 708 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 709 * 710 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 711 * @param TransA The type of transpose applied to matrix A. 712 * @param Diag Specifies whether or not A is unit triangular. 713 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 714 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 715 * @param incX The increment for the elements of vector x, must be larger than zero. 716 */ 717 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 718 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 719 int N = A.getType().getY(); 720 721 boolean mUseIncSupp = isIncSupp(); 722 long aID = A.getID(mRS); 723 long xID = X.getID(mRS); 724 if (mUseIncSupp) { 725 aID = getDummyAlloc(A); 726 xID = getDummyAlloc(X); 727 } 728 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 729 } 730 731 /** 732 * CTRMV performs one of the matrix-vector operations 733 * x := A*x or x := A**T*x or x := A**H*x 734 * 735 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 736 * 737 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 738 * @param TransA The type of transpose applied to matrix A. 739 * @param Diag Specifies whether or not A is unit triangular. 740 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 741 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 742 * @param incX The increment for the elements of vector x, must be larger than zero. 743 */ 744 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 745 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 746 int N = A.getType().getY(); 747 748 boolean mUseIncSupp = isIncSupp(); 749 long aID = A.getID(mRS); 750 long xID = X.getID(mRS); 751 if (mUseIncSupp) { 752 aID = getDummyAlloc(A); 753 xID = getDummyAlloc(X); 754 } 755 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 756 } 757 758 /** 759 * ZTRMV performs one of the matrix-vector operations 760 * x := A*x or x := A**T*x or x := A**H*x 761 * 762 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 763 * 764 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 765 * @param TransA The type of transpose applied to matrix A. 766 * @param Diag Specifies whether or not A is unit triangular. 767 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 768 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 769 * @param incX The increment for the elements of vector x, must be larger than zero. 770 */ 771 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 772 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 773 int N = A.getType().getY(); 774 775 boolean mUseIncSupp = isIncSupp(); 776 long aID = A.getID(mRS); 777 long xID = X.getID(mRS); 778 if (mUseIncSupp) { 779 aID = getDummyAlloc(A); 780 xID = getDummyAlloc(X); 781 } 782 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 783 } 784 785 /** 786 * STBMV performs one of the matrix-vector operations 787 * x := A*x or x := A**T*x 788 * 789 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 790 * 791 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 792 * but only the region N*(K+1) will be referenced. The following subroutine can is an 793 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 794 * for i in range(0, n): 795 * for j in range(i, min(i+k+1, n)): 796 * b[i, j-i] = a[i, j] 797 * 798 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 799 * @param TransA The type of transpose applied to matrix A. 800 * @param Diag Specifies whether or not A is unit triangular. 801 * @param K The number of off-diagonals of the matrix A 802 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 803 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 804 * @param incX The increment for the elements of vector x, must be larger than zero. 805 */ 806 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 807 // TBMV has the same requirements as TRMV + K >= 0 808 if (K < 0) { 809 throw new RSRuntimeException("K must be greater than or equal to 0"); 810 } 811 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 812 int N = A.getType().getY(); 813 814 boolean mUseIncSupp = isIncSupp(); 815 long aID = A.getID(mRS); 816 long xID = X.getID(mRS); 817 if (mUseIncSupp) { 818 aID = getDummyAlloc(A); 819 xID = getDummyAlloc(X); 820 } 821 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 822 } 823 824 /** 825 * DTBMV performs one of the matrix-vector operations 826 * x := A*x or x := A**T*x 827 * 828 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 829 * 830 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 831 * but only the region N*(K+1) will be referenced. The following subroutine can is an 832 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 833 * for i in range(0, n): 834 * for j in range(i, min(i+k+1, n)): 835 * b[i, j-i] = a[i, j] 836 * 837 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 838 * @param TransA The type of transpose applied to matrix A. 839 * @param Diag Specifies whether or not A is unit triangular. 840 * @param K The number of off-diagonals of the matrix A 841 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 842 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 843 * @param incX The increment for the elements of vector x, must be larger than zero. 844 */ 845 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 846 // TBMV has the same requirements as TRMV + K >= 0 847 if (K < 0) { 848 throw new RSRuntimeException("K must be greater than or equal to 0"); 849 } 850 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 851 int N = A.getType().getY(); 852 853 boolean mUseIncSupp = isIncSupp(); 854 long aID = A.getID(mRS); 855 long xID = X.getID(mRS); 856 if (mUseIncSupp) { 857 aID = getDummyAlloc(A); 858 xID = getDummyAlloc(X); 859 } 860 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 861 } 862 863 /** 864 * CTBMV performs one of the matrix-vector operations 865 * x := A*x or x := A**T*x or x := A**H*x 866 * 867 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 868 * 869 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 870 * but only the region N*(K+1) will be referenced. The following subroutine can is an 871 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 872 * for i in range(0, n): 873 * for j in range(i, min(i+k+1, n)): 874 * b[i, j-i] = a[i, j] 875 * 876 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 877 * @param TransA The type of transpose applied to matrix A. 878 * @param Diag Specifies whether or not A is unit triangular. 879 * @param K The number of off-diagonals of the matrix A 880 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 881 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 882 * @param incX The increment for the elements of vector x, must be larger than zero. 883 */ 884 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 885 // TBMV has the same requirements as TRMV + K >= 0 886 if (K < 0) { 887 throw new RSRuntimeException("K must be greater than or equal to 0"); 888 } 889 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 890 int N = A.getType().getY(); 891 892 boolean mUseIncSupp = isIncSupp(); 893 long aID = A.getID(mRS); 894 long xID = X.getID(mRS); 895 if (mUseIncSupp) { 896 aID = getDummyAlloc(A); 897 xID = getDummyAlloc(X); 898 } 899 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 900 } 901 902 /** 903 * ZTBMV performs one of the matrix-vector operations 904 * x := A*x or x := A**T*x or x := A**H*x 905 * 906 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 907 * 908 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 909 * but only the region N*(K+1) will be referenced. The following subroutine can is an 910 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 911 * for i in range(0, n): 912 * for j in range(i, min(i+k+1, n)): 913 * b[i, j-i] = a[i, j] 914 * 915 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 916 * @param TransA The type of transpose applied to matrix A. 917 * @param Diag Specifies whether or not A is unit triangular. 918 * @param K The number of off-diagonals of the matrix A 919 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 920 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 921 * @param incX The increment for the elements of vector x, must be larger than zero. 922 */ 923 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 924 // TBMV has the same requirements as TRMV + K >= 0 925 if (K < 0) { 926 throw new RSRuntimeException("K must be greater than or equal to 0"); 927 } 928 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 929 int N = A.getType().getY(); 930 931 boolean mUseIncSupp = isIncSupp(); 932 long aID = A.getID(mRS); 933 long xID = X.getID(mRS); 934 if (mUseIncSupp) { 935 aID = getDummyAlloc(A); 936 xID = getDummyAlloc(X); 937 } 938 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 939 } 940 941 /** 942 * STPMV performs one of the matrix-vector operations 943 * x := A*x or x := A**T*x 944 * 945 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 946 * 947 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 948 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 949 * 'a' to packed matrix 'b'. 950 * k = 0 951 * for i in range(0, n): 952 * for j in range(i, n): 953 * b[k++] = a[i, j] 954 * 955 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 956 * @param TransA The type of transpose applied to matrix A. 957 * @param Diag Specifies whether or not A is unit triangular. 958 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 959 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 960 * @param incX The increment for the elements of vector x, must be larger than zero. 961 */ 962 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 963 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 964 965 boolean mUseIncSupp = isIncSupp(); 966 long apID = Ap.getID(mRS); 967 long xID = X.getID(mRS); 968 if (mUseIncSupp) { 969 apID = getDummyAlloc(Ap); 970 xID = getDummyAlloc(X); 971 } 972 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 973 } 974 975 /** 976 * DTPMV performs one of the matrix-vector operations 977 * x := A*x or x := A**T*x 978 * 979 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 980 * 981 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 982 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 983 * 'a' to packed matrix 'b'. 984 * k = 0 985 * for i in range(0, n): 986 * for j in range(i, n): 987 * b[k++] = a[i, j] 988 * 989 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 990 * @param TransA The type of transpose applied to matrix A. 991 * @param Diag Specifies whether or not A is unit triangular. 992 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 993 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 994 * @param incX The increment for the elements of vector x, must be larger than zero. 995 */ 996 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 997 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 998 999 boolean mUseIncSupp = isIncSupp(); 1000 long apID = Ap.getID(mRS); 1001 long xID = X.getID(mRS); 1002 if (mUseIncSupp) { 1003 apID = getDummyAlloc(Ap); 1004 xID = getDummyAlloc(X); 1005 } 1006 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1007 } 1008 1009 /** 1010 * CTPMV performs one of the matrix-vector operations 1011 * x := A*x or x := A**T*x or x := A**H*x 1012 * 1013 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 1014 * 1015 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1016 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1017 * 'a' to packed matrix 'b'. 1018 * k = 0 1019 * for i in range(0, n): 1020 * for j in range(i, n): 1021 * b[k++] = a[i, j] 1022 * 1023 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1024 * @param TransA The type of transpose applied to matrix A. 1025 * @param Diag Specifies whether or not A is unit triangular. 1026 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1027 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1028 * @param incX The increment for the elements of vector x, must be larger than zero. 1029 */ 1030 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1031 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1032 1033 boolean mUseIncSupp = isIncSupp(); 1034 long apID = Ap.getID(mRS); 1035 long xID = X.getID(mRS); 1036 if (mUseIncSupp) { 1037 apID = getDummyAlloc(Ap); 1038 xID = getDummyAlloc(X); 1039 } 1040 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1041 } 1042 1043 /** 1044 * ZTPMV performs one of the matrix-vector operations 1045 * x := A*x or x := A**T*x or x := A**H*x 1046 * 1047 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 1048 * 1049 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1050 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1051 * 'a' to packed matrix 'b'. 1052 * k = 0 1053 * for i in range(0, n): 1054 * for j in range(i, n): 1055 * b[k++] = a[i, j] 1056 * 1057 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1058 * @param TransA The type of transpose applied to matrix A. 1059 * @param Diag Specifies whether or not A is unit triangular. 1060 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1061 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1062 * @param incX The increment for the elements of vector x, must be larger than zero. 1063 */ 1064 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1065 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1066 1067 boolean mUseIncSupp = isIncSupp(); 1068 long apID = Ap.getID(mRS); 1069 long xID = X.getID(mRS); 1070 if (mUseIncSupp) { 1071 apID = getDummyAlloc(Ap); 1072 xID = getDummyAlloc(X); 1073 } 1074 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1075 } 1076 1077 /** 1078 * STRSV solves one of the systems of equations 1079 * A*x = b or A**T*x = b 1080 * 1081 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 1082 * 1083 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1084 * @param TransA The type of transpose applied to matrix A. 1085 * @param Diag Specifies whether or not A is unit triangular. 1086 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1087 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1088 * @param incX The increment for the elements of vector x, must be larger than zero. 1089 */ 1090 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1091 // TRSV is the same as TRMV 1092 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1093 int N = A.getType().getY(); 1094 1095 boolean mUseIncSupp = isIncSupp(); 1096 long aID = A.getID(mRS); 1097 long xID = X.getID(mRS); 1098 if (mUseIncSupp) { 1099 aID = getDummyAlloc(A); 1100 xID = getDummyAlloc(X); 1101 } 1102 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1103 1104 } 1105 1106 /** 1107 * DTRSV solves one of the systems of equations 1108 * A*x = b or A**T*x = b 1109 * 1110 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 1111 * 1112 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1113 * @param TransA The type of transpose applied to matrix A. 1114 * @param Diag Specifies whether or not A is unit triangular. 1115 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1116 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1117 * @param incX The increment for the elements of vector x, must be larger than zero. 1118 */ 1119 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1120 // TRSV is the same as TRMV 1121 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1122 int N = A.getType().getY(); 1123 1124 boolean mUseIncSupp = isIncSupp(); 1125 long aID = A.getID(mRS); 1126 long xID = X.getID(mRS); 1127 if (mUseIncSupp) { 1128 aID = getDummyAlloc(A); 1129 xID = getDummyAlloc(X); 1130 } 1131 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1132 1133 } 1134 1135 /** 1136 * CTRSV solves one of the systems of equations 1137 * A*x = b or A**T*x = b or A**H*x = b 1138 * 1139 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 1140 * 1141 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1142 * @param TransA The type of transpose applied to matrix A. 1143 * @param Diag Specifies whether or not A is unit triangular. 1144 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1145 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1146 * @param incX The increment for the elements of vector x, must be larger than zero. 1147 */ 1148 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1149 // TRSV is the same as TRMV 1150 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1151 int N = A.getType().getY(); 1152 1153 boolean mUseIncSupp = isIncSupp(); 1154 long aID = A.getID(mRS); 1155 long xID = X.getID(mRS); 1156 if (mUseIncSupp) { 1157 aID = getDummyAlloc(A); 1158 xID = getDummyAlloc(X); 1159 } 1160 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1161 1162 } 1163 1164 /** 1165 * ZTRSV solves one of the systems of equations 1166 * A*x = b or A**T*x = b or A**H*x = b 1167 * 1168 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 1169 * 1170 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1171 * @param TransA The type of transpose applied to matrix A. 1172 * @param Diag Specifies whether or not A is unit triangular. 1173 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1174 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1175 * @param incX The increment for the elements of vector x, must be larger than zero. 1176 */ 1177 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1178 // TRSV is the same as TRMV 1179 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1180 int N = A.getType().getY(); 1181 1182 boolean mUseIncSupp = isIncSupp(); 1183 long aID = A.getID(mRS); 1184 long xID = X.getID(mRS); 1185 if (mUseIncSupp) { 1186 aID = getDummyAlloc(A); 1187 xID = getDummyAlloc(X); 1188 } 1189 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1190 1191 } 1192 1193 /** 1194 * STBSV solves one of the systems of equations 1195 * A*x = b or A**T*x = b 1196 * 1197 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 1198 * 1199 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1200 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1201 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1202 * for i in range(0, n): 1203 * for j in range(i, min(i+k+1, n)): 1204 * b[i, j-i] = a[i, j] 1205 * 1206 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1207 * @param TransA The type of transpose applied to matrix A. 1208 * @param Diag Specifies whether or not A is unit triangular. 1209 * @param K The number of off-diagonals of the matrix A 1210 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1211 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1212 * @param incX The increment for the elements of vector x, must be larger than zero. 1213 */ 1214 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1215 // TBSV is the same as TRMV + K >= 0 1216 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1217 int N = A.getType().getY(); 1218 if (K < 0) { 1219 throw new RSRuntimeException("Number of diagonals must be positive"); 1220 } 1221 1222 boolean mUseIncSupp = isIncSupp(); 1223 long aID = A.getID(mRS); 1224 long xID = X.getID(mRS); 1225 if (mUseIncSupp) { 1226 aID = getDummyAlloc(A); 1227 xID = getDummyAlloc(X); 1228 } 1229 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1230 } 1231 1232 /** 1233 * DTBSV solves one of the systems of equations 1234 * A*x = b or A**T*x = b 1235 * 1236 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1237 * 1238 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1239 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1240 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1241 * for i in range(0, n): 1242 * for j in range(i, min(i+k+1, n)): 1243 * b[i, j-i] = a[i, j] 1244 * 1245 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1246 * @param TransA The type of transpose applied to matrix A. 1247 * @param Diag Specifies whether or not A is unit triangular. 1248 * @param K The number of off-diagonals of the matrix A 1249 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1250 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1251 * @param incX The increment for the elements of vector x, must be larger than zero. 1252 */ 1253 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1254 // TBSV is the same as TRMV + K >= 0 1255 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1256 int N = A.getType().getY(); 1257 if (K < 0) { 1258 throw new RSRuntimeException("Number of diagonals must be positive"); 1259 } 1260 1261 boolean mUseIncSupp = isIncSupp(); 1262 long aID = A.getID(mRS); 1263 long xID = X.getID(mRS); 1264 if (mUseIncSupp) { 1265 aID = getDummyAlloc(A); 1266 xID = getDummyAlloc(X); 1267 } 1268 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1269 } 1270 1271 /** 1272 * CTBSV solves one of the systems of equations 1273 * A*x = b or A**T*x = b or A**H*x = b 1274 * 1275 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1276 * 1277 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1278 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1279 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1280 * for i in range(0, n): 1281 * for j in range(i, min(i+k+1, n)): 1282 * b[i, j-i] = a[i, j] 1283 * 1284 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1285 * @param TransA The type of transpose applied to matrix A. 1286 * @param Diag Specifies whether or not A is unit triangular. 1287 * @param K The number of off-diagonals of the matrix A 1288 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1289 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1290 * @param incX The increment for the elements of vector x, must be larger than zero. 1291 */ 1292 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1293 // TBSV is the same as TRMV + K >= 0 1294 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1295 int N = A.getType().getY(); 1296 if (K < 0) { 1297 throw new RSRuntimeException("Number of diagonals must be positive"); 1298 } 1299 1300 boolean mUseIncSupp = isIncSupp(); 1301 long aID = A.getID(mRS); 1302 long xID = X.getID(mRS); 1303 if (mUseIncSupp) { 1304 aID = getDummyAlloc(A); 1305 xID = getDummyAlloc(X); 1306 } 1307 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1308 } 1309 1310 /** 1311 * ZTBSV solves one of the systems of equations 1312 * A*x = b or A**T*x = b or A**H*x = b 1313 * 1314 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1315 * 1316 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1317 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1318 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1319 * for i in range(0, n): 1320 * for j in range(i, min(i+k+1, n)): 1321 * b[i, j-i] = a[i, j] 1322 * 1323 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1324 * @param TransA The type of transpose applied to matrix A. 1325 * @param Diag Specifies whether or not A is unit triangular. 1326 * @param K The number of off-diagonals of the matrix A 1327 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1328 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1329 * @param incX The increment for the elements of vector x, must be larger than zero. 1330 */ 1331 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1332 // TBSV is the same as TRMV + K >= 0 1333 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1334 int N = A.getType().getY(); 1335 if (K < 0) { 1336 throw new RSRuntimeException("Number of diagonals must be positive"); 1337 } 1338 1339 boolean mUseIncSupp = isIncSupp(); 1340 long aID = A.getID(mRS); 1341 long xID = X.getID(mRS); 1342 if (mUseIncSupp) { 1343 aID = getDummyAlloc(A); 1344 xID = getDummyAlloc(X); 1345 } 1346 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1347 } 1348 1349 /** 1350 * STPSV solves one of the systems of equations 1351 * A*x = b or A**T*x = b 1352 * 1353 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1354 * 1355 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1356 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1357 * 'a' to packed matrix 'b'. 1358 * k = 0 1359 * for i in range(0, n): 1360 * for j in range(i, n): 1361 * b[k++] = a[i, j] 1362 * 1363 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1364 * @param TransA The type of transpose applied to matrix A. 1365 * @param Diag Specifies whether or not A is unit triangular. 1366 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1367 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1368 * @param incX The increment for the elements of vector x, must be larger than zero. 1369 */ 1370 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1371 // TPSV is same as TPMV 1372 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1373 1374 boolean mUseIncSupp = isIncSupp(); 1375 long apID = Ap.getID(mRS); 1376 long xID = X.getID(mRS); 1377 if (mUseIncSupp) { 1378 apID = getDummyAlloc(Ap); 1379 xID = getDummyAlloc(X); 1380 } 1381 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1382 } 1383 1384 /** 1385 * DTPSV solves one of the systems of equations 1386 * A*x = b or A**T*x = b 1387 * 1388 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1389 * 1390 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1391 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1392 * 'a' to packed matrix 'b'. 1393 * k = 0 1394 * for i in range(0, n): 1395 * for j in range(i, n): 1396 * b[k++] = a[i, j] 1397 * 1398 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1399 * @param TransA The type of transpose applied to matrix A. 1400 * @param Diag Specifies whether or not A is unit triangular. 1401 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1402 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1403 * @param incX The increment for the elements of vector x, must be larger than zero. 1404 */ 1405 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1406 // TPSV is same as TPMV 1407 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1408 1409 boolean mUseIncSupp = isIncSupp(); 1410 long apID = Ap.getID(mRS); 1411 long xID = X.getID(mRS); 1412 if (mUseIncSupp) { 1413 apID = getDummyAlloc(Ap); 1414 xID = getDummyAlloc(X); 1415 } 1416 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1417 } 1418 1419 /** 1420 * CTPSV solves one of the systems of equations 1421 * A*x = b or A**T*x = b or A**H*x = b 1422 * 1423 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1424 * 1425 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1426 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1427 * 'a' to packed matrix 'b'. 1428 * k = 0 1429 * for i in range(0, n): 1430 * for j in range(i, n): 1431 * b[k++] = a[i, j] 1432 * 1433 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1434 * @param TransA The type of transpose applied to matrix A. 1435 * @param Diag Specifies whether or not A is unit triangular. 1436 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1437 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1438 * @param incX The increment for the elements of vector x, must be larger than zero. 1439 */ 1440 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1441 // TPSV is same as TPMV 1442 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1443 1444 boolean mUseIncSupp = isIncSupp(); 1445 long apID = Ap.getID(mRS); 1446 long xID = X.getID(mRS); 1447 if (mUseIncSupp) { 1448 apID = getDummyAlloc(Ap); 1449 xID = getDummyAlloc(X); 1450 } 1451 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1452 } 1453 1454 /** 1455 * ZTPSV solves one of the systems of equations 1456 * A*x = b or A**T*x = b or A**H*x = b 1457 * 1458 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1459 * 1460 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1461 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1462 * 'a' to packed matrix 'b'. 1463 * k = 0 1464 * for i in range(0, n): 1465 * for j in range(i, n): 1466 * b[k++] = a[i, j] 1467 * 1468 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1469 * @param TransA The type of transpose applied to matrix A. 1470 * @param Diag Specifies whether or not A is unit triangular. 1471 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1472 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1473 * @param incX The increment for the elements of vector x, must be larger than zero. 1474 */ 1475 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1476 // TPSV is same as TPMV 1477 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1478 1479 boolean mUseIncSupp = isIncSupp(); 1480 long apID = Ap.getID(mRS); 1481 long xID = X.getID(mRS); 1482 if (mUseIncSupp) { 1483 apID = getDummyAlloc(Ap); 1484 xID = getDummyAlloc(X); 1485 } 1486 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1487 } 1488 1489 /** 1490 * Level 2, S and D only 1491 */ 1492 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1493 validateUplo(Uplo); 1494 int N = A.getType().getY(); 1495 if (A.getType().getX() != N) { 1496 throw new RSRuntimeException("A must be a square matrix for SYMV"); 1497 } 1498 if (!A.getType().getElement().isCompatible(e) || 1499 !X.getType().getElement().isCompatible(e) || 1500 !Y.getType().getElement().isCompatible(e) ) { 1501 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1502 } 1503 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1504 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1505 } 1506 1507 if (incX <= 0 || incY <= 0) { 1508 throw new RSRuntimeException("Vector increments must be greater than 0"); 1509 } 1510 int expectedXDim = 1 + (N - 1) * incX; 1511 if (X.getType().getX() != expectedXDim) { 1512 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1513 } 1514 int expectedYDim = 1 + (N - 1) * incY; 1515 if (Y.getType().getX() != expectedYDim) { 1516 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1517 } 1518 return N; 1519 } 1520 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1521 validateUplo(Uplo); 1522 if (!Ap.getType().getElement().isCompatible(e) || 1523 !X.getType().getElement().isCompatible(e) || 1524 !Y.getType().getElement().isCompatible(e)) { 1525 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1526 } 1527 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1528 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1529 } 1530 1531 if (Ap.getType().getY() > 1) { 1532 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1533 } 1534 1535 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1536 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1537 throw new RSRuntimeException("Invalid dimension for Ap"); 1538 } 1539 if (incX <= 0 || incY <= 0) { 1540 throw new RSRuntimeException("Vector increments must be greater than 0"); 1541 } 1542 int expectedXDim = 1 + (N - 1) * incX; 1543 if (X.getType().getX() != expectedXDim) { 1544 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1545 } 1546 int expectedYDim = 1 + (N - 1) * incY; 1547 if (Y.getType().getX() != expectedYDim) { 1548 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1549 } 1550 1551 return N; 1552 } 1553 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1554 if (!A.getType().getElement().isCompatible(e) || 1555 !X.getType().getElement().isCompatible(e) || 1556 !Y.getType().getElement().isCompatible(e) ) { 1557 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1558 } 1559 1560 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1561 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1562 } 1563 1564 int M = A.getType().getY(); 1565 int N = A.getType().getX(); 1566 1567 if (N < 1 || M < 1) { 1568 throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1569 } 1570 if (incX <= 0 || incY <= 0) { 1571 throw new RSRuntimeException("Vector increments must be greater than 0"); 1572 } 1573 int expectedXDim = 1 + (M - 1) * incX; 1574 if (X.getType().getX() != expectedXDim) { 1575 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1576 } 1577 int expectedYDim = 1 + (N - 1) * incY; 1578 if (Y.getType().getX() != expectedYDim) { 1579 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1580 } 1581 1582 1583 } 1584 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1585 validateUplo(Uplo); 1586 if (!A.getType().getElement().isCompatible(e) || 1587 !X.getType().getElement().isCompatible(e)) { 1588 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1589 } 1590 1591 int N = A.getType().getX(); 1592 1593 if (X.getType().getY() > 1) { 1594 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1595 } 1596 if (N != A.getType().getY()) { 1597 throw new RSRuntimeException("A must be a symmetric matrix"); 1598 } 1599 if (incX <= 0) { 1600 throw new RSRuntimeException("Vector increments must be greater than 0"); 1601 } 1602 int expectedXDim = 1 + (N - 1) * incX; 1603 if (X.getType().getX() != expectedXDim) { 1604 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1605 } 1606 return N; 1607 } 1608 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1609 validateUplo(Uplo); 1610 if (!Ap.getType().getElement().isCompatible(e) || 1611 !X.getType().getElement().isCompatible(e)) { 1612 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1613 } 1614 if (X.getType().getY() > 1) { 1615 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1616 } 1617 1618 if (Ap.getType().getY() > 1) { 1619 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1620 } 1621 1622 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1623 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1624 throw new RSRuntimeException("Invalid dimension for Ap"); 1625 } 1626 if (incX <= 0) { 1627 throw new RSRuntimeException("Vector increments must be greater than 0"); 1628 } 1629 int expectedXDim = 1 + (N - 1) * incX; 1630 if (X.getType().getX() != expectedXDim) { 1631 throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1632 } 1633 1634 return N; 1635 } 1636 1637 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1638 validateUplo(Uplo); 1639 if (!A.getType().getElement().isCompatible(e) || 1640 !X.getType().getElement().isCompatible(e) || 1641 !Y.getType().getElement().isCompatible(e)) { 1642 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1643 } 1644 1645 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1646 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1647 } 1648 1649 int N = A.getType().getX(); 1650 1651 if (N != A.getType().getY()) { 1652 throw new RSRuntimeException("A must be a symmetric matrix"); 1653 } 1654 if (incX <= 0 || incY <= 0) { 1655 throw new RSRuntimeException("Vector increments must be greater than 0"); 1656 } 1657 int expectedXDim = 1 + (N - 1) * incX; 1658 int expectedYDim = 1 + (N - 1) * incY; 1659 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1660 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1661 } 1662 return N; 1663 1664 } 1665 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1666 validateUplo(Uplo); 1667 if (!Ap.getType().getElement().isCompatible(e) || 1668 !X.getType().getElement().isCompatible(e) || 1669 !Y.getType().getElement().isCompatible(e)) { 1670 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1671 } 1672 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1673 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1674 } 1675 1676 if (Ap.getType().getY() > 1) { 1677 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1678 } 1679 1680 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1681 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1682 throw new RSRuntimeException("Invalid dimension for Ap"); 1683 } 1684 if (incX <= 0 || incY <= 0) { 1685 throw new RSRuntimeException("Vector increments must be greater than 0"); 1686 } 1687 int expectedXDim = 1 + (N - 1) * incX; 1688 int expectedYDim = 1 + (N - 1) * incY; 1689 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1690 throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1691 } 1692 1693 return N; 1694 } 1695 1696 /** 1697 * SSYMV performs the matrix-vector operation 1698 * y := alpha*A*x + beta*y 1699 * 1700 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1701 * 1702 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1703 * @param alpha The scalar alpha. 1704 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1705 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1706 * @param incX The increment for the elements of vector x, must be larger than zero. 1707 * @param beta The scalar beta. 1708 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1709 * @param incY The increment for the elements of vector y, must be larger than zero. 1710 */ 1711 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1712 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1713 1714 boolean mUseIncSupp = isIncSupp(); 1715 long aID = A.getID(mRS); 1716 long xID = X.getID(mRS); 1717 long yID = Y.getID(mRS); 1718 if (mUseIncSupp) { 1719 aID = getDummyAlloc(A); 1720 xID = getDummyAlloc(X); 1721 yID = getDummyAlloc(Y); 1722 } 1723 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1724 } 1725 1726 /** 1727 * SSBMV performs the matrix-vector operation 1728 * y := alpha*A*x + beta*y 1729 * 1730 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1731 * 1732 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1733 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1734 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1735 * for i in range(0, n): 1736 * for j in range(i, min(i+k+1, n)): 1737 * b[i, j-i] = a[i, j] 1738 * 1739 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1740 * @param K The number of off-diagonals of the matrix A 1741 * @param alpha The scalar alpha. 1742 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1743 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1744 * @param incX The increment for the elements of vector x, must be larger than zero. 1745 * @param beta The scalar beta. 1746 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1747 * @param incY The increment for the elements of vector y, must be larger than zero. 1748 */ 1749 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1750 // SBMV is the same as SYMV + K >= 0 1751 if (K < 0) { 1752 throw new RSRuntimeException("K must be greater than or equal to 0"); 1753 } 1754 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1755 1756 boolean mUseIncSupp = isIncSupp(); 1757 long aID = A.getID(mRS); 1758 long xID = X.getID(mRS); 1759 long yID = Y.getID(mRS); 1760 if (mUseIncSupp) { 1761 aID = getDummyAlloc(A); 1762 xID = getDummyAlloc(X); 1763 yID = getDummyAlloc(Y); 1764 } 1765 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1766 } 1767 1768 /** 1769 * SSPMV performs the matrix-vector operation 1770 * y := alpha*A*x + beta*y 1771 * 1772 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1773 * 1774 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1775 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1776 * 'a' to packed matrix 'b'. 1777 * k = 0 1778 * for i in range(0, n): 1779 * for j in range(i, n): 1780 * b[k++] = a[i, j] 1781 * 1782 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1783 * @param alpha The scalar alpha. 1784 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1785 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1786 * @param incX The increment for the elements of vector x, must be larger than zero. 1787 * @param beta The scalar beta. 1788 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1789 * @param incY The increment for the elements of vector y, must be larger than zero. 1790 */ 1791 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1792 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1793 1794 boolean mUseIncSupp = isIncSupp(); 1795 long apID = Ap.getID(mRS); 1796 long xID = X.getID(mRS); 1797 long yID = Y.getID(mRS); 1798 if (mUseIncSupp) { 1799 apID = getDummyAlloc(Ap); 1800 xID = getDummyAlloc(X); 1801 yID = getDummyAlloc(Y); 1802 } 1803 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1804 } 1805 1806 /** 1807 * SGER performs the rank 1 operation 1808 * A := alpha*x*y**T + A 1809 * 1810 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1811 * 1812 * @param alpha The scalar alpha. 1813 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1814 * @param incX The increment for the elements of vector x, must be larger than zero. 1815 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1816 * @param incY The increment for the elements of vector y, must be larger than zero. 1817 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1818 */ 1819 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1820 int M = A.getType().getY(); 1821 int N = A.getType().getX(); 1822 validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1823 1824 boolean mUseIncSupp = isIncSupp(); 1825 long aID = A.getID(mRS); 1826 long xID = X.getID(mRS); 1827 long yID = Y.getID(mRS); 1828 if (mUseIncSupp) { 1829 aID = getDummyAlloc(A); 1830 xID = getDummyAlloc(X); 1831 yID = getDummyAlloc(Y); 1832 } 1833 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 1834 } 1835 1836 /** 1837 * SSYR performs the rank 1 operation 1838 * A := alpha*x*x**T + A 1839 * 1840 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1841 * 1842 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1843 * @param alpha The scalar alpha. 1844 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1845 * @param incX The increment for the elements of vector x, must be larger than zero. 1846 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1847 */ 1848 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1849 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1850 1851 boolean mUseIncSupp = isIncSupp(); 1852 long aID = A.getID(mRS); 1853 long xID = X.getID(mRS); 1854 if (mUseIncSupp) { 1855 aID = getDummyAlloc(A); 1856 xID = getDummyAlloc(X); 1857 } 1858 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1859 } 1860 1861 /** 1862 * SSPR performs the rank 1 operation 1863 * A := alpha*x*x**T + A 1864 * 1865 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1866 * 1867 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1868 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1869 * 'a' to packed matrix 'b'. 1870 * k = 0 1871 * for i in range(0, n): 1872 * for j in range(i, n): 1873 * b[k++] = a[i, j] 1874 * 1875 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1876 * @param alpha The scalar alpha. 1877 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1878 * @param incX The increment for the elements of vector x, must be larger than zero. 1879 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1880 */ 1881 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1882 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1883 1884 boolean mUseIncSupp = isIncSupp(); 1885 long apID = Ap.getID(mRS); 1886 long xID = X.getID(mRS); 1887 if (mUseIncSupp) { 1888 apID = getDummyAlloc(Ap); 1889 xID = getDummyAlloc(X); 1890 } 1891 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1892 } 1893 1894 /** 1895 * SSYR2 performs the symmetric rank 2 operation 1896 * A := alpha*x*y**T + alpha*y*x**T + A 1897 * 1898 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1899 * 1900 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1901 * @param alpha The scalar alpha. 1902 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1903 * @param incX The increment for the elements of vector x, must be larger than zero. 1904 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1905 * @param incY The increment for the elements of vector y, must be larger than zero. 1906 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1907 */ 1908 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1909 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1910 1911 boolean mUseIncSupp = isIncSupp(); 1912 long aID = A.getID(mRS); 1913 long xID = X.getID(mRS); 1914 long yID = Y.getID(mRS); 1915 if (mUseIncSupp) { 1916 aID = getDummyAlloc(A); 1917 xID = getDummyAlloc(X); 1918 yID = getDummyAlloc(Y); 1919 } 1920 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 1921 } 1922 1923 /** 1924 * SSPR2 performs the symmetric rank 2 operation 1925 * A := alpha*x*y**T + alpha*y*x**T + A 1926 * 1927 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1928 * 1929 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1930 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1931 * 'a' to packed matrix 'b'. 1932 * k = 0 1933 * for i in range(0, n): 1934 * for j in range(i, n): 1935 * b[k++] = a[i, j] 1936 * 1937 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1938 * @param alpha The scalar alpha. 1939 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1940 * @param incX The increment for the elements of vector x, must be larger than zero. 1941 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1942 * @param incY The increment for the elements of vector y, must be larger than zero. 1943 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1944 */ 1945 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1946 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1947 1948 boolean mUseIncSupp = isIncSupp(); 1949 long apID = Ap.getID(mRS); 1950 long xID = X.getID(mRS); 1951 long yID = Y.getID(mRS); 1952 if (mUseIncSupp) { 1953 apID = getDummyAlloc(Ap); 1954 xID = getDummyAlloc(X); 1955 yID = getDummyAlloc(Y); 1956 } 1957 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 1958 } 1959 1960 /** 1961 * DSYMV performs the matrix-vector operation 1962 * y := alpha*A*x + beta*y 1963 * 1964 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1965 * 1966 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1967 * @param alpha The scalar alpha. 1968 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1969 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1970 * @param incX The increment for the elements of vector x, must be larger than zero. 1971 * @param beta The scalar beta. 1972 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1973 * @param incY The increment for the elements of vector y, must be larger than zero. 1974 */ 1975 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1976 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1977 1978 boolean mUseIncSupp = isIncSupp(); 1979 long aID = A.getID(mRS); 1980 long xID = X.getID(mRS); 1981 long yID = Y.getID(mRS); 1982 if (mUseIncSupp) { 1983 aID = getDummyAlloc(A); 1984 xID = getDummyAlloc(X); 1985 yID = getDummyAlloc(Y); 1986 } 1987 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1988 } 1989 1990 /** 1991 * DSBMV performs the matrix-vector operation 1992 * y := alpha*A*x + beta*y 1993 * 1994 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1995 * 1996 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1997 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1998 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1999 * for i in range(0, n): 2000 * for j in range(i, min(i+k+1, n)): 2001 * b[i, j-i] = a[i, j] 2002 * 2003 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2004 * @param K The number of off-diagonals of the matrix A 2005 * @param alpha The scalar alpha. 2006 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2007 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2008 * @param incX The increment for the elements of vector x, must be larger than zero. 2009 * @param beta The scalar beta. 2010 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2011 * @param incY The increment for the elements of vector y, must be larger than zero. 2012 */ 2013 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 2014 // SBMV is the same as SYMV + K >= 0 2015 if (K < 0) { 2016 throw new RSRuntimeException("K must be greater than or equal to 0"); 2017 } 2018 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 2019 2020 boolean mUseIncSupp = isIncSupp(); 2021 long aID = A.getID(mRS); 2022 long xID = X.getID(mRS); 2023 long yID = Y.getID(mRS); 2024 if (mUseIncSupp) { 2025 aID = getDummyAlloc(A); 2026 xID = getDummyAlloc(X); 2027 yID = getDummyAlloc(Y); 2028 } 2029 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2030 } 2031 2032 /** 2033 * DSPMV performs the matrix-vector operation 2034 * y := alpha*A*x + beta*y 2035 * 2036 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 2037 * 2038 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2039 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2040 * 'a' to packed matrix 'b'. 2041 * k = 0 2042 * for i in range(0, n): 2043 * for j in range(i, n): 2044 * b[k++] = a[i, j] 2045 * 2046 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2047 * @param alpha The scalar alpha. 2048 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2049 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2050 * @param incX The increment for the elements of vector x, must be larger than zero. 2051 * @param beta The scalar beta. 2052 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2053 * @param incY The increment for the elements of vector y, must be larger than zero. 2054 */ 2055 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 2056 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 2057 2058 boolean mUseIncSupp = isIncSupp(); 2059 long apID = Ap.getID(mRS); 2060 long xID = X.getID(mRS); 2061 long yID = Y.getID(mRS); 2062 if (mUseIncSupp) { 2063 apID = getDummyAlloc(Ap); 2064 xID = getDummyAlloc(X); 2065 yID = getDummyAlloc(Y); 2066 } 2067 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2068 } 2069 2070 /** 2071 * DGER performs the rank 1 operation 2072 * A := alpha*x*y**T + A 2073 * 2074 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 2075 * 2076 * @param alpha The scalar alpha. 2077 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2078 * @param incX The increment for the elements of vector x, must be larger than zero. 2079 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2080 * @param incY The increment for the elements of vector y, must be larger than zero. 2081 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2082 */ 2083 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2084 int M = A.getType().getY(); 2085 int N = A.getType().getX(); 2086 validateGER(Element.F64(mRS), X, incX, Y, incY, A); 2087 2088 boolean mUseIncSupp = isIncSupp(); 2089 long aID = A.getID(mRS); 2090 long xID = X.getID(mRS); 2091 long yID = Y.getID(mRS); 2092 if (mUseIncSupp) { 2093 aID = getDummyAlloc(A); 2094 xID = getDummyAlloc(X); 2095 yID = getDummyAlloc(Y); 2096 } 2097 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 2098 } 2099 2100 /** 2101 * DSYR performs the rank 1 operation 2102 * A := alpha*x*x**T + A 2103 * 2104 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 2105 * 2106 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2107 * @param alpha The scalar alpha. 2108 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2109 * @param incX The increment for the elements of vector x, must be larger than zero. 2110 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2111 */ 2112 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2113 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 2114 2115 boolean mUseIncSupp = isIncSupp(); 2116 long aID = A.getID(mRS); 2117 long xID = X.getID(mRS); 2118 if (mUseIncSupp) { 2119 aID = getDummyAlloc(A); 2120 xID = getDummyAlloc(X); 2121 } 2122 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2123 } 2124 2125 /** 2126 * DSPR performs the rank 1 operation 2127 * A := alpha*x*x**T + A 2128 * 2129 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 2130 * 2131 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2132 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2133 * 'a' to packed matrix 'b'. 2134 * k = 0 2135 * for i in range(0, n): 2136 * for j in range(i, n): 2137 * b[k++] = a[i, j] 2138 * 2139 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2140 * @param alpha The scalar alpha. 2141 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2142 * @param incX The increment for the elements of vector x, must be larger than zero. 2143 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2144 */ 2145 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2146 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 2147 2148 boolean mUseIncSupp = isIncSupp(); 2149 long apID = Ap.getID(mRS); 2150 long xID = X.getID(mRS); 2151 if (mUseIncSupp) { 2152 apID = getDummyAlloc(Ap); 2153 xID = getDummyAlloc(X); 2154 } 2155 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2156 } 2157 2158 /** 2159 * DSYR2 performs the symmetric rank 2 operation 2160 * A := alpha*x*y**T + alpha*y*x**T + A 2161 * 2162 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 2163 * 2164 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2165 * @param alpha The scalar alpha. 2166 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2167 * @param incX The increment for the elements of vector x, must be larger than zero. 2168 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2169 * @param incY The increment for the elements of vector y, must be larger than zero. 2170 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2171 */ 2172 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2173 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 2174 2175 boolean mUseIncSupp = isIncSupp(); 2176 long aID = A.getID(mRS); 2177 long xID = X.getID(mRS); 2178 long yID = Y.getID(mRS); 2179 if (mUseIncSupp) { 2180 aID = getDummyAlloc(A); 2181 xID = getDummyAlloc(X); 2182 yID = getDummyAlloc(Y); 2183 } 2184 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2185 } 2186 2187 /** 2188 * DSPR2 performs the symmetric rank 2 operation 2189 * A := alpha*x*y**T + alpha*y*x**T + A 2190 * 2191 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 2192 * 2193 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2194 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2195 * 'a' to packed matrix 'b'. 2196 * k = 0 2197 * for i in range(0, n): 2198 * for j in range(i, n): 2199 * b[k++] = a[i, j] 2200 * 2201 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2202 * @param alpha The scalar alpha. 2203 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2204 * @param incX The increment for the elements of vector x, must be larger than zero. 2205 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2206 * @param incY The increment for the elements of vector y, must be larger than zero. 2207 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2208 */ 2209 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2210 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 2211 2212 boolean mUseIncSupp = isIncSupp(); 2213 long apID = Ap.getID(mRS); 2214 long xID = X.getID(mRS); 2215 long yID = Y.getID(mRS); 2216 if (mUseIncSupp) { 2217 apID = getDummyAlloc(Ap); 2218 xID = getDummyAlloc(X); 2219 yID = getDummyAlloc(Y); 2220 } 2221 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2222 } 2223 2224 2225 /** 2226 * Level 2, C and Z only 2227 */ 2228 2229 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2230 if (!A.getType().getElement().isCompatible(e) || 2231 !X.getType().getElement().isCompatible(e) || 2232 !Y.getType().getElement().isCompatible(e)) { 2233 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2234 } 2235 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 2236 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 2237 } 2238 2239 int M = A.getType().getY(); 2240 int N = A.getType().getX(); 2241 if (incX <= 0 || incY <= 0) { 2242 throw new RSRuntimeException("Vector increments must be greater than 0"); 2243 } 2244 int expectedXDim = 1 + (M - 1) * incX; 2245 if (X.getType().getX() != expectedXDim) { 2246 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2247 } 2248 int expectedYDim = 1 + (N - 1) * incY; 2249 if (Y.getType().getX() != expectedYDim) { 2250 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2251 } 2252 2253 } 2254 2255 /** 2256 * CHEMV performs the matrix-vector operation 2257 * y := alpha*A*x + beta*y 2258 * 2259 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 2260 * 2261 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2262 * @param alpha The scalar alpha. 2263 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2264 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2265 * @param incX The increment for the elements of vector x, must be larger than zero. 2266 * @param beta The scalar beta. 2267 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2268 * @param incY The increment for the elements of vector y, must be larger than zero. 2269 */ 2270 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2271 // HEMV is the same as SYR2 validation-wise 2272 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2273 2274 boolean mUseIncSupp = isIncSupp(); 2275 long aID = A.getID(mRS); 2276 long xID = X.getID(mRS); 2277 long yID = Y.getID(mRS); 2278 if (mUseIncSupp) { 2279 aID = getDummyAlloc(A); 2280 xID = getDummyAlloc(X); 2281 yID = getDummyAlloc(Y); 2282 } 2283 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2284 } 2285 2286 /** 2287 * CHBMV performs the matrix-vector operation 2288 * y := alpha*A*x + beta*y 2289 * 2290 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 2291 * 2292 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2293 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2294 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2295 * for i in range(0, n): 2296 * for j in range(i, min(i+k+1, n)): 2297 * b[i, j-i] = a[i, j] 2298 * 2299 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2300 * @param K The number of off-diagonals of the matrix A 2301 * @param alpha The scalar alpha. 2302 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2303 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2304 * @param incX The increment for the elements of vector x, must be larger than zero. 2305 * @param beta The scalar beta. 2306 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2307 * @param incY The increment for the elements of vector y, must be larger than zero. 2308 */ 2309 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2310 // HBMV is the same as SYR2 validation-wise 2311 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2312 if (K < 0) { 2313 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2314 } 2315 2316 boolean mUseIncSupp = isIncSupp(); 2317 long aID = A.getID(mRS); 2318 long xID = X.getID(mRS); 2319 long yID = Y.getID(mRS); 2320 if (mUseIncSupp) { 2321 aID = getDummyAlloc(A); 2322 xID = getDummyAlloc(X); 2323 yID = getDummyAlloc(Y); 2324 } 2325 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2326 } 2327 2328 /** 2329 * CHPMV performs the matrix-vector operation 2330 * y := alpha*A*x + beta*y 2331 * 2332 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 2333 * 2334 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2335 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2336 * 'a' to packed matrix 'b'. 2337 * k = 0 2338 * for i in range(0, n): 2339 * for j in range(i, n): 2340 * b[k++] = a[i, j] 2341 * 2342 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2343 * @param alpha The scalar alpha. 2344 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2345 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2346 * @param incX The increment for the elements of vector x, must be larger than zero. 2347 * @param beta The scalar beta. 2348 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2349 * @param incY The increment for the elements of vector y, must be larger than zero. 2350 */ 2351 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2352 // HPMV is the same as SPR2 2353 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2354 2355 boolean mUseIncSupp = isIncSupp(); 2356 long apID = Ap.getID(mRS); 2357 long xID = X.getID(mRS); 2358 long yID = Y.getID(mRS); 2359 if (mUseIncSupp) { 2360 apID = getDummyAlloc(Ap); 2361 xID = getDummyAlloc(X); 2362 yID = getDummyAlloc(Y); 2363 } 2364 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2365 } 2366 2367 /** 2368 * CGERU performs the rank 1 operation 2369 * A := alpha*x*y**T + A 2370 * 2371 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 2372 * 2373 * @param alpha The scalar alpha. 2374 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2375 * @param incX The increment for the elements of vector x, must be larger than zero. 2376 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2377 * @param incY The increment for the elements of vector y, must be larger than zero. 2378 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2379 */ 2380 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2381 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2382 int M = A.getType().getY(); 2383 int N = A.getType().getX(); 2384 2385 boolean mUseIncSupp = isIncSupp(); 2386 long aID = A.getID(mRS); 2387 long xID = X.getID(mRS); 2388 long yID = Y.getID(mRS); 2389 if (mUseIncSupp) { 2390 aID = getDummyAlloc(A); 2391 xID = getDummyAlloc(X); 2392 yID = getDummyAlloc(Y); 2393 } 2394 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2395 } 2396 2397 /** 2398 * CGERC performs the rank 1 operation 2399 * A := alpha*x*y**H + A 2400 * 2401 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 2402 * 2403 * @param alpha The scalar alpha. 2404 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2405 * @param incX The increment for the elements of vector x, must be larger than zero. 2406 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2407 * @param incY The increment for the elements of vector y, must be larger than zero. 2408 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2409 */ 2410 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2411 // same as GERU 2412 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2413 int M = A.getType().getY(); 2414 int N = A.getType().getX(); 2415 2416 boolean mUseIncSupp = isIncSupp(); 2417 long aID = A.getID(mRS); 2418 long xID = X.getID(mRS); 2419 long yID = Y.getID(mRS); 2420 if (mUseIncSupp) { 2421 aID = getDummyAlloc(A); 2422 xID = getDummyAlloc(X); 2423 yID = getDummyAlloc(Y); 2424 } 2425 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2426 } 2427 2428 /** 2429 * CHER performs the rank 1 operation 2430 * A := alpha*x*x**H + A 2431 * 2432 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 2433 * 2434 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2435 * @param alpha The scalar alpha. 2436 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2437 * @param incX The increment for the elements of vector x, must be larger than zero. 2438 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2439 */ 2440 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 2441 // same as SYR 2442 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 2443 2444 boolean mUseIncSupp = isIncSupp(); 2445 long aID = A.getID(mRS); 2446 long xID = X.getID(mRS); 2447 if (mUseIncSupp) { 2448 aID = getDummyAlloc(A); 2449 xID = getDummyAlloc(X); 2450 } 2451 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2452 } 2453 2454 /** 2455 * CHPR performs the rank 1 operation 2456 * A := alpha*x*x**H + A 2457 * 2458 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 2459 * 2460 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2461 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2462 * 'a' to packed matrix 'b'. 2463 * k = 0 2464 * for i in range(0, n): 2465 * for j in range(i, n): 2466 * b[k++] = a[i, j] 2467 * 2468 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2469 * @param alpha The scalar alpha. 2470 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2471 * @param incX The increment for the elements of vector x, must be larger than zero. 2472 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2473 */ 2474 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 2475 // equivalent to SPR for validation 2476 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 2477 2478 boolean mUseIncSupp = isIncSupp(); 2479 long apID = Ap.getID(mRS); 2480 long xID = X.getID(mRS); 2481 if (mUseIncSupp) { 2482 apID = getDummyAlloc(Ap); 2483 xID = getDummyAlloc(X); 2484 } 2485 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2486 } 2487 2488 /** 2489 * CHER2 performs the symmetric rank 2 operation 2490 * A := alpha*x*y**H + alpha*y*x**H + A 2491 * 2492 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 2493 * 2494 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2495 * @param alpha The scalar alpha. 2496 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2497 * @param incX The increment for the elements of vector x, must be larger than zero. 2498 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2499 * @param incY The increment for the elements of vector y, must be larger than zero. 2500 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2501 */ 2502 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2503 // same as SYR2 2504 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2505 2506 boolean mUseIncSupp = isIncSupp(); 2507 long aID = A.getID(mRS); 2508 long xID = X.getID(mRS); 2509 long yID = Y.getID(mRS); 2510 if (mUseIncSupp) { 2511 aID = getDummyAlloc(A); 2512 xID = getDummyAlloc(X); 2513 yID = getDummyAlloc(Y); 2514 } 2515 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2516 } 2517 2518 /** 2519 * CHPR2 performs the symmetric rank 2 operation 2520 * A := alpha*x*y**H + alpha*y*x**H + A 2521 * 2522 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2523 * 2524 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2525 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2526 * 'a' to packed matrix 'b'. 2527 * k = 0 2528 * for i in range(0, n): 2529 * for j in range(i, n): 2530 * b[k++] = a[i, j] 2531 * 2532 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2533 * @param alpha The scalar alpha. 2534 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2535 * @param incX The increment for the elements of vector x, must be larger than zero. 2536 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2537 * @param incY The increment for the elements of vector y, must be larger than zero. 2538 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2539 */ 2540 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2541 // same as SPR2 2542 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2543 2544 boolean mUseIncSupp = isIncSupp(); 2545 long apID = Ap.getID(mRS); 2546 long xID = X.getID(mRS); 2547 long yID = Y.getID(mRS); 2548 if (mUseIncSupp) { 2549 apID = getDummyAlloc(Ap); 2550 xID = getDummyAlloc(X); 2551 yID = getDummyAlloc(Y); 2552 } 2553 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2554 } 2555 2556 /** 2557 * ZHEMV performs the matrix-vector operation 2558 * y := alpha*A*x + beta*y 2559 * 2560 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2561 * 2562 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2563 * @param alpha The scalar alpha. 2564 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2565 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2566 * @param incX The increment for the elements of vector x, must be larger than zero. 2567 * @param beta The scalar beta. 2568 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2569 * @param incY The increment for the elements of vector y, must be larger than zero. 2570 */ 2571 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2572 // HEMV is the same as SYR2 validation-wise 2573 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2574 2575 boolean mUseIncSupp = isIncSupp(); 2576 long aID = A.getID(mRS); 2577 long xID = X.getID(mRS); 2578 long yID = Y.getID(mRS); 2579 if (mUseIncSupp) { 2580 aID = getDummyAlloc(A); 2581 xID = getDummyAlloc(X); 2582 yID = getDummyAlloc(Y); 2583 } 2584 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2585 } 2586 2587 /** 2588 * ZHBMV performs the matrix-vector operation 2589 * y := alpha*A*x + beta*y 2590 * 2591 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2592 * 2593 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2594 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2595 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2596 * for i in range(0, n): 2597 * for j in range(i, min(i+k+1, n)): 2598 * b[i, j-i] = a[i, j] 2599 * 2600 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2601 * @param K The number of off-diagonals of the matrix A 2602 * @param alpha The scalar alpha. 2603 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2604 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2605 * @param incX The increment for the elements of vector x, must be larger than zero. 2606 * @param beta The scalar beta. 2607 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2608 * @param incY The increment for the elements of vector y, must be larger than zero. 2609 */ 2610 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2611 // HBMV is the same as SYR2 validation-wise 2612 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2613 if (K < 0) { 2614 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2615 } 2616 2617 boolean mUseIncSupp = isIncSupp(); 2618 long aID = A.getID(mRS); 2619 long xID = X.getID(mRS); 2620 long yID = Y.getID(mRS); 2621 if (mUseIncSupp) { 2622 aID = getDummyAlloc(A); 2623 xID = getDummyAlloc(X); 2624 yID = getDummyAlloc(Y); 2625 } 2626 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2627 } 2628 2629 /** 2630 * ZHPMV performs the matrix-vector operation 2631 * y := alpha*A*x + beta*y 2632 * 2633 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2634 * 2635 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2636 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2637 * 'a' to packed matrix 'b'. 2638 * k = 0 2639 * for i in range(0, n): 2640 * for j in range(i, n): 2641 * b[k++] = a[i, j] 2642 * 2643 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2644 * @param alpha The scalar alpha. 2645 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2646 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2647 * @param incX The increment for the elements of vector x, must be larger than zero. 2648 * @param beta The scalar beta. 2649 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2650 * @param incY The increment for the elements of vector y, must be larger than zero. 2651 */ 2652 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2653 // HPMV is the same as SPR2 2654 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2655 2656 boolean mUseIncSupp = isIncSupp(); 2657 long apID = Ap.getID(mRS); 2658 long xID = X.getID(mRS); 2659 long yID = Y.getID(mRS); 2660 if (mUseIncSupp) { 2661 apID = getDummyAlloc(Ap); 2662 xID = getDummyAlloc(X); 2663 yID = getDummyAlloc(Y); 2664 } 2665 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2666 } 2667 2668 /** 2669 * ZGERU performs the rank 1 operation 2670 * A := alpha*x*y**T + A 2671 * 2672 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2673 * 2674 * @param alpha The scalar alpha. 2675 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2676 * @param incX The increment for the elements of vector x, must be larger than zero. 2677 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2678 * @param incY The increment for the elements of vector y, must be larger than zero. 2679 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2680 */ 2681 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2682 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2683 int M = A.getType().getY(); 2684 int N = A.getType().getX(); 2685 2686 boolean mUseIncSupp = isIncSupp(); 2687 long aID = A.getID(mRS); 2688 long xID = X.getID(mRS); 2689 long yID = Y.getID(mRS); 2690 if (mUseIncSupp) { 2691 aID = getDummyAlloc(A); 2692 xID = getDummyAlloc(X); 2693 yID = getDummyAlloc(Y); 2694 } 2695 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2696 } 2697 2698 /** 2699 * ZGERC performs the rank 1 operation 2700 * A := alpha*x*y**H + A 2701 * 2702 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2703 * 2704 * @param alpha The scalar alpha. 2705 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2706 * @param incX The increment for the elements of vector x, must be larger than zero. 2707 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2708 * @param incY The increment for the elements of vector y, must be larger than zero. 2709 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2710 */ 2711 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2712 // same as GERU 2713 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2714 int M = A.getType().getY(); 2715 int N = A.getType().getX(); 2716 2717 boolean mUseIncSupp = isIncSupp(); 2718 long aID = A.getID(mRS); 2719 long xID = X.getID(mRS); 2720 long yID = Y.getID(mRS); 2721 if (mUseIncSupp) { 2722 aID = getDummyAlloc(A); 2723 xID = getDummyAlloc(X); 2724 yID = getDummyAlloc(Y); 2725 } 2726 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2727 } 2728 2729 /** 2730 * ZHER performs the rank 1 operation 2731 * A := alpha*x*x**H + A 2732 * 2733 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2734 * 2735 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2736 * @param alpha The scalar alpha. 2737 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2738 * @param incX The increment for the elements of vector x, must be larger than zero. 2739 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2740 */ 2741 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2742 // same as SYR 2743 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2744 2745 boolean mUseIncSupp = isIncSupp(); 2746 long aID = A.getID(mRS); 2747 long xID = X.getID(mRS); 2748 if (mUseIncSupp) { 2749 aID = getDummyAlloc(A); 2750 xID = getDummyAlloc(X); 2751 } 2752 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2753 } 2754 2755 /** 2756 * ZHPR performs the rank 1 operation 2757 * A := alpha*x*x**H + A 2758 * 2759 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2760 * 2761 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2762 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2763 * 'a' to packed matrix 'b'. 2764 * k = 0 2765 * for i in range(0, n): 2766 * for j in range(i, n): 2767 * b[k++] = a[i, j] 2768 * 2769 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2770 * @param alpha The scalar alpha. 2771 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2772 * @param incX The increment for the elements of vector x, must be larger than zero. 2773 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2774 */ 2775 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2776 // equivalent to SPR for validation 2777 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2778 2779 boolean mUseIncSupp = isIncSupp(); 2780 long apID = Ap.getID(mRS); 2781 long xID = X.getID(mRS); 2782 if (mUseIncSupp) { 2783 apID = getDummyAlloc(Ap); 2784 xID = getDummyAlloc(X); 2785 } 2786 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2787 } 2788 2789 /** 2790 * ZHER2 performs the symmetric rank 2 operation 2791 * A := alpha*x*y**H + alpha*y*x**H + A 2792 * 2793 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2794 * 2795 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2796 * @param alpha The scalar alpha. 2797 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2798 * @param incX The increment for the elements of vector x, must be larger than zero. 2799 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2800 * @param incY The increment for the elements of vector y, must be larger than zero. 2801 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2802 */ 2803 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2804 // same as SYR2 2805 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2806 2807 boolean mUseIncSupp = isIncSupp(); 2808 long aID = A.getID(mRS); 2809 long xID = X.getID(mRS); 2810 long yID = Y.getID(mRS); 2811 if (mUseIncSupp) { 2812 aID = getDummyAlloc(A); 2813 xID = getDummyAlloc(X); 2814 yID = getDummyAlloc(Y); 2815 } 2816 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2817 } 2818 2819 /** 2820 * ZHPR2 performs the symmetric rank 2 operation 2821 * A := alpha*x*y**H + alpha*y*x**H + A 2822 * 2823 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2824 * 2825 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2826 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2827 * 'a' to packed matrix 'b'. 2828 * k = 0 2829 * for i in range(0, n): 2830 * for j in range(i, n): 2831 * b[k++] = a[i, j] 2832 * 2833 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2834 * @param alpha The scalar alpha. 2835 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2836 * @param incX The increment for the elements of vector x, must be larger than zero. 2837 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2838 * @param incY The increment for the elements of vector y, must be larger than zero. 2839 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2840 */ 2841 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2842 // same as SPR2 2843 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2844 2845 boolean mUseIncSupp = isIncSupp(); 2846 long apID = Ap.getID(mRS); 2847 long xID = X.getID(mRS); 2848 long yID = Y.getID(mRS); 2849 if (mUseIncSupp) { 2850 apID = getDummyAlloc(Ap); 2851 xID = getDummyAlloc(X); 2852 yID = getDummyAlloc(Y); 2853 } 2854 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2855 } 2856 2857 2858 /** 2859 * Level 3 BLAS 2860 */ 2861 2862 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2863 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2864 if ((A != null && !A.getType().getElement().isCompatible(e)) || 2865 (B != null && !B.getType().getElement().isCompatible(e)) || 2866 (C != null && !C.getType().getElement().isCompatible(e))) { 2867 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2868 } 2869 if (C == null) { 2870 //since matrix C is used to store the result, it cannot be null. 2871 throw new RSRuntimeException("Allocation C cannot be null"); 2872 } 2873 cM = C.getType().getY(); 2874 cN = C.getType().getX(); 2875 2876 if (Side == RIGHT) { 2877 if ((A == null && B != null) || (A != null && B == null)) { 2878 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2879 } 2880 if (B != null) { 2881 bM = A.getType().getY(); 2882 bN = A.getType().getX(); 2883 } 2884 if (A != null) { 2885 aM = B.getType().getY(); 2886 aN = B.getType().getX(); 2887 } 2888 } else { 2889 if (A != null) { 2890 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2891 aN = A.getType().getY(); 2892 aM = A.getType().getX(); 2893 } else { 2894 aM = A.getType().getY(); 2895 aN = A.getType().getX(); 2896 } 2897 } 2898 if (B != null) { 2899 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2900 bN = B.getType().getY(); 2901 bM = B.getType().getX(); 2902 } else { 2903 bM = B.getType().getY(); 2904 bN = B.getType().getX(); 2905 } 2906 } 2907 } 2908 if (A != null && B != null && C != null) { 2909 if (aN != bM || aM != cM || bN != cN) { 2910 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2911 } 2912 } else if (A != null && C != null) { 2913 // A and C only, for SYRK 2914 if (cM != cN) { 2915 throw new RSRuntimeException("Matrix C is not symmetric"); 2916 } 2917 if (aM != cM) { 2918 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2919 } 2920 } else if (A != null && B != null) { 2921 // A and B only 2922 if (aN != bM) { 2923 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2924 } 2925 } 2926 2927 } 2928 2929 /** 2930 * SGEMM performs one of the matrix-matrix operations 2931 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2932 * 2933 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2934 * 2935 * @param TransA The type of transpose applied to matrix A. 2936 * @param TransB The type of transpose applied to matrix B. 2937 * @param alpha The scalar alpha. 2938 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2939 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2940 * @param beta The scalar beta. 2941 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2942 */ 2943 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2944 Allocation B, float beta, Allocation C) { 2945 validateTranspose(TransA); 2946 validateTranspose(TransB); 2947 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2948 2949 int M = -1, N = -1, K = -1; 2950 if (TransA != NO_TRANSPOSE) { 2951 M = A.getType().getX(); 2952 K = A.getType().getY(); 2953 } else { 2954 M = A.getType().getY(); 2955 K = A.getType().getX(); 2956 } 2957 if (TransB != NO_TRANSPOSE) { 2958 N = B.getType().getY(); 2959 } else { 2960 N = B.getType().getX(); 2961 } 2962 2963 boolean mUseIncSupp = isIncSupp(); 2964 long aID = A.getID(mRS); 2965 long bID = B.getID(mRS); 2966 long cID = C.getID(mRS); 2967 if (mUseIncSupp) { 2968 aID = getDummyAlloc(A); 2969 bID = getDummyAlloc(B); 2970 cID = getDummyAlloc(C); 2971 } 2972 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 2973 beta, cID, 0, 0, 0, 0, mUseIncSupp); 2974 } 2975 2976 /** 2977 * DGEMM performs one of the matrix-matrix operations 2978 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2979 * 2980 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2981 * 2982 * @param TransA The type of transpose applied to matrix A. 2983 * @param TransB The type of transpose applied to matrix B. 2984 * @param alpha The scalar alpha. 2985 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2986 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2987 * @param beta The scalar beta. 2988 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2989 */ 2990 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2991 Allocation B, double beta, Allocation C) { 2992 validateTranspose(TransA); 2993 validateTranspose(TransB); 2994 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2995 int M = -1, N = -1, K = -1; 2996 if (TransA != NO_TRANSPOSE) { 2997 M = A.getType().getX(); 2998 K = A.getType().getY(); 2999 } else { 3000 M = A.getType().getY(); 3001 K = A.getType().getX(); 3002 } 3003 if (TransB != NO_TRANSPOSE) { 3004 N = B.getType().getY(); 3005 } else { 3006 N = B.getType().getX(); 3007 } 3008 3009 boolean mUseIncSupp = isIncSupp(); 3010 long aID = A.getID(mRS); 3011 long bID = B.getID(mRS); 3012 long cID = C.getID(mRS); 3013 if (mUseIncSupp) { 3014 aID = getDummyAlloc(A); 3015 bID = getDummyAlloc(B); 3016 cID = getDummyAlloc(C); 3017 } 3018 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 3019 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3020 } 3021 3022 /** 3023 * CGEMM performs one of the matrix-matrix operations 3024 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3025 * 3026 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 3027 * 3028 * @param TransA The type of transpose applied to matrix A. 3029 * @param TransB The type of transpose applied to matrix B. 3030 * @param alpha The scalar alpha. 3031 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3032 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3033 * @param beta The scalar beta. 3034 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3035 */ 3036 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 3037 Allocation B, Float2 beta, Allocation C) { 3038 validateTranspose(TransA); 3039 validateTranspose(TransB); 3040 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 3041 int M = -1, N = -1, K = -1; 3042 if (TransA != NO_TRANSPOSE) { 3043 M = A.getType().getX(); 3044 K = A.getType().getY(); 3045 } else { 3046 M = A.getType().getY(); 3047 K = A.getType().getX(); 3048 } 3049 if (TransB != NO_TRANSPOSE) { 3050 N = B.getType().getY(); 3051 } else { 3052 N = B.getType().getX(); 3053 } 3054 3055 boolean mUseIncSupp = isIncSupp(); 3056 long aID = A.getID(mRS); 3057 long bID = B.getID(mRS); 3058 long cID = C.getID(mRS); 3059 if (mUseIncSupp) { 3060 aID = getDummyAlloc(A); 3061 bID = getDummyAlloc(B); 3062 cID = getDummyAlloc(C); 3063 } 3064 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3065 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3066 } 3067 3068 /** 3069 * ZGEMM performs one of the matrix-matrix operations 3070 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3071 * 3072 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 3073 * 3074 * @param TransA The type of transpose applied to matrix A. 3075 * @param TransB The type of transpose applied to matrix B. 3076 * @param alpha The scalar alpha. 3077 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2 3078 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2 3079 * @param beta The scalar beta. 3080 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2 3081 */ 3082 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 3083 Allocation B, Double2 beta, Allocation C) { 3084 validateTranspose(TransA); 3085 validateTranspose(TransB); 3086 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 3087 int M = -1, N = -1, K = -1; 3088 if (TransA != NO_TRANSPOSE) { 3089 M = A.getType().getX(); 3090 K = A.getType().getY(); 3091 } else { 3092 M = A.getType().getY(); 3093 K = A.getType().getX(); 3094 } 3095 if (TransB != NO_TRANSPOSE) { 3096 N = B.getType().getY(); 3097 } else { 3098 N = B.getType().getX(); 3099 } 3100 3101 boolean mUseIncSupp = isIncSupp(); 3102 long aID = A.getID(mRS); 3103 long bID = B.getID(mRS); 3104 long cID = C.getID(mRS); 3105 if (mUseIncSupp) { 3106 aID = getDummyAlloc(A); 3107 bID = getDummyAlloc(B); 3108 cID = getDummyAlloc(C); 3109 } 3110 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3111 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3112 } 3113 3114 /** 3115 * SSYMM performs one of the matrix-matrix operations 3116 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3117 * 3118 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 3119 * 3120 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3121 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3122 * @param alpha The scalar alpha. 3123 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3124 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3125 * @param beta The scalar beta. 3126 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3127 */ 3128 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 3129 Allocation B, float beta, Allocation C) { 3130 validateSide(Side); 3131 validateUplo(Uplo); 3132 //For SYMM, Matrix A should be symmetric 3133 if (A.getType().getX() != A.getType().getY()) { 3134 throw new RSRuntimeException("Matrix A is not symmetric"); 3135 } 3136 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 3137 3138 boolean mUseIncSupp = isIncSupp(); 3139 long aID = A.getID(mRS); 3140 long bID = B.getID(mRS); 3141 long cID = C.getID(mRS); 3142 if (mUseIncSupp) { 3143 aID = getDummyAlloc(A); 3144 bID = getDummyAlloc(B); 3145 cID = getDummyAlloc(C); 3146 } 3147 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3148 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3149 } 3150 3151 /** 3152 * DSYMM performs one of the matrix-matrix operations 3153 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3154 * 3155 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 3156 * 3157 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3158 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3159 * @param alpha The scalar alpha. 3160 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3161 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3162 * @param beta The scalar beta. 3163 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3164 */ 3165 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 3166 Allocation B, double beta, Allocation C) { 3167 validateSide(Side); 3168 validateUplo(Uplo); 3169 if (A.getType().getX() != A.getType().getY()) { 3170 throw new RSRuntimeException("Matrix A is not symmetric"); 3171 } 3172 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 3173 3174 boolean mUseIncSupp = isIncSupp(); 3175 long aID = A.getID(mRS); 3176 long bID = B.getID(mRS); 3177 long cID = C.getID(mRS); 3178 if (mUseIncSupp) { 3179 aID = getDummyAlloc(A); 3180 bID = getDummyAlloc(B); 3181 cID = getDummyAlloc(C); 3182 } 3183 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3184 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3185 } 3186 3187 /** 3188 * CSYMM performs one of the matrix-matrix operations 3189 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3190 * 3191 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 3192 * 3193 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3194 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3195 * @param alpha The scalar alpha. 3196 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3197 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3198 * @param beta The scalar beta. 3199 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3200 */ 3201 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 3202 Allocation B, Float2 beta, Allocation C) { 3203 validateSide(Side); 3204 validateUplo(Uplo); 3205 if (A.getType().getX() != A.getType().getY()) { 3206 throw new RSRuntimeException("Matrix A is not symmetric"); 3207 } 3208 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 3209 3210 boolean mUseIncSupp = isIncSupp(); 3211 long aID = A.getID(mRS); 3212 long bID = B.getID(mRS); 3213 long cID = C.getID(mRS); 3214 if (mUseIncSupp) { 3215 aID = getDummyAlloc(A); 3216 bID = getDummyAlloc(B); 3217 cID = getDummyAlloc(C); 3218 } 3219 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3220 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3221 } 3222 3223 /** 3224 * ZSYMM performs one of the matrix-matrix operations 3225 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3226 * 3227 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 3228 * 3229 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3230 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3231 * @param alpha The scalar alpha. 3232 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3233 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3234 * @param beta The scalar beta. 3235 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3236 */ 3237 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 3238 Allocation B, Double2 beta, Allocation C) { 3239 validateSide(Side); 3240 validateUplo(Uplo); 3241 if (A.getType().getX() != A.getType().getY()) { 3242 throw new RSRuntimeException("Matrix A is not symmetric"); 3243 } 3244 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 3245 3246 boolean mUseIncSupp = isIncSupp(); 3247 long aID = A.getID(mRS); 3248 long bID = B.getID(mRS); 3249 long cID = C.getID(mRS); 3250 if (mUseIncSupp) { 3251 aID = getDummyAlloc(A); 3252 bID = getDummyAlloc(B); 3253 cID = getDummyAlloc(C); 3254 } 3255 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3256 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3257 } 3258 3259 /** 3260 * SSYRK performs one of the symmetric rank k operations 3261 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3262 * 3263 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 3264 * 3265 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3266 * @param Trans The type of transpose applied to the operation. 3267 * @param alpha The scalar alpha. 3268 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3269 * @param beta The scalar beta. 3270 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3271 */ 3272 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3273 validateTranspose(Trans); 3274 validateUplo(Uplo); 3275 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 3276 int K = -1; 3277 if (Trans != NO_TRANSPOSE) { 3278 K = A.getType().getY(); 3279 } else { 3280 K = A.getType().getX(); 3281 } 3282 3283 boolean mUseIncSupp = isIncSupp(); 3284 long aID = A.getID(mRS); 3285 long cID = C.getID(mRS); 3286 if (mUseIncSupp) { 3287 aID = getDummyAlloc(A); 3288 cID = getDummyAlloc(C); 3289 } 3290 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3291 } 3292 3293 /** 3294 * DSYRK performs one of the symmetric rank k operations 3295 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3296 * 3297 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 3298 * 3299 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3300 * @param Trans The type of transpose applied to the operation. 3301 * @param alpha The scalar alpha. 3302 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3303 * @param beta The scalar beta. 3304 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3305 */ 3306 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3307 validateTranspose(Trans); 3308 validateUplo(Uplo); 3309 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 3310 int K = -1; 3311 if (Trans != NO_TRANSPOSE) { 3312 K = A.getType().getY(); 3313 } else { 3314 K = A.getType().getX(); 3315 } 3316 3317 boolean mUseIncSupp = isIncSupp(); 3318 long aID = A.getID(mRS); 3319 long cID = C.getID(mRS); 3320 if (mUseIncSupp) { 3321 aID = getDummyAlloc(A); 3322 cID = getDummyAlloc(C); 3323 } 3324 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3325 } 3326 3327 /** 3328 * CSYRK performs one of the symmetric rank k operations 3329 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3330 * 3331 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 3332 * 3333 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3334 * @param Trans The type of transpose applied to the operation. 3335 * @param alpha The scalar alpha. 3336 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3337 * @param beta The scalar beta. 3338 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3339 */ 3340 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 3341 validateTranspose(Trans); 3342 validateUplo(Uplo); 3343 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 3344 int K = -1; 3345 if (Trans != NO_TRANSPOSE) { 3346 K = A.getType().getY(); 3347 } else { 3348 K = A.getType().getX(); 3349 } 3350 3351 boolean mUseIncSupp = isIncSupp(); 3352 long aID = A.getID(mRS); 3353 long cID = C.getID(mRS); 3354 if (mUseIncSupp) { 3355 aID = getDummyAlloc(A); 3356 cID = getDummyAlloc(C); 3357 } 3358 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3359 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3360 } 3361 3362 /** 3363 * ZSYRK performs one of the symmetric rank k operations 3364 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3365 * 3366 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 3367 * 3368 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3369 * @param Trans The type of transpose applied to the operation. 3370 * @param alpha The scalar alpha. 3371 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3372 * @param beta The scalar beta. 3373 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3374 */ 3375 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 3376 validateTranspose(Trans); 3377 validateUplo(Uplo); 3378 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 3379 int K = -1; 3380 if (Trans != NO_TRANSPOSE) { 3381 K = A.getType().getY(); 3382 } else { 3383 K = A.getType().getX(); 3384 } 3385 3386 boolean mUseIncSupp = isIncSupp(); 3387 long aID = A.getID(mRS); 3388 long cID = C.getID(mRS); 3389 if (mUseIncSupp) { 3390 aID = getDummyAlloc(A); 3391 cID = getDummyAlloc(C); 3392 } 3393 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3394 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3395 } 3396 3397 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3398 validateTranspose(Trans); 3399 if (!A.getType().getElement().isCompatible(e) || 3400 !B.getType().getElement().isCompatible(e) || 3401 !C.getType().getElement().isCompatible(e)) { 3402 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3403 } 3404 int Cdim = -1; 3405 // A is n x k if no transpose, k x n if transpose 3406 // C is n x n 3407 if (Trans == TRANSPOSE) { 3408 // check columns versus C 3409 Cdim = A.getType().getX(); 3410 } else { 3411 // check rows versus C 3412 Cdim = A.getType().getY(); 3413 } 3414 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 3415 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 3416 } 3417 // A dims == B dims 3418 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3419 throw new RSRuntimeException("Invalid A and B in SYR2K"); 3420 } 3421 } 3422 3423 /** 3424 * SSYR2K performs one of the symmetric rank 2k operations 3425 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3426 * 3427 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 3428 * 3429 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3430 * @param Trans The type of transpose applied to the operation. 3431 * @param alpha The scalar alpha. 3432 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3433 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3434 * @param beta The scalar beta. 3435 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3436 */ 3437 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 3438 validateUplo(Uplo); 3439 validateSYR2K(Element.F32(mRS), Trans, A, B, C); 3440 int K = -1; 3441 if (Trans != NO_TRANSPOSE) { 3442 K = A.getType().getY(); 3443 } else { 3444 K = A.getType().getX(); 3445 } 3446 3447 boolean mUseIncSupp = isIncSupp(); 3448 long aID = A.getID(mRS); 3449 long bID = B.getID(mRS); 3450 long cID = C.getID(mRS); 3451 if (mUseIncSupp) { 3452 aID = getDummyAlloc(A); 3453 bID = getDummyAlloc(B); 3454 cID = getDummyAlloc(C); 3455 } 3456 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3457 } 3458 3459 /** 3460 * DSYR2K performs one of the symmetric rank 2k operations 3461 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3462 * 3463 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 3464 * 3465 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3466 * @param Trans The type of transpose applied to the operation. 3467 * @param alpha The scalar alpha. 3468 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3469 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3470 * @param beta The scalar beta. 3471 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3472 */ 3473 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 3474 validateUplo(Uplo); 3475 validateSYR2K(Element.F64(mRS), Trans, A, B, C); 3476 int K = -1; 3477 if (Trans != NO_TRANSPOSE) { 3478 K = A.getType().getY(); 3479 } else { 3480 K = A.getType().getX(); 3481 } 3482 3483 boolean mUseIncSupp = isIncSupp(); 3484 long aID = A.getID(mRS); 3485 long bID = B.getID(mRS); 3486 long cID = C.getID(mRS); 3487 if (mUseIncSupp) { 3488 aID = getDummyAlloc(A); 3489 bID = getDummyAlloc(B); 3490 cID = getDummyAlloc(C); 3491 } 3492 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3493 } 3494 3495 /** 3496 * CSYR2K performs one of the symmetric rank 2k operations 3497 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3498 * 3499 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 3500 * 3501 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3502 * @param Trans The type of transpose applied to the operation. 3503 * @param alpha The scalar alpha. 3504 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3505 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3506 * @param beta The scalar beta. 3507 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3508 */ 3509 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3510 validateUplo(Uplo); 3511 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 3512 int K = -1; 3513 if (Trans != NO_TRANSPOSE) { 3514 K = A.getType().getY(); 3515 } else { 3516 K = A.getType().getX(); 3517 } 3518 3519 boolean mUseIncSupp = isIncSupp(); 3520 long aID = A.getID(mRS); 3521 long bID = B.getID(mRS); 3522 long cID = C.getID(mRS); 3523 if (mUseIncSupp) { 3524 aID = getDummyAlloc(A); 3525 bID = getDummyAlloc(B); 3526 cID = getDummyAlloc(C); 3527 } 3528 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3529 } 3530 3531 /** 3532 * ZSYR2K performs one of the symmetric rank 2k operations 3533 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3534 * 3535 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 3536 * 3537 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3538 * @param Trans The type of transpose applied to the operation. 3539 * @param alpha The scalar alpha. 3540 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3541 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3542 * @param beta The scalar beta. 3543 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3544 */ 3545 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3546 validateUplo(Uplo); 3547 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 3548 int K = -1; 3549 if (Trans != NO_TRANSPOSE) { 3550 K = A.getType().getY(); 3551 } else { 3552 K = A.getType().getX(); 3553 } 3554 3555 boolean mUseIncSupp = isIncSupp(); 3556 long aID = A.getID(mRS); 3557 long bID = B.getID(mRS); 3558 long cID = C.getID(mRS); 3559 if (mUseIncSupp) { 3560 aID = getDummyAlloc(A); 3561 bID = getDummyAlloc(B); 3562 cID = getDummyAlloc(C); 3563 } 3564 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3565 } 3566 3567 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3568 validateSide(Side); 3569 validateTranspose(TransA); 3570 int aM = -1, aN = -1, bM = -1, bN = -1; 3571 if (!A.getType().getElement().isCompatible(e) || 3572 !B.getType().getElement().isCompatible(e)) { 3573 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3574 } 3575 3576 aM = A.getType().getY(); 3577 aN = A.getType().getX(); 3578 if (aM != aN) { 3579 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 3580 } 3581 3582 bM = B.getType().getY(); 3583 bN = B.getType().getX(); 3584 if (Side == LEFT) { 3585 if (aN != bM) { 3586 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3587 } 3588 } else { 3589 if (bN != aM) { 3590 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3591 } 3592 } 3593 } 3594 3595 /** 3596 * STRMM performs one of the matrix-matrix operations 3597 * B := alpha*op(A)*B or B := alpha*B*op(A) 3598 * op(A) is one of op(A) = A or op(A) = A**T 3599 * 3600 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 3601 * 3602 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3603 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3604 * @param TransA The type of transpose applied to matrix A. 3605 * @param Diag Specifies whether or not A is unit triangular. 3606 * @param alpha The scalar alpha. 3607 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3608 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3609 */ 3610 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3611 validateUplo(Uplo); 3612 validateDiag(Diag); 3613 validateTRMM(Element.F32(mRS), Side, TransA, A, B); 3614 3615 boolean mUseIncSupp = isIncSupp(); 3616 long aID = A.getID(mRS); 3617 long bID = B.getID(mRS); 3618 if (mUseIncSupp) { 3619 aID = getDummyAlloc(A); 3620 bID = getDummyAlloc(B); 3621 } 3622 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3623 alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp); 3624 } 3625 3626 /** 3627 * DTRMM performs one of the matrix-matrix operations 3628 * B := alpha*op(A)*B or B := alpha*B*op(A) 3629 * op(A) is one of op(A) = A or op(A) = A**T 3630 * 3631 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 3632 * 3633 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3634 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3635 * @param TransA The type of transpose applied to matrix A. 3636 * @param Diag Specifies whether or not A is unit triangular. 3637 * @param alpha The scalar alpha. 3638 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3639 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3640 */ 3641 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3642 validateUplo(Uplo); 3643 validateDiag(Diag); 3644 validateTRMM(Element.F64(mRS), Side, TransA, A, B); 3645 3646 boolean mUseIncSupp = isIncSupp(); 3647 long aID = A.getID(mRS); 3648 long bID = B.getID(mRS); 3649 if (mUseIncSupp) { 3650 aID = getDummyAlloc(A); 3651 bID = getDummyAlloc(B); 3652 } 3653 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3654 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3655 } 3656 3657 /** 3658 * CTRMM performs one of the matrix-matrix operations 3659 * B := alpha*op(A)*B or B := alpha*B*op(A) 3660 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3661 * 3662 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 3663 * 3664 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3665 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3666 * @param TransA The type of transpose applied to matrix A. 3667 * @param Diag Specifies whether or not A is unit triangular. 3668 * @param alpha The scalar alpha. 3669 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3670 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3671 */ 3672 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3673 validateUplo(Uplo); 3674 validateDiag(Diag); 3675 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 3676 3677 boolean mUseIncSupp = isIncSupp(); 3678 long aID = A.getID(mRS); 3679 long bID = B.getID(mRS); 3680 if (mUseIncSupp) { 3681 aID = getDummyAlloc(A); 3682 bID = getDummyAlloc(B); 3683 } 3684 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3685 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3686 } 3687 3688 /** 3689 * ZTRMM performs one of the matrix-matrix operations 3690 * B := alpha*op(A)*B or B := alpha*B*op(A) 3691 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3692 * 3693 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 3694 * 3695 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3696 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3697 * @param TransA The type of transpose applied to matrix A. 3698 * @param Diag Specifies whether or not A is unit triangular. 3699 * @param alpha The scalar alpha. 3700 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3701 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3702 */ 3703 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3704 validateUplo(Uplo); 3705 validateDiag(Diag); 3706 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 3707 3708 boolean mUseIncSupp = isIncSupp(); 3709 long aID = A.getID(mRS); 3710 long bID = B.getID(mRS); 3711 if (mUseIncSupp) { 3712 aID = getDummyAlloc(A); 3713 bID = getDummyAlloc(B); 3714 } 3715 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3716 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3717 } 3718 3719 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3720 int adim = -1, bM = -1, bN = -1; 3721 validateSide(Side); 3722 validateTranspose(TransA); 3723 if (!A.getType().getElement().isCompatible(e) || 3724 !B.getType().getElement().isCompatible(e)) { 3725 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3726 } 3727 adim = A.getType().getX(); 3728 if (adim != A.getType().getY()) { 3729 // this may be unnecessary, the restriction could potentially be relaxed 3730 // A needs to contain at least that symmetric matrix but could theoretically be larger 3731 // for now we assume adapters are sufficient, will reevaluate in the future 3732 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 3733 } 3734 bM = B.getType().getY(); 3735 bN = B.getType().getX(); 3736 if (Side == LEFT) { 3737 // A is M*M 3738 if (adim != bM) { 3739 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3740 } 3741 } else { 3742 // A is N*N 3743 if (adim != bN) { 3744 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3745 } 3746 } 3747 } 3748 3749 /** 3750 * STRSM solves one of the matrix equations 3751 * op(A)*X := alpha*B or X*op(A) := alpha*B 3752 * op(A) is one of op(A) = A or op(A) = A**T 3753 * 3754 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 3755 * 3756 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3757 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3758 * @param TransA The type of transpose applied to matrix A. 3759 * @param Diag Specifies whether or not A is unit triangular. 3760 * @param alpha The scalar alpha. 3761 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3762 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3763 */ 3764 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3765 validateUplo(Uplo); 3766 validateDiag(Diag); 3767 validateTRSM(Element.F32(mRS), Side, TransA, A, B); 3768 3769 boolean mUseIncSupp = isIncSupp(); 3770 long aID = A.getID(mRS); 3771 long bID = B.getID(mRS); 3772 if (mUseIncSupp) { 3773 aID = getDummyAlloc(A); 3774 bID = getDummyAlloc(B); 3775 } 3776 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3777 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3778 } 3779 3780 /** 3781 * DTRSM solves one of the matrix equations 3782 * op(A)*X := alpha*B or X*op(A) := alpha*B 3783 * op(A) is one of op(A) = A or op(A) = A**T 3784 * 3785 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 3786 * 3787 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3788 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3789 * @param TransA The type of transpose applied to matrix A. 3790 * @param Diag Specifies whether or not A is unit triangular. 3791 * @param alpha The scalar alpha. 3792 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3793 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3794 */ 3795 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3796 validateUplo(Uplo); 3797 validateDiag(Diag); 3798 validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3799 3800 boolean mUseIncSupp = isIncSupp(); 3801 long aID = A.getID(mRS); 3802 long bID = B.getID(mRS); 3803 if (mUseIncSupp) { 3804 aID = getDummyAlloc(A); 3805 bID = getDummyAlloc(B); 3806 } 3807 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3808 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3809 } 3810 3811 /** 3812 * CTRSM solves one of the matrix equations 3813 * op(A)*X := alpha*B or X*op(A) := alpha*B 3814 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3815 * 3816 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3817 * 3818 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3819 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3820 * @param TransA The type of transpose applied to matrix A. 3821 * @param Diag Specifies whether or not A is unit triangular. 3822 * @param alpha The scalar alpha. 3823 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3824 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3825 */ 3826 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3827 validateUplo(Uplo); 3828 validateDiag(Diag); 3829 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3830 3831 boolean mUseIncSupp = isIncSupp(); 3832 long aID = A.getID(mRS); 3833 long bID = B.getID(mRS); 3834 if (mUseIncSupp) { 3835 aID = getDummyAlloc(A); 3836 bID = getDummyAlloc(B); 3837 } 3838 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3839 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3840 } 3841 3842 /** 3843 * ZTRSM solves one of the matrix equations 3844 * op(A)*X := alpha*B or X*op(A) := alpha*B 3845 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3846 * 3847 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3848 * 3849 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3850 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3851 * @param TransA The type of transpose applied to matrix A. 3852 * @param Diag Specifies whether or not A is unit triangular. 3853 * @param alpha The scalar alpha. 3854 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3855 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3856 */ 3857 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3858 validateUplo(Uplo); 3859 validateDiag(Diag); 3860 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3861 3862 boolean mUseIncSupp = isIncSupp(); 3863 long aID = A.getID(mRS); 3864 long bID = B.getID(mRS); 3865 if (mUseIncSupp) { 3866 aID = getDummyAlloc(A); 3867 bID = getDummyAlloc(B); 3868 } 3869 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3870 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3871 } 3872 3873 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3874 validateSide(Side); 3875 3876 if (!A.getType().getElement().isCompatible(e) || 3877 !B.getType().getElement().isCompatible(e) || 3878 !C.getType().getElement().isCompatible(e)) { 3879 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3880 } 3881 3882 // A must be square; can potentially be relaxed similar to TRSM 3883 int adim = A.getType().getX(); 3884 if (adim != A.getType().getY()) { 3885 throw new RSRuntimeException("Called HEMM with non-square A"); 3886 } 3887 if ((Side == LEFT && adim != B.getType().getY()) || 3888 (Side == RIGHT && adim != B.getType().getX())) { 3889 throw new RSRuntimeException("Called HEMM with invalid B"); 3890 } 3891 if (B.getType().getX() != C.getType().getX() || 3892 B.getType().getY() != C.getType().getY()) { 3893 throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3894 } 3895 } 3896 3897 /** 3898 * CHEMM performs one of the matrix-matrix operations 3899 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3900 * 3901 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3902 * 3903 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3904 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3905 * @param alpha The scalar alpha. 3906 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3907 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3908 * @param beta The scalar beta. 3909 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3910 */ 3911 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3912 validateUplo(Uplo); 3913 validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3914 3915 boolean mUseIncSupp = isIncSupp(); 3916 long aID = A.getID(mRS); 3917 long bID = B.getID(mRS); 3918 long cID = C.getID(mRS); 3919 if (mUseIncSupp) { 3920 aID = getDummyAlloc(A); 3921 bID = getDummyAlloc(B); 3922 cID = getDummyAlloc(C); 3923 } 3924 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3925 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3926 } 3927 3928 /** 3929 * ZHEMM performs one of the matrix-matrix operations 3930 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3931 * 3932 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3933 * 3934 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3935 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3936 * @param alpha The scalar alpha. 3937 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3938 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3939 * @param beta The scalar beta. 3940 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3941 */ 3942 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3943 validateUplo(Uplo); 3944 validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3945 3946 boolean mUseIncSupp = isIncSupp(); 3947 long aID = A.getID(mRS); 3948 long bID = B.getID(mRS); 3949 long cID = C.getID(mRS); 3950 if (mUseIncSupp) { 3951 aID = getDummyAlloc(A); 3952 bID = getDummyAlloc(B); 3953 cID = getDummyAlloc(C); 3954 } 3955 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3956 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3957 } 3958 3959 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3960 if (!A.getType().getElement().isCompatible(e) || 3961 !C.getType().getElement().isCompatible(e)) { 3962 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3963 } 3964 validateConjTranspose(Trans); 3965 int cdim = C.getType().getX(); 3966 if (cdim != C.getType().getY()) { 3967 throw new RSRuntimeException("Called HERK with non-square C"); 3968 } 3969 if (Trans == NO_TRANSPOSE) { 3970 if (cdim != A.getType().getY()) { 3971 throw new RSRuntimeException("Called HERK with invalid A"); 3972 } 3973 } else { 3974 if (cdim != A.getType().getX()) { 3975 throw new RSRuntimeException("Called HERK with invalid A"); 3976 } 3977 } 3978 } 3979 3980 /** 3981 * CHERK performs one of the hermitian rank k operations 3982 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3983 * 3984 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3985 * 3986 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3987 * @param Trans The type of transpose applied to the operation. 3988 * @param alpha The scalar alpha. 3989 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3990 * @param beta The scalar beta. 3991 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3992 */ 3993 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3994 validateUplo(Uplo); 3995 validateHERK(Element.F32_2(mRS), Trans, A, C); 3996 int k = 0; 3997 if (Trans == CONJ_TRANSPOSE) { 3998 k = A.getType().getY(); 3999 } else { 4000 k = A.getType().getX(); 4001 } 4002 4003 boolean mUseIncSupp = isIncSupp(); 4004 long aID = A.getID(mRS); 4005 long cID = C.getID(mRS); 4006 if (mUseIncSupp) { 4007 aID = getDummyAlloc(A); 4008 cID = getDummyAlloc(C); 4009 } 4010 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4011 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4012 } 4013 4014 /** 4015 * ZHERK performs one of the hermitian rank k operations 4016 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 4017 * 4018 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 4019 * 4020 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4021 * @param Trans The type of transpose applied to the operation. 4022 * @param alpha The scalar alpha. 4023 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4024 * @param beta The scalar beta. 4025 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4026 */ 4027 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 4028 validateUplo(Uplo); 4029 validateHERK(Element.F64_2(mRS), Trans, A, C); 4030 int k = 0; 4031 if (Trans == CONJ_TRANSPOSE) { 4032 k = A.getType().getY(); 4033 } else { 4034 k = A.getType().getX(); 4035 } 4036 4037 boolean mUseIncSupp = isIncSupp(); 4038 long aID = A.getID(mRS); 4039 long cID = C.getID(mRS); 4040 if (mUseIncSupp) { 4041 aID = getDummyAlloc(A); 4042 cID = getDummyAlloc(C); 4043 } 4044 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4045 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4046 } 4047 4048 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 4049 if (!A.getType().getElement().isCompatible(e) || 4050 !B.getType().getElement().isCompatible(e) || 4051 !C.getType().getElement().isCompatible(e)) { 4052 throw new RSRuntimeException("Called BLAS with wrong Element type"); 4053 } 4054 validateConjTranspose(Trans); 4055 int cdim = C.getType().getX(); 4056 if (cdim != C.getType().getY()) { 4057 throw new RSRuntimeException("Called HER2K with non-square C"); 4058 } 4059 if (Trans == NO_TRANSPOSE) { 4060 if (A.getType().getY() != cdim) { 4061 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4062 } 4063 } else { 4064 if (A.getType().getX() != cdim) { 4065 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4066 } 4067 } 4068 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 4069 throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 4070 } 4071 } 4072 4073 /** 4074 * CHER2K performs one of the hermitian rank 2k operations 4075 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4076 * 4077 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 4078 * 4079 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4080 * @param Trans The type of transpose applied to the operation. 4081 * @param alpha The scalar alpha. 4082 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 4083 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 4084 * @param beta The scalar beta. 4085 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 4086 */ 4087 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 4088 validateUplo(Uplo); 4089 validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 4090 int k = 0; 4091 if (Trans == NO_TRANSPOSE) { 4092 k = A.getType().getX(); 4093 } else { 4094 k = A.getType().getY(); 4095 } 4096 4097 boolean mUseIncSupp = isIncSupp(); 4098 long aID = A.getID(mRS); 4099 long bID = B.getID(mRS); 4100 long cID = C.getID(mRS); 4101 if (mUseIncSupp) { 4102 aID = getDummyAlloc(A); 4103 bID = getDummyAlloc(B); 4104 cID = getDummyAlloc(C); 4105 } 4106 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4107 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4108 } 4109 4110 /** 4111 * ZHER2K performs one of the hermitian rank 2k operations 4112 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4113 * 4114 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 4115 * 4116 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4117 * @param Trans The type of transpose applied to the operation. 4118 * @param alpha The scalar alpha. 4119 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4120 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 4121 * @param beta The scalar beta. 4122 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4123 */ 4124 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 4125 validateUplo(Uplo); 4126 validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 4127 int k = 0; 4128 if (Trans == NO_TRANSPOSE) { 4129 k = A.getType().getX(); 4130 } else { 4131 k = A.getType().getY(); 4132 } 4133 4134 boolean mUseIncSupp = isIncSupp(); 4135 long aID = A.getID(mRS); 4136 long bID = B.getID(mRS); 4137 long cID = C.getID(mRS); 4138 if (mUseIncSupp) { 4139 aID = getDummyAlloc(A); 4140 bID = getDummyAlloc(B); 4141 cID = getDummyAlloc(C); 4142 } 4143 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4144 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4145 } 4146 4147 4148 /** 4149 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) 4150 * Calculations are done in 1.10.21 fixed-point format for the final output, 4151 * just before there's a shift down to drop the fractional parts. The output 4152 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 4153 * gives some headroom to avoid wrapping around on small overflows. 4154 * 4155 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 4156 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. 4157 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 4158 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. 4159 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 4160 * @param c_offset The offset for all values in matrix C. 4161 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 4162 **/ 4163 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 4164 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 4165 4166 if (a_offset < 0 || a_offset > 255) { 4167 throw new RSRuntimeException("Invalid a_offset passed to BNNM"); 4168 } 4169 if (b_offset < 0 || b_offset > 255) { 4170 throw new RSRuntimeException("Invalid b_offset passed to BNNM"); 4171 } 4172 int M = -1, N = -1, K = -1; 4173 M = A.getType().getY(); 4174 N = B.getType().getY(); 4175 K = A.getType().getX(); 4176 4177 boolean mUseIncSupp = isIncSupp(); 4178 long aID = A.getID(mRS); 4179 long bID = B.getID(mRS); 4180 long cID = C.getID(mRS); 4181 if (mUseIncSupp) { 4182 aID = getDummyAlloc(A); 4183 bID = getDummyAlloc(B); 4184 cID = getDummyAlloc(C); 4185 } 4186 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp); 4187 4188 } 4189 4190} 4191