ScriptIntrinsicBLAS.java revision ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5f
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.support.v8.renderscript; 18 19import android.support.annotation.IntDef; 20import java.lang.annotation.Retention; 21import java.lang.annotation.RetentionPolicy; 22 23/** 24 * @hide 25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26 * 27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28 * building blocks for performing basic vector and matrix operations. 29 * 30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31 * 32 **/ 33public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 34 private Allocation mLUT; 35 private static final int INTRINSIC_API_LEVEL = 23; 36 37 private ScriptIntrinsicBLAS(long id, RenderScript rs) { 38 super(id, rs); 39 } 40 41 private static final int RsBlas_sdsdot = 1; 42 private static final int RsBlas_dsdot = 2; 43 private static final int RsBlas_sdot = 3; 44 private static final int RsBlas_ddot = 4; 45 private static final int RsBlas_cdotu_sub = 5; 46 private static final int RsBlas_cdotc_sub = 6; 47 private static final int RsBlas_zdotu_sub = 7; 48 private static final int RsBlas_zdotc_sub = 8; 49 private static final int RsBlas_snrm2 = 9; 50 private static final int RsBlas_sasum = 10; 51 private static final int RsBlas_dnrm2 = 11; 52 private static final int RsBlas_dasum = 12; 53 private static final int RsBlas_scnrm2 = 13; 54 private static final int RsBlas_scasum = 14; 55 private static final int RsBlas_dznrm2 = 15; 56 private static final int RsBlas_dzasum = 16; 57 private static final int RsBlas_isamax = 17; 58 private static final int RsBlas_idamax = 18; 59 private static final int RsBlas_icamax = 19; 60 private static final int RsBlas_izamax = 20; 61 private static final int RsBlas_sswap = 21; 62 private static final int RsBlas_scopy = 22; 63 private static final int RsBlas_saxpy = 23; 64 private static final int RsBlas_dswap = 24; 65 private static final int RsBlas_dcopy = 25; 66 private static final int RsBlas_daxpy = 26; 67 private static final int RsBlas_cswap = 27; 68 private static final int RsBlas_ccopy = 28; 69 private static final int RsBlas_caxpy = 29; 70 private static final int RsBlas_zswap = 30; 71 private static final int RsBlas_zcopy = 31; 72 private static final int RsBlas_zaxpy = 32; 73 private static final int RsBlas_srotg = 33; 74 private static final int RsBlas_srotmg = 34; 75 private static final int RsBlas_srot = 35; 76 private static final int RsBlas_srotm = 36; 77 private static final int RsBlas_drotg = 37; 78 private static final int RsBlas_drotmg = 38; 79 private static final int RsBlas_drot = 39; 80 private static final int RsBlas_drotm = 40; 81 private static final int RsBlas_sscal = 41; 82 private static final int RsBlas_dscal = 42; 83 private static final int RsBlas_cscal = 43; 84 private static final int RsBlas_zscal = 44; 85 private static final int RsBlas_csscal = 45; 86 private static final int RsBlas_zdscal = 46; 87 private static final int RsBlas_sgemv = 47; 88 private static final int RsBlas_sgbmv = 48; 89 private static final int RsBlas_strmv = 49; 90 private static final int RsBlas_stbmv = 50; 91 private static final int RsBlas_stpmv = 51; 92 private static final int RsBlas_strsv = 52; 93 private static final int RsBlas_stbsv = 53; 94 private static final int RsBlas_stpsv = 54; 95 private static final int RsBlas_dgemv = 55; 96 private static final int RsBlas_dgbmv = 56; 97 private static final int RsBlas_dtrmv = 57; 98 private static final int RsBlas_dtbmv = 58; 99 private static final int RsBlas_dtpmv = 59; 100 private static final int RsBlas_dtrsv = 60; 101 private static final int RsBlas_dtbsv = 61; 102 private static final int RsBlas_dtpsv = 62; 103 private static final int RsBlas_cgemv = 63; 104 private static final int RsBlas_cgbmv = 64; 105 private static final int RsBlas_ctrmv = 65; 106 private static final int RsBlas_ctbmv = 66; 107 private static final int RsBlas_ctpmv = 67; 108 private static final int RsBlas_ctrsv = 68; 109 private static final int RsBlas_ctbsv = 69; 110 private static final int RsBlas_ctpsv = 70; 111 private static final int RsBlas_zgemv = 71; 112 private static final int RsBlas_zgbmv = 72; 113 private static final int RsBlas_ztrmv = 73; 114 private static final int RsBlas_ztbmv = 74; 115 private static final int RsBlas_ztpmv = 75; 116 private static final int RsBlas_ztrsv = 76; 117 private static final int RsBlas_ztbsv = 77; 118 private static final int RsBlas_ztpsv = 78; 119 private static final int RsBlas_ssymv = 79; 120 private static final int RsBlas_ssbmv = 80; 121 private static final int RsBlas_sspmv = 81; 122 private static final int RsBlas_sger = 82; 123 private static final int RsBlas_ssyr = 83; 124 private static final int RsBlas_sspr = 84; 125 private static final int RsBlas_ssyr2 = 85; 126 private static final int RsBlas_sspr2 = 86; 127 private static final int RsBlas_dsymv = 87; 128 private static final int RsBlas_dsbmv = 88; 129 private static final int RsBlas_dspmv = 89; 130 private static final int RsBlas_dger = 90; 131 private static final int RsBlas_dsyr = 91; 132 private static final int RsBlas_dspr = 92; 133 private static final int RsBlas_dsyr2 = 93; 134 private static final int RsBlas_dspr2 = 94; 135 private static final int RsBlas_chemv = 95; 136 private static final int RsBlas_chbmv = 96; 137 private static final int RsBlas_chpmv = 97; 138 private static final int RsBlas_cgeru = 98; 139 private static final int RsBlas_cgerc = 99; 140 private static final int RsBlas_cher = 100; 141 private static final int RsBlas_chpr = 101; 142 private static final int RsBlas_cher2 = 102; 143 private static final int RsBlas_chpr2 = 103; 144 private static final int RsBlas_zhemv = 104; 145 private static final int RsBlas_zhbmv = 105; 146 private static final int RsBlas_zhpmv = 106; 147 private static final int RsBlas_zgeru = 107; 148 private static final int RsBlas_zgerc = 108; 149 private static final int RsBlas_zher = 109; 150 private static final int RsBlas_zhpr = 110; 151 private static final int RsBlas_zher2 = 111; 152 private static final int RsBlas_zhpr2 = 112; 153 private static final int RsBlas_sgemm = 113; 154 private static final int RsBlas_ssymm = 114; 155 private static final int RsBlas_ssyrk = 115; 156 private static final int RsBlas_ssyr2k = 116; 157 private static final int RsBlas_strmm = 117; 158 private static final int RsBlas_strsm = 118; 159 private static final int RsBlas_dgemm = 119; 160 private static final int RsBlas_dsymm = 120; 161 private static final int RsBlas_dsyrk = 121; 162 private static final int RsBlas_dsyr2k = 122; 163 private static final int RsBlas_dtrmm = 123; 164 private static final int RsBlas_dtrsm = 124; 165 private static final int RsBlas_cgemm = 125; 166 private static final int RsBlas_csymm = 126; 167 private static final int RsBlas_csyrk = 127; 168 private static final int RsBlas_csyr2k = 128; 169 private static final int RsBlas_ctrmm = 129; 170 private static final int RsBlas_ctrsm = 130; 171 private static final int RsBlas_zgemm = 131; 172 private static final int RsBlas_zsymm = 132; 173 private static final int RsBlas_zsyrk = 133; 174 private static final int RsBlas_zsyr2k = 134; 175 private static final int RsBlas_ztrmm = 135; 176 private static final int RsBlas_ztrsm = 136; 177 private static final int RsBlas_chemm = 137; 178 private static final int RsBlas_cherk = 138; 179 private static final int RsBlas_cher2k = 139; 180 private static final int RsBlas_zhemm = 140; 181 private static final int RsBlas_zherk = 141; 182 private static final int RsBlas_zher2k = 142; 183 184 // BLAS extensions start here 185 private static final int RsBlas_bnnm = 1000; 186 187 /** 188 * Create an intrinsic to access BLAS subroutines. 189 * 190 * @param rs The RenderScript context 191 * @return ScriptIntrinsicBLAS 192 */ 193 public static ScriptIntrinsicBLAS create(RenderScript rs) { 194 long id; 195 boolean mUseIncSupp = rs.isUseNative() && 196 android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL; 197 198 id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp); 199 ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs); 200 si.setIncSupp(mUseIncSupp); 201 return si; 202 } 203 204 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 205 @Retention(RetentionPolicy.SOURCE) 206 public @interface Transpose {} 207 208 @IntDef({UPPER, LOWER}) 209 @Retention(RetentionPolicy.SOURCE) 210 public @interface Uplo {} 211 212 @IntDef({NON_UNIT, UNIT}) 213 @Retention(RetentionPolicy.SOURCE) 214 public @interface Diag {} 215 216 @IntDef({LEFT, RIGHT}) 217 @Retention(RetentionPolicy.SOURCE) 218 public @interface Side {} 219 220 public static final int NO_TRANSPOSE = 111; 221 public static final int TRANSPOSE = 112; 222 public static final int CONJ_TRANSPOSE = 113; 223 224 public static final int UPPER = 121; 225 public static final int LOWER = 122; 226 227 public static final int NON_UNIT = 131; 228 public static final int UNIT = 132; 229 230 public static final int LEFT = 141; 231 public static final int RIGHT = 142; 232 233 static void validateSide(@Side int Side) { 234 if (Side != LEFT && Side != RIGHT) { 235 throw new RSRuntimeException("Invalid side passed to BLAS"); 236 } 237 } 238 239 static void validateTranspose(@Transpose int Trans) { 240 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 241 Trans != CONJ_TRANSPOSE) { 242 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 243 } 244 } 245 246 static void validateConjTranspose(@Transpose int Trans) { 247 if (Trans != NO_TRANSPOSE && 248 Trans != CONJ_TRANSPOSE) { 249 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 250 } 251 } 252 253 static void validateDiag(@Diag int Diag) { 254 if (Diag != NON_UNIT && Diag != UNIT) { 255 throw new RSRuntimeException("Invalid diag passed to BLAS"); 256 } 257 } 258 259 static void validateUplo(@Uplo int Uplo) { 260 if (Uplo != UPPER && Uplo != LOWER) { 261 throw new RSRuntimeException("Invalid uplo passed to BLAS"); 262 } 263 } 264 265 266 /** 267 * Level 2 BLAS 268 */ 269 270 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 271 validateTranspose(TransA); 272 int M = A.getType().getY(); 273 int N = A.getType().getX(); 274 if (!A.getType().getElement().isCompatible(e) || 275 !X.getType().getElement().isCompatible(e) || 276 !Y.getType().getElement().isCompatible(e)) { 277 throw new RSRuntimeException("Called BLAS with wrong Element type"); 278 } 279 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 280 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 281 } 282 283 if (incX <= 0 || incY <= 0) { 284 throw new RSRuntimeException("Vector increments must be greater than 0"); 285 } 286 int expectedXDim = -1, expectedYDim = -1; 287 if (TransA == NO_TRANSPOSE) { 288 expectedXDim = 1 + (N - 1) * incX; 289 expectedYDim = 1 + (M - 1) * incY; 290 } else { 291 expectedXDim = 1 + (M - 1) * incX; 292 expectedYDim = 1 + (N - 1) * incY; 293 } 294 if (X.getType().getX() != expectedXDim || 295 Y.getType().getX() != expectedYDim) { 296 throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 297 } 298 } 299 300 /** 301 * SGEMV performs one of the matrix-vector operations 302 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 303 * 304 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 305 * 306 * @param TransA The type of transpose applied to matrix A. 307 * @param alpha The scalar alpha. 308 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 309 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 310 * @param incX The increment for the elements of vector x, must be larger than zero. 311 * @param beta The scalar beta. 312 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 313 * @param incY The increment for the elements of vector y, must be larger than zero. 314 */ 315 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 316 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 317 int M = A.getType().getY(); 318 int N = A.getType().getX(); 319 320 boolean mUseIncSupp = isIncSupp(); 321 long aID = A.getID(mRS); 322 long xID = X.getID(mRS); 323 long yID = Y.getID(mRS); 324 if (mUseIncSupp) { 325 aID = getDummyAlloc(A); 326 xID = getDummyAlloc(X); 327 yID = getDummyAlloc(Y); 328 } 329 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 330 } 331 332 /** 333 * DGEMV performs one of the matrix-vector operations 334 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 335 * 336 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 337 * 338 * @param TransA The type of transpose applied to matrix A. 339 * @param alpha The scalar alpha. 340 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 341 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 342 * @param incX The increment for the elements of vector x, must be larger than zero. 343 * @param beta The scalar beta. 344 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 345 * @param incY The increment for the elements of vector y, must be larger than zero. 346 */ 347 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 348 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 349 int M = A.getType().getY(); 350 int N = A.getType().getX(); 351 352 boolean mUseIncSupp = isIncSupp(); 353 long aID = A.getID(mRS); 354 long xID = X.getID(mRS); 355 long yID = Y.getID(mRS); 356 if (mUseIncSupp) { 357 aID = getDummyAlloc(A); 358 xID = getDummyAlloc(X); 359 yID = getDummyAlloc(Y); 360 } 361 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 362 } 363 364 /** 365 * CGEMV performs one of the matrix-vector operations 366 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 367 * 368 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 369 * 370 * @param TransA The type of transpose applied to matrix A. 371 * @param alpha The scalar alpha. 372 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 373 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 374 * @param incX The increment for the elements of vector x, must be larger than zero. 375 * @param beta The scalar beta. 376 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 377 * @param incY The increment for the elements of vector y, must be larger than zero. 378 */ 379 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 380 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 381 int M = A.getType().getY(); 382 int N = A.getType().getX(); 383 384 boolean mUseIncSupp = isIncSupp(); 385 long aID = A.getID(mRS); 386 long xID = X.getID(mRS); 387 long yID = Y.getID(mRS); 388 if (mUseIncSupp) { 389 aID = getDummyAlloc(A); 390 xID = getDummyAlloc(X); 391 yID = getDummyAlloc(Y); 392 } 393 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 394 } 395 396 /** 397 * ZGEMV performs one of the matrix-vector operations 398 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 399 * 400 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 401 * 402 * @param TransA The type of transpose applied to matrix A. 403 * @param alpha The scalar alpha. 404 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 405 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 406 * @param incX The increment for the elements of vector x, must be larger than zero. 407 * @param beta The scalar beta. 408 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 409 * @param incY The increment for the elements of vector y, must be larger than zero. 410 */ 411 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 412 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 413 int M = A.getType().getY(); 414 int N = A.getType().getX(); 415 416 boolean mUseIncSupp = isIncSupp(); 417 long aID = A.getID(mRS); 418 long xID = X.getID(mRS); 419 long yID = Y.getID(mRS); 420 if (mUseIncSupp) { 421 aID = getDummyAlloc(A); 422 xID = getDummyAlloc(X); 423 yID = getDummyAlloc(Y); 424 } 425 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 426 } 427 428 /** 429 * SGBMV performs one of the matrix-vector operations 430 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 431 * 432 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 433 * 434 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 435 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 436 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 437 * for i in range(0, m): 438 * for j in range(max(0, i-kl), min(i+ku+1, n)): 439 * b[i, j-i+kl] = a[i, j] 440 * 441 * @param TransA The type of transpose applied to matrix A. 442 * @param KL The number of sub-diagonals of the matrix A. 443 * @param KU The number of super-diagonals of the matrix A. 444 * @param alpha The scalar alpha. 445 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 446 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 447 * @param incX The increment for the elements of vector x, must be larger than zero. 448 * @param beta The scalar beta. 449 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 450 * @param incY The increment for the elements of vector y, must be larger than zero. 451 */ 452 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 453 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 454 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 455 if (KL < 0 || KU < 0) { 456 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 457 } 458 int M = A.getType().getY(); 459 int N = A.getType().getX(); 460 461 boolean mUseIncSupp = isIncSupp(); 462 long aID = A.getID(mRS); 463 long xID = X.getID(mRS); 464 long yID = Y.getID(mRS); 465 if (mUseIncSupp) { 466 aID = getDummyAlloc(A); 467 xID = getDummyAlloc(X); 468 yID = getDummyAlloc(Y); 469 } 470 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 471 } 472 473 /** 474 * DGBMV performs one of the matrix-vector operations 475 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 476 * 477 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 478 * 479 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 480 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 481 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 482 * for i in range(0, m): 483 * for j in range(max(0, i-kl), min(i+ku+1, n)): 484 * b[i, j-i+kl] = a[i, j] 485 * 486 * @param TransA The type of transpose applied to matrix A. 487 * @param KL The number of sub-diagonals of the matrix A. 488 * @param KU The number of super-diagonals of the matrix A. 489 * @param alpha The scalar alpha. 490 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 491 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 492 * @param incX The increment for the elements of vector x, must be larger than zero. 493 * @param beta The scalar beta. 494 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 495 * @param incY The increment for the elements of vector y, must be larger than zero. 496 */ 497 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 498 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 499 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 500 if (KL < 0 || KU < 0) { 501 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 502 } 503 int M = A.getType().getY(); 504 int N = A.getType().getX(); 505 506 boolean mUseIncSupp = isIncSupp(); 507 long aID = A.getID(mRS); 508 long xID = X.getID(mRS); 509 long yID = Y.getID(mRS); 510 if (mUseIncSupp) { 511 aID = getDummyAlloc(A); 512 xID = getDummyAlloc(X); 513 yID = getDummyAlloc(Y); 514 } 515 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 516 } 517 518 /** 519 * CGBMV performs one of the matrix-vector operations 520 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 521 * 522 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 523 * 524 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 525 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 526 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 527 * for i in range(0, m): 528 * for j in range(max(0, i-kl), min(i+ku+1, n)): 529 * b[i, j-i+kl] = a[i, j] 530 * 531 * @param TransA The type of transpose applied to matrix A. 532 * @param KL The number of sub-diagonals of the matrix A. 533 * @param KU The number of super-diagonals of the matrix A. 534 * @param alpha The scalar alpha. 535 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 536 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 537 * @param incX The increment for the elements of vector x, must be larger than zero. 538 * @param beta The scalar beta. 539 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 540 * @param incY The increment for the elements of vector y, must be larger than zero. 541 */ 542 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 543 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 544 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 545 if (KL < 0 || KU < 0) { 546 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 547 } 548 int M = A.getType().getY(); 549 int N = A.getType().getX(); 550 551 boolean mUseIncSupp = isIncSupp(); 552 long aID = A.getID(mRS); 553 long xID = X.getID(mRS); 554 long yID = Y.getID(mRS); 555 if (mUseIncSupp) { 556 aID = getDummyAlloc(A); 557 xID = getDummyAlloc(X); 558 yID = getDummyAlloc(Y); 559 } 560 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 561 } 562 563 /** 564 * ZGBMV performs one of the matrix-vector operations 565 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 566 * 567 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 568 * 569 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 570 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 571 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 572 * for i in range(0, m): 573 * for j in range(max(0, i-kl), min(i+ku+1, n)): 574 * b[i, j-i+kl] = a[i, j] 575 * 576 * @param TransA The type of transpose applied to matrix A. 577 * @param KL The number of sub-diagonals of the matrix A. 578 * @param KU The number of super-diagonals of the matrix A. 579 * @param alpha The scalar alpha. 580 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 581 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 582 * @param incX The increment for the elements of vector x, must be larger than zero. 583 * @param beta The scalar beta. 584 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 585 * @param incY The increment for the elements of vector y, must be larger than zero. 586 */ 587 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 588 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 589 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 590 if (KL < 0 || KU < 0) { 591 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 592 } 593 int M = A.getType().getY(); 594 int N = A.getType().getX(); 595 596 boolean mUseIncSupp = isIncSupp(); 597 long aID = A.getID(mRS); 598 long xID = X.getID(mRS); 599 long yID = Y.getID(mRS); 600 if (mUseIncSupp) { 601 aID = getDummyAlloc(A); 602 xID = getDummyAlloc(X); 603 yID = getDummyAlloc(Y); 604 } 605 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 606 } 607 608 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 609 validateTranspose(TransA); 610 validateUplo(Uplo); 611 validateDiag(Diag); 612 int N = A.getType().getY(); 613 if (A.getType().getX() != N) { 614 throw new RSRuntimeException("A must be a square matrix for TRMV"); 615 } 616 if (!A.getType().getElement().isCompatible(e) || 617 !X.getType().getElement().isCompatible(e)) { 618 throw new RSRuntimeException("Called BLAS with wrong Element type"); 619 } 620 if (X.getType().getY() > 1) { 621 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 622 } 623 624 if (incX <= 0) { 625 throw new RSRuntimeException("Vector increments must be greater than 0"); 626 } 627 int expectedXDim = 1 + (N - 1) * incX; 628 if (X.getType().getX() != expectedXDim) { 629 throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 630 } 631 } 632 633 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 634 validateTranspose(TransA); 635 validateUplo(Uplo); 636 validateDiag(Diag); 637 if (!Ap.getType().getElement().isCompatible(e) || 638 !X.getType().getElement().isCompatible(e)) { 639 throw new RSRuntimeException("Called BLAS with wrong Element type"); 640 } 641 if (X.getType().getY() > 1) { 642 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 643 } 644 645 if (Ap.getType().getY() > 1) { 646 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 647 } 648 649 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 650 //is it really doing anything? 651 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 652 throw new RSRuntimeException("Invalid dimension for Ap"); 653 } 654 if (incX <= 0) { 655 throw new RSRuntimeException("Vector increments must be greater than 0"); 656 } 657 int expectedXDim = 1 + (N - 1) * incX; 658 if (X.getType().getX() != expectedXDim) { 659 throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 660 } 661 662 return N; 663 } 664 665 /** 666 * STRMV performs one of the matrix-vector operations 667 * x := A*x or x := A**T*x 668 * 669 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 670 * 671 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 672 * @param TransA The type of transpose applied to matrix A. 673 * @param Diag Specifies whether or not A is unit triangular. 674 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 675 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 676 * @param incX The increment for the elements of vector x, must be larger than zero. 677 */ 678 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 679 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 680 int N = A.getType().getY(); 681 682 boolean mUseIncSupp = isIncSupp(); 683 long aID = A.getID(mRS); 684 long xID = X.getID(mRS); 685 if (mUseIncSupp) { 686 aID = getDummyAlloc(A); 687 xID = getDummyAlloc(X); 688 } 689 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 690 } 691 692 /** 693 * DTRMV performs one of the matrix-vector operations 694 * x := A*x or x := A**T*x 695 * 696 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 697 * 698 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 699 * @param TransA The type of transpose applied to matrix A. 700 * @param Diag Specifies whether or not A is unit triangular. 701 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 702 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 703 * @param incX The increment for the elements of vector x, must be larger than zero. 704 */ 705 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 706 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 707 int N = A.getType().getY(); 708 709 boolean mUseIncSupp = isIncSupp(); 710 long aID = A.getID(mRS); 711 long xID = X.getID(mRS); 712 if (mUseIncSupp) { 713 aID = getDummyAlloc(A); 714 xID = getDummyAlloc(X); 715 } 716 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 717 } 718 719 /** 720 * CTRMV performs one of the matrix-vector operations 721 * x := A*x or x := A**T*x or x := A**H*x 722 * 723 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 724 * 725 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 726 * @param TransA The type of transpose applied to matrix A. 727 * @param Diag Specifies whether or not A is unit triangular. 728 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 729 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 730 * @param incX The increment for the elements of vector x, must be larger than zero. 731 */ 732 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 733 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 734 int N = A.getType().getY(); 735 736 boolean mUseIncSupp = isIncSupp(); 737 long aID = A.getID(mRS); 738 long xID = X.getID(mRS); 739 if (mUseIncSupp) { 740 aID = getDummyAlloc(A); 741 xID = getDummyAlloc(X); 742 } 743 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 744 } 745 746 /** 747 * ZTRMV performs one of the matrix-vector operations 748 * x := A*x or x := A**T*x or x := A**H*x 749 * 750 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 751 * 752 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 753 * @param TransA The type of transpose applied to matrix A. 754 * @param Diag Specifies whether or not A is unit triangular. 755 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 756 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 757 * @param incX The increment for the elements of vector x, must be larger than zero. 758 */ 759 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 760 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 761 int N = A.getType().getY(); 762 763 boolean mUseIncSupp = isIncSupp(); 764 long aID = A.getID(mRS); 765 long xID = X.getID(mRS); 766 if (mUseIncSupp) { 767 aID = getDummyAlloc(A); 768 xID = getDummyAlloc(X); 769 } 770 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 771 } 772 773 /** 774 * STBMV performs one of the matrix-vector operations 775 * x := A*x or x := A**T*x 776 * 777 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 778 * 779 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 780 * but only the region N*(K+1) will be referenced. The following subroutine can is an 781 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 782 * for i in range(0, n): 783 * for j in range(i, min(i+k+1, n)): 784 * b[i, j-i] = a[i, j] 785 * 786 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 787 * @param TransA The type of transpose applied to matrix A. 788 * @param Diag Specifies whether or not A is unit triangular. 789 * @param K The number of off-diagonals of the matrix A 790 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 791 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 792 * @param incX The increment for the elements of vector x, must be larger than zero. 793 */ 794 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 795 // TBMV has the same requirements as TRMV + K >= 0 796 if (K < 0) { 797 throw new RSRuntimeException("K must be greater than or equal to 0"); 798 } 799 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 800 int N = A.getType().getY(); 801 802 boolean mUseIncSupp = isIncSupp(); 803 long aID = A.getID(mRS); 804 long xID = X.getID(mRS); 805 if (mUseIncSupp) { 806 aID = getDummyAlloc(A); 807 xID = getDummyAlloc(X); 808 } 809 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 810 } 811 812 /** 813 * DTBMV performs one of the matrix-vector operations 814 * x := A*x or x := A**T*x 815 * 816 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 817 * 818 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 819 * but only the region N*(K+1) will be referenced. The following subroutine can is an 820 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 821 * for i in range(0, n): 822 * for j in range(i, min(i+k+1, n)): 823 * b[i, j-i] = a[i, j] 824 * 825 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 826 * @param TransA The type of transpose applied to matrix A. 827 * @param Diag Specifies whether or not A is unit triangular. 828 * @param K The number of off-diagonals of the matrix A 829 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 830 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 831 * @param incX The increment for the elements of vector x, must be larger than zero. 832 */ 833 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 834 // TBMV has the same requirements as TRMV + K >= 0 835 if (K < 0) { 836 throw new RSRuntimeException("K must be greater than or equal to 0"); 837 } 838 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 839 int N = A.getType().getY(); 840 841 boolean mUseIncSupp = isIncSupp(); 842 long aID = A.getID(mRS); 843 long xID = X.getID(mRS); 844 if (mUseIncSupp) { 845 aID = getDummyAlloc(A); 846 xID = getDummyAlloc(X); 847 } 848 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 849 } 850 851 /** 852 * CTBMV performs one of the matrix-vector operations 853 * x := A*x or x := A**T*x or x := A**H*x 854 * 855 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 856 * 857 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 858 * but only the region N*(K+1) will be referenced. The following subroutine can is an 859 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 860 * for i in range(0, n): 861 * for j in range(i, min(i+k+1, n)): 862 * b[i, j-i] = a[i, j] 863 * 864 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 865 * @param TransA The type of transpose applied to matrix A. 866 * @param Diag Specifies whether or not A is unit triangular. 867 * @param K The number of off-diagonals of the matrix A 868 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 869 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 870 * @param incX The increment for the elements of vector x, must be larger than zero. 871 */ 872 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 873 // TBMV has the same requirements as TRMV + K >= 0 874 if (K < 0) { 875 throw new RSRuntimeException("K must be greater than or equal to 0"); 876 } 877 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 878 int N = A.getType().getY(); 879 880 boolean mUseIncSupp = isIncSupp(); 881 long aID = A.getID(mRS); 882 long xID = X.getID(mRS); 883 if (mUseIncSupp) { 884 aID = getDummyAlloc(A); 885 xID = getDummyAlloc(X); 886 } 887 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 888 } 889 890 /** 891 * ZTBMV performs one of the matrix-vector operations 892 * x := A*x or x := A**T*x or x := A**H*x 893 * 894 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 895 * 896 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 897 * but only the region N*(K+1) will be referenced. The following subroutine can is an 898 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 899 * for i in range(0, n): 900 * for j in range(i, min(i+k+1, n)): 901 * b[i, j-i] = a[i, j] 902 * 903 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 904 * @param TransA The type of transpose applied to matrix A. 905 * @param Diag Specifies whether or not A is unit triangular. 906 * @param K The number of off-diagonals of the matrix A 907 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 908 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 909 * @param incX The increment for the elements of vector x, must be larger than zero. 910 */ 911 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 912 // TBMV has the same requirements as TRMV + K >= 0 913 if (K < 0) { 914 throw new RSRuntimeException("K must be greater than or equal to 0"); 915 } 916 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 917 int N = A.getType().getY(); 918 919 boolean mUseIncSupp = isIncSupp(); 920 long aID = A.getID(mRS); 921 long xID = X.getID(mRS); 922 if (mUseIncSupp) { 923 aID = getDummyAlloc(A); 924 xID = getDummyAlloc(X); 925 } 926 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 927 } 928 929 /** 930 * STPMV performs one of the matrix-vector operations 931 * x := A*x or x := A**T*x 932 * 933 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 934 * 935 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 936 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 937 * 'a' to packed matrix 'b'. 938 * k = 0 939 * for i in range(0, n): 940 * for j in range(i, n): 941 * b[k++] = a[i, j] 942 * 943 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 944 * @param TransA The type of transpose applied to matrix A. 945 * @param Diag Specifies whether or not A is unit triangular. 946 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 947 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 948 * @param incX The increment for the elements of vector x, must be larger than zero. 949 */ 950 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 951 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 952 953 boolean mUseIncSupp = isIncSupp(); 954 long apID = Ap.getID(mRS); 955 long xID = X.getID(mRS); 956 if (mUseIncSupp) { 957 apID = getDummyAlloc(Ap); 958 xID = getDummyAlloc(X); 959 } 960 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 961 } 962 963 /** 964 * DTPMV performs one of the matrix-vector operations 965 * x := A*x or x := A**T*x 966 * 967 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 968 * 969 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 970 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 971 * 'a' to packed matrix 'b'. 972 * k = 0 973 * for i in range(0, n): 974 * for j in range(i, n): 975 * b[k++] = a[i, j] 976 * 977 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 978 * @param TransA The type of transpose applied to matrix A. 979 * @param Diag Specifies whether or not A is unit triangular. 980 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 981 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 982 * @param incX The increment for the elements of vector x, must be larger than zero. 983 */ 984 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 985 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 986 987 boolean mUseIncSupp = isIncSupp(); 988 long apID = Ap.getID(mRS); 989 long xID = X.getID(mRS); 990 if (mUseIncSupp) { 991 apID = getDummyAlloc(Ap); 992 xID = getDummyAlloc(X); 993 } 994 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 995 } 996 997 /** 998 * CTPMV performs one of the matrix-vector operations 999 * x := A*x or x := A**T*x or x := A**H*x 1000 * 1001 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 1002 * 1003 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1004 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1005 * 'a' to packed matrix 'b'. 1006 * k = 0 1007 * for i in range(0, n): 1008 * for j in range(i, n): 1009 * b[k++] = a[i, j] 1010 * 1011 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1012 * @param TransA The type of transpose applied to matrix A. 1013 * @param Diag Specifies whether or not A is unit triangular. 1014 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1015 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1016 * @param incX The increment for the elements of vector x, must be larger than zero. 1017 */ 1018 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1019 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1020 1021 boolean mUseIncSupp = isIncSupp(); 1022 long apID = Ap.getID(mRS); 1023 long xID = X.getID(mRS); 1024 if (mUseIncSupp) { 1025 apID = getDummyAlloc(Ap); 1026 xID = getDummyAlloc(X); 1027 } 1028 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1029 } 1030 1031 /** 1032 * ZTPMV performs one of the matrix-vector operations 1033 * x := A*x or x := A**T*x or x := A**H*x 1034 * 1035 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 1036 * 1037 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1038 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1039 * 'a' to packed matrix 'b'. 1040 * k = 0 1041 * for i in range(0, n): 1042 * for j in range(i, n): 1043 * b[k++] = a[i, j] 1044 * 1045 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1046 * @param TransA The type of transpose applied to matrix A. 1047 * @param Diag Specifies whether or not A is unit triangular. 1048 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1049 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1050 * @param incX The increment for the elements of vector x, must be larger than zero. 1051 */ 1052 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1053 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1054 1055 boolean mUseIncSupp = isIncSupp(); 1056 long apID = Ap.getID(mRS); 1057 long xID = X.getID(mRS); 1058 if (mUseIncSupp) { 1059 apID = getDummyAlloc(Ap); 1060 xID = getDummyAlloc(X); 1061 } 1062 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1063 } 1064 1065 /** 1066 * STRSV solves one of the systems of equations 1067 * A*x = b or A**T*x = b 1068 * 1069 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 1070 * 1071 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1072 * @param TransA The type of transpose applied to matrix A. 1073 * @param Diag Specifies whether or not A is unit triangular. 1074 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1075 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1076 * @param incX The increment for the elements of vector x, must be larger than zero. 1077 */ 1078 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1079 // TRSV is the same as TRMV 1080 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1081 int N = A.getType().getY(); 1082 1083 boolean mUseIncSupp = isIncSupp(); 1084 long aID = A.getID(mRS); 1085 long xID = X.getID(mRS); 1086 if (mUseIncSupp) { 1087 aID = getDummyAlloc(A); 1088 xID = getDummyAlloc(X); 1089 } 1090 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1091 1092 } 1093 1094 /** 1095 * DTRSV solves one of the systems of equations 1096 * A*x = b or A**T*x = b 1097 * 1098 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 1099 * 1100 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1101 * @param TransA The type of transpose applied to matrix A. 1102 * @param Diag Specifies whether or not A is unit triangular. 1103 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1104 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1105 * @param incX The increment for the elements of vector x, must be larger than zero. 1106 */ 1107 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1108 // TRSV is the same as TRMV 1109 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1110 int N = A.getType().getY(); 1111 1112 boolean mUseIncSupp = isIncSupp(); 1113 long aID = A.getID(mRS); 1114 long xID = X.getID(mRS); 1115 if (mUseIncSupp) { 1116 aID = getDummyAlloc(A); 1117 xID = getDummyAlloc(X); 1118 } 1119 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1120 1121 } 1122 1123 /** 1124 * CTRSV solves one of the systems of equations 1125 * A*x = b or A**T*x = b or A**H*x = b 1126 * 1127 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 1128 * 1129 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1130 * @param TransA The type of transpose applied to matrix A. 1131 * @param Diag Specifies whether or not A is unit triangular. 1132 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1133 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1134 * @param incX The increment for the elements of vector x, must be larger than zero. 1135 */ 1136 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1137 // TRSV is the same as TRMV 1138 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1139 int N = A.getType().getY(); 1140 1141 boolean mUseIncSupp = isIncSupp(); 1142 long aID = A.getID(mRS); 1143 long xID = X.getID(mRS); 1144 if (mUseIncSupp) { 1145 aID = getDummyAlloc(A); 1146 xID = getDummyAlloc(X); 1147 } 1148 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1149 1150 } 1151 1152 /** 1153 * ZTRSV solves one of the systems of equations 1154 * A*x = b or A**T*x = b or A**H*x = b 1155 * 1156 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 1157 * 1158 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1159 * @param TransA The type of transpose applied to matrix A. 1160 * @param Diag Specifies whether or not A is unit triangular. 1161 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1162 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1163 * @param incX The increment for the elements of vector x, must be larger than zero. 1164 */ 1165 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1166 // TRSV is the same as TRMV 1167 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1168 int N = A.getType().getY(); 1169 1170 boolean mUseIncSupp = isIncSupp(); 1171 long aID = A.getID(mRS); 1172 long xID = X.getID(mRS); 1173 if (mUseIncSupp) { 1174 aID = getDummyAlloc(A); 1175 xID = getDummyAlloc(X); 1176 } 1177 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1178 1179 } 1180 1181 /** 1182 * STBSV solves one of the systems of equations 1183 * A*x = b or A**T*x = b 1184 * 1185 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 1186 * 1187 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1188 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1189 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1190 * for i in range(0, n): 1191 * for j in range(i, min(i+k+1, n)): 1192 * b[i, j-i] = a[i, j] 1193 * 1194 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1195 * @param TransA The type of transpose applied to matrix A. 1196 * @param Diag Specifies whether or not A is unit triangular. 1197 * @param K The number of off-diagonals of the matrix A 1198 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1199 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1200 * @param incX The increment for the elements of vector x, must be larger than zero. 1201 */ 1202 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1203 // TBSV is the same as TRMV + K >= 0 1204 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1205 int N = A.getType().getY(); 1206 if (K < 0) { 1207 throw new RSRuntimeException("Number of diagonals must be positive"); 1208 } 1209 1210 boolean mUseIncSupp = isIncSupp(); 1211 long aID = A.getID(mRS); 1212 long xID = X.getID(mRS); 1213 if (mUseIncSupp) { 1214 aID = getDummyAlloc(A); 1215 xID = getDummyAlloc(X); 1216 } 1217 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1218 } 1219 1220 /** 1221 * DTBSV solves one of the systems of equations 1222 * A*x = b or A**T*x = b 1223 * 1224 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1225 * 1226 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1227 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1228 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1229 * for i in range(0, n): 1230 * for j in range(i, min(i+k+1, n)): 1231 * b[i, j-i] = a[i, j] 1232 * 1233 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1234 * @param TransA The type of transpose applied to matrix A. 1235 * @param Diag Specifies whether or not A is unit triangular. 1236 * @param K The number of off-diagonals of the matrix A 1237 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1238 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1239 * @param incX The increment for the elements of vector x, must be larger than zero. 1240 */ 1241 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1242 // TBSV is the same as TRMV + K >= 0 1243 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1244 int N = A.getType().getY(); 1245 if (K < 0) { 1246 throw new RSRuntimeException("Number of diagonals must be positive"); 1247 } 1248 1249 boolean mUseIncSupp = isIncSupp(); 1250 long aID = A.getID(mRS); 1251 long xID = X.getID(mRS); 1252 if (mUseIncSupp) { 1253 aID = getDummyAlloc(A); 1254 xID = getDummyAlloc(X); 1255 } 1256 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1257 } 1258 1259 /** 1260 * CTBSV solves one of the systems of equations 1261 * A*x = b or A**T*x = b or A**H*x = b 1262 * 1263 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1264 * 1265 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1266 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1267 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1268 * for i in range(0, n): 1269 * for j in range(i, min(i+k+1, n)): 1270 * b[i, j-i] = a[i, j] 1271 * 1272 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1273 * @param TransA The type of transpose applied to matrix A. 1274 * @param Diag Specifies whether or not A is unit triangular. 1275 * @param K The number of off-diagonals of the matrix A 1276 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1277 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1278 * @param incX The increment for the elements of vector x, must be larger than zero. 1279 */ 1280 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1281 // TBSV is the same as TRMV + K >= 0 1282 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1283 int N = A.getType().getY(); 1284 if (K < 0) { 1285 throw new RSRuntimeException("Number of diagonals must be positive"); 1286 } 1287 1288 boolean mUseIncSupp = isIncSupp(); 1289 long aID = A.getID(mRS); 1290 long xID = X.getID(mRS); 1291 if (mUseIncSupp) { 1292 aID = getDummyAlloc(A); 1293 xID = getDummyAlloc(X); 1294 } 1295 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1296 } 1297 1298 /** 1299 * ZTBSV solves one of the systems of equations 1300 * A*x = b or A**T*x = b or A**H*x = b 1301 * 1302 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1303 * 1304 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1305 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1306 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1307 * for i in range(0, n): 1308 * for j in range(i, min(i+k+1, n)): 1309 * b[i, j-i] = a[i, j] 1310 * 1311 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1312 * @param TransA The type of transpose applied to matrix A. 1313 * @param Diag Specifies whether or not A is unit triangular. 1314 * @param K The number of off-diagonals of the matrix A 1315 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1316 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1317 * @param incX The increment for the elements of vector x, must be larger than zero. 1318 */ 1319 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1320 // TBSV is the same as TRMV + K >= 0 1321 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1322 int N = A.getType().getY(); 1323 if (K < 0) { 1324 throw new RSRuntimeException("Number of diagonals must be positive"); 1325 } 1326 1327 boolean mUseIncSupp = isIncSupp(); 1328 long aID = A.getID(mRS); 1329 long xID = X.getID(mRS); 1330 if (mUseIncSupp) { 1331 aID = getDummyAlloc(A); 1332 xID = getDummyAlloc(X); 1333 } 1334 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1335 } 1336 1337 /** 1338 * STPSV solves one of the systems of equations 1339 * A*x = b or A**T*x = b 1340 * 1341 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1342 * 1343 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1344 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1345 * 'a' to packed matrix 'b'. 1346 * k = 0 1347 * for i in range(0, n): 1348 * for j in range(i, n): 1349 * b[k++] = a[i, j] 1350 * 1351 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1352 * @param TransA The type of transpose applied to matrix A. 1353 * @param Diag Specifies whether or not A is unit triangular. 1354 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1355 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1356 * @param incX The increment for the elements of vector x, must be larger than zero. 1357 */ 1358 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1359 // TPSV is same as TPMV 1360 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1361 1362 boolean mUseIncSupp = isIncSupp(); 1363 long apID = Ap.getID(mRS); 1364 long xID = X.getID(mRS); 1365 if (mUseIncSupp) { 1366 apID = getDummyAlloc(Ap); 1367 xID = getDummyAlloc(X); 1368 } 1369 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1370 } 1371 1372 /** 1373 * DTPSV solves one of the systems of equations 1374 * A*x = b or A**T*x = b 1375 * 1376 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1377 * 1378 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1379 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1380 * 'a' to packed matrix 'b'. 1381 * k = 0 1382 * for i in range(0, n): 1383 * for j in range(i, n): 1384 * b[k++] = a[i, j] 1385 * 1386 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1387 * @param TransA The type of transpose applied to matrix A. 1388 * @param Diag Specifies whether or not A is unit triangular. 1389 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1390 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1391 * @param incX The increment for the elements of vector x, must be larger than zero. 1392 */ 1393 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1394 // TPSV is same as TPMV 1395 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1396 1397 boolean mUseIncSupp = isIncSupp(); 1398 long apID = Ap.getID(mRS); 1399 long xID = X.getID(mRS); 1400 if (mUseIncSupp) { 1401 apID = getDummyAlloc(Ap); 1402 xID = getDummyAlloc(X); 1403 } 1404 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1405 } 1406 1407 /** 1408 * CTPSV solves one of the systems of equations 1409 * A*x = b or A**T*x = b or A**H*x = b 1410 * 1411 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1412 * 1413 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1414 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1415 * 'a' to packed matrix 'b'. 1416 * k = 0 1417 * for i in range(0, n): 1418 * for j in range(i, n): 1419 * b[k++] = a[i, j] 1420 * 1421 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1422 * @param TransA The type of transpose applied to matrix A. 1423 * @param Diag Specifies whether or not A is unit triangular. 1424 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1425 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1426 * @param incX The increment for the elements of vector x, must be larger than zero. 1427 */ 1428 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1429 // TPSV is same as TPMV 1430 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1431 1432 boolean mUseIncSupp = isIncSupp(); 1433 long apID = Ap.getID(mRS); 1434 long xID = X.getID(mRS); 1435 if (mUseIncSupp) { 1436 apID = getDummyAlloc(Ap); 1437 xID = getDummyAlloc(X); 1438 } 1439 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1440 } 1441 1442 /** 1443 * ZTPSV solves one of the systems of equations 1444 * A*x = b or A**T*x = b or A**H*x = b 1445 * 1446 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1447 * 1448 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1449 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1450 * 'a' to packed matrix 'b'. 1451 * k = 0 1452 * for i in range(0, n): 1453 * for j in range(i, n): 1454 * b[k++] = a[i, j] 1455 * 1456 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1457 * @param TransA The type of transpose applied to matrix A. 1458 * @param Diag Specifies whether or not A is unit triangular. 1459 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1460 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1461 * @param incX The increment for the elements of vector x, must be larger than zero. 1462 */ 1463 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1464 // TPSV is same as TPMV 1465 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1466 1467 boolean mUseIncSupp = isIncSupp(); 1468 long apID = Ap.getID(mRS); 1469 long xID = X.getID(mRS); 1470 if (mUseIncSupp) { 1471 apID = getDummyAlloc(Ap); 1472 xID = getDummyAlloc(X); 1473 } 1474 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1475 } 1476 1477 /** 1478 * Level 2, S and D only 1479 */ 1480 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1481 validateUplo(Uplo); 1482 int N = A.getType().getY(); 1483 if (A.getType().getX() != N) { 1484 throw new RSRuntimeException("A must be a square matrix for SYMV"); 1485 } 1486 if (!A.getType().getElement().isCompatible(e) || 1487 !X.getType().getElement().isCompatible(e) || 1488 !Y.getType().getElement().isCompatible(e) ) { 1489 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1490 } 1491 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1492 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1493 } 1494 1495 if (incX <= 0 || incY <= 0) { 1496 throw new RSRuntimeException("Vector increments must be greater than 0"); 1497 } 1498 int expectedXDim = 1 + (N - 1) * incX; 1499 if (X.getType().getX() != expectedXDim) { 1500 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1501 } 1502 int expectedYDim = 1 + (N - 1) * incY; 1503 if (Y.getType().getX() != expectedYDim) { 1504 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1505 } 1506 return N; 1507 } 1508 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1509 validateUplo(Uplo); 1510 if (!Ap.getType().getElement().isCompatible(e) || 1511 !X.getType().getElement().isCompatible(e) || 1512 !Y.getType().getElement().isCompatible(e)) { 1513 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1514 } 1515 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1516 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1517 } 1518 1519 if (Ap.getType().getY() > 1) { 1520 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1521 } 1522 1523 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1524 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1525 throw new RSRuntimeException("Invalid dimension for Ap"); 1526 } 1527 if (incX <= 0 || incY <= 0) { 1528 throw new RSRuntimeException("Vector increments must be greater than 0"); 1529 } 1530 int expectedXDim = 1 + (N - 1) * incX; 1531 if (X.getType().getX() != expectedXDim) { 1532 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1533 } 1534 int expectedYDim = 1 + (N - 1) * incY; 1535 if (Y.getType().getX() != expectedYDim) { 1536 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1537 } 1538 1539 return N; 1540 } 1541 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1542 if (!A.getType().getElement().isCompatible(e) || 1543 !X.getType().getElement().isCompatible(e) || 1544 !Y.getType().getElement().isCompatible(e) ) { 1545 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1546 } 1547 1548 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1549 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1550 } 1551 1552 int M = A.getType().getY(); 1553 int N = A.getType().getX(); 1554 1555 if (N < 1 || M < 1) { 1556 throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1557 } 1558 if (incX <= 0 || incY <= 0) { 1559 throw new RSRuntimeException("Vector increments must be greater than 0"); 1560 } 1561 int expectedXDim = 1 + (M - 1) * incX; 1562 if (X.getType().getX() != expectedXDim) { 1563 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1564 } 1565 int expectedYDim = 1 + (N - 1) * incY; 1566 if (Y.getType().getX() != expectedYDim) { 1567 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1568 } 1569 1570 1571 } 1572 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1573 validateUplo(Uplo); 1574 if (!A.getType().getElement().isCompatible(e) || 1575 !X.getType().getElement().isCompatible(e)) { 1576 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1577 } 1578 1579 int N = A.getType().getX(); 1580 1581 if (X.getType().getY() > 1) { 1582 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1583 } 1584 if (N != A.getType().getY()) { 1585 throw new RSRuntimeException("A must be a symmetric matrix"); 1586 } 1587 if (incX <= 0) { 1588 throw new RSRuntimeException("Vector increments must be greater than 0"); 1589 } 1590 int expectedXDim = 1 + (N - 1) * incX; 1591 if (X.getType().getX() != expectedXDim) { 1592 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1593 } 1594 return N; 1595 } 1596 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1597 validateUplo(Uplo); 1598 if (!Ap.getType().getElement().isCompatible(e) || 1599 !X.getType().getElement().isCompatible(e)) { 1600 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1601 } 1602 if (X.getType().getY() > 1) { 1603 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1604 } 1605 1606 if (Ap.getType().getY() > 1) { 1607 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1608 } 1609 1610 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1611 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1612 throw new RSRuntimeException("Invalid dimension for Ap"); 1613 } 1614 if (incX <= 0) { 1615 throw new RSRuntimeException("Vector increments must be greater than 0"); 1616 } 1617 int expectedXDim = 1 + (N - 1) * incX; 1618 if (X.getType().getX() != expectedXDim) { 1619 throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1620 } 1621 1622 return N; 1623 } 1624 1625 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1626 validateUplo(Uplo); 1627 if (!A.getType().getElement().isCompatible(e) || 1628 !X.getType().getElement().isCompatible(e) || 1629 !Y.getType().getElement().isCompatible(e)) { 1630 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1631 } 1632 1633 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1634 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1635 } 1636 1637 int N = A.getType().getX(); 1638 1639 if (N != A.getType().getY()) { 1640 throw new RSRuntimeException("A must be a symmetric matrix"); 1641 } 1642 if (incX <= 0 || incY <= 0) { 1643 throw new RSRuntimeException("Vector increments must be greater than 0"); 1644 } 1645 int expectedXDim = 1 + (N - 1) * incX; 1646 int expectedYDim = 1 + (N - 1) * incY; 1647 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1648 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1649 } 1650 return N; 1651 1652 } 1653 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1654 validateUplo(Uplo); 1655 if (!Ap.getType().getElement().isCompatible(e) || 1656 !X.getType().getElement().isCompatible(e) || 1657 !Y.getType().getElement().isCompatible(e)) { 1658 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1659 } 1660 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1661 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1662 } 1663 1664 if (Ap.getType().getY() > 1) { 1665 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1666 } 1667 1668 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1669 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1670 throw new RSRuntimeException("Invalid dimension for Ap"); 1671 } 1672 if (incX <= 0 || incY <= 0) { 1673 throw new RSRuntimeException("Vector increments must be greater than 0"); 1674 } 1675 int expectedXDim = 1 + (N - 1) * incX; 1676 int expectedYDim = 1 + (N - 1) * incY; 1677 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1678 throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1679 } 1680 1681 return N; 1682 } 1683 1684 /** 1685 * SSYMV performs the matrix-vector operation 1686 * y := alpha*A*x + beta*y 1687 * 1688 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1689 * 1690 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1691 * @param alpha The scalar alpha. 1692 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1693 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1694 * @param incX The increment for the elements of vector x, must be larger than zero. 1695 * @param beta The scalar beta. 1696 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1697 * @param incY The increment for the elements of vector y, must be larger than zero. 1698 */ 1699 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1700 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1701 1702 boolean mUseIncSupp = isIncSupp(); 1703 long aID = A.getID(mRS); 1704 long xID = X.getID(mRS); 1705 long yID = Y.getID(mRS); 1706 if (mUseIncSupp) { 1707 aID = getDummyAlloc(A); 1708 xID = getDummyAlloc(X); 1709 yID = getDummyAlloc(Y); 1710 } 1711 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1712 } 1713 1714 /** 1715 * SSBMV performs the matrix-vector operation 1716 * y := alpha*A*x + beta*y 1717 * 1718 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1719 * 1720 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1721 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1722 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1723 * for i in range(0, n): 1724 * for j in range(i, min(i+k+1, n)): 1725 * b[i, j-i] = a[i, j] 1726 * 1727 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1728 * @param K The number of off-diagonals of the matrix A 1729 * @param alpha The scalar alpha. 1730 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1731 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1732 * @param incX The increment for the elements of vector x, must be larger than zero. 1733 * @param beta The scalar beta. 1734 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1735 * @param incY The increment for the elements of vector y, must be larger than zero. 1736 */ 1737 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1738 // SBMV is the same as SYMV + K >= 0 1739 if (K < 0) { 1740 throw new RSRuntimeException("K must be greater than or equal to 0"); 1741 } 1742 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1743 1744 boolean mUseIncSupp = isIncSupp(); 1745 long aID = A.getID(mRS); 1746 long xID = X.getID(mRS); 1747 long yID = Y.getID(mRS); 1748 if (mUseIncSupp) { 1749 aID = getDummyAlloc(A); 1750 xID = getDummyAlloc(X); 1751 yID = getDummyAlloc(Y); 1752 } 1753 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1754 } 1755 1756 /** 1757 * SSPMV performs the matrix-vector operation 1758 * y := alpha*A*x + beta*y 1759 * 1760 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1761 * 1762 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1763 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1764 * 'a' to packed matrix 'b'. 1765 * k = 0 1766 * for i in range(0, n): 1767 * for j in range(i, n): 1768 * b[k++] = a[i, j] 1769 * 1770 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1771 * @param alpha The scalar alpha. 1772 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1773 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1774 * @param incX The increment for the elements of vector x, must be larger than zero. 1775 * @param beta The scalar beta. 1776 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1777 * @param incY The increment for the elements of vector y, must be larger than zero. 1778 */ 1779 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1780 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1781 1782 boolean mUseIncSupp = isIncSupp(); 1783 long apID = Ap.getID(mRS); 1784 long xID = X.getID(mRS); 1785 long yID = Y.getID(mRS); 1786 if (mUseIncSupp) { 1787 apID = getDummyAlloc(Ap); 1788 xID = getDummyAlloc(X); 1789 yID = getDummyAlloc(Y); 1790 } 1791 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1792 } 1793 1794 /** 1795 * SGER performs the rank 1 operation 1796 * A := alpha*x*y**T + A 1797 * 1798 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1799 * 1800 * @param alpha The scalar alpha. 1801 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1802 * @param incX The increment for the elements of vector x, must be larger than zero. 1803 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1804 * @param incY The increment for the elements of vector y, must be larger than zero. 1805 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1806 */ 1807 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1808 int M = A.getType().getY(); 1809 int N = A.getType().getX(); 1810 validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1811 1812 boolean mUseIncSupp = isIncSupp(); 1813 long aID = A.getID(mRS); 1814 long xID = X.getID(mRS); 1815 long yID = Y.getID(mRS); 1816 if (mUseIncSupp) { 1817 aID = getDummyAlloc(A); 1818 xID = getDummyAlloc(X); 1819 yID = getDummyAlloc(Y); 1820 } 1821 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 1822 } 1823 1824 /** 1825 * SSYR performs the rank 1 operation 1826 * A := alpha*x*x**T + A 1827 * 1828 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1829 * 1830 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1831 * @param alpha The scalar alpha. 1832 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1833 * @param incX The increment for the elements of vector x, must be larger than zero. 1834 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1835 */ 1836 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1837 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1838 1839 boolean mUseIncSupp = isIncSupp(); 1840 long aID = A.getID(mRS); 1841 long xID = X.getID(mRS); 1842 if (mUseIncSupp) { 1843 aID = getDummyAlloc(A); 1844 xID = getDummyAlloc(X); 1845 } 1846 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1847 } 1848 1849 /** 1850 * SSPR performs the rank 1 operation 1851 * A := alpha*x*x**T + A 1852 * 1853 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1854 * 1855 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1856 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1857 * 'a' to packed matrix 'b'. 1858 * k = 0 1859 * for i in range(0, n): 1860 * for j in range(i, n): 1861 * b[k++] = a[i, j] 1862 * 1863 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1864 * @param alpha The scalar alpha. 1865 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1866 * @param incX The increment for the elements of vector x, must be larger than zero. 1867 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1868 */ 1869 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1870 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1871 1872 boolean mUseIncSupp = isIncSupp(); 1873 long apID = Ap.getID(mRS); 1874 long xID = X.getID(mRS); 1875 if (mUseIncSupp) { 1876 apID = getDummyAlloc(Ap); 1877 xID = getDummyAlloc(X); 1878 } 1879 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1880 } 1881 1882 /** 1883 * SSYR2 performs the symmetric rank 2 operation 1884 * A := alpha*x*y**T + alpha*y*x**T + A 1885 * 1886 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1887 * 1888 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1889 * @param alpha The scalar alpha. 1890 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1891 * @param incX The increment for the elements of vector x, must be larger than zero. 1892 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1893 * @param incY The increment for the elements of vector y, must be larger than zero. 1894 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1895 */ 1896 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1897 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1898 1899 boolean mUseIncSupp = isIncSupp(); 1900 long aID = A.getID(mRS); 1901 long xID = X.getID(mRS); 1902 long yID = Y.getID(mRS); 1903 if (mUseIncSupp) { 1904 aID = getDummyAlloc(A); 1905 xID = getDummyAlloc(X); 1906 yID = getDummyAlloc(Y); 1907 } 1908 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 1909 } 1910 1911 /** 1912 * SSPR2 performs the symmetric rank 2 operation 1913 * A := alpha*x*y**T + alpha*y*x**T + A 1914 * 1915 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1916 * 1917 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1918 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1919 * 'a' to packed matrix 'b'. 1920 * k = 0 1921 * for i in range(0, n): 1922 * for j in range(i, n): 1923 * b[k++] = a[i, j] 1924 * 1925 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1926 * @param alpha The scalar alpha. 1927 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1928 * @param incX The increment for the elements of vector x, must be larger than zero. 1929 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1930 * @param incY The increment for the elements of vector y, must be larger than zero. 1931 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1932 */ 1933 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1934 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1935 1936 boolean mUseIncSupp = isIncSupp(); 1937 long apID = Ap.getID(mRS); 1938 long xID = X.getID(mRS); 1939 long yID = Y.getID(mRS); 1940 if (mUseIncSupp) { 1941 apID = getDummyAlloc(Ap); 1942 xID = getDummyAlloc(X); 1943 yID = getDummyAlloc(Y); 1944 } 1945 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 1946 } 1947 1948 /** 1949 * DSYMV performs the matrix-vector operation 1950 * y := alpha*A*x + beta*y 1951 * 1952 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1953 * 1954 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1955 * @param alpha The scalar alpha. 1956 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1957 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1958 * @param incX The increment for the elements of vector x, must be larger than zero. 1959 * @param beta The scalar beta. 1960 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1961 * @param incY The increment for the elements of vector y, must be larger than zero. 1962 */ 1963 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1964 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1965 1966 boolean mUseIncSupp = isIncSupp(); 1967 long aID = A.getID(mRS); 1968 long xID = X.getID(mRS); 1969 long yID = Y.getID(mRS); 1970 if (mUseIncSupp) { 1971 aID = getDummyAlloc(A); 1972 xID = getDummyAlloc(X); 1973 yID = getDummyAlloc(Y); 1974 } 1975 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1976 } 1977 1978 /** 1979 * DSBMV performs the matrix-vector operation 1980 * y := alpha*A*x + beta*y 1981 * 1982 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1983 * 1984 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1985 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1986 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1987 * for i in range(0, n): 1988 * for j in range(i, min(i+k+1, n)): 1989 * b[i, j-i] = a[i, j] 1990 * 1991 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1992 * @param K The number of off-diagonals of the matrix A 1993 * @param alpha The scalar alpha. 1994 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1995 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1996 * @param incX The increment for the elements of vector x, must be larger than zero. 1997 * @param beta The scalar beta. 1998 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1999 * @param incY The increment for the elements of vector y, must be larger than zero. 2000 */ 2001 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 2002 // SBMV is the same as SYMV + K >= 0 2003 if (K < 0) { 2004 throw new RSRuntimeException("K must be greater than or equal to 0"); 2005 } 2006 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 2007 2008 boolean mUseIncSupp = isIncSupp(); 2009 long aID = A.getID(mRS); 2010 long xID = X.getID(mRS); 2011 long yID = Y.getID(mRS); 2012 if (mUseIncSupp) { 2013 aID = getDummyAlloc(A); 2014 xID = getDummyAlloc(X); 2015 yID = getDummyAlloc(Y); 2016 } 2017 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2018 } 2019 2020 /** 2021 * DSPMV performs the matrix-vector operation 2022 * y := alpha*A*x + beta*y 2023 * 2024 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 2025 * 2026 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2027 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2028 * 'a' to packed matrix 'b'. 2029 * k = 0 2030 * for i in range(0, n): 2031 * for j in range(i, n): 2032 * b[k++] = a[i, j] 2033 * 2034 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2035 * @param alpha The scalar alpha. 2036 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2037 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2038 * @param incX The increment for the elements of vector x, must be larger than zero. 2039 * @param beta The scalar beta. 2040 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2041 * @param incY The increment for the elements of vector y, must be larger than zero. 2042 */ 2043 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 2044 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 2045 2046 boolean mUseIncSupp = isIncSupp(); 2047 long apID = Ap.getID(mRS); 2048 long xID = X.getID(mRS); 2049 long yID = Y.getID(mRS); 2050 if (mUseIncSupp) { 2051 apID = getDummyAlloc(Ap); 2052 xID = getDummyAlloc(X); 2053 yID = getDummyAlloc(Y); 2054 } 2055 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2056 } 2057 2058 /** 2059 * DGER performs the rank 1 operation 2060 * A := alpha*x*y**T + A 2061 * 2062 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 2063 * 2064 * @param alpha The scalar alpha. 2065 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2066 * @param incX The increment for the elements of vector x, must be larger than zero. 2067 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2068 * @param incY The increment for the elements of vector y, must be larger than zero. 2069 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2070 */ 2071 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2072 int M = A.getType().getY(); 2073 int N = A.getType().getX(); 2074 validateGER(Element.F64(mRS), X, incX, Y, incY, A); 2075 2076 boolean mUseIncSupp = isIncSupp(); 2077 long aID = A.getID(mRS); 2078 long xID = X.getID(mRS); 2079 long yID = Y.getID(mRS); 2080 if (mUseIncSupp) { 2081 aID = getDummyAlloc(A); 2082 xID = getDummyAlloc(X); 2083 yID = getDummyAlloc(Y); 2084 } 2085 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 2086 } 2087 2088 /** 2089 * DSYR performs the rank 1 operation 2090 * A := alpha*x*x**T + A 2091 * 2092 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 2093 * 2094 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2095 * @param alpha The scalar alpha. 2096 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2097 * @param incX The increment for the elements of vector x, must be larger than zero. 2098 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2099 */ 2100 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2101 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 2102 2103 boolean mUseIncSupp = isIncSupp(); 2104 long aID = A.getID(mRS); 2105 long xID = X.getID(mRS); 2106 if (mUseIncSupp) { 2107 aID = getDummyAlloc(A); 2108 xID = getDummyAlloc(X); 2109 } 2110 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2111 } 2112 2113 /** 2114 * DSPR performs the rank 1 operation 2115 * A := alpha*x*x**T + A 2116 * 2117 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 2118 * 2119 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2120 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2121 * 'a' to packed matrix 'b'. 2122 * k = 0 2123 * for i in range(0, n): 2124 * for j in range(i, n): 2125 * b[k++] = a[i, j] 2126 * 2127 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2128 * @param alpha The scalar alpha. 2129 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2130 * @param incX The increment for the elements of vector x, must be larger than zero. 2131 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2132 */ 2133 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2134 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 2135 2136 boolean mUseIncSupp = isIncSupp(); 2137 long apID = Ap.getID(mRS); 2138 long xID = X.getID(mRS); 2139 if (mUseIncSupp) { 2140 apID = getDummyAlloc(Ap); 2141 xID = getDummyAlloc(X); 2142 } 2143 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2144 } 2145 2146 /** 2147 * DSYR2 performs the symmetric rank 2 operation 2148 * A := alpha*x*y**T + alpha*y*x**T + A 2149 * 2150 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 2151 * 2152 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2153 * @param alpha The scalar alpha. 2154 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2155 * @param incX The increment for the elements of vector x, must be larger than zero. 2156 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2157 * @param incY The increment for the elements of vector y, must be larger than zero. 2158 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2159 */ 2160 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2161 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 2162 2163 boolean mUseIncSupp = isIncSupp(); 2164 long aID = A.getID(mRS); 2165 long xID = X.getID(mRS); 2166 long yID = Y.getID(mRS); 2167 if (mUseIncSupp) { 2168 aID = getDummyAlloc(A); 2169 xID = getDummyAlloc(X); 2170 yID = getDummyAlloc(Y); 2171 } 2172 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2173 } 2174 2175 /** 2176 * DSPR2 performs the symmetric rank 2 operation 2177 * A := alpha*x*y**T + alpha*y*x**T + A 2178 * 2179 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 2180 * 2181 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2182 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2183 * 'a' to packed matrix 'b'. 2184 * k = 0 2185 * for i in range(0, n): 2186 * for j in range(i, n): 2187 * b[k++] = a[i, j] 2188 * 2189 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2190 * @param alpha The scalar alpha. 2191 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2192 * @param incX The increment for the elements of vector x, must be larger than zero. 2193 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2194 * @param incY The increment for the elements of vector y, must be larger than zero. 2195 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2196 */ 2197 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2198 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 2199 2200 boolean mUseIncSupp = isIncSupp(); 2201 long apID = Ap.getID(mRS); 2202 long xID = X.getID(mRS); 2203 long yID = Y.getID(mRS); 2204 if (mUseIncSupp) { 2205 apID = getDummyAlloc(Ap); 2206 xID = getDummyAlloc(X); 2207 yID = getDummyAlloc(Y); 2208 } 2209 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2210 } 2211 2212 2213 /** 2214 * Level 2, C and Z only 2215 */ 2216 2217 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2218 if (!A.getType().getElement().isCompatible(e) || 2219 !X.getType().getElement().isCompatible(e) || 2220 !Y.getType().getElement().isCompatible(e)) { 2221 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2222 } 2223 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 2224 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 2225 } 2226 2227 int M = A.getType().getY(); 2228 int N = A.getType().getX(); 2229 if (incX <= 0 || incY <= 0) { 2230 throw new RSRuntimeException("Vector increments must be greater than 0"); 2231 } 2232 int expectedXDim = 1 + (M - 1) * incX; 2233 if (X.getType().getX() != expectedXDim) { 2234 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2235 } 2236 int expectedYDim = 1 + (N - 1) * incY; 2237 if (Y.getType().getX() != expectedYDim) { 2238 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2239 } 2240 2241 } 2242 2243 /** 2244 * CHEMV performs the matrix-vector operation 2245 * y := alpha*A*x + beta*y 2246 * 2247 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 2248 * 2249 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2250 * @param alpha The scalar alpha. 2251 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2252 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2253 * @param incX The increment for the elements of vector x, must be larger than zero. 2254 * @param beta The scalar beta. 2255 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2256 * @param incY The increment for the elements of vector y, must be larger than zero. 2257 */ 2258 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2259 // HEMV is the same as SYR2 validation-wise 2260 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2261 2262 boolean mUseIncSupp = isIncSupp(); 2263 long aID = A.getID(mRS); 2264 long xID = X.getID(mRS); 2265 long yID = Y.getID(mRS); 2266 if (mUseIncSupp) { 2267 aID = getDummyAlloc(A); 2268 xID = getDummyAlloc(X); 2269 yID = getDummyAlloc(Y); 2270 } 2271 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2272 } 2273 2274 /** 2275 * CHBMV performs the matrix-vector operation 2276 * y := alpha*A*x + beta*y 2277 * 2278 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 2279 * 2280 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2281 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2282 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2283 * for i in range(0, n): 2284 * for j in range(i, min(i+k+1, n)): 2285 * b[i, j-i] = a[i, j] 2286 * 2287 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2288 * @param K The number of off-diagonals of the matrix A 2289 * @param alpha The scalar alpha. 2290 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2291 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2292 * @param incX The increment for the elements of vector x, must be larger than zero. 2293 * @param beta The scalar beta. 2294 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2295 * @param incY The increment for the elements of vector y, must be larger than zero. 2296 */ 2297 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2298 // HBMV is the same as SYR2 validation-wise 2299 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2300 if (K < 0) { 2301 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2302 } 2303 2304 boolean mUseIncSupp = isIncSupp(); 2305 long aID = A.getID(mRS); 2306 long xID = X.getID(mRS); 2307 long yID = Y.getID(mRS); 2308 if (mUseIncSupp) { 2309 aID = getDummyAlloc(A); 2310 xID = getDummyAlloc(X); 2311 yID = getDummyAlloc(Y); 2312 } 2313 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2314 } 2315 2316 /** 2317 * CHPMV performs the matrix-vector operation 2318 * y := alpha*A*x + beta*y 2319 * 2320 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 2321 * 2322 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2323 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2324 * 'a' to packed matrix 'b'. 2325 * k = 0 2326 * for i in range(0, n): 2327 * for j in range(i, n): 2328 * b[k++] = a[i, j] 2329 * 2330 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2331 * @param alpha The scalar alpha. 2332 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2333 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2334 * @param incX The increment for the elements of vector x, must be larger than zero. 2335 * @param beta The scalar beta. 2336 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2337 * @param incY The increment for the elements of vector y, must be larger than zero. 2338 */ 2339 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2340 // HPMV is the same as SPR2 2341 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2342 2343 boolean mUseIncSupp = isIncSupp(); 2344 long apID = Ap.getID(mRS); 2345 long xID = X.getID(mRS); 2346 long yID = Y.getID(mRS); 2347 if (mUseIncSupp) { 2348 apID = getDummyAlloc(Ap); 2349 xID = getDummyAlloc(X); 2350 yID = getDummyAlloc(Y); 2351 } 2352 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2353 } 2354 2355 /** 2356 * CGERU performs the rank 1 operation 2357 * A := alpha*x*y**T + A 2358 * 2359 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 2360 * 2361 * @param alpha The scalar alpha. 2362 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2363 * @param incX The increment for the elements of vector x, must be larger than zero. 2364 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2365 * @param incY The increment for the elements of vector y, must be larger than zero. 2366 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2367 */ 2368 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2369 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2370 int M = A.getType().getY(); 2371 int N = A.getType().getX(); 2372 2373 boolean mUseIncSupp = isIncSupp(); 2374 long aID = A.getID(mRS); 2375 long xID = X.getID(mRS); 2376 long yID = Y.getID(mRS); 2377 if (mUseIncSupp) { 2378 aID = getDummyAlloc(A); 2379 xID = getDummyAlloc(X); 2380 yID = getDummyAlloc(Y); 2381 } 2382 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2383 } 2384 2385 /** 2386 * CGERC performs the rank 1 operation 2387 * A := alpha*x*y**H + A 2388 * 2389 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 2390 * 2391 * @param alpha The scalar alpha. 2392 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2393 * @param incX The increment for the elements of vector x, must be larger than zero. 2394 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2395 * @param incY The increment for the elements of vector y, must be larger than zero. 2396 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2397 */ 2398 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2399 // same as GERU 2400 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2401 int M = A.getType().getY(); 2402 int N = A.getType().getX(); 2403 2404 boolean mUseIncSupp = isIncSupp(); 2405 long aID = A.getID(mRS); 2406 long xID = X.getID(mRS); 2407 long yID = Y.getID(mRS); 2408 if (mUseIncSupp) { 2409 aID = getDummyAlloc(A); 2410 xID = getDummyAlloc(X); 2411 yID = getDummyAlloc(Y); 2412 } 2413 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2414 } 2415 2416 /** 2417 * CHER performs the rank 1 operation 2418 * A := alpha*x*x**H + A 2419 * 2420 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 2421 * 2422 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2423 * @param alpha The scalar alpha. 2424 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2425 * @param incX The increment for the elements of vector x, must be larger than zero. 2426 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2427 */ 2428 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 2429 // same as SYR 2430 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 2431 2432 boolean mUseIncSupp = isIncSupp(); 2433 long aID = A.getID(mRS); 2434 long xID = X.getID(mRS); 2435 if (mUseIncSupp) { 2436 aID = getDummyAlloc(A); 2437 xID = getDummyAlloc(X); 2438 } 2439 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2440 } 2441 2442 /** 2443 * CHPR performs the rank 1 operation 2444 * A := alpha*x*x**H + A 2445 * 2446 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 2447 * 2448 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2449 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2450 * 'a' to packed matrix 'b'. 2451 * k = 0 2452 * for i in range(0, n): 2453 * for j in range(i, n): 2454 * b[k++] = a[i, j] 2455 * 2456 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2457 * @param alpha The scalar alpha. 2458 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2459 * @param incX The increment for the elements of vector x, must be larger than zero. 2460 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2461 */ 2462 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 2463 // equivalent to SPR for validation 2464 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 2465 2466 boolean mUseIncSupp = isIncSupp(); 2467 long apID = Ap.getID(mRS); 2468 long xID = X.getID(mRS); 2469 if (mUseIncSupp) { 2470 apID = getDummyAlloc(Ap); 2471 xID = getDummyAlloc(X); 2472 } 2473 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2474 } 2475 2476 /** 2477 * CHER2 performs the symmetric rank 2 operation 2478 * A := alpha*x*y**H + alpha*y*x**H + A 2479 * 2480 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 2481 * 2482 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2483 * @param alpha The scalar alpha. 2484 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2485 * @param incX The increment for the elements of vector x, must be larger than zero. 2486 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2487 * @param incY The increment for the elements of vector y, must be larger than zero. 2488 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2489 */ 2490 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2491 // same as SYR2 2492 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2493 2494 boolean mUseIncSupp = isIncSupp(); 2495 long aID = A.getID(mRS); 2496 long xID = X.getID(mRS); 2497 long yID = Y.getID(mRS); 2498 if (mUseIncSupp) { 2499 aID = getDummyAlloc(A); 2500 xID = getDummyAlloc(X); 2501 yID = getDummyAlloc(Y); 2502 } 2503 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2504 } 2505 2506 /** 2507 * CHPR2 performs the symmetric rank 2 operation 2508 * A := alpha*x*y**H + alpha*y*x**H + A 2509 * 2510 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2511 * 2512 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2513 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2514 * 'a' to packed matrix 'b'. 2515 * k = 0 2516 * for i in range(0, n): 2517 * for j in range(i, n): 2518 * b[k++] = a[i, j] 2519 * 2520 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2521 * @param alpha The scalar alpha. 2522 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2523 * @param incX The increment for the elements of vector x, must be larger than zero. 2524 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2525 * @param incY The increment for the elements of vector y, must be larger than zero. 2526 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2527 */ 2528 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2529 // same as SPR2 2530 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2531 2532 boolean mUseIncSupp = isIncSupp(); 2533 long apID = Ap.getID(mRS); 2534 long xID = X.getID(mRS); 2535 long yID = Y.getID(mRS); 2536 if (mUseIncSupp) { 2537 apID = getDummyAlloc(Ap); 2538 xID = getDummyAlloc(X); 2539 yID = getDummyAlloc(Y); 2540 } 2541 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2542 } 2543 2544 /** 2545 * ZHEMV performs the matrix-vector operation 2546 * y := alpha*A*x + beta*y 2547 * 2548 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2549 * 2550 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2551 * @param alpha The scalar alpha. 2552 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2553 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2554 * @param incX The increment for the elements of vector x, must be larger than zero. 2555 * @param beta The scalar beta. 2556 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2557 * @param incY The increment for the elements of vector y, must be larger than zero. 2558 */ 2559 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2560 // HEMV is the same as SYR2 validation-wise 2561 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2562 2563 boolean mUseIncSupp = isIncSupp(); 2564 long aID = A.getID(mRS); 2565 long xID = X.getID(mRS); 2566 long yID = Y.getID(mRS); 2567 if (mUseIncSupp) { 2568 aID = getDummyAlloc(A); 2569 xID = getDummyAlloc(X); 2570 yID = getDummyAlloc(Y); 2571 } 2572 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2573 } 2574 2575 /** 2576 * ZHBMV performs the matrix-vector operation 2577 * y := alpha*A*x + beta*y 2578 * 2579 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2580 * 2581 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2582 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2583 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2584 * for i in range(0, n): 2585 * for j in range(i, min(i+k+1, n)): 2586 * b[i, j-i] = a[i, j] 2587 * 2588 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2589 * @param K The number of off-diagonals of the matrix A 2590 * @param alpha The scalar alpha. 2591 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2592 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2593 * @param incX The increment for the elements of vector x, must be larger than zero. 2594 * @param beta The scalar beta. 2595 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2596 * @param incY The increment for the elements of vector y, must be larger than zero. 2597 */ 2598 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2599 // HBMV is the same as SYR2 validation-wise 2600 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2601 if (K < 0) { 2602 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2603 } 2604 2605 boolean mUseIncSupp = isIncSupp(); 2606 long aID = A.getID(mRS); 2607 long xID = X.getID(mRS); 2608 long yID = Y.getID(mRS); 2609 if (mUseIncSupp) { 2610 aID = getDummyAlloc(A); 2611 xID = getDummyAlloc(X); 2612 yID = getDummyAlloc(Y); 2613 } 2614 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2615 } 2616 2617 /** 2618 * ZHPMV performs the matrix-vector operation 2619 * y := alpha*A*x + beta*y 2620 * 2621 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2622 * 2623 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2624 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2625 * 'a' to packed matrix 'b'. 2626 * k = 0 2627 * for i in range(0, n): 2628 * for j in range(i, n): 2629 * b[k++] = a[i, j] 2630 * 2631 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2632 * @param alpha The scalar alpha. 2633 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2634 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2635 * @param incX The increment for the elements of vector x, must be larger than zero. 2636 * @param beta The scalar beta. 2637 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2638 * @param incY The increment for the elements of vector y, must be larger than zero. 2639 */ 2640 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2641 // HPMV is the same as SPR2 2642 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2643 2644 boolean mUseIncSupp = isIncSupp(); 2645 long apID = Ap.getID(mRS); 2646 long xID = X.getID(mRS); 2647 long yID = Y.getID(mRS); 2648 if (mUseIncSupp) { 2649 apID = getDummyAlloc(Ap); 2650 xID = getDummyAlloc(X); 2651 yID = getDummyAlloc(Y); 2652 } 2653 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2654 } 2655 2656 /** 2657 * ZGERU performs the rank 1 operation 2658 * A := alpha*x*y**T + A 2659 * 2660 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2661 * 2662 * @param alpha The scalar alpha. 2663 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2664 * @param incX The increment for the elements of vector x, must be larger than zero. 2665 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2666 * @param incY The increment for the elements of vector y, must be larger than zero. 2667 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2668 */ 2669 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2670 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2671 int M = A.getType().getY(); 2672 int N = A.getType().getX(); 2673 2674 boolean mUseIncSupp = isIncSupp(); 2675 long aID = A.getID(mRS); 2676 long xID = X.getID(mRS); 2677 long yID = Y.getID(mRS); 2678 if (mUseIncSupp) { 2679 aID = getDummyAlloc(A); 2680 xID = getDummyAlloc(X); 2681 yID = getDummyAlloc(Y); 2682 } 2683 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2684 } 2685 2686 /** 2687 * ZGERC performs the rank 1 operation 2688 * A := alpha*x*y**H + A 2689 * 2690 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2691 * 2692 * @param alpha The scalar alpha. 2693 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2694 * @param incX The increment for the elements of vector x, must be larger than zero. 2695 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2696 * @param incY The increment for the elements of vector y, must be larger than zero. 2697 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2698 */ 2699 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2700 // same as GERU 2701 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2702 int M = A.getType().getY(); 2703 int N = A.getType().getX(); 2704 2705 boolean mUseIncSupp = isIncSupp(); 2706 long aID = A.getID(mRS); 2707 long xID = X.getID(mRS); 2708 long yID = Y.getID(mRS); 2709 if (mUseIncSupp) { 2710 aID = getDummyAlloc(A); 2711 xID = getDummyAlloc(X); 2712 yID = getDummyAlloc(Y); 2713 } 2714 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2715 } 2716 2717 /** 2718 * ZHER performs the rank 1 operation 2719 * A := alpha*x*x**H + A 2720 * 2721 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2722 * 2723 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2724 * @param alpha The scalar alpha. 2725 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2726 * @param incX The increment for the elements of vector x, must be larger than zero. 2727 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2728 */ 2729 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2730 // same as SYR 2731 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2732 2733 boolean mUseIncSupp = isIncSupp(); 2734 long aID = A.getID(mRS); 2735 long xID = X.getID(mRS); 2736 if (mUseIncSupp) { 2737 aID = getDummyAlloc(A); 2738 xID = getDummyAlloc(X); 2739 } 2740 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2741 } 2742 2743 /** 2744 * ZHPR performs the rank 1 operation 2745 * A := alpha*x*x**H + A 2746 * 2747 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2748 * 2749 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2750 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2751 * 'a' to packed matrix 'b'. 2752 * k = 0 2753 * for i in range(0, n): 2754 * for j in range(i, n): 2755 * b[k++] = a[i, j] 2756 * 2757 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2758 * @param alpha The scalar alpha. 2759 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2760 * @param incX The increment for the elements of vector x, must be larger than zero. 2761 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2762 */ 2763 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2764 // equivalent to SPR for validation 2765 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2766 2767 boolean mUseIncSupp = isIncSupp(); 2768 long apID = Ap.getID(mRS); 2769 long xID = X.getID(mRS); 2770 if (mUseIncSupp) { 2771 apID = getDummyAlloc(Ap); 2772 xID = getDummyAlloc(X); 2773 } 2774 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2775 } 2776 2777 /** 2778 * ZHER2 performs the symmetric rank 2 operation 2779 * A := alpha*x*y**H + alpha*y*x**H + A 2780 * 2781 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2782 * 2783 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2784 * @param alpha The scalar alpha. 2785 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2786 * @param incX The increment for the elements of vector x, must be larger than zero. 2787 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2788 * @param incY The increment for the elements of vector y, must be larger than zero. 2789 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2790 */ 2791 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2792 // same as SYR2 2793 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2794 2795 boolean mUseIncSupp = isIncSupp(); 2796 long aID = A.getID(mRS); 2797 long xID = X.getID(mRS); 2798 long yID = Y.getID(mRS); 2799 if (mUseIncSupp) { 2800 aID = getDummyAlloc(A); 2801 xID = getDummyAlloc(X); 2802 yID = getDummyAlloc(Y); 2803 } 2804 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2805 } 2806 2807 /** 2808 * ZHPR2 performs the symmetric rank 2 operation 2809 * A := alpha*x*y**H + alpha*y*x**H + A 2810 * 2811 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2812 * 2813 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2814 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2815 * 'a' to packed matrix 'b'. 2816 * k = 0 2817 * for i in range(0, n): 2818 * for j in range(i, n): 2819 * b[k++] = a[i, j] 2820 * 2821 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2822 * @param alpha The scalar alpha. 2823 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2824 * @param incX The increment for the elements of vector x, must be larger than zero. 2825 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2826 * @param incY The increment for the elements of vector y, must be larger than zero. 2827 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2828 */ 2829 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2830 // same as SPR2 2831 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2832 2833 boolean mUseIncSupp = isIncSupp(); 2834 long apID = Ap.getID(mRS); 2835 long xID = X.getID(mRS); 2836 long yID = Y.getID(mRS); 2837 if (mUseIncSupp) { 2838 apID = getDummyAlloc(Ap); 2839 xID = getDummyAlloc(X); 2840 yID = getDummyAlloc(Y); 2841 } 2842 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2843 } 2844 2845 2846 /** 2847 * Level 3 BLAS 2848 */ 2849 2850 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2851 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2852 if ((A != null && !A.getType().getElement().isCompatible(e)) || 2853 (B != null && !B.getType().getElement().isCompatible(e)) || 2854 (C != null && !C.getType().getElement().isCompatible(e))) { 2855 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2856 } 2857 if (C == null) { 2858 //since matrix C is used to store the result, it cannot be null. 2859 throw new RSRuntimeException("Allocation C cannot be null"); 2860 } 2861 cM = C.getType().getY(); 2862 cN = C.getType().getX(); 2863 2864 if (Side == RIGHT) { 2865 if ((A == null && B != null) || (A != null && B == null)) { 2866 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2867 } 2868 if (B != null) { 2869 bM = A.getType().getY(); 2870 bN = A.getType().getX(); 2871 } 2872 if (A != null) { 2873 aM = B.getType().getY(); 2874 aN = B.getType().getX(); 2875 } 2876 } else { 2877 if (A != null) { 2878 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2879 aN = A.getType().getY(); 2880 aM = A.getType().getX(); 2881 } else { 2882 aM = A.getType().getY(); 2883 aN = A.getType().getX(); 2884 } 2885 } 2886 if (B != null) { 2887 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2888 bN = B.getType().getY(); 2889 bM = B.getType().getX(); 2890 } else { 2891 bM = B.getType().getY(); 2892 bN = B.getType().getX(); 2893 } 2894 } 2895 } 2896 if (A != null && B != null && C != null) { 2897 if (aN != bM || aM != cM || bN != cN) { 2898 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2899 } 2900 } else if (A != null && C != null) { 2901 // A and C only, for SYRK 2902 if (cM != cN) { 2903 throw new RSRuntimeException("Matrix C is not symmetric"); 2904 } 2905 if (aM != cM) { 2906 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2907 } 2908 } else if (A != null && B != null) { 2909 // A and B only 2910 if (aN != bM) { 2911 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2912 } 2913 } 2914 2915 } 2916 2917 /** 2918 * SGEMM performs one of the matrix-matrix operations 2919 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2920 * 2921 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2922 * 2923 * @param TransA The type of transpose applied to matrix A. 2924 * @param TransB The type of transpose applied to matrix B. 2925 * @param alpha The scalar alpha. 2926 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2927 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2928 * @param beta The scalar beta. 2929 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2930 */ 2931 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2932 Allocation B, float beta, Allocation C) { 2933 validateTranspose(TransA); 2934 validateTranspose(TransB); 2935 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2936 2937 int M = -1, N = -1, K = -1; 2938 if (TransA != NO_TRANSPOSE) { 2939 M = A.getType().getX(); 2940 K = A.getType().getY(); 2941 } else { 2942 M = A.getType().getY(); 2943 K = A.getType().getX(); 2944 } 2945 if (TransB != NO_TRANSPOSE) { 2946 N = B.getType().getY(); 2947 } else { 2948 N = B.getType().getX(); 2949 } 2950 2951 boolean mUseIncSupp = isIncSupp(); 2952 long aID = A.getID(mRS); 2953 long bID = B.getID(mRS); 2954 long cID = C.getID(mRS); 2955 if (mUseIncSupp) { 2956 aID = getDummyAlloc(A); 2957 bID = getDummyAlloc(B); 2958 cID = getDummyAlloc(C); 2959 } 2960 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 2961 beta, cID, 0, 0, 0, 0, mUseIncSupp); 2962 } 2963 2964 /** 2965 * DGEMM performs one of the matrix-matrix operations 2966 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2967 * 2968 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2969 * 2970 * @param TransA The type of transpose applied to matrix A. 2971 * @param TransB The type of transpose applied to matrix B. 2972 * @param alpha The scalar alpha. 2973 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2974 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2975 * @param beta The scalar beta. 2976 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2977 */ 2978 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2979 Allocation B, double beta, Allocation C) { 2980 validateTranspose(TransA); 2981 validateTranspose(TransB); 2982 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2983 int M = -1, N = -1, K = -1; 2984 if (TransA != NO_TRANSPOSE) { 2985 M = A.getType().getX(); 2986 K = A.getType().getY(); 2987 } else { 2988 M = A.getType().getY(); 2989 K = A.getType().getX(); 2990 } 2991 if (TransB != NO_TRANSPOSE) { 2992 N = B.getType().getY(); 2993 } else { 2994 N = B.getType().getX(); 2995 } 2996 2997 boolean mUseIncSupp = isIncSupp(); 2998 long aID = A.getID(mRS); 2999 long bID = B.getID(mRS); 3000 long cID = C.getID(mRS); 3001 if (mUseIncSupp) { 3002 aID = getDummyAlloc(A); 3003 bID = getDummyAlloc(B); 3004 cID = getDummyAlloc(C); 3005 } 3006 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 3007 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3008 } 3009 3010 /** 3011 * CGEMM performs one of the matrix-matrix operations 3012 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3013 * 3014 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 3015 * 3016 * @param TransA The type of transpose applied to matrix A. 3017 * @param TransB The type of transpose applied to matrix B. 3018 * @param alpha The scalar alpha. 3019 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3020 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3021 * @param beta The scalar beta. 3022 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3023 */ 3024 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 3025 Allocation B, Float2 beta, Allocation C) { 3026 validateTranspose(TransA); 3027 validateTranspose(TransB); 3028 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 3029 int M = -1, N = -1, K = -1; 3030 if (TransA != NO_TRANSPOSE) { 3031 M = A.getType().getX(); 3032 K = A.getType().getY(); 3033 } else { 3034 M = A.getType().getY(); 3035 K = A.getType().getX(); 3036 } 3037 if (TransB != NO_TRANSPOSE) { 3038 N = B.getType().getY(); 3039 } else { 3040 N = B.getType().getX(); 3041 } 3042 3043 boolean mUseIncSupp = isIncSupp(); 3044 long aID = A.getID(mRS); 3045 long bID = B.getID(mRS); 3046 long cID = C.getID(mRS); 3047 if (mUseIncSupp) { 3048 aID = getDummyAlloc(A); 3049 bID = getDummyAlloc(B); 3050 cID = getDummyAlloc(C); 3051 } 3052 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3053 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3054 } 3055 3056 /** 3057 * ZGEMM performs one of the matrix-matrix operations 3058 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3059 * 3060 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 3061 * 3062 * @param TransA The type of transpose applied to matrix A. 3063 * @param TransB The type of transpose applied to matrix B. 3064 * @param alpha The scalar alpha. 3065 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2 3066 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2 3067 * @param beta The scalar beta. 3068 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2 3069 */ 3070 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 3071 Allocation B, Double2 beta, Allocation C) { 3072 validateTranspose(TransA); 3073 validateTranspose(TransB); 3074 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 3075 int M = -1, N = -1, K = -1; 3076 if (TransA != NO_TRANSPOSE) { 3077 M = A.getType().getX(); 3078 K = A.getType().getY(); 3079 } else { 3080 M = A.getType().getY(); 3081 K = A.getType().getX(); 3082 } 3083 if (TransB != NO_TRANSPOSE) { 3084 N = B.getType().getY(); 3085 } else { 3086 N = B.getType().getX(); 3087 } 3088 3089 boolean mUseIncSupp = isIncSupp(); 3090 long aID = A.getID(mRS); 3091 long bID = B.getID(mRS); 3092 long cID = C.getID(mRS); 3093 if (mUseIncSupp) { 3094 aID = getDummyAlloc(A); 3095 bID = getDummyAlloc(B); 3096 cID = getDummyAlloc(C); 3097 } 3098 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3099 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3100 } 3101 3102 /** 3103 * SSYMM performs one of the matrix-matrix operations 3104 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3105 * 3106 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 3107 * 3108 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3109 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3110 * @param alpha The scalar alpha. 3111 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3112 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3113 * @param beta The scalar beta. 3114 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3115 */ 3116 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 3117 Allocation B, float beta, Allocation C) { 3118 validateSide(Side); 3119 validateUplo(Uplo); 3120 //For SYMM, Matrix A should be symmetric 3121 if (A.getType().getX() != A.getType().getY()) { 3122 throw new RSRuntimeException("Matrix A is not symmetric"); 3123 } 3124 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 3125 3126 boolean mUseIncSupp = isIncSupp(); 3127 long aID = A.getID(mRS); 3128 long bID = B.getID(mRS); 3129 long cID = C.getID(mRS); 3130 if (mUseIncSupp) { 3131 aID = getDummyAlloc(A); 3132 bID = getDummyAlloc(B); 3133 cID = getDummyAlloc(C); 3134 } 3135 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3136 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3137 } 3138 3139 /** 3140 * DSYMM performs one of the matrix-matrix operations 3141 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3142 * 3143 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 3144 * 3145 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3146 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3147 * @param alpha The scalar alpha. 3148 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3149 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3150 * @param beta The scalar beta. 3151 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3152 */ 3153 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 3154 Allocation B, double beta, Allocation C) { 3155 validateSide(Side); 3156 validateUplo(Uplo); 3157 if (A.getType().getX() != A.getType().getY()) { 3158 throw new RSRuntimeException("Matrix A is not symmetric"); 3159 } 3160 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 3161 3162 boolean mUseIncSupp = isIncSupp(); 3163 long aID = A.getID(mRS); 3164 long bID = B.getID(mRS); 3165 long cID = C.getID(mRS); 3166 if (mUseIncSupp) { 3167 aID = getDummyAlloc(A); 3168 bID = getDummyAlloc(B); 3169 cID = getDummyAlloc(C); 3170 } 3171 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3172 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3173 } 3174 3175 /** 3176 * CSYMM performs one of the matrix-matrix operations 3177 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3178 * 3179 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 3180 * 3181 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3182 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3183 * @param alpha The scalar alpha. 3184 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3185 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3186 * @param beta The scalar beta. 3187 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3188 */ 3189 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 3190 Allocation B, Float2 beta, Allocation C) { 3191 validateSide(Side); 3192 validateUplo(Uplo); 3193 if (A.getType().getX() != A.getType().getY()) { 3194 throw new RSRuntimeException("Matrix A is not symmetric"); 3195 } 3196 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 3197 3198 boolean mUseIncSupp = isIncSupp(); 3199 long aID = A.getID(mRS); 3200 long bID = B.getID(mRS); 3201 long cID = C.getID(mRS); 3202 if (mUseIncSupp) { 3203 aID = getDummyAlloc(A); 3204 bID = getDummyAlloc(B); 3205 cID = getDummyAlloc(C); 3206 } 3207 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3208 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3209 } 3210 3211 /** 3212 * ZSYMM performs one of the matrix-matrix operations 3213 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3214 * 3215 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 3216 * 3217 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3218 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3219 * @param alpha The scalar alpha. 3220 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3221 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3222 * @param beta The scalar beta. 3223 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3224 */ 3225 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 3226 Allocation B, Double2 beta, Allocation C) { 3227 validateSide(Side); 3228 validateUplo(Uplo); 3229 if (A.getType().getX() != A.getType().getY()) { 3230 throw new RSRuntimeException("Matrix A is not symmetric"); 3231 } 3232 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 3233 3234 boolean mUseIncSupp = isIncSupp(); 3235 long aID = A.getID(mRS); 3236 long bID = B.getID(mRS); 3237 long cID = C.getID(mRS); 3238 if (mUseIncSupp) { 3239 aID = getDummyAlloc(A); 3240 bID = getDummyAlloc(B); 3241 cID = getDummyAlloc(C); 3242 } 3243 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3244 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3245 } 3246 3247 /** 3248 * SSYRK performs one of the symmetric rank k operations 3249 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3250 * 3251 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 3252 * 3253 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3254 * @param Trans The type of transpose applied to the operation. 3255 * @param alpha The scalar alpha. 3256 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3257 * @param beta The scalar beta. 3258 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3259 */ 3260 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3261 validateTranspose(Trans); 3262 validateUplo(Uplo); 3263 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 3264 int K = -1; 3265 if (Trans != NO_TRANSPOSE) { 3266 K = A.getType().getY(); 3267 } else { 3268 K = A.getType().getX(); 3269 } 3270 3271 boolean mUseIncSupp = isIncSupp(); 3272 long aID = A.getID(mRS); 3273 long cID = C.getID(mRS); 3274 if (mUseIncSupp) { 3275 aID = getDummyAlloc(A); 3276 cID = getDummyAlloc(C); 3277 } 3278 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3279 } 3280 3281 /** 3282 * DSYRK performs one of the symmetric rank k operations 3283 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3284 * 3285 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 3286 * 3287 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3288 * @param Trans The type of transpose applied to the operation. 3289 * @param alpha The scalar alpha. 3290 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3291 * @param beta The scalar beta. 3292 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3293 */ 3294 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3295 validateTranspose(Trans); 3296 validateUplo(Uplo); 3297 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 3298 int K = -1; 3299 if (Trans != NO_TRANSPOSE) { 3300 K = A.getType().getY(); 3301 } else { 3302 K = A.getType().getX(); 3303 } 3304 3305 boolean mUseIncSupp = isIncSupp(); 3306 long aID = A.getID(mRS); 3307 long cID = C.getID(mRS); 3308 if (mUseIncSupp) { 3309 aID = getDummyAlloc(A); 3310 cID = getDummyAlloc(C); 3311 } 3312 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3313 } 3314 3315 /** 3316 * CSYRK performs one of the symmetric rank k operations 3317 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3318 * 3319 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 3320 * 3321 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3322 * @param Trans The type of transpose applied to the operation. 3323 * @param alpha The scalar alpha. 3324 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3325 * @param beta The scalar beta. 3326 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3327 */ 3328 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 3329 validateTranspose(Trans); 3330 validateUplo(Uplo); 3331 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 3332 int K = -1; 3333 if (Trans != NO_TRANSPOSE) { 3334 K = A.getType().getY(); 3335 } else { 3336 K = A.getType().getX(); 3337 } 3338 3339 boolean mUseIncSupp = isIncSupp(); 3340 long aID = A.getID(mRS); 3341 long cID = C.getID(mRS); 3342 if (mUseIncSupp) { 3343 aID = getDummyAlloc(A); 3344 cID = getDummyAlloc(C); 3345 } 3346 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3347 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3348 } 3349 3350 /** 3351 * ZSYRK performs one of the symmetric rank k operations 3352 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3353 * 3354 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 3355 * 3356 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3357 * @param Trans The type of transpose applied to the operation. 3358 * @param alpha The scalar alpha. 3359 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3360 * @param beta The scalar beta. 3361 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3362 */ 3363 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 3364 validateTranspose(Trans); 3365 validateUplo(Uplo); 3366 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 3367 int K = -1; 3368 if (Trans != NO_TRANSPOSE) { 3369 K = A.getType().getY(); 3370 } else { 3371 K = A.getType().getX(); 3372 } 3373 3374 boolean mUseIncSupp = isIncSupp(); 3375 long aID = A.getID(mRS); 3376 long cID = C.getID(mRS); 3377 if (mUseIncSupp) { 3378 aID = getDummyAlloc(A); 3379 cID = getDummyAlloc(C); 3380 } 3381 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3382 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3383 } 3384 3385 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3386 validateTranspose(Trans); 3387 if (!A.getType().getElement().isCompatible(e) || 3388 !B.getType().getElement().isCompatible(e) || 3389 !C.getType().getElement().isCompatible(e)) { 3390 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3391 } 3392 int Cdim = -1; 3393 // A is n x k if no transpose, k x n if transpose 3394 // C is n x n 3395 if (Trans == TRANSPOSE) { 3396 // check columns versus C 3397 Cdim = A.getType().getX(); 3398 } else { 3399 // check rows versus C 3400 Cdim = A.getType().getY(); 3401 } 3402 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 3403 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 3404 } 3405 // A dims == B dims 3406 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3407 throw new RSRuntimeException("Invalid A and B in SYR2K"); 3408 } 3409 } 3410 3411 /** 3412 * SSYR2K performs one of the symmetric rank 2k operations 3413 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3414 * 3415 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 3416 * 3417 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3418 * @param Trans The type of transpose applied to the operation. 3419 * @param alpha The scalar alpha. 3420 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3421 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3422 * @param beta The scalar beta. 3423 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3424 */ 3425 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 3426 validateUplo(Uplo); 3427 validateSYR2K(Element.F32(mRS), Trans, A, B, C); 3428 int K = -1; 3429 if (Trans != NO_TRANSPOSE) { 3430 K = A.getType().getY(); 3431 } else { 3432 K = A.getType().getX(); 3433 } 3434 3435 boolean mUseIncSupp = isIncSupp(); 3436 long aID = A.getID(mRS); 3437 long bID = B.getID(mRS); 3438 long cID = C.getID(mRS); 3439 if (mUseIncSupp) { 3440 aID = getDummyAlloc(A); 3441 bID = getDummyAlloc(B); 3442 cID = getDummyAlloc(C); 3443 } 3444 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3445 } 3446 3447 /** 3448 * DSYR2K performs one of the symmetric rank 2k operations 3449 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3450 * 3451 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 3452 * 3453 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3454 * @param Trans The type of transpose applied to the operation. 3455 * @param alpha The scalar alpha. 3456 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3457 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3458 * @param beta The scalar beta. 3459 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3460 */ 3461 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 3462 validateUplo(Uplo); 3463 validateSYR2K(Element.F64(mRS), Trans, A, B, C); 3464 int K = -1; 3465 if (Trans != NO_TRANSPOSE) { 3466 K = A.getType().getY(); 3467 } else { 3468 K = A.getType().getX(); 3469 } 3470 3471 boolean mUseIncSupp = isIncSupp(); 3472 long aID = A.getID(mRS); 3473 long bID = B.getID(mRS); 3474 long cID = C.getID(mRS); 3475 if (mUseIncSupp) { 3476 aID = getDummyAlloc(A); 3477 bID = getDummyAlloc(B); 3478 cID = getDummyAlloc(C); 3479 } 3480 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3481 } 3482 3483 /** 3484 * CSYR2K performs one of the symmetric rank 2k operations 3485 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3486 * 3487 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 3488 * 3489 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3490 * @param Trans The type of transpose applied to the operation. 3491 * @param alpha The scalar alpha. 3492 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3493 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3494 * @param beta The scalar beta. 3495 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3496 */ 3497 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3498 validateUplo(Uplo); 3499 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 3500 int K = -1; 3501 if (Trans != NO_TRANSPOSE) { 3502 K = A.getType().getY(); 3503 } else { 3504 K = A.getType().getX(); 3505 } 3506 3507 boolean mUseIncSupp = isIncSupp(); 3508 long aID = A.getID(mRS); 3509 long bID = B.getID(mRS); 3510 long cID = C.getID(mRS); 3511 if (mUseIncSupp) { 3512 aID = getDummyAlloc(A); 3513 bID = getDummyAlloc(B); 3514 cID = getDummyAlloc(C); 3515 } 3516 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3517 } 3518 3519 /** 3520 * ZSYR2K performs one of the symmetric rank 2k operations 3521 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3522 * 3523 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 3524 * 3525 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3526 * @param Trans The type of transpose applied to the operation. 3527 * @param alpha The scalar alpha. 3528 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3529 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3530 * @param beta The scalar beta. 3531 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3532 */ 3533 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3534 validateUplo(Uplo); 3535 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 3536 int K = -1; 3537 if (Trans != NO_TRANSPOSE) { 3538 K = A.getType().getY(); 3539 } else { 3540 K = A.getType().getX(); 3541 } 3542 3543 boolean mUseIncSupp = isIncSupp(); 3544 long aID = A.getID(mRS); 3545 long bID = B.getID(mRS); 3546 long cID = C.getID(mRS); 3547 if (mUseIncSupp) { 3548 aID = getDummyAlloc(A); 3549 bID = getDummyAlloc(B); 3550 cID = getDummyAlloc(C); 3551 } 3552 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3553 } 3554 3555 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3556 validateSide(Side); 3557 validateTranspose(TransA); 3558 int aM = -1, aN = -1, bM = -1, bN = -1; 3559 if (!A.getType().getElement().isCompatible(e) || 3560 !B.getType().getElement().isCompatible(e)) { 3561 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3562 } 3563 3564 aM = A.getType().getY(); 3565 aN = A.getType().getX(); 3566 if (aM != aN) { 3567 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 3568 } 3569 3570 bM = B.getType().getY(); 3571 bN = B.getType().getX(); 3572 if (Side == LEFT) { 3573 if (aN != bM) { 3574 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3575 } 3576 } else { 3577 if (bN != aM) { 3578 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3579 } 3580 } 3581 } 3582 3583 /** 3584 * STRMM performs one of the matrix-matrix operations 3585 * B := alpha*op(A)*B or B := alpha*B*op(A) 3586 * op(A) is one of op(A) = A or op(A) = A**T 3587 * 3588 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 3589 * 3590 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3591 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3592 * @param TransA The type of transpose applied to matrix A. 3593 * @param Diag Specifies whether or not A is unit triangular. 3594 * @param alpha The scalar alpha. 3595 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3596 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3597 */ 3598 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3599 validateUplo(Uplo); 3600 validateDiag(Diag); 3601 validateTRMM(Element.F32(mRS), Side, TransA, A, B); 3602 3603 boolean mUseIncSupp = isIncSupp(); 3604 long aID = A.getID(mRS); 3605 long bID = B.getID(mRS); 3606 if (mUseIncSupp) { 3607 aID = getDummyAlloc(A); 3608 bID = getDummyAlloc(B); 3609 } 3610 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3611 alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp); 3612 } 3613 3614 /** 3615 * DTRMM performs one of the matrix-matrix operations 3616 * B := alpha*op(A)*B or B := alpha*B*op(A) 3617 * op(A) is one of op(A) = A or op(A) = A**T 3618 * 3619 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 3620 * 3621 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3622 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3623 * @param TransA The type of transpose applied to matrix A. 3624 * @param Diag Specifies whether or not A is unit triangular. 3625 * @param alpha The scalar alpha. 3626 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3627 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3628 */ 3629 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3630 validateUplo(Uplo); 3631 validateDiag(Diag); 3632 validateTRMM(Element.F64(mRS), Side, TransA, A, B); 3633 3634 boolean mUseIncSupp = isIncSupp(); 3635 long aID = A.getID(mRS); 3636 long bID = B.getID(mRS); 3637 if (mUseIncSupp) { 3638 aID = getDummyAlloc(A); 3639 bID = getDummyAlloc(B); 3640 } 3641 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3642 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3643 } 3644 3645 /** 3646 * CTRMM performs one of the matrix-matrix operations 3647 * B := alpha*op(A)*B or B := alpha*B*op(A) 3648 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3649 * 3650 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 3651 * 3652 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3653 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3654 * @param TransA The type of transpose applied to matrix A. 3655 * @param Diag Specifies whether or not A is unit triangular. 3656 * @param alpha The scalar alpha. 3657 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3658 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3659 */ 3660 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3661 validateUplo(Uplo); 3662 validateDiag(Diag); 3663 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 3664 3665 boolean mUseIncSupp = isIncSupp(); 3666 long aID = A.getID(mRS); 3667 long bID = B.getID(mRS); 3668 if (mUseIncSupp) { 3669 aID = getDummyAlloc(A); 3670 bID = getDummyAlloc(B); 3671 } 3672 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3673 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3674 } 3675 3676 /** 3677 * ZTRMM performs one of the matrix-matrix operations 3678 * B := alpha*op(A)*B or B := alpha*B*op(A) 3679 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3680 * 3681 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 3682 * 3683 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3684 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3685 * @param TransA The type of transpose applied to matrix A. 3686 * @param Diag Specifies whether or not A is unit triangular. 3687 * @param alpha The scalar alpha. 3688 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3689 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3690 */ 3691 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3692 validateUplo(Uplo); 3693 validateDiag(Diag); 3694 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 3695 3696 boolean mUseIncSupp = isIncSupp(); 3697 long aID = A.getID(mRS); 3698 long bID = B.getID(mRS); 3699 if (mUseIncSupp) { 3700 aID = getDummyAlloc(A); 3701 bID = getDummyAlloc(B); 3702 } 3703 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3704 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3705 } 3706 3707 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3708 int adim = -1, bM = -1, bN = -1; 3709 validateSide(Side); 3710 validateTranspose(TransA); 3711 if (!A.getType().getElement().isCompatible(e) || 3712 !B.getType().getElement().isCompatible(e)) { 3713 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3714 } 3715 adim = A.getType().getX(); 3716 if (adim != A.getType().getY()) { 3717 // this may be unnecessary, the restriction could potentially be relaxed 3718 // A needs to contain at least that symmetric matrix but could theoretically be larger 3719 // for now we assume adapters are sufficient, will reevaluate in the future 3720 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 3721 } 3722 bM = B.getType().getY(); 3723 bN = B.getType().getX(); 3724 if (Side == LEFT) { 3725 // A is M*M 3726 if (adim != bM) { 3727 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3728 } 3729 } else { 3730 // A is N*N 3731 if (adim != bN) { 3732 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3733 } 3734 } 3735 } 3736 3737 /** 3738 * STRSM solves one of the matrix equations 3739 * op(A)*X := alpha*B or X*op(A) := alpha*B 3740 * op(A) is one of op(A) = A or op(A) = A**T 3741 * 3742 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 3743 * 3744 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3745 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3746 * @param TransA The type of transpose applied to matrix A. 3747 * @param Diag Specifies whether or not A is unit triangular. 3748 * @param alpha The scalar alpha. 3749 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3750 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3751 */ 3752 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3753 validateUplo(Uplo); 3754 validateDiag(Diag); 3755 validateTRSM(Element.F32(mRS), Side, TransA, A, B); 3756 3757 boolean mUseIncSupp = isIncSupp(); 3758 long aID = A.getID(mRS); 3759 long bID = B.getID(mRS); 3760 if (mUseIncSupp) { 3761 aID = getDummyAlloc(A); 3762 bID = getDummyAlloc(B); 3763 } 3764 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3765 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3766 } 3767 3768 /** 3769 * DTRSM solves one of the matrix equations 3770 * op(A)*X := alpha*B or X*op(A) := alpha*B 3771 * op(A) is one of op(A) = A or op(A) = A**T 3772 * 3773 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 3774 * 3775 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3776 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3777 * @param TransA The type of transpose applied to matrix A. 3778 * @param Diag Specifies whether or not A is unit triangular. 3779 * @param alpha The scalar alpha. 3780 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3781 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3782 */ 3783 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3784 validateUplo(Uplo); 3785 validateDiag(Diag); 3786 validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3787 3788 boolean mUseIncSupp = isIncSupp(); 3789 long aID = A.getID(mRS); 3790 long bID = B.getID(mRS); 3791 if (mUseIncSupp) { 3792 aID = getDummyAlloc(A); 3793 bID = getDummyAlloc(B); 3794 } 3795 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3796 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3797 } 3798 3799 /** 3800 * CTRSM solves one of the matrix equations 3801 * op(A)*X := alpha*B or X*op(A) := alpha*B 3802 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3803 * 3804 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3805 * 3806 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3807 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3808 * @param TransA The type of transpose applied to matrix A. 3809 * @param Diag Specifies whether or not A is unit triangular. 3810 * @param alpha The scalar alpha. 3811 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3812 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3813 */ 3814 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3815 validateUplo(Uplo); 3816 validateDiag(Diag); 3817 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3818 3819 boolean mUseIncSupp = isIncSupp(); 3820 long aID = A.getID(mRS); 3821 long bID = B.getID(mRS); 3822 if (mUseIncSupp) { 3823 aID = getDummyAlloc(A); 3824 bID = getDummyAlloc(B); 3825 } 3826 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3827 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3828 } 3829 3830 /** 3831 * ZTRSM solves one of the matrix equations 3832 * op(A)*X := alpha*B or X*op(A) := alpha*B 3833 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3834 * 3835 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3836 * 3837 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3838 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3839 * @param TransA The type of transpose applied to matrix A. 3840 * @param Diag Specifies whether or not A is unit triangular. 3841 * @param alpha The scalar alpha. 3842 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3843 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3844 */ 3845 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3846 validateUplo(Uplo); 3847 validateDiag(Diag); 3848 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3849 3850 boolean mUseIncSupp = isIncSupp(); 3851 long aID = A.getID(mRS); 3852 long bID = B.getID(mRS); 3853 if (mUseIncSupp) { 3854 aID = getDummyAlloc(A); 3855 bID = getDummyAlloc(B); 3856 } 3857 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3858 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3859 } 3860 3861 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3862 validateSide(Side); 3863 3864 if (!A.getType().getElement().isCompatible(e) || 3865 !B.getType().getElement().isCompatible(e) || 3866 !C.getType().getElement().isCompatible(e)) { 3867 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3868 } 3869 3870 // A must be square; can potentially be relaxed similar to TRSM 3871 int adim = A.getType().getX(); 3872 if (adim != A.getType().getY()) { 3873 throw new RSRuntimeException("Called HEMM with non-square A"); 3874 } 3875 if ((Side == LEFT && adim != B.getType().getY()) || 3876 (Side == RIGHT && adim != B.getType().getX())) { 3877 throw new RSRuntimeException("Called HEMM with invalid B"); 3878 } 3879 if (B.getType().getX() != C.getType().getX() || 3880 B.getType().getY() != C.getType().getY()) { 3881 throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3882 } 3883 } 3884 3885 /** 3886 * CHEMM performs one of the matrix-matrix operations 3887 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3888 * 3889 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3890 * 3891 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3892 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3893 * @param alpha The scalar alpha. 3894 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3895 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3896 * @param beta The scalar beta. 3897 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3898 */ 3899 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3900 validateUplo(Uplo); 3901 validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3902 3903 boolean mUseIncSupp = isIncSupp(); 3904 long aID = A.getID(mRS); 3905 long bID = B.getID(mRS); 3906 long cID = C.getID(mRS); 3907 if (mUseIncSupp) { 3908 aID = getDummyAlloc(A); 3909 bID = getDummyAlloc(B); 3910 cID = getDummyAlloc(C); 3911 } 3912 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3913 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3914 } 3915 3916 /** 3917 * ZHEMM performs one of the matrix-matrix operations 3918 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3919 * 3920 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3921 * 3922 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3923 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3924 * @param alpha The scalar alpha. 3925 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3926 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3927 * @param beta The scalar beta. 3928 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3929 */ 3930 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3931 validateUplo(Uplo); 3932 validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3933 3934 boolean mUseIncSupp = isIncSupp(); 3935 long aID = A.getID(mRS); 3936 long bID = B.getID(mRS); 3937 long cID = C.getID(mRS); 3938 if (mUseIncSupp) { 3939 aID = getDummyAlloc(A); 3940 bID = getDummyAlloc(B); 3941 cID = getDummyAlloc(C); 3942 } 3943 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3944 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3945 } 3946 3947 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3948 if (!A.getType().getElement().isCompatible(e) || 3949 !C.getType().getElement().isCompatible(e)) { 3950 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3951 } 3952 validateConjTranspose(Trans); 3953 int cdim = C.getType().getX(); 3954 if (cdim != C.getType().getY()) { 3955 throw new RSRuntimeException("Called HERK with non-square C"); 3956 } 3957 if (Trans == NO_TRANSPOSE) { 3958 if (cdim != A.getType().getY()) { 3959 throw new RSRuntimeException("Called HERK with invalid A"); 3960 } 3961 } else { 3962 if (cdim != A.getType().getX()) { 3963 throw new RSRuntimeException("Called HERK with invalid A"); 3964 } 3965 } 3966 } 3967 3968 /** 3969 * CHERK performs one of the hermitian rank k operations 3970 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3971 * 3972 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3973 * 3974 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3975 * @param Trans The type of transpose applied to the operation. 3976 * @param alpha The scalar alpha. 3977 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3978 * @param beta The scalar beta. 3979 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3980 */ 3981 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3982 validateUplo(Uplo); 3983 validateHERK(Element.F32_2(mRS), Trans, A, C); 3984 int k = 0; 3985 if (Trans == CONJ_TRANSPOSE) { 3986 k = A.getType().getY(); 3987 } else { 3988 k = A.getType().getX(); 3989 } 3990 3991 boolean mUseIncSupp = isIncSupp(); 3992 long aID = A.getID(mRS); 3993 long cID = C.getID(mRS); 3994 if (mUseIncSupp) { 3995 aID = getDummyAlloc(A); 3996 cID = getDummyAlloc(C); 3997 } 3998 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 3999 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4000 } 4001 4002 /** 4003 * ZHERK performs one of the hermitian rank k operations 4004 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 4005 * 4006 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 4007 * 4008 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4009 * @param Trans The type of transpose applied to the operation. 4010 * @param alpha The scalar alpha. 4011 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4012 * @param beta The scalar beta. 4013 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4014 */ 4015 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 4016 validateUplo(Uplo); 4017 validateHERK(Element.F64_2(mRS), Trans, A, C); 4018 int k = 0; 4019 if (Trans == CONJ_TRANSPOSE) { 4020 k = A.getType().getY(); 4021 } else { 4022 k = A.getType().getX(); 4023 } 4024 4025 boolean mUseIncSupp = isIncSupp(); 4026 long aID = A.getID(mRS); 4027 long cID = C.getID(mRS); 4028 if (mUseIncSupp) { 4029 aID = getDummyAlloc(A); 4030 cID = getDummyAlloc(C); 4031 } 4032 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4033 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4034 } 4035 4036 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 4037 if (!A.getType().getElement().isCompatible(e) || 4038 !B.getType().getElement().isCompatible(e) || 4039 !C.getType().getElement().isCompatible(e)) { 4040 throw new RSRuntimeException("Called BLAS with wrong Element type"); 4041 } 4042 validateConjTranspose(Trans); 4043 int cdim = C.getType().getX(); 4044 if (cdim != C.getType().getY()) { 4045 throw new RSRuntimeException("Called HER2K with non-square C"); 4046 } 4047 if (Trans == NO_TRANSPOSE) { 4048 if (A.getType().getY() != cdim) { 4049 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4050 } 4051 } else { 4052 if (A.getType().getX() != cdim) { 4053 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4054 } 4055 } 4056 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 4057 throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 4058 } 4059 } 4060 4061 /** 4062 * CHER2K performs one of the hermitian rank 2k operations 4063 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4064 * 4065 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 4066 * 4067 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4068 * @param Trans The type of transpose applied to the operation. 4069 * @param alpha The scalar alpha. 4070 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 4071 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 4072 * @param beta The scalar beta. 4073 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 4074 */ 4075 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 4076 validateUplo(Uplo); 4077 validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 4078 int k = 0; 4079 if (Trans == NO_TRANSPOSE) { 4080 k = A.getType().getX(); 4081 } else { 4082 k = A.getType().getY(); 4083 } 4084 4085 boolean mUseIncSupp = isIncSupp(); 4086 long aID = A.getID(mRS); 4087 long bID = B.getID(mRS); 4088 long cID = C.getID(mRS); 4089 if (mUseIncSupp) { 4090 aID = getDummyAlloc(A); 4091 bID = getDummyAlloc(B); 4092 cID = getDummyAlloc(C); 4093 } 4094 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4095 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4096 } 4097 4098 /** 4099 * ZHER2K performs one of the hermitian rank 2k operations 4100 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4101 * 4102 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 4103 * 4104 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4105 * @param Trans The type of transpose applied to the operation. 4106 * @param alpha The scalar alpha. 4107 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4108 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 4109 * @param beta The scalar beta. 4110 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4111 */ 4112 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 4113 validateUplo(Uplo); 4114 validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 4115 int k = 0; 4116 if (Trans == NO_TRANSPOSE) { 4117 k = A.getType().getX(); 4118 } else { 4119 k = A.getType().getY(); 4120 } 4121 4122 boolean mUseIncSupp = isIncSupp(); 4123 long aID = A.getID(mRS); 4124 long bID = B.getID(mRS); 4125 long cID = C.getID(mRS); 4126 if (mUseIncSupp) { 4127 aID = getDummyAlloc(A); 4128 bID = getDummyAlloc(B); 4129 cID = getDummyAlloc(C); 4130 } 4131 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4132 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4133 } 4134 4135 4136 /** 4137 * 8-bit GEMM-like operation for neural networks: C = B.transposed() * A 4138 * Calculations are done in 1.10.21 fixed-point format for the final output, 4139 * just before there's a shift down to drop the fractional parts. The output 4140 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 4141 * gives some headroom to avoid wrapping around on small overflows. 4142 * 4143 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 4144 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. 4145 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 4146 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. 4147 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 4148 * @param c_offset The offset for all values in matrix C. 4149 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 4150 **/ 4151 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 4152 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 4153 4154 int M = -1, N = -1, K = -1; 4155 M = A.getType().getY(); 4156 N = B.getType().getY(); 4157 K = A.getType().getX(); 4158 4159 boolean mUseIncSupp = isIncSupp(); 4160 long aID = A.getID(mRS); 4161 long bID = B.getID(mRS); 4162 long cID = C.getID(mRS); 4163 if (mUseIncSupp) { 4164 aID = getDummyAlloc(A); 4165 bID = getDummyAlloc(B); 4166 cID = getDummyAlloc(C); 4167 } 4168 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp); 4169 4170 } 4171 4172} 4173