1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.support.v8.renderscript;
18
19import android.support.annotation.IntDef;
20import java.lang.annotation.Retention;
21import java.lang.annotation.RetentionPolicy;
22
23/**
24 *
25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26 *
27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28 * building blocks for performing basic vector and matrix operations.
29 *
30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
31 *
32 **/
33public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
34    private Allocation mLUT;
35    private static final int INTRINSIC_API_LEVEL = 23;
36
37    private ScriptIntrinsicBLAS(long id, RenderScript rs) {
38        super(id, rs);
39    }
40
41    private static final int RsBlas_sdsdot = 1;
42    private static final int RsBlas_dsdot = 2;
43    private static final int RsBlas_sdot = 3;
44    private static final int RsBlas_ddot = 4;
45    private static final int RsBlas_cdotu_sub = 5;
46    private static final int RsBlas_cdotc_sub = 6;
47    private static final int RsBlas_zdotu_sub = 7;
48    private static final int RsBlas_zdotc_sub = 8;
49    private static final int RsBlas_snrm2 = 9;
50    private static final int RsBlas_sasum = 10;
51    private static final int RsBlas_dnrm2 = 11;
52    private static final int RsBlas_dasum = 12;
53    private static final int RsBlas_scnrm2 = 13;
54    private static final int RsBlas_scasum = 14;
55    private static final int RsBlas_dznrm2 = 15;
56    private static final int RsBlas_dzasum = 16;
57    private static final int RsBlas_isamax = 17;
58    private static final int RsBlas_idamax = 18;
59    private static final int RsBlas_icamax = 19;
60    private static final int RsBlas_izamax = 20;
61    private static final int RsBlas_sswap = 21;
62    private static final int RsBlas_scopy = 22;
63    private static final int RsBlas_saxpy = 23;
64    private static final int RsBlas_dswap = 24;
65    private static final int RsBlas_dcopy = 25;
66    private static final int RsBlas_daxpy = 26;
67    private static final int RsBlas_cswap = 27;
68    private static final int RsBlas_ccopy = 28;
69    private static final int RsBlas_caxpy = 29;
70    private static final int RsBlas_zswap = 30;
71    private static final int RsBlas_zcopy = 31;
72    private static final int RsBlas_zaxpy = 32;
73    private static final int RsBlas_srotg = 33;
74    private static final int RsBlas_srotmg = 34;
75    private static final int RsBlas_srot = 35;
76    private static final int RsBlas_srotm = 36;
77    private static final int RsBlas_drotg = 37;
78    private static final int RsBlas_drotmg = 38;
79    private static final int RsBlas_drot = 39;
80    private static final int RsBlas_drotm = 40;
81    private static final int RsBlas_sscal = 41;
82    private static final int RsBlas_dscal = 42;
83    private static final int RsBlas_cscal = 43;
84    private static final int RsBlas_zscal = 44;
85    private static final int RsBlas_csscal = 45;
86    private static final int RsBlas_zdscal = 46;
87    private static final int RsBlas_sgemv = 47;
88    private static final int RsBlas_sgbmv = 48;
89    private static final int RsBlas_strmv = 49;
90    private static final int RsBlas_stbmv = 50;
91    private static final int RsBlas_stpmv = 51;
92    private static final int RsBlas_strsv = 52;
93    private static final int RsBlas_stbsv = 53;
94    private static final int RsBlas_stpsv = 54;
95    private static final int RsBlas_dgemv = 55;
96    private static final int RsBlas_dgbmv = 56;
97    private static final int RsBlas_dtrmv = 57;
98    private static final int RsBlas_dtbmv = 58;
99    private static final int RsBlas_dtpmv = 59;
100    private static final int RsBlas_dtrsv = 60;
101    private static final int RsBlas_dtbsv = 61;
102    private static final int RsBlas_dtpsv = 62;
103    private static final int RsBlas_cgemv = 63;
104    private static final int RsBlas_cgbmv = 64;
105    private static final int RsBlas_ctrmv = 65;
106    private static final int RsBlas_ctbmv = 66;
107    private static final int RsBlas_ctpmv = 67;
108    private static final int RsBlas_ctrsv = 68;
109    private static final int RsBlas_ctbsv = 69;
110    private static final int RsBlas_ctpsv = 70;
111    private static final int RsBlas_zgemv = 71;
112    private static final int RsBlas_zgbmv = 72;
113    private static final int RsBlas_ztrmv = 73;
114    private static final int RsBlas_ztbmv = 74;
115    private static final int RsBlas_ztpmv = 75;
116    private static final int RsBlas_ztrsv = 76;
117    private static final int RsBlas_ztbsv = 77;
118    private static final int RsBlas_ztpsv = 78;
119    private static final int RsBlas_ssymv = 79;
120    private static final int RsBlas_ssbmv = 80;
121    private static final int RsBlas_sspmv = 81;
122    private static final int RsBlas_sger = 82;
123    private static final int RsBlas_ssyr = 83;
124    private static final int RsBlas_sspr = 84;
125    private static final int RsBlas_ssyr2 = 85;
126    private static final int RsBlas_sspr2 = 86;
127    private static final int RsBlas_dsymv = 87;
128    private static final int RsBlas_dsbmv = 88;
129    private static final int RsBlas_dspmv = 89;
130    private static final int RsBlas_dger = 90;
131    private static final int RsBlas_dsyr = 91;
132    private static final int RsBlas_dspr = 92;
133    private static final int RsBlas_dsyr2 = 93;
134    private static final int RsBlas_dspr2 = 94;
135    private static final int RsBlas_chemv = 95;
136    private static final int RsBlas_chbmv = 96;
137    private static final int RsBlas_chpmv = 97;
138    private static final int RsBlas_cgeru = 98;
139    private static final int RsBlas_cgerc = 99;
140    private static final int RsBlas_cher = 100;
141    private static final int RsBlas_chpr = 101;
142    private static final int RsBlas_cher2 = 102;
143    private static final int RsBlas_chpr2 = 103;
144    private static final int RsBlas_zhemv = 104;
145    private static final int RsBlas_zhbmv = 105;
146    private static final int RsBlas_zhpmv = 106;
147    private static final int RsBlas_zgeru = 107;
148    private static final int RsBlas_zgerc = 108;
149    private static final int RsBlas_zher = 109;
150    private static final int RsBlas_zhpr = 110;
151    private static final int RsBlas_zher2 = 111;
152    private static final int RsBlas_zhpr2 = 112;
153    private static final int RsBlas_sgemm = 113;
154    private static final int RsBlas_ssymm = 114;
155    private static final int RsBlas_ssyrk = 115;
156    private static final int RsBlas_ssyr2k = 116;
157    private static final int RsBlas_strmm = 117;
158    private static final int RsBlas_strsm = 118;
159    private static final int RsBlas_dgemm = 119;
160    private static final int RsBlas_dsymm = 120;
161    private static final int RsBlas_dsyrk = 121;
162    private static final int RsBlas_dsyr2k = 122;
163    private static final int RsBlas_dtrmm = 123;
164    private static final int RsBlas_dtrsm = 124;
165    private static final int RsBlas_cgemm = 125;
166    private static final int RsBlas_csymm = 126;
167    private static final int RsBlas_csyrk = 127;
168    private static final int RsBlas_csyr2k = 128;
169    private static final int RsBlas_ctrmm = 129;
170    private static final int RsBlas_ctrsm = 130;
171    private static final int RsBlas_zgemm = 131;
172    private static final int RsBlas_zsymm = 132;
173    private static final int RsBlas_zsyrk = 133;
174    private static final int RsBlas_zsyr2k = 134;
175    private static final int RsBlas_ztrmm = 135;
176    private static final int RsBlas_ztrsm = 136;
177    private static final int RsBlas_chemm = 137;
178    private static final int RsBlas_cherk = 138;
179    private static final int RsBlas_cher2k = 139;
180    private static final int RsBlas_zhemm = 140;
181    private static final int RsBlas_zherk = 141;
182    private static final int RsBlas_zher2k = 142;
183
184    // BLAS extensions start here
185    private static final int RsBlas_bnnm = 1000;
186
187    /**
188     * Create an intrinsic to access BLAS subroutines.
189     *
190     * @param rs The RenderScript context
191     * @return ScriptIntrinsicBLAS
192     */
193    public static ScriptIntrinsicBLAS create(RenderScript rs) {
194        long id;
195        boolean mUseIncSupp = rs.isUseNative() &&
196                              android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL;
197
198        id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp);
199        ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs);
200        si.setIncSupp(mUseIncSupp);
201        return si;
202    }
203
204    /**
205     * @hide
206     */
207    @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
208    @Retention(RetentionPolicy.SOURCE)
209    public @interface Transpose {}
210
211    /**
212     * @hide
213     */
214    @IntDef({UPPER, LOWER})
215    @Retention(RetentionPolicy.SOURCE)
216    public @interface Uplo {}
217
218    /**
219     * @hide
220     */
221    @IntDef({NON_UNIT, UNIT})
222    @Retention(RetentionPolicy.SOURCE)
223    public @interface Diag {}
224
225    /**
226     * @hide
227     */
228    @IntDef({LEFT, RIGHT})
229    @Retention(RetentionPolicy.SOURCE)
230    public @interface Side {}
231
232    public static final int NO_TRANSPOSE = 111;
233    public static final int TRANSPOSE = 112;
234    public static final int CONJ_TRANSPOSE = 113;
235
236    public static final int UPPER = 121;
237    public static final int LOWER = 122;
238
239    public static final int NON_UNIT = 131;
240    public static final int UNIT = 132;
241
242    public static final int LEFT = 141;
243    public static final int RIGHT = 142;
244
245    static void validateSide(@Side int Side) {
246        if (Side != LEFT && Side != RIGHT) {
247            throw new RSRuntimeException("Invalid side passed to BLAS");
248        }
249    }
250
251    static void validateTranspose(@Transpose int Trans) {
252        if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
253            Trans != CONJ_TRANSPOSE) {
254            throw new RSRuntimeException("Invalid transpose passed to BLAS");
255        }
256    }
257
258    static void validateConjTranspose(@Transpose int Trans) {
259        if (Trans != NO_TRANSPOSE &&
260            Trans != CONJ_TRANSPOSE) {
261            throw new RSRuntimeException("Invalid transpose passed to BLAS");
262        }
263    }
264
265    static void validateDiag(@Diag int Diag) {
266        if (Diag != NON_UNIT && Diag != UNIT) {
267            throw new RSRuntimeException("Invalid diag passed to BLAS");
268        }
269    }
270
271    static void validateUplo(@Uplo int Uplo) {
272        if (Uplo != UPPER && Uplo != LOWER) {
273            throw new RSRuntimeException("Invalid uplo passed to BLAS");
274        }
275    }
276
277
278    /**
279     * Level 2 BLAS
280     */
281
282    static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
283        validateTranspose(TransA);
284        int M = A.getType().getY();
285        int N = A.getType().getX();
286        if (!A.getType().getElement().isCompatible(e) ||
287            !X.getType().getElement().isCompatible(e) ||
288            !Y.getType().getElement().isCompatible(e)) {
289            throw new RSRuntimeException("Called BLAS with wrong Element type");
290        }
291        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
292            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
293        }
294
295        if (incX <= 0 || incY <= 0) {
296            throw new RSRuntimeException("Vector increments must be greater than 0");
297        }
298        int expectedXDim = -1, expectedYDim = -1;
299        if (TransA == NO_TRANSPOSE) {
300            expectedXDim = 1 + (N - 1) * incX;
301            expectedYDim = 1 + (M - 1) * incY;
302        } else {
303            expectedXDim = 1 + (M - 1) * incX;
304            expectedYDim = 1 + (N - 1) * incY;
305        }
306        if (X.getType().getX() != expectedXDim ||
307            Y.getType().getX() != expectedYDim) {
308            throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
309        }
310    }
311
312    /**
313     * SGEMV performs one of the matrix-vector operations
314     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
315     *
316     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
317     *
318     * @param TransA The type of transpose applied to matrix A.
319     * @param alpha The scalar alpha.
320     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
321     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
322     * @param incX The increment for the elements of vector x, must be larger than zero.
323     * @param beta The scalar beta.
324     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
325     * @param incY The increment for the elements of vector y, must be larger than zero.
326     */
327    public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
328        validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
329        int M = A.getType().getY();
330        int N = A.getType().getX();
331
332        boolean mUseIncSupp = isIncSupp();
333        long aID = A.getID(mRS);
334        long xID = X.getID(mRS);
335        long yID = Y.getID(mRS);
336        if (mUseIncSupp) {
337            aID = getDummyAlloc(A);
338            xID = getDummyAlloc(X);
339            yID = getDummyAlloc(Y);
340        }
341        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
342    }
343
344    /**
345     * DGEMV performs one of the matrix-vector operations
346     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
347     *
348     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
349     *
350     * @param TransA The type of transpose applied to matrix A.
351     * @param alpha The scalar alpha.
352     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
353     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
354     * @param incX The increment for the elements of vector x, must be larger than zero.
355     * @param beta The scalar beta.
356     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
357     * @param incY The increment for the elements of vector y, must be larger than zero.
358     */
359    public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
360        validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
361        int M = A.getType().getY();
362        int N = A.getType().getX();
363
364        boolean mUseIncSupp = isIncSupp();
365        long aID = A.getID(mRS);
366        long xID = X.getID(mRS);
367        long yID = Y.getID(mRS);
368        if (mUseIncSupp) {
369            aID = getDummyAlloc(A);
370            xID = getDummyAlloc(X);
371            yID = getDummyAlloc(Y);
372        }
373        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
374    }
375
376    /**
377     * CGEMV performs one of the matrix-vector operations
378     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
379     *
380     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
381     *
382     * @param TransA The type of transpose applied to matrix A.
383     * @param alpha The scalar alpha.
384     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
385     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
386     * @param incX The increment for the elements of vector x, must be larger than zero.
387     * @param beta The scalar beta.
388     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
389     * @param incY The increment for the elements of vector y, must be larger than zero.
390     */
391    public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
392        validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
393        int M = A.getType().getY();
394        int N = A.getType().getX();
395
396        boolean mUseIncSupp = isIncSupp();
397        long aID = A.getID(mRS);
398        long xID = X.getID(mRS);
399        long yID = Y.getID(mRS);
400        if (mUseIncSupp) {
401            aID = getDummyAlloc(A);
402            xID = getDummyAlloc(X);
403            yID = getDummyAlloc(Y);
404        }
405        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
406    }
407
408    /**
409     * ZGEMV performs one of the matrix-vector operations
410     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
411     *
412     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
413     *
414     * @param TransA The type of transpose applied to matrix A.
415     * @param alpha The scalar alpha.
416     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
417     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
418     * @param incX The increment for the elements of vector x, must be larger than zero.
419     * @param beta The scalar beta.
420     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
421     * @param incY The increment for the elements of vector y, must be larger than zero.
422     */
423    public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
424        validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
425        int M = A.getType().getY();
426        int N = A.getType().getX();
427
428        boolean mUseIncSupp = isIncSupp();
429        long aID = A.getID(mRS);
430        long xID = X.getID(mRS);
431        long yID = Y.getID(mRS);
432        if (mUseIncSupp) {
433            aID = getDummyAlloc(A);
434            xID = getDummyAlloc(X);
435            yID = getDummyAlloc(Y);
436        }
437        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
438    }
439
440    /**
441     * SGBMV performs one of the matrix-vector operations
442     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
443     *
444     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
445     *
446     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
447     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
448     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
449     *           for i in range(0, m):
450     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
451     *                  b[i, j-i+kl] = a[i, j]
452     *
453     * @param TransA The type of transpose applied to matrix A.
454     * @param KL The number of sub-diagonals of the matrix A.
455     * @param KU The number of super-diagonals of the matrix A.
456     * @param alpha The scalar alpha.
457     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
458     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
459     * @param incX The increment for the elements of vector x, must be larger than zero.
460     * @param beta The scalar beta.
461     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
462     * @param incY The increment for the elements of vector y, must be larger than zero.
463     */
464    public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
465        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
466        validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
467        if (KL < 0 || KU < 0) {
468            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
469        }
470        int M = A.getType().getY();
471        int N = A.getType().getX();
472
473        boolean mUseIncSupp = isIncSupp();
474        long aID = A.getID(mRS);
475        long xID = X.getID(mRS);
476        long yID = Y.getID(mRS);
477        if (mUseIncSupp) {
478            aID = getDummyAlloc(A);
479            xID = getDummyAlloc(X);
480            yID = getDummyAlloc(Y);
481        }
482        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
483    }
484
485    /**
486     * DGBMV performs one of the matrix-vector operations
487     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
488     *
489     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
490     *
491     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
492     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
493     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
494     *           for i in range(0, m):
495     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
496     *                  b[i, j-i+kl] = a[i, j]
497     *
498     * @param TransA The type of transpose applied to matrix A.
499     * @param KL The number of sub-diagonals of the matrix A.
500     * @param KU The number of super-diagonals of the matrix A.
501     * @param alpha The scalar alpha.
502     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
503     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
504     * @param incX The increment for the elements of vector x, must be larger than zero.
505     * @param beta The scalar beta.
506     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
507     * @param incY The increment for the elements of vector y, must be larger than zero.
508     */
509    public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
510        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
511        validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
512        if (KL < 0 || KU < 0) {
513            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
514        }
515        int M = A.getType().getY();
516        int N = A.getType().getX();
517
518        boolean mUseIncSupp = isIncSupp();
519        long aID = A.getID(mRS);
520        long xID = X.getID(mRS);
521        long yID = Y.getID(mRS);
522        if (mUseIncSupp) {
523            aID = getDummyAlloc(A);
524            xID = getDummyAlloc(X);
525            yID = getDummyAlloc(Y);
526        }
527        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
528    }
529
530    /**
531     * CGBMV performs one of the matrix-vector operations
532     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
533     *
534     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
535     *
536     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
537     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
538     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
539     *           for i in range(0, m):
540     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
541     *                  b[i, j-i+kl] = a[i, j]
542     *
543     * @param TransA The type of transpose applied to matrix A.
544     * @param KL The number of sub-diagonals of the matrix A.
545     * @param KU The number of super-diagonals of the matrix A.
546     * @param alpha The scalar alpha.
547     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
548     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
549     * @param incX The increment for the elements of vector x, must be larger than zero.
550     * @param beta The scalar beta.
551     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
552     * @param incY The increment for the elements of vector y, must be larger than zero.
553     */
554    public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
555        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
556        validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
557        if (KL < 0 || KU < 0) {
558            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
559        }
560        int M = A.getType().getY();
561        int N = A.getType().getX();
562
563        boolean mUseIncSupp = isIncSupp();
564        long aID = A.getID(mRS);
565        long xID = X.getID(mRS);
566        long yID = Y.getID(mRS);
567        if (mUseIncSupp) {
568            aID = getDummyAlloc(A);
569            xID = getDummyAlloc(X);
570            yID = getDummyAlloc(Y);
571        }
572        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
573    }
574
575    /**
576     * ZGBMV performs one of the matrix-vector operations
577     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
578     *
579     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
580     *
581     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
582     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
583     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
584     *           for i in range(0, m):
585     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
586     *                  b[i, j-i+kl] = a[i, j]
587     *
588     * @param TransA The type of transpose applied to matrix A.
589     * @param KL The number of sub-diagonals of the matrix A.
590     * @param KU The number of super-diagonals of the matrix A.
591     * @param alpha The scalar alpha.
592     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
593     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
594     * @param incX The increment for the elements of vector x, must be larger than zero.
595     * @param beta The scalar beta.
596     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
597     * @param incY The increment for the elements of vector y, must be larger than zero.
598     */
599    public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
600        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
601        validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
602        if (KL < 0 || KU < 0) {
603            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
604        }
605        int M = A.getType().getY();
606        int N = A.getType().getX();
607
608        boolean mUseIncSupp = isIncSupp();
609        long aID = A.getID(mRS);
610        long xID = X.getID(mRS);
611        long yID = Y.getID(mRS);
612        if (mUseIncSupp) {
613            aID = getDummyAlloc(A);
614            xID = getDummyAlloc(X);
615            yID = getDummyAlloc(Y);
616        }
617        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
618    }
619
620    static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
621        validateTranspose(TransA);
622        validateUplo(Uplo);
623        validateDiag(Diag);
624        int N = A.getType().getY();
625        if (A.getType().getX() != N) {
626            throw new RSRuntimeException("A must be a square matrix for TRMV");
627        }
628        if (!A.getType().getElement().isCompatible(e) ||
629            !X.getType().getElement().isCompatible(e)) {
630            throw new RSRuntimeException("Called BLAS with wrong Element type");
631        }
632        if (X.getType().getY() > 1) {
633            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
634        }
635
636        if (incX <= 0) {
637            throw new RSRuntimeException("Vector increments must be greater than 0");
638        }
639        int expectedXDim = 1 + (N - 1) * incX;
640        if (X.getType().getX() != expectedXDim) {
641            throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
642        }
643    }
644
645    static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
646        validateTranspose(TransA);
647        validateUplo(Uplo);
648        validateDiag(Diag);
649        if (!Ap.getType().getElement().isCompatible(e) ||
650            !X.getType().getElement().isCompatible(e)) {
651            throw new RSRuntimeException("Called BLAS with wrong Element type");
652        }
653        if (X.getType().getY() > 1) {
654            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
655        }
656
657        if (Ap.getType().getY() > 1) {
658            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
659        }
660
661        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
662        //is it really doing anything?
663        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
664            throw new RSRuntimeException("Invalid dimension for Ap");
665        }
666        if (incX <= 0) {
667            throw new RSRuntimeException("Vector increments must be greater than 0");
668        }
669        int expectedXDim = 1 + (N - 1) * incX;
670        if (X.getType().getX() != expectedXDim) {
671            throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
672        }
673
674        return N;
675    }
676
677    /**
678     * STRMV performs one of the matrix-vector operations
679     * x := A*x   or   x := A**T*x
680     *
681     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
682     *
683     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
684     * @param TransA The type of transpose applied to matrix A.
685     * @param Diag Specifies whether or not A is unit triangular.
686     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
687     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
688     * @param incX The increment for the elements of vector x, must be larger than zero.
689     */
690    public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
691        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
692        int N = A.getType().getY();
693
694        boolean mUseIncSupp = isIncSupp();
695        long aID = A.getID(mRS);
696        long xID = X.getID(mRS);
697        if (mUseIncSupp) {
698            aID = getDummyAlloc(A);
699            xID = getDummyAlloc(X);
700        }
701        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
702    }
703
704    /**
705     * DTRMV performs one of the matrix-vector operations
706     * x := A*x   or   x := A**T*x
707     *
708     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
709     *
710     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
711     * @param TransA The type of transpose applied to matrix A.
712     * @param Diag Specifies whether or not A is unit triangular.
713     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
714     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
715     * @param incX The increment for the elements of vector x, must be larger than zero.
716     */
717    public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
718        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
719        int N = A.getType().getY();
720
721        boolean mUseIncSupp = isIncSupp();
722        long aID = A.getID(mRS);
723        long xID = X.getID(mRS);
724        if (mUseIncSupp) {
725            aID = getDummyAlloc(A);
726            xID = getDummyAlloc(X);
727        }
728        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
729    }
730
731    /**
732     * CTRMV performs one of the matrix-vector operations
733     * x := A*x   or   x := A**T*x   or   x := A**H*x
734     *
735     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
736     *
737     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
738     * @param TransA The type of transpose applied to matrix A.
739     * @param Diag Specifies whether or not A is unit triangular.
740     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
741     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
742     * @param incX The increment for the elements of vector x, must be larger than zero.
743     */
744    public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
745        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
746        int N = A.getType().getY();
747
748        boolean mUseIncSupp = isIncSupp();
749        long aID = A.getID(mRS);
750        long xID = X.getID(mRS);
751        if (mUseIncSupp) {
752            aID = getDummyAlloc(A);
753            xID = getDummyAlloc(X);
754        }
755        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
756    }
757
758    /**
759     * ZTRMV performs one of the matrix-vector operations
760     * x := A*x   or   x := A**T*x   or   x := A**H*x
761     *
762     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
763     *
764     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
765     * @param TransA The type of transpose applied to matrix A.
766     * @param Diag Specifies whether or not A is unit triangular.
767     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
768     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
769     * @param incX The increment for the elements of vector x, must be larger than zero.
770     */
771    public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
772        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
773        int N = A.getType().getY();
774
775        boolean mUseIncSupp = isIncSupp();
776        long aID = A.getID(mRS);
777        long xID = X.getID(mRS);
778        if (mUseIncSupp) {
779            aID = getDummyAlloc(A);
780            xID = getDummyAlloc(X);
781        }
782        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
783    }
784
785    /**
786     * STBMV performs one of the matrix-vector operations
787     * x := A*x   or   x := A**T*x
788     *
789     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
790     *
791     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
792     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
793     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
794     *           for i in range(0, n):
795     *              for j in range(i, min(i+k+1, n)):
796     *                  b[i, j-i] = a[i, j]
797     *
798     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
799     * @param TransA The type of transpose applied to matrix A.
800     * @param Diag Specifies whether or not A is unit triangular.
801     * @param K The number of off-diagonals of the matrix A
802     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
803     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
804     * @param incX The increment for the elements of vector x, must be larger than zero.
805     */
806    public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
807        // TBMV has the same requirements as TRMV + K >= 0
808        if (K < 0) {
809            throw new RSRuntimeException("K must be greater than or equal to 0");
810        }
811        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
812        int N = A.getType().getY();
813
814        boolean mUseIncSupp = isIncSupp();
815        long aID = A.getID(mRS);
816        long xID = X.getID(mRS);
817        if (mUseIncSupp) {
818            aID = getDummyAlloc(A);
819            xID = getDummyAlloc(X);
820        }
821        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
822    }
823
824    /**
825     * DTBMV performs one of the matrix-vector operations
826     * x := A*x   or   x := A**T*x
827     *
828     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
829     *
830     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
831     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
832     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
833     *           for i in range(0, n):
834     *              for j in range(i, min(i+k+1, n)):
835     *                  b[i, j-i] = a[i, j]
836     *
837     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
838     * @param TransA The type of transpose applied to matrix A.
839     * @param Diag Specifies whether or not A is unit triangular.
840     * @param K The number of off-diagonals of the matrix A
841     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
842     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
843     * @param incX The increment for the elements of vector x, must be larger than zero.
844     */
845    public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
846        // TBMV has the same requirements as TRMV + K >= 0
847        if (K < 0) {
848            throw new RSRuntimeException("K must be greater than or equal to 0");
849        }
850        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
851        int N = A.getType().getY();
852
853        boolean mUseIncSupp = isIncSupp();
854        long aID = A.getID(mRS);
855        long xID = X.getID(mRS);
856        if (mUseIncSupp) {
857            aID = getDummyAlloc(A);
858            xID = getDummyAlloc(X);
859        }
860        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
861    }
862
863    /**
864     * CTBMV performs one of the matrix-vector operations
865     * x := A*x   or   x := A**T*x   or   x := A**H*x
866     *
867     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
868     *
869     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
870     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
871     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
872     *           for i in range(0, n):
873     *              for j in range(i, min(i+k+1, n)):
874     *                  b[i, j-i] = a[i, j]
875     *
876     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
877     * @param TransA The type of transpose applied to matrix A.
878     * @param Diag Specifies whether or not A is unit triangular.
879     * @param K The number of off-diagonals of the matrix A
880     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
881     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
882     * @param incX The increment for the elements of vector x, must be larger than zero.
883     */
884    public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
885        // TBMV has the same requirements as TRMV + K >= 0
886        if (K < 0) {
887            throw new RSRuntimeException("K must be greater than or equal to 0");
888        }
889        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
890        int N = A.getType().getY();
891
892        boolean mUseIncSupp = isIncSupp();
893        long aID = A.getID(mRS);
894        long xID = X.getID(mRS);
895        if (mUseIncSupp) {
896            aID = getDummyAlloc(A);
897            xID = getDummyAlloc(X);
898        }
899        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
900    }
901
902    /**
903     * ZTBMV performs one of the matrix-vector operations
904     * x := A*x   or   x := A**T*x   or   x := A**H*x
905     *
906     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
907     *
908     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
909     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
910     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
911     *           for i in range(0, n):
912     *              for j in range(i, min(i+k+1, n)):
913     *                  b[i, j-i] = a[i, j]
914     *
915     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
916     * @param TransA The type of transpose applied to matrix A.
917     * @param Diag Specifies whether or not A is unit triangular.
918     * @param K The number of off-diagonals of the matrix A
919     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
920     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
921     * @param incX The increment for the elements of vector x, must be larger than zero.
922     */
923    public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
924        // TBMV has the same requirements as TRMV + K >= 0
925        if (K < 0) {
926            throw new RSRuntimeException("K must be greater than or equal to 0");
927        }
928        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
929        int N = A.getType().getY();
930
931        boolean mUseIncSupp = isIncSupp();
932        long aID = A.getID(mRS);
933        long xID = X.getID(mRS);
934        if (mUseIncSupp) {
935            aID = getDummyAlloc(A);
936            xID = getDummyAlloc(X);
937        }
938        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
939    }
940
941    /**
942     * STPMV performs one of the matrix-vector operations
943     * x := A*x   or   x := A**T*x
944     *
945     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
946     *
947     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
948     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
949     *       'a' to packed matrix 'b'.
950     *           k = 0
951     *           for i in range(0, n):
952     *              for j in range(i, n):
953     *                  b[k++] = a[i, j]
954     *
955     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
956     * @param TransA The type of transpose applied to matrix A.
957     * @param Diag Specifies whether or not A is unit triangular.
958     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
959     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
960     * @param incX The increment for the elements of vector x, must be larger than zero.
961     */
962    public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
963        int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
964
965        boolean mUseIncSupp = isIncSupp();
966        long apID = Ap.getID(mRS);
967        long xID = X.getID(mRS);
968        if (mUseIncSupp) {
969            apID = getDummyAlloc(Ap);
970            xID = getDummyAlloc(X);
971        }
972        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
973    }
974
975    /**
976     * DTPMV performs one of the matrix-vector operations
977     * x := A*x   or   x := A**T*x
978     *
979     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
980     *
981     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
982     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
983     *       'a' to packed matrix 'b'.
984     *           k = 0
985     *           for i in range(0, n):
986     *              for j in range(i, n):
987     *                  b[k++] = a[i, j]
988     *
989     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
990     * @param TransA The type of transpose applied to matrix A.
991     * @param Diag Specifies whether or not A is unit triangular.
992     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
993     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
994     * @param incX The increment for the elements of vector x, must be larger than zero.
995     */
996    public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
997        int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
998
999        boolean mUseIncSupp = isIncSupp();
1000        long apID = Ap.getID(mRS);
1001        long xID = X.getID(mRS);
1002        if (mUseIncSupp) {
1003            apID = getDummyAlloc(Ap);
1004            xID = getDummyAlloc(X);
1005        }
1006        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1007    }
1008
1009    /**
1010     * CTPMV performs one of the matrix-vector operations
1011     * x := A*x   or   x := A**T*x   or   x := A**H*x
1012     *
1013     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
1014     *
1015     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1016     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1017     *       'a' to packed matrix 'b'.
1018     *           k = 0
1019     *           for i in range(0, n):
1020     *              for j in range(i, n):
1021     *                  b[k++] = a[i, j]
1022     *
1023     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1024     * @param TransA The type of transpose applied to matrix A.
1025     * @param Diag Specifies whether or not A is unit triangular.
1026     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1027     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1028     * @param incX The increment for the elements of vector x, must be larger than zero.
1029     */
1030    public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1031        int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1032
1033        boolean mUseIncSupp = isIncSupp();
1034        long apID = Ap.getID(mRS);
1035        long xID = X.getID(mRS);
1036        if (mUseIncSupp) {
1037            apID = getDummyAlloc(Ap);
1038            xID = getDummyAlloc(X);
1039        }
1040        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1041    }
1042
1043    /**
1044     * ZTPMV performs one of the matrix-vector operations
1045     * x := A*x   or   x := A**T*x   or   x := A**H*x
1046     *
1047     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
1048     *
1049     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1050     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1051     *       'a' to packed matrix 'b'.
1052     *           k = 0
1053     *           for i in range(0, n):
1054     *              for j in range(i, n):
1055     *                  b[k++] = a[i, j]
1056     *
1057     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1058     * @param TransA The type of transpose applied to matrix A.
1059     * @param Diag Specifies whether or not A is unit triangular.
1060     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1061     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1062     * @param incX The increment for the elements of vector x, must be larger than zero.
1063     */
1064    public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1065        int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1066
1067        boolean mUseIncSupp = isIncSupp();
1068        long apID = Ap.getID(mRS);
1069        long xID = X.getID(mRS);
1070        if (mUseIncSupp) {
1071            apID = getDummyAlloc(Ap);
1072            xID = getDummyAlloc(X);
1073        }
1074        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1075    }
1076
1077    /**
1078     * STRSV solves one of the systems of equations
1079     * A*x = b   or   A**T*x = b
1080     *
1081     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
1082     *
1083     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1084     * @param TransA The type of transpose applied to matrix A.
1085     * @param Diag Specifies whether or not A is unit triangular.
1086     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1087     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1088     * @param incX The increment for the elements of vector x, must be larger than zero.
1089     */
1090    public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1091        // TRSV is the same as TRMV
1092        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1093        int N = A.getType().getY();
1094
1095        boolean mUseIncSupp = isIncSupp();
1096        long aID = A.getID(mRS);
1097        long xID = X.getID(mRS);
1098        if (mUseIncSupp) {
1099            aID = getDummyAlloc(A);
1100            xID = getDummyAlloc(X);
1101        }
1102        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1103
1104    }
1105
1106    /**
1107     * DTRSV solves one of the systems of equations
1108     * A*x = b   or   A**T*x = b
1109     *
1110     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
1111     *
1112     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1113     * @param TransA The type of transpose applied to matrix A.
1114     * @param Diag Specifies whether or not A is unit triangular.
1115     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1116     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1117     * @param incX The increment for the elements of vector x, must be larger than zero.
1118     */
1119    public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1120        // TRSV is the same as TRMV
1121        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1122        int N = A.getType().getY();
1123
1124        boolean mUseIncSupp = isIncSupp();
1125        long aID = A.getID(mRS);
1126        long xID = X.getID(mRS);
1127        if (mUseIncSupp) {
1128            aID = getDummyAlloc(A);
1129            xID = getDummyAlloc(X);
1130        }
1131        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1132
1133    }
1134
1135    /**
1136     * CTRSV solves one of the systems of equations
1137     * A*x = b   or   A**T*x = b   or   A**H*x = b
1138     *
1139     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
1140     *
1141     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1142     * @param TransA The type of transpose applied to matrix A.
1143     * @param Diag Specifies whether or not A is unit triangular.
1144     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1145     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1146     * @param incX The increment for the elements of vector x, must be larger than zero.
1147     */
1148    public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1149        // TRSV is the same as TRMV
1150        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1151        int N = A.getType().getY();
1152
1153        boolean mUseIncSupp = isIncSupp();
1154        long aID = A.getID(mRS);
1155        long xID = X.getID(mRS);
1156        if (mUseIncSupp) {
1157            aID = getDummyAlloc(A);
1158            xID = getDummyAlloc(X);
1159        }
1160        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1161
1162    }
1163
1164    /**
1165     * ZTRSV solves one of the systems of equations
1166     * A*x = b   or   A**T*x = b   or   A**H*x = b
1167     *
1168     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
1169     *
1170     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1171     * @param TransA The type of transpose applied to matrix A.
1172     * @param Diag Specifies whether or not A is unit triangular.
1173     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1174     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1175     * @param incX The increment for the elements of vector x, must be larger than zero.
1176     */
1177    public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1178        // TRSV is the same as TRMV
1179        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1180        int N = A.getType().getY();
1181
1182        boolean mUseIncSupp = isIncSupp();
1183        long aID = A.getID(mRS);
1184        long xID = X.getID(mRS);
1185        if (mUseIncSupp) {
1186            aID = getDummyAlloc(A);
1187            xID = getDummyAlloc(X);
1188        }
1189        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1190
1191    }
1192
1193    /**
1194     * STBSV solves one of the systems of equations
1195     * A*x = b   or   A**T*x = b
1196     *
1197     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
1198     *
1199     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1200     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1201     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1202     *           for i in range(0, n):
1203     *              for j in range(i, min(i+k+1, n)):
1204     *                  b[i, j-i] = a[i, j]
1205     *
1206     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1207     * @param TransA The type of transpose applied to matrix A.
1208     * @param Diag Specifies whether or not A is unit triangular.
1209     * @param K The number of off-diagonals of the matrix A
1210     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1211     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1212     * @param incX The increment for the elements of vector x, must be larger than zero.
1213     */
1214    public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1215        // TBSV is the same as TRMV + K >= 0
1216        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1217        int N = A.getType().getY();
1218        if (K < 0) {
1219            throw new RSRuntimeException("Number of diagonals must be positive");
1220        }
1221
1222        boolean mUseIncSupp = isIncSupp();
1223        long aID = A.getID(mRS);
1224        long xID = X.getID(mRS);
1225        if (mUseIncSupp) {
1226            aID = getDummyAlloc(A);
1227            xID = getDummyAlloc(X);
1228        }
1229        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1230    }
1231
1232    /**
1233     * DTBSV solves one of the systems of equations
1234     * A*x = b   or   A**T*x = b
1235     *
1236     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1237     *
1238     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1239     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1240     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1241     *           for i in range(0, n):
1242     *              for j in range(i, min(i+k+1, n)):
1243     *                  b[i, j-i] = a[i, j]
1244     *
1245     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1246     * @param TransA The type of transpose applied to matrix A.
1247     * @param Diag Specifies whether or not A is unit triangular.
1248     * @param K The number of off-diagonals of the matrix A
1249     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1250     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1251     * @param incX The increment for the elements of vector x, must be larger than zero.
1252     */
1253    public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1254        // TBSV is the same as TRMV + K >= 0
1255        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1256        int N = A.getType().getY();
1257        if (K < 0) {
1258            throw new RSRuntimeException("Number of diagonals must be positive");
1259        }
1260
1261        boolean mUseIncSupp = isIncSupp();
1262        long aID = A.getID(mRS);
1263        long xID = X.getID(mRS);
1264        if (mUseIncSupp) {
1265            aID = getDummyAlloc(A);
1266            xID = getDummyAlloc(X);
1267        }
1268        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1269    }
1270
1271    /**
1272     * CTBSV solves one of the systems of equations
1273     * A*x = b   or   A**T*x = b   or   A**H*x = b
1274     *
1275     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1276     *
1277     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1278     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1279     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1280     *           for i in range(0, n):
1281     *              for j in range(i, min(i+k+1, n)):
1282     *                  b[i, j-i] = a[i, j]
1283     *
1284     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1285     * @param TransA The type of transpose applied to matrix A.
1286     * @param Diag Specifies whether or not A is unit triangular.
1287     * @param K The number of off-diagonals of the matrix A
1288     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1289     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1290     * @param incX The increment for the elements of vector x, must be larger than zero.
1291     */
1292    public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1293        // TBSV is the same as TRMV + K >= 0
1294        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1295        int N = A.getType().getY();
1296        if (K < 0) {
1297            throw new RSRuntimeException("Number of diagonals must be positive");
1298        }
1299
1300        boolean mUseIncSupp = isIncSupp();
1301        long aID = A.getID(mRS);
1302        long xID = X.getID(mRS);
1303        if (mUseIncSupp) {
1304            aID = getDummyAlloc(A);
1305            xID = getDummyAlloc(X);
1306        }
1307        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1308    }
1309
1310    /**
1311     * ZTBSV solves one of the systems of equations
1312     * A*x = b   or   A**T*x = b   or   A**H*x = b
1313     *
1314     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1315     *
1316     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1317     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1318     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1319     *           for i in range(0, n):
1320     *              for j in range(i, min(i+k+1, n)):
1321     *                  b[i, j-i] = a[i, j]
1322     *
1323     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1324     * @param TransA The type of transpose applied to matrix A.
1325     * @param Diag Specifies whether or not A is unit triangular.
1326     * @param K The number of off-diagonals of the matrix A
1327     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1328     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1329     * @param incX The increment for the elements of vector x, must be larger than zero.
1330     */
1331    public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1332        // TBSV is the same as TRMV + K >= 0
1333        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1334        int N = A.getType().getY();
1335        if (K < 0) {
1336            throw new RSRuntimeException("Number of diagonals must be positive");
1337        }
1338
1339        boolean mUseIncSupp = isIncSupp();
1340        long aID = A.getID(mRS);
1341        long xID = X.getID(mRS);
1342        if (mUseIncSupp) {
1343            aID = getDummyAlloc(A);
1344            xID = getDummyAlloc(X);
1345        }
1346        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1347    }
1348
1349    /**
1350     * STPSV solves one of the systems of equations
1351     * A*x = b   or   A**T*x = b
1352     *
1353     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1354     *
1355     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1356     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1357     *       'a' to packed matrix 'b'.
1358     *           k = 0
1359     *           for i in range(0, n):
1360     *              for j in range(i, n):
1361     *                  b[k++] = a[i, j]
1362     *
1363     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1364     * @param TransA The type of transpose applied to matrix A.
1365     * @param Diag Specifies whether or not A is unit triangular.
1366     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1367     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1368     * @param incX The increment for the elements of vector x, must be larger than zero.
1369     */
1370    public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1371        // TPSV is same as TPMV
1372        int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1373
1374        boolean mUseIncSupp = isIncSupp();
1375        long apID = Ap.getID(mRS);
1376        long xID = X.getID(mRS);
1377        if (mUseIncSupp) {
1378            apID = getDummyAlloc(Ap);
1379            xID = getDummyAlloc(X);
1380        }
1381        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1382    }
1383
1384    /**
1385     * DTPSV solves one of the systems of equations
1386     * A*x = b   or   A**T*x = b
1387     *
1388     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1389     *
1390     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1391     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1392     *       'a' to packed matrix 'b'.
1393     *           k = 0
1394     *           for i in range(0, n):
1395     *              for j in range(i, n):
1396     *                  b[k++] = a[i, j]
1397     *
1398     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1399     * @param TransA The type of transpose applied to matrix A.
1400     * @param Diag Specifies whether or not A is unit triangular.
1401     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1402     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1403     * @param incX The increment for the elements of vector x, must be larger than zero.
1404     */
1405    public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1406        // TPSV is same as TPMV
1407        int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1408
1409        boolean mUseIncSupp = isIncSupp();
1410        long apID = Ap.getID(mRS);
1411        long xID = X.getID(mRS);
1412        if (mUseIncSupp) {
1413            apID = getDummyAlloc(Ap);
1414            xID = getDummyAlloc(X);
1415        }
1416        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1417    }
1418
1419    /**
1420     * CTPSV solves one of the systems of equations
1421     * A*x = b   or   A**T*x = b   or   A**H*x = b
1422     *
1423     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1424     *
1425     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1426     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1427     *       'a' to packed matrix 'b'.
1428     *           k = 0
1429     *           for i in range(0, n):
1430     *              for j in range(i, n):
1431     *                  b[k++] = a[i, j]
1432     *
1433     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1434     * @param TransA The type of transpose applied to matrix A.
1435     * @param Diag Specifies whether or not A is unit triangular.
1436     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1437     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1438     * @param incX The increment for the elements of vector x, must be larger than zero.
1439     */
1440    public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1441        // TPSV is same as TPMV
1442        int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1443
1444        boolean mUseIncSupp = isIncSupp();
1445        long apID = Ap.getID(mRS);
1446        long xID = X.getID(mRS);
1447        if (mUseIncSupp) {
1448            apID = getDummyAlloc(Ap);
1449            xID = getDummyAlloc(X);
1450        }
1451        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1452    }
1453
1454    /**
1455     * ZTPSV solves one of the systems of equations
1456     * A*x = b   or   A**T*x = b   or   A**H*x = b
1457     *
1458     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1459     *
1460     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1461     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1462     *       'a' to packed matrix 'b'.
1463     *           k = 0
1464     *           for i in range(0, n):
1465     *              for j in range(i, n):
1466     *                  b[k++] = a[i, j]
1467     *
1468     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1469     * @param TransA The type of transpose applied to matrix A.
1470     * @param Diag Specifies whether or not A is unit triangular.
1471     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1472     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1473     * @param incX The increment for the elements of vector x, must be larger than zero.
1474     */
1475    public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1476        // TPSV is same as TPMV
1477        int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1478
1479        boolean mUseIncSupp = isIncSupp();
1480        long apID = Ap.getID(mRS);
1481        long xID = X.getID(mRS);
1482        if (mUseIncSupp) {
1483            apID = getDummyAlloc(Ap);
1484            xID = getDummyAlloc(X);
1485        }
1486        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1487    }
1488
1489    /**
1490     * Level 2, S and D only
1491     */
1492    static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1493        validateUplo(Uplo);
1494        int N = A.getType().getY();
1495        if (A.getType().getX() != N) {
1496            throw new RSRuntimeException("A must be a square matrix for SYMV");
1497        }
1498        if (!A.getType().getElement().isCompatible(e) ||
1499            !X.getType().getElement().isCompatible(e) ||
1500            !Y.getType().getElement().isCompatible(e) ) {
1501            throw new RSRuntimeException("Called BLAS with wrong Element type");
1502        }
1503        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1504            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1505        }
1506
1507        if (incX <= 0 || incY <= 0) {
1508            throw new RSRuntimeException("Vector increments must be greater than 0");
1509        }
1510        int expectedXDim = 1 + (N - 1) * incX;
1511        if (X.getType().getX() != expectedXDim) {
1512            throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1513        }
1514        int expectedYDim = 1 + (N - 1) * incY;
1515        if (Y.getType().getX() != expectedYDim) {
1516            throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1517        }
1518        return N;
1519    }
1520    static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1521        validateUplo(Uplo);
1522        if (!Ap.getType().getElement().isCompatible(e) ||
1523            !X.getType().getElement().isCompatible(e) ||
1524            !Y.getType().getElement().isCompatible(e)) {
1525            throw new RSRuntimeException("Called BLAS with wrong Element type");
1526        }
1527        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1528            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1529        }
1530
1531        if (Ap.getType().getY() > 1) {
1532            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1533        }
1534
1535        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1536        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1537            throw new RSRuntimeException("Invalid dimension for Ap");
1538        }
1539        if (incX <= 0 || incY <= 0) {
1540            throw new RSRuntimeException("Vector increments must be greater than 0");
1541        }
1542        int expectedXDim = 1 + (N - 1) * incX;
1543        if (X.getType().getX() != expectedXDim) {
1544            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1545        }
1546        int expectedYDim = 1 + (N - 1) * incY;
1547        if (Y.getType().getX() != expectedYDim) {
1548            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1549        }
1550
1551        return N;
1552    }
1553    static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1554        if (!A.getType().getElement().isCompatible(e) ||
1555            !X.getType().getElement().isCompatible(e) ||
1556            !Y.getType().getElement().isCompatible(e) ) {
1557            throw new RSRuntimeException("Called BLAS with wrong Element type");
1558        }
1559
1560        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1561            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1562        }
1563
1564        int M = A.getType().getY();
1565        int N = A.getType().getX();
1566
1567        if (N < 1 || M < 1) {
1568            throw new RSRuntimeException("M and N must be 1 or greater for GER");
1569        }
1570        if (incX <= 0 || incY <= 0) {
1571            throw new RSRuntimeException("Vector increments must be greater than 0");
1572        }
1573        int expectedXDim = 1 + (M - 1) * incX;
1574        if (X.getType().getX() != expectedXDim) {
1575            throw new RSRuntimeException("Incorrect vector dimensions for GER");
1576        }
1577        int expectedYDim = 1 + (N - 1) * incY;
1578        if (Y.getType().getX() != expectedYDim) {
1579            throw new RSRuntimeException("Incorrect vector dimensions for GER");
1580        }
1581
1582
1583    }
1584    static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1585        validateUplo(Uplo);
1586        if (!A.getType().getElement().isCompatible(e) ||
1587            !X.getType().getElement().isCompatible(e)) {
1588            throw new RSRuntimeException("Called BLAS with wrong Element type");
1589        }
1590
1591        int N = A.getType().getX();
1592
1593        if (X.getType().getY() > 1) {
1594            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1595        }
1596        if (N != A.getType().getY()) {
1597            throw new RSRuntimeException("A must be a symmetric matrix");
1598        }
1599        if (incX <= 0) {
1600            throw new RSRuntimeException("Vector increments must be greater than 0");
1601        }
1602        int expectedXDim = 1 + (N - 1) * incX;
1603        if (X.getType().getX() != expectedXDim) {
1604            throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1605        }
1606        return N;
1607    }
1608    static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1609        validateUplo(Uplo);
1610        if (!Ap.getType().getElement().isCompatible(e) ||
1611            !X.getType().getElement().isCompatible(e)) {
1612            throw new RSRuntimeException("Called BLAS with wrong Element type");
1613        }
1614        if (X.getType().getY() > 1) {
1615            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1616        }
1617
1618        if (Ap.getType().getY() > 1) {
1619            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1620        }
1621
1622        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1623        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1624            throw new RSRuntimeException("Invalid dimension for Ap");
1625        }
1626        if (incX <= 0) {
1627            throw new RSRuntimeException("Vector increments must be greater than 0");
1628        }
1629        int expectedXDim = 1 + (N - 1) * incX;
1630        if (X.getType().getX() != expectedXDim) {
1631            throw new RSRuntimeException("Incorrect vector dimensions for SPR");
1632        }
1633
1634        return N;
1635    }
1636
1637    static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1638        validateUplo(Uplo);
1639        if (!A.getType().getElement().isCompatible(e) ||
1640            !X.getType().getElement().isCompatible(e) ||
1641            !Y.getType().getElement().isCompatible(e)) {
1642            throw new RSRuntimeException("Called BLAS with wrong Element type");
1643        }
1644
1645        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1646            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1647        }
1648
1649        int N = A.getType().getX();
1650
1651        if (N != A.getType().getY()) {
1652            throw new RSRuntimeException("A must be a symmetric matrix");
1653        }
1654        if (incX <= 0 || incY <= 0) {
1655            throw new RSRuntimeException("Vector increments must be greater than 0");
1656        }
1657        int expectedXDim = 1 + (N - 1) * incX;
1658        int expectedYDim = 1 + (N - 1) * incY;
1659        if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1660            throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1661        }
1662        return N;
1663
1664    }
1665    static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1666        validateUplo(Uplo);
1667        if (!Ap.getType().getElement().isCompatible(e) ||
1668            !X.getType().getElement().isCompatible(e) ||
1669            !Y.getType().getElement().isCompatible(e)) {
1670            throw new RSRuntimeException("Called BLAS with wrong Element type");
1671        }
1672        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1673            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1674        }
1675
1676        if (Ap.getType().getY() > 1) {
1677            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1678        }
1679
1680        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1681        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1682            throw new RSRuntimeException("Invalid dimension for Ap");
1683        }
1684        if (incX <= 0 || incY <= 0) {
1685            throw new RSRuntimeException("Vector increments must be greater than 0");
1686        }
1687        int expectedXDim = 1 + (N - 1) * incX;
1688        int expectedYDim = 1 + (N - 1) * incY;
1689        if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1690            throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
1691        }
1692
1693        return N;
1694    }
1695
1696    /**
1697     * SSYMV performs the matrix-vector operation
1698     * y := alpha*A*x + beta*y
1699     *
1700     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1701     *
1702     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1703     * @param alpha The scalar alpha.
1704     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1705     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1706     * @param incX The increment for the elements of vector x, must be larger than zero.
1707     * @param beta The scalar beta.
1708     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1709     * @param incY The increment for the elements of vector y, must be larger than zero.
1710     */
1711    public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1712        int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1713
1714        boolean mUseIncSupp = isIncSupp();
1715        long aID = A.getID(mRS);
1716        long xID = X.getID(mRS);
1717        long yID = Y.getID(mRS);
1718        if (mUseIncSupp) {
1719            aID = getDummyAlloc(A);
1720            xID = getDummyAlloc(X);
1721            yID = getDummyAlloc(Y);
1722        }
1723        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1724    }
1725
1726    /**
1727     * SSBMV performs the matrix-vector operation
1728     * y := alpha*A*x + beta*y
1729     *
1730     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1731     *
1732     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1733     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1734     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1735     *           for i in range(0, n):
1736     *              for j in range(i, min(i+k+1, n)):
1737     *                  b[i, j-i] = a[i, j]
1738     *
1739     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1740     * @param K The number of off-diagonals of the matrix A
1741     * @param alpha The scalar alpha.
1742     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1743     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1744     * @param incX The increment for the elements of vector x, must be larger than zero.
1745     * @param beta The scalar beta.
1746     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1747     * @param incY The increment for the elements of vector y, must be larger than zero.
1748     */
1749    public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1750        // SBMV is the same as SYMV + K >= 0
1751        if (K < 0) {
1752            throw new RSRuntimeException("K must be greater than or equal to 0");
1753        }
1754        int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1755
1756        boolean mUseIncSupp = isIncSupp();
1757        long aID = A.getID(mRS);
1758        long xID = X.getID(mRS);
1759        long yID = Y.getID(mRS);
1760        if (mUseIncSupp) {
1761            aID = getDummyAlloc(A);
1762            xID = getDummyAlloc(X);
1763            yID = getDummyAlloc(Y);
1764        }
1765        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1766    }
1767
1768    /**
1769     * SSPMV performs the matrix-vector operation
1770     * y := alpha*A*x + beta*y
1771     *
1772     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1773     *
1774     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1775     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1776     *       'a' to packed matrix 'b'.
1777     *           k = 0
1778     *           for i in range(0, n):
1779     *              for j in range(i, n):
1780     *                  b[k++] = a[i, j]
1781     *
1782     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1783     * @param alpha The scalar alpha.
1784     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1785     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1786     * @param incX The increment for the elements of vector x, must be larger than zero.
1787     * @param beta The scalar beta.
1788     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1789     * @param incY The increment for the elements of vector y, must be larger than zero.
1790     */
1791    public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
1792        int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1793
1794        boolean mUseIncSupp = isIncSupp();
1795        long apID = Ap.getID(mRS);
1796        long xID = X.getID(mRS);
1797        long yID = Y.getID(mRS);
1798        if (mUseIncSupp) {
1799            apID = getDummyAlloc(Ap);
1800            xID = getDummyAlloc(X);
1801            yID = getDummyAlloc(Y);
1802        }
1803        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1804    }
1805
1806    /**
1807     * SGER performs the rank 1 operation
1808     * A := alpha*x*y**T + A
1809     *
1810     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1811     *
1812     * @param alpha The scalar alpha.
1813     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1814     * @param incX The increment for the elements of vector x, must be larger than zero.
1815     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1816     * @param incY The increment for the elements of vector y, must be larger than zero.
1817     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1818     */
1819    public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1820        int M = A.getType().getY();
1821        int N = A.getType().getX();
1822        validateGER(Element.F32(mRS), X, incX, Y, incY, A);
1823
1824        boolean mUseIncSupp = isIncSupp();
1825        long aID = A.getID(mRS);
1826        long xID = X.getID(mRS);
1827        long yID = Y.getID(mRS);
1828        if (mUseIncSupp) {
1829            aID = getDummyAlloc(A);
1830            xID = getDummyAlloc(X);
1831            yID = getDummyAlloc(Y);
1832        }
1833        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
1834    }
1835
1836    /**
1837     * SSYR performs the rank 1 operation
1838     * A := alpha*x*x**T + A
1839     *
1840     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1841     *
1842     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1843     * @param alpha The scalar alpha.
1844     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1845     * @param incX The increment for the elements of vector x, must be larger than zero.
1846     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1847     */
1848    public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1849        int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1850
1851        boolean mUseIncSupp = isIncSupp();
1852        long aID = A.getID(mRS);
1853        long xID = X.getID(mRS);
1854        if (mUseIncSupp) {
1855            aID = getDummyAlloc(A);
1856            xID = getDummyAlloc(X);
1857        }
1858        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1859    }
1860
1861    /**
1862     * SSPR performs the rank 1 operation
1863     * A := alpha*x*x**T + A
1864     *
1865     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1866     *
1867     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1868     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1869     *       'a' to packed matrix 'b'.
1870     *           k = 0
1871     *           for i in range(0, n):
1872     *              for j in range(i, n):
1873     *                  b[k++] = a[i, j]
1874     *
1875     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1876     * @param alpha The scalar alpha.
1877     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1878     * @param incX The increment for the elements of vector x, must be larger than zero.
1879     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1880     */
1881    public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1882        int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1883
1884        boolean mUseIncSupp = isIncSupp();
1885        long apID = Ap.getID(mRS);
1886        long xID = X.getID(mRS);
1887        if (mUseIncSupp) {
1888            apID = getDummyAlloc(Ap);
1889            xID = getDummyAlloc(X);
1890        }
1891        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1892    }
1893
1894    /**
1895     * SSYR2 performs the symmetric rank 2 operation
1896     * A := alpha*x*y**T + alpha*y*x**T + A
1897     *
1898     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1899     *
1900     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1901     * @param alpha The scalar alpha.
1902     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1903     * @param incX The increment for the elements of vector x, must be larger than zero.
1904     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1905     * @param incY The increment for the elements of vector y, must be larger than zero.
1906     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1907     */
1908    public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1909        int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1910
1911        boolean mUseIncSupp = isIncSupp();
1912        long aID = A.getID(mRS);
1913        long xID = X.getID(mRS);
1914        long yID = Y.getID(mRS);
1915        if (mUseIncSupp) {
1916            aID = getDummyAlloc(A);
1917            xID = getDummyAlloc(X);
1918            yID = getDummyAlloc(Y);
1919        }
1920        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
1921    }
1922
1923    /**
1924     * SSPR2 performs the symmetric rank 2 operation
1925     * A := alpha*x*y**T + alpha*y*x**T + A
1926     *
1927     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1928     *
1929     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1930     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1931     *       'a' to packed matrix 'b'.
1932     *           k = 0
1933     *           for i in range(0, n):
1934     *              for j in range(i, n):
1935     *                  b[k++] = a[i, j]
1936     *
1937     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1938     * @param alpha The scalar alpha.
1939     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1940     * @param incX The increment for the elements of vector x, must be larger than zero.
1941     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1942     * @param incY The increment for the elements of vector y, must be larger than zero.
1943     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1944     */
1945    public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1946        int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1947
1948        boolean mUseIncSupp = isIncSupp();
1949        long apID = Ap.getID(mRS);
1950        long xID = X.getID(mRS);
1951        long yID = Y.getID(mRS);
1952        if (mUseIncSupp) {
1953            apID = getDummyAlloc(Ap);
1954            xID = getDummyAlloc(X);
1955            yID = getDummyAlloc(Y);
1956        }
1957        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
1958    }
1959
1960    /**
1961     * DSYMV performs the matrix-vector operation
1962     * y := alpha*A*x + beta*y
1963     *
1964     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1965     *
1966     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1967     * @param alpha The scalar alpha.
1968     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1969     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1970     * @param incX The increment for the elements of vector x, must be larger than zero.
1971     * @param beta The scalar beta.
1972     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1973     * @param incY The increment for the elements of vector y, must be larger than zero.
1974     */
1975    public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1976        int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1977
1978        boolean mUseIncSupp = isIncSupp();
1979        long aID = A.getID(mRS);
1980        long xID = X.getID(mRS);
1981        long yID = Y.getID(mRS);
1982        if (mUseIncSupp) {
1983            aID = getDummyAlloc(A);
1984            xID = getDummyAlloc(X);
1985            yID = getDummyAlloc(Y);
1986        }
1987        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1988    }
1989
1990    /**
1991     * DSBMV performs the matrix-vector operation
1992     * y := alpha*A*x + beta*y
1993     *
1994     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1995     *
1996     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1997     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1998     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1999     *           for i in range(0, n):
2000     *              for j in range(i, min(i+k+1, n)):
2001     *                  b[i, j-i] = a[i, j]
2002     *
2003     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2004     * @param K The number of off-diagonals of the matrix A
2005     * @param alpha The scalar alpha.
2006     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2007     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2008     * @param incX The increment for the elements of vector x, must be larger than zero.
2009     * @param beta The scalar beta.
2010     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2011     * @param incY The increment for the elements of vector y, must be larger than zero.
2012     */
2013    public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
2014        // SBMV is the same as SYMV + K >= 0
2015        if (K < 0) {
2016            throw new RSRuntimeException("K must be greater than or equal to 0");
2017        }
2018        int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
2019
2020        boolean mUseIncSupp = isIncSupp();
2021        long aID = A.getID(mRS);
2022        long xID = X.getID(mRS);
2023        long yID = Y.getID(mRS);
2024        if (mUseIncSupp) {
2025            aID = getDummyAlloc(A);
2026            xID = getDummyAlloc(X);
2027            yID = getDummyAlloc(Y);
2028        }
2029        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2030    }
2031
2032    /**
2033     * DSPMV performs the matrix-vector operation
2034     * y := alpha*A*x + beta*y
2035     *
2036     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2037     *
2038     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2039     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2040     *       'a' to packed matrix 'b'.
2041     *           k = 0
2042     *           for i in range(0, n):
2043     *              for j in range(i, n):
2044     *                  b[k++] = a[i, j]
2045     *
2046     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2047     * @param alpha The scalar alpha.
2048     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2049     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2050     * @param incX The increment for the elements of vector x, must be larger than zero.
2051     * @param beta The scalar beta.
2052     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2053     * @param incY The increment for the elements of vector y, must be larger than zero.
2054     */
2055    public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
2056        int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
2057
2058        boolean mUseIncSupp = isIncSupp();
2059        long apID = Ap.getID(mRS);
2060        long xID = X.getID(mRS);
2061        long yID = Y.getID(mRS);
2062        if (mUseIncSupp) {
2063            apID = getDummyAlloc(Ap);
2064            xID = getDummyAlloc(X);
2065            yID = getDummyAlloc(Y);
2066        }
2067        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2068    }
2069
2070    /**
2071     * DGER performs the rank 1 operation
2072     * A := alpha*x*y**T + A
2073     *
2074     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2075     *
2076     * @param alpha The scalar alpha.
2077     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2078     * @param incX The increment for the elements of vector x, must be larger than zero.
2079     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2080     * @param incY The increment for the elements of vector y, must be larger than zero.
2081     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2082     */
2083    public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2084        int M = A.getType().getY();
2085        int N = A.getType().getX();
2086        validateGER(Element.F64(mRS), X, incX, Y, incY, A);
2087
2088        boolean mUseIncSupp = isIncSupp();
2089        long aID = A.getID(mRS);
2090        long xID = X.getID(mRS);
2091        long yID = Y.getID(mRS);
2092        if (mUseIncSupp) {
2093            aID = getDummyAlloc(A);
2094            xID = getDummyAlloc(X);
2095            yID = getDummyAlloc(Y);
2096        }
2097        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
2098    }
2099
2100    /**
2101     * DSYR performs the rank 1 operation
2102     * A := alpha*x*x**T + A
2103     *
2104     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2105     *
2106     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2107     * @param alpha The scalar alpha.
2108     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2109     * @param incX The increment for the elements of vector x, must be larger than zero.
2110     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2111     */
2112    public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2113        int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
2114
2115        boolean mUseIncSupp = isIncSupp();
2116        long aID = A.getID(mRS);
2117        long xID = X.getID(mRS);
2118        if (mUseIncSupp) {
2119            aID = getDummyAlloc(A);
2120            xID = getDummyAlloc(X);
2121        }
2122        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2123    }
2124
2125    /**
2126     * DSPR performs the rank 1 operation
2127     * A := alpha*x*x**T + A
2128     *
2129     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2130     *
2131     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2132     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2133     *       'a' to packed matrix 'b'.
2134     *           k = 0
2135     *           for i in range(0, n):
2136     *              for j in range(i, n):
2137     *                  b[k++] = a[i, j]
2138     *
2139     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2140     * @param alpha The scalar alpha.
2141     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2142     * @param incX The increment for the elements of vector x, must be larger than zero.
2143     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2144     */
2145    public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2146        int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
2147
2148        boolean mUseIncSupp = isIncSupp();
2149        long apID = Ap.getID(mRS);
2150        long xID = X.getID(mRS);
2151        if (mUseIncSupp) {
2152            apID = getDummyAlloc(Ap);
2153            xID = getDummyAlloc(X);
2154        }
2155        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2156    }
2157
2158    /**
2159     * DSYR2 performs the symmetric rank 2 operation
2160     * A := alpha*x*y**T + alpha*y*x**T + A
2161     *
2162     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2163     *
2164     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2165     * @param alpha The scalar alpha.
2166     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2167     * @param incX The increment for the elements of vector x, must be larger than zero.
2168     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2169     * @param incY The increment for the elements of vector y, must be larger than zero.
2170     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2171     */
2172    public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2173        int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
2174
2175        boolean mUseIncSupp = isIncSupp();
2176        long aID = A.getID(mRS);
2177        long xID = X.getID(mRS);
2178        long yID = Y.getID(mRS);
2179        if (mUseIncSupp) {
2180            aID = getDummyAlloc(A);
2181            xID = getDummyAlloc(X);
2182            yID = getDummyAlloc(Y);
2183        }
2184        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2185    }
2186
2187    /**
2188     * DSPR2 performs the symmetric rank 2 operation
2189     * A := alpha*x*y**T + alpha*y*x**T + A
2190     *
2191     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2192     *
2193     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2194     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2195     *       'a' to packed matrix 'b'.
2196     *           k = 0
2197     *           for i in range(0, n):
2198     *              for j in range(i, n):
2199     *                  b[k++] = a[i, j]
2200     *
2201     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2202     * @param alpha The scalar alpha.
2203     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2204     * @param incX The increment for the elements of vector x, must be larger than zero.
2205     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2206     * @param incY The increment for the elements of vector y, must be larger than zero.
2207     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2208     */
2209    public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2210        int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
2211
2212        boolean mUseIncSupp = isIncSupp();
2213        long apID = Ap.getID(mRS);
2214        long xID = X.getID(mRS);
2215        long yID = Y.getID(mRS);
2216        if (mUseIncSupp) {
2217            apID = getDummyAlloc(Ap);
2218            xID = getDummyAlloc(X);
2219            yID = getDummyAlloc(Y);
2220        }
2221        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2222    }
2223
2224
2225    /**
2226     * Level 2, C and Z only
2227     */
2228
2229    static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2230        if (!A.getType().getElement().isCompatible(e) ||
2231            !X.getType().getElement().isCompatible(e) ||
2232            !Y.getType().getElement().isCompatible(e)) {
2233            throw new RSRuntimeException("Called BLAS with wrong Element type");
2234        }
2235        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
2236            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
2237        }
2238
2239        int M = A.getType().getY();
2240        int N = A.getType().getX();
2241        if (incX <= 0 || incY <= 0) {
2242            throw new RSRuntimeException("Vector increments must be greater than 0");
2243        }
2244        int expectedXDim = 1 + (M - 1) * incX;
2245        if (X.getType().getX() != expectedXDim) {
2246            throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2247        }
2248        int expectedYDim = 1 + (N - 1) * incY;
2249        if (Y.getType().getX() != expectedYDim) {
2250            throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2251        }
2252
2253    }
2254
2255    /**
2256     * CHEMV performs the matrix-vector operation
2257     * y := alpha*A*x + beta*y
2258     *
2259     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2260     *
2261     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2262     * @param alpha The scalar alpha.
2263     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2264     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2265     * @param incX The increment for the elements of vector x, must be larger than zero.
2266     * @param beta The scalar beta.
2267     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2268     * @param incY The increment for the elements of vector y, must be larger than zero.
2269     */
2270    public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2271        // HEMV is the same as SYR2 validation-wise
2272        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2273
2274        boolean mUseIncSupp = isIncSupp();
2275        long aID = A.getID(mRS);
2276        long xID = X.getID(mRS);
2277        long yID = Y.getID(mRS);
2278        if (mUseIncSupp) {
2279            aID = getDummyAlloc(A);
2280            xID = getDummyAlloc(X);
2281            yID = getDummyAlloc(Y);
2282        }
2283        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2284    }
2285
2286    /**
2287     * CHBMV performs the matrix-vector operation
2288     * y := alpha*A*x + beta*y
2289     *
2290     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2291     *
2292     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2293     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2294     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2295     *           for i in range(0, n):
2296     *              for j in range(i, min(i+k+1, n)):
2297     *                  b[i, j-i] = a[i, j]
2298     *
2299     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2300     * @param K The number of off-diagonals of the matrix A
2301     * @param alpha The scalar alpha.
2302     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2303     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2304     * @param incX The increment for the elements of vector x, must be larger than zero.
2305     * @param beta The scalar beta.
2306     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2307     * @param incY The increment for the elements of vector y, must be larger than zero.
2308     */
2309    public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2310        // HBMV is the same as SYR2 validation-wise
2311        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2312        if (K < 0) {
2313            throw new RSRuntimeException("K must be 0 or greater for HBMV");
2314        }
2315
2316        boolean mUseIncSupp = isIncSupp();
2317        long aID = A.getID(mRS);
2318        long xID = X.getID(mRS);
2319        long yID = Y.getID(mRS);
2320        if (mUseIncSupp) {
2321            aID = getDummyAlloc(A);
2322            xID = getDummyAlloc(X);
2323            yID = getDummyAlloc(Y);
2324        }
2325        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2326    }
2327
2328    /**
2329     * CHPMV performs the matrix-vector operation
2330     * y := alpha*A*x + beta*y
2331     *
2332     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2333     *
2334     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2335     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2336     *       'a' to packed matrix 'b'.
2337     *           k = 0
2338     *           for i in range(0, n):
2339     *              for j in range(i, n):
2340     *                  b[k++] = a[i, j]
2341     *
2342     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2343     * @param alpha The scalar alpha.
2344     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2345     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2346     * @param incX The increment for the elements of vector x, must be larger than zero.
2347     * @param beta The scalar beta.
2348     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2349     * @param incY The increment for the elements of vector y, must be larger than zero.
2350     */
2351    public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2352        // HPMV is the same as SPR2
2353        int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2354
2355        boolean mUseIncSupp = isIncSupp();
2356        long apID = Ap.getID(mRS);
2357        long xID = X.getID(mRS);
2358        long yID = Y.getID(mRS);
2359        if (mUseIncSupp) {
2360            apID = getDummyAlloc(Ap);
2361            xID = getDummyAlloc(X);
2362            yID = getDummyAlloc(Y);
2363        }
2364        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2365    }
2366
2367    /**
2368     * CGERU performs the rank 1 operation
2369     * A := alpha*x*y**T + A
2370     *
2371     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2372     *
2373     * @param alpha The scalar alpha.
2374     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2375     * @param incX The increment for the elements of vector x, must be larger than zero.
2376     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2377     * @param incY The increment for the elements of vector y, must be larger than zero.
2378     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2379     */
2380    public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2381        validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2382        int M = A.getType().getY();
2383        int N = A.getType().getX();
2384
2385        boolean mUseIncSupp = isIncSupp();
2386        long aID = A.getID(mRS);
2387        long xID = X.getID(mRS);
2388        long yID = Y.getID(mRS);
2389        if (mUseIncSupp) {
2390            aID = getDummyAlloc(A);
2391            xID = getDummyAlloc(X);
2392            yID = getDummyAlloc(Y);
2393        }
2394        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2395    }
2396
2397    /**
2398     * CGERC performs the rank 1 operation
2399     * A := alpha*x*y**H + A
2400     *
2401     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2402     *
2403     * @param alpha The scalar alpha.
2404     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2405     * @param incX The increment for the elements of vector x, must be larger than zero.
2406     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2407     * @param incY The increment for the elements of vector y, must be larger than zero.
2408     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2409     */
2410    public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2411        // same as GERU
2412        validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2413        int M = A.getType().getY();
2414        int N = A.getType().getX();
2415
2416        boolean mUseIncSupp = isIncSupp();
2417        long aID = A.getID(mRS);
2418        long xID = X.getID(mRS);
2419        long yID = Y.getID(mRS);
2420        if (mUseIncSupp) {
2421            aID = getDummyAlloc(A);
2422            xID = getDummyAlloc(X);
2423            yID = getDummyAlloc(Y);
2424        }
2425        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2426    }
2427
2428    /**
2429     * CHER performs the rank 1 operation
2430     * A := alpha*x*x**H + A
2431     *
2432     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2433     *
2434     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2435     * @param alpha The scalar alpha.
2436     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2437     * @param incX The increment for the elements of vector x, must be larger than zero.
2438     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2439     */
2440    public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
2441        // same as SYR
2442        int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
2443
2444        boolean mUseIncSupp = isIncSupp();
2445        long aID = A.getID(mRS);
2446        long xID = X.getID(mRS);
2447        if (mUseIncSupp) {
2448            aID = getDummyAlloc(A);
2449            xID = getDummyAlloc(X);
2450        }
2451        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2452    }
2453
2454    /**
2455     * CHPR performs the rank 1 operation
2456     * A := alpha*x*x**H + A
2457     *
2458     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2459     *
2460     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2461     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2462     *       'a' to packed matrix 'b'.
2463     *           k = 0
2464     *           for i in range(0, n):
2465     *              for j in range(i, n):
2466     *                  b[k++] = a[i, j]
2467     *
2468     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2469     * @param alpha The scalar alpha.
2470     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2471     * @param incX The increment for the elements of vector x, must be larger than zero.
2472     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2473     */
2474    public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
2475        // equivalent to SPR for validation
2476        int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
2477
2478        boolean mUseIncSupp = isIncSupp();
2479        long apID = Ap.getID(mRS);
2480        long xID = X.getID(mRS);
2481        if (mUseIncSupp) {
2482            apID = getDummyAlloc(Ap);
2483            xID = getDummyAlloc(X);
2484        }
2485        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2486    }
2487
2488    /**
2489     * CHER2 performs the symmetric rank 2 operation
2490     * A := alpha*x*y**H + alpha*y*x**H + A
2491     *
2492     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2493     *
2494     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2495     * @param alpha The scalar alpha.
2496     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2497     * @param incX The increment for the elements of vector x, must be larger than zero.
2498     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2499     * @param incY The increment for the elements of vector y, must be larger than zero.
2500     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2501     */
2502    public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2503        // same as SYR2
2504        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2505
2506        boolean mUseIncSupp = isIncSupp();
2507        long aID = A.getID(mRS);
2508        long xID = X.getID(mRS);
2509        long yID = Y.getID(mRS);
2510        if (mUseIncSupp) {
2511            aID = getDummyAlloc(A);
2512            xID = getDummyAlloc(X);
2513            yID = getDummyAlloc(Y);
2514        }
2515        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2516    }
2517
2518    /**
2519     * CHPR2 performs the symmetric rank 2 operation
2520     * A := alpha*x*y**H + alpha*y*x**H + A
2521     *
2522     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2523     *
2524     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2525     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2526     *       'a' to packed matrix 'b'.
2527     *           k = 0
2528     *           for i in range(0, n):
2529     *              for j in range(i, n):
2530     *                  b[k++] = a[i, j]
2531     *
2532     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2533     * @param alpha The scalar alpha.
2534     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2535     * @param incX The increment for the elements of vector x, must be larger than zero.
2536     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2537     * @param incY The increment for the elements of vector y, must be larger than zero.
2538     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2539     */
2540    public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2541        // same as SPR2
2542        int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2543
2544        boolean mUseIncSupp = isIncSupp();
2545        long apID = Ap.getID(mRS);
2546        long xID = X.getID(mRS);
2547        long yID = Y.getID(mRS);
2548        if (mUseIncSupp) {
2549            apID = getDummyAlloc(Ap);
2550            xID = getDummyAlloc(X);
2551            yID = getDummyAlloc(Y);
2552        }
2553        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2554    }
2555
2556    /**
2557     * ZHEMV performs the matrix-vector operation
2558     * y := alpha*A*x + beta*y
2559     *
2560     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2561     *
2562     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2563     * @param alpha The scalar alpha.
2564     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2565     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2566     * @param incX The increment for the elements of vector x, must be larger than zero.
2567     * @param beta The scalar beta.
2568     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2569     * @param incY The increment for the elements of vector y, must be larger than zero.
2570     */
2571    public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2572        // HEMV is the same as SYR2 validation-wise
2573        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2574
2575        boolean mUseIncSupp = isIncSupp();
2576        long aID = A.getID(mRS);
2577        long xID = X.getID(mRS);
2578        long yID = Y.getID(mRS);
2579        if (mUseIncSupp) {
2580            aID = getDummyAlloc(A);
2581            xID = getDummyAlloc(X);
2582            yID = getDummyAlloc(Y);
2583        }
2584        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2585    }
2586
2587    /**
2588     * ZHBMV performs the matrix-vector operation
2589     * y := alpha*A*x + beta*y
2590     *
2591     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2592     *
2593     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2594     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2595     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2596     *           for i in range(0, n):
2597     *              for j in range(i, min(i+k+1, n)):
2598     *                  b[i, j-i] = a[i, j]
2599     *
2600     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2601     * @param K The number of off-diagonals of the matrix A
2602     * @param alpha The scalar alpha.
2603     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2604     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2605     * @param incX The increment for the elements of vector x, must be larger than zero.
2606     * @param beta The scalar beta.
2607     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2608     * @param incY The increment for the elements of vector y, must be larger than zero.
2609     */
2610    public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2611        // HBMV is the same as SYR2 validation-wise
2612        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2613        if (K < 0) {
2614            throw new RSRuntimeException("K must be 0 or greater for HBMV");
2615        }
2616
2617        boolean mUseIncSupp = isIncSupp();
2618        long aID = A.getID(mRS);
2619        long xID = X.getID(mRS);
2620        long yID = Y.getID(mRS);
2621        if (mUseIncSupp) {
2622            aID = getDummyAlloc(A);
2623            xID = getDummyAlloc(X);
2624            yID = getDummyAlloc(Y);
2625        }
2626        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2627    }
2628
2629    /**
2630     * ZHPMV performs the matrix-vector operation
2631     * y := alpha*A*x + beta*y
2632     *
2633     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2634     *
2635     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2636     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2637     *       'a' to packed matrix 'b'.
2638     *           k = 0
2639     *           for i in range(0, n):
2640     *              for j in range(i, n):
2641     *                  b[k++] = a[i, j]
2642     *
2643     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2644     * @param alpha The scalar alpha.
2645     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2646     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2647     * @param incX The increment for the elements of vector x, must be larger than zero.
2648     * @param beta The scalar beta.
2649     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2650     * @param incY The increment for the elements of vector y, must be larger than zero.
2651     */
2652    public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2653        // HPMV is the same as SPR2
2654        int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2655
2656        boolean mUseIncSupp = isIncSupp();
2657        long apID = Ap.getID(mRS);
2658        long xID = X.getID(mRS);
2659        long yID = Y.getID(mRS);
2660        if (mUseIncSupp) {
2661            apID = getDummyAlloc(Ap);
2662            xID = getDummyAlloc(X);
2663            yID = getDummyAlloc(Y);
2664        }
2665        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2666    }
2667
2668    /**
2669     * ZGERU performs the rank 1 operation
2670     * A := alpha*x*y**T + A
2671     *
2672     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2673     *
2674     * @param alpha The scalar alpha.
2675     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2676     * @param incX The increment for the elements of vector x, must be larger than zero.
2677     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2678     * @param incY The increment for the elements of vector y, must be larger than zero.
2679     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2680     */
2681    public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2682        validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2683        int M = A.getType().getY();
2684        int N = A.getType().getX();
2685
2686        boolean mUseIncSupp = isIncSupp();
2687        long aID = A.getID(mRS);
2688        long xID = X.getID(mRS);
2689        long yID = Y.getID(mRS);
2690        if (mUseIncSupp) {
2691            aID = getDummyAlloc(A);
2692            xID = getDummyAlloc(X);
2693            yID = getDummyAlloc(Y);
2694        }
2695        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2696    }
2697
2698    /**
2699     * ZGERC performs the rank 1 operation
2700     * A := alpha*x*y**H + A
2701     *
2702     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2703     *
2704     * @param alpha The scalar alpha.
2705     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2706     * @param incX The increment for the elements of vector x, must be larger than zero.
2707     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2708     * @param incY The increment for the elements of vector y, must be larger than zero.
2709     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2710     */
2711    public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2712        // same as GERU
2713        validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2714        int M = A.getType().getY();
2715        int N = A.getType().getX();
2716
2717        boolean mUseIncSupp = isIncSupp();
2718        long aID = A.getID(mRS);
2719        long xID = X.getID(mRS);
2720        long yID = Y.getID(mRS);
2721        if (mUseIncSupp) {
2722            aID = getDummyAlloc(A);
2723            xID = getDummyAlloc(X);
2724            yID = getDummyAlloc(Y);
2725        }
2726        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2727    }
2728
2729    /**
2730     * ZHER performs the rank 1 operation
2731     * A := alpha*x*x**H + A
2732     *
2733     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2734     *
2735     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2736     * @param alpha The scalar alpha.
2737     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2738     * @param incX The increment for the elements of vector x, must be larger than zero.
2739     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2740     */
2741    public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2742        // same as SYR
2743        int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
2744
2745        boolean mUseIncSupp = isIncSupp();
2746        long aID = A.getID(mRS);
2747        long xID = X.getID(mRS);
2748        if (mUseIncSupp) {
2749            aID = getDummyAlloc(A);
2750            xID = getDummyAlloc(X);
2751        }
2752        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2753    }
2754
2755    /**
2756     * ZHPR performs the rank 1 operation
2757     * A := alpha*x*x**H + A
2758     *
2759     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2760     *
2761     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2762     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2763     *       'a' to packed matrix 'b'.
2764     *           k = 0
2765     *           for i in range(0, n):
2766     *              for j in range(i, n):
2767     *                  b[k++] = a[i, j]
2768     *
2769     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2770     * @param alpha The scalar alpha.
2771     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2772     * @param incX The increment for the elements of vector x, must be larger than zero.
2773     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2774     */
2775    public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2776        // equivalent to SPR for validation
2777        int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2778
2779        boolean mUseIncSupp = isIncSupp();
2780        long apID = Ap.getID(mRS);
2781        long xID = X.getID(mRS);
2782        if (mUseIncSupp) {
2783            apID = getDummyAlloc(Ap);
2784            xID = getDummyAlloc(X);
2785        }
2786        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2787    }
2788
2789    /**
2790     * ZHER2 performs the symmetric rank 2 operation
2791     * A := alpha*x*y**H + alpha*y*x**H + A
2792     *
2793     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2794     *
2795     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2796     * @param alpha The scalar alpha.
2797     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2798     * @param incX The increment for the elements of vector x, must be larger than zero.
2799     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2800     * @param incY The increment for the elements of vector y, must be larger than zero.
2801     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2802     */
2803    public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2804        // same as SYR2
2805        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2806
2807        boolean mUseIncSupp = isIncSupp();
2808        long aID = A.getID(mRS);
2809        long xID = X.getID(mRS);
2810        long yID = Y.getID(mRS);
2811        if (mUseIncSupp) {
2812            aID = getDummyAlloc(A);
2813            xID = getDummyAlloc(X);
2814            yID = getDummyAlloc(Y);
2815        }
2816        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2817    }
2818
2819    /**
2820     * ZHPR2 performs the symmetric rank 2 operation
2821     * A := alpha*x*y**H + alpha*y*x**H + A
2822     *
2823     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2824     *
2825     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2826     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2827     *       'a' to packed matrix 'b'.
2828     *           k = 0
2829     *           for i in range(0, n):
2830     *              for j in range(i, n):
2831     *                  b[k++] = a[i, j]
2832     *
2833     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2834     * @param alpha The scalar alpha.
2835     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2836     * @param incX The increment for the elements of vector x, must be larger than zero.
2837     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2838     * @param incY The increment for the elements of vector y, must be larger than zero.
2839     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2840     */
2841    public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2842        // same as SPR2
2843        int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2844
2845        boolean mUseIncSupp = isIncSupp();
2846        long apID = Ap.getID(mRS);
2847        long xID = X.getID(mRS);
2848        long yID = Y.getID(mRS);
2849        if (mUseIncSupp) {
2850            apID = getDummyAlloc(Ap);
2851            xID = getDummyAlloc(X);
2852            yID = getDummyAlloc(Y);
2853        }
2854        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2855    }
2856
2857
2858    /**
2859     * Level 3 BLAS
2860     */
2861
2862    static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
2863        int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
2864        if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2865            (B != null && !B.getType().getElement().isCompatible(e)) ||
2866            (C != null && !C.getType().getElement().isCompatible(e))) {
2867            throw new RSRuntimeException("Called BLAS with wrong Element type");
2868        }
2869        if (C == null) {
2870            //since matrix C is used to store the result, it cannot be null.
2871            throw new RSRuntimeException("Allocation C cannot be null");
2872        }
2873        cM = C.getType().getY();
2874        cN = C.getType().getX();
2875
2876        if (Side == RIGHT) {
2877            if ((A == null && B != null) || (A != null && B == null)) {
2878                throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2879            }
2880            if (B != null) {
2881                bM = A.getType().getY();
2882                bN = A.getType().getX();
2883            }
2884            if (A != null) {
2885                aM = B.getType().getY();
2886                aN = B.getType().getX();
2887            }
2888        } else {
2889            if (A != null) {
2890                if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
2891                    aN = A.getType().getY();
2892                    aM = A.getType().getX();
2893                } else {
2894                    aM = A.getType().getY();
2895                    aN = A.getType().getX();
2896                }
2897            }
2898            if (B != null) {
2899                if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
2900                    bN = B.getType().getY();
2901                    bM = B.getType().getX();
2902                } else {
2903                    bM = B.getType().getY();
2904                    bN = B.getType().getX();
2905                }
2906            }
2907        }
2908        if (A != null && B != null && C != null) {
2909            if (aN != bM || aM != cM || bN != cN) {
2910                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2911            }
2912        } else if (A != null && C != null) {
2913            // A and C only, for SYRK
2914            if (cM != cN) {
2915                throw new RSRuntimeException("Matrix C is not symmetric");
2916            }
2917            if (aM != cM) {
2918                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2919            }
2920        } else if (A != null && B != null) {
2921            // A and B only
2922            if (aN != bM) {
2923                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2924            }
2925        }
2926
2927    }
2928
2929    /**
2930     * SGEMM performs one of the matrix-matrix operations
2931     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2932     *
2933     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2934     *
2935     * @param TransA The type of transpose applied to matrix A.
2936     * @param TransB The type of transpose applied to matrix B.
2937     * @param alpha The scalar alpha.
2938     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2939     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2940     * @param beta The scalar beta.
2941     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2942     */
2943    public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2944                      Allocation B, float beta, Allocation C) {
2945        validateTranspose(TransA);
2946        validateTranspose(TransB);
2947        validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2948
2949        int M = -1, N = -1, K = -1;
2950        if (TransA != NO_TRANSPOSE) {
2951            M = A.getType().getX();
2952            K = A.getType().getY();
2953        } else {
2954            M = A.getType().getY();
2955            K = A.getType().getX();
2956        }
2957        if (TransB != NO_TRANSPOSE) {
2958            N = B.getType().getY();
2959        } else {
2960            N = B.getType().getX();
2961        }
2962
2963        boolean mUseIncSupp = isIncSupp();
2964        long aID = A.getID(mRS);
2965        long bID = B.getID(mRS);
2966        long cID = C.getID(mRS);
2967        if (mUseIncSupp) {
2968            aID = getDummyAlloc(A);
2969            bID = getDummyAlloc(B);
2970            cID = getDummyAlloc(C);
2971        }
2972        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
2973                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
2974    }
2975
2976    /**
2977     * DGEMM performs one of the matrix-matrix operations
2978     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2979     *
2980     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2981     *
2982     * @param TransA The type of transpose applied to matrix A.
2983     * @param TransB The type of transpose applied to matrix B.
2984     * @param alpha The scalar alpha.
2985     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2986     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2987     * @param beta The scalar beta.
2988     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2989     */
2990    public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2991                      Allocation B, double beta, Allocation C) {
2992        validateTranspose(TransA);
2993        validateTranspose(TransB);
2994        validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2995        int M = -1, N = -1, K = -1;
2996        if (TransA != NO_TRANSPOSE) {
2997            M = A.getType().getX();
2998            K = A.getType().getY();
2999        } else {
3000            M = A.getType().getY();
3001            K = A.getType().getX();
3002        }
3003        if (TransB != NO_TRANSPOSE) {
3004            N = B.getType().getY();
3005        } else {
3006            N = B.getType().getX();
3007        }
3008
3009        boolean mUseIncSupp = isIncSupp();
3010        long aID = A.getID(mRS);
3011        long bID = B.getID(mRS);
3012        long cID = C.getID(mRS);
3013        if (mUseIncSupp) {
3014            aID = getDummyAlloc(A);
3015            bID = getDummyAlloc(B);
3016            cID = getDummyAlloc(C);
3017        }
3018        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
3019                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
3020    }
3021
3022    /**
3023     * CGEMM performs one of the matrix-matrix operations
3024     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3025     *
3026     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3027     *
3028     * @param TransA The type of transpose applied to matrix A.
3029     * @param TransB The type of transpose applied to matrix B.
3030     * @param alpha The scalar alpha.
3031     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3032     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3033     * @param beta The scalar beta.
3034     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3035     */
3036    public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
3037                      Allocation B, Float2 beta, Allocation C) {
3038        validateTranspose(TransA);
3039        validateTranspose(TransB);
3040        validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
3041        int M = -1, N = -1, K = -1;
3042        if (TransA != NO_TRANSPOSE) {
3043            M = A.getType().getX();
3044            K = A.getType().getY();
3045        } else {
3046            M = A.getType().getY();
3047            K = A.getType().getX();
3048        }
3049        if (TransB != NO_TRANSPOSE) {
3050            N = B.getType().getY();
3051        } else {
3052            N = B.getType().getX();
3053        }
3054
3055        boolean mUseIncSupp = isIncSupp();
3056        long aID = A.getID(mRS);
3057        long bID = B.getID(mRS);
3058        long cID = C.getID(mRS);
3059        if (mUseIncSupp) {
3060            aID = getDummyAlloc(A);
3061            bID = getDummyAlloc(B);
3062            cID = getDummyAlloc(C);
3063        }
3064        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3065                                         beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3066    }
3067
3068    /**
3069     * ZGEMM performs one of the matrix-matrix operations
3070     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3071     *
3072     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3073     *
3074     * @param TransA The type of transpose applied to matrix A.
3075     * @param TransB The type of transpose applied to matrix B.
3076     * @param alpha The scalar alpha.
3077     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2
3078     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2
3079     * @param beta The scalar beta.
3080     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2
3081     */
3082    public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
3083                      Allocation B, Double2 beta, Allocation C) {
3084        validateTranspose(TransA);
3085        validateTranspose(TransB);
3086        validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
3087        int M = -1, N = -1, K = -1;
3088        if (TransA != NO_TRANSPOSE) {
3089            M = A.getType().getX();
3090            K = A.getType().getY();
3091        } else {
3092            M = A.getType().getY();
3093            K = A.getType().getX();
3094        }
3095        if (TransB != NO_TRANSPOSE) {
3096            N = B.getType().getY();
3097        } else {
3098            N = B.getType().getX();
3099        }
3100
3101        boolean mUseIncSupp = isIncSupp();
3102        long aID = A.getID(mRS);
3103        long bID = B.getID(mRS);
3104        long cID = C.getID(mRS);
3105        if (mUseIncSupp) {
3106            aID = getDummyAlloc(A);
3107            bID = getDummyAlloc(B);
3108            cID = getDummyAlloc(C);
3109        }
3110        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3111                                   beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3112    }
3113
3114    /**
3115     * SSYMM performs one of the matrix-matrix operations
3116     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3117     *
3118     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3119     *
3120     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3121     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3122     * @param alpha The scalar alpha.
3123     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3124     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3125     * @param beta The scalar beta.
3126     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3127     */
3128    public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
3129                      Allocation B, float beta, Allocation C) {
3130        validateSide(Side);
3131        validateUplo(Uplo);
3132        //For SYMM, Matrix A should be symmetric
3133        if (A.getType().getX() != A.getType().getY()) {
3134            throw new RSRuntimeException("Matrix A is not symmetric");
3135        }
3136        validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
3137
3138        boolean mUseIncSupp = isIncSupp();
3139        long aID = A.getID(mRS);
3140        long bID = B.getID(mRS);
3141        long cID = C.getID(mRS);
3142        if (mUseIncSupp) {
3143            aID = getDummyAlloc(A);
3144            bID = getDummyAlloc(B);
3145            cID = getDummyAlloc(C);
3146        }
3147        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3148                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
3149    }
3150
3151    /**
3152     * DSYMM performs one of the matrix-matrix operations
3153     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3154     *
3155     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3156     *
3157     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3158     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3159     * @param alpha The scalar alpha.
3160     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3161     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3162     * @param beta The scalar beta.
3163     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3164     */
3165    public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
3166                      Allocation B, double beta, Allocation C) {
3167        validateSide(Side);
3168        validateUplo(Uplo);
3169        if (A.getType().getX() != A.getType().getY()) {
3170            throw new RSRuntimeException("Matrix A is not symmetric");
3171        }
3172        validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
3173
3174        boolean mUseIncSupp = isIncSupp();
3175        long aID = A.getID(mRS);
3176        long bID = B.getID(mRS);
3177        long cID = C.getID(mRS);
3178        if (mUseIncSupp) {
3179            aID = getDummyAlloc(A);
3180            bID = getDummyAlloc(B);
3181            cID = getDummyAlloc(C);
3182        }
3183        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3184                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
3185    }
3186
3187    /**
3188     * CSYMM performs one of the matrix-matrix operations
3189     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3190     *
3191     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3192     *
3193     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3194     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3195     * @param alpha The scalar alpha.
3196     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3197     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3198     * @param beta The scalar beta.
3199     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3200     */
3201    public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
3202                      Allocation B, Float2 beta, Allocation C) {
3203        validateSide(Side);
3204        validateUplo(Uplo);
3205        if (A.getType().getX() != A.getType().getY()) {
3206            throw new RSRuntimeException("Matrix A is not symmetric");
3207        }
3208        validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
3209
3210        boolean mUseIncSupp = isIncSupp();
3211        long aID = A.getID(mRS);
3212        long bID = B.getID(mRS);
3213        long cID = C.getID(mRS);
3214        if (mUseIncSupp) {
3215            aID = getDummyAlloc(A);
3216            bID = getDummyAlloc(B);
3217            cID = getDummyAlloc(C);
3218        }
3219        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3220                                         beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3221    }
3222
3223    /**
3224     * ZSYMM performs one of the matrix-matrix operations
3225     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3226     *
3227     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3228     *
3229     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3230     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3231     * @param alpha The scalar alpha.
3232     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3233     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3234     * @param beta The scalar beta.
3235     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3236     */
3237    public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
3238                      Allocation B, Double2 beta, Allocation C) {
3239        validateSide(Side);
3240        validateUplo(Uplo);
3241        if (A.getType().getX() != A.getType().getY()) {
3242            throw new RSRuntimeException("Matrix A is not symmetric");
3243        }
3244        validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
3245
3246        boolean mUseIncSupp = isIncSupp();
3247        long aID = A.getID(mRS);
3248        long bID = B.getID(mRS);
3249        long cID = C.getID(mRS);
3250        if (mUseIncSupp) {
3251            aID = getDummyAlloc(A);
3252            bID = getDummyAlloc(B);
3253            cID = getDummyAlloc(C);
3254        }
3255        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3256                                   beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3257    }
3258
3259    /**
3260     * SSYRK performs one of the symmetric rank k operations
3261     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3262     *
3263     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3264     *
3265     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3266     * @param Trans The type of transpose applied to the operation.
3267     * @param alpha The scalar alpha.
3268     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3269     * @param beta The scalar beta.
3270     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3271     */
3272    public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3273        validateTranspose(Trans);
3274        validateUplo(Uplo);
3275        validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
3276        int K = -1;
3277        if (Trans != NO_TRANSPOSE) {
3278            K = A.getType().getY();
3279        } else {
3280            K = A.getType().getX();
3281        }
3282
3283        boolean mUseIncSupp = isIncSupp();
3284        long aID = A.getID(mRS);
3285        long cID = C.getID(mRS);
3286        if (mUseIncSupp) {
3287            aID = getDummyAlloc(A);
3288            cID = getDummyAlloc(C);
3289        }
3290        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3291    }
3292
3293    /**
3294     * DSYRK performs one of the symmetric rank k operations
3295     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3296     *
3297     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3298     *
3299     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3300     * @param Trans The type of transpose applied to the operation.
3301     * @param alpha The scalar alpha.
3302     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3303     * @param beta The scalar beta.
3304     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3305     */
3306    public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3307        validateTranspose(Trans);
3308        validateUplo(Uplo);
3309        validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
3310        int K = -1;
3311        if (Trans != NO_TRANSPOSE) {
3312            K = A.getType().getY();
3313        } else {
3314            K = A.getType().getX();
3315        }
3316
3317        boolean mUseIncSupp = isIncSupp();
3318        long aID = A.getID(mRS);
3319        long cID = C.getID(mRS);
3320        if (mUseIncSupp) {
3321            aID = getDummyAlloc(A);
3322            cID = getDummyAlloc(C);
3323        }
3324        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3325    }
3326
3327    /**
3328     * CSYRK performs one of the symmetric rank k operations
3329     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3330     *
3331     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3332     *
3333     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3334     * @param Trans The type of transpose applied to the operation.
3335     * @param alpha The scalar alpha.
3336     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3337     * @param beta The scalar beta.
3338     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3339     */
3340    public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
3341        validateTranspose(Trans);
3342        validateUplo(Uplo);
3343        validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
3344        int K = -1;
3345        if (Trans != NO_TRANSPOSE) {
3346            K = A.getType().getY();
3347        } else {
3348            K = A.getType().getX();
3349        }
3350
3351        boolean mUseIncSupp = isIncSupp();
3352        long aID = A.getID(mRS);
3353        long cID = C.getID(mRS);
3354        if (mUseIncSupp) {
3355            aID = getDummyAlloc(A);
3356            cID = getDummyAlloc(C);
3357        }
3358        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3359                                         C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3360    }
3361
3362    /**
3363     * ZSYRK performs one of the symmetric rank k operations
3364     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3365     *
3366     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3367     *
3368     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3369     * @param Trans The type of transpose applied to the operation.
3370     * @param alpha The scalar alpha.
3371     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3372     * @param beta The scalar beta.
3373     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3374     */
3375    public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
3376        validateTranspose(Trans);
3377        validateUplo(Uplo);
3378        validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
3379        int K = -1;
3380        if (Trans != NO_TRANSPOSE) {
3381            K = A.getType().getY();
3382        } else {
3383            K = A.getType().getX();
3384        }
3385
3386        boolean mUseIncSupp = isIncSupp();
3387        long aID = A.getID(mRS);
3388        long cID = C.getID(mRS);
3389        if (mUseIncSupp) {
3390            aID = getDummyAlloc(A);
3391            cID = getDummyAlloc(C);
3392        }
3393        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3394                                   C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3395    }
3396
3397    static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3398        validateTranspose(Trans);
3399        if (!A.getType().getElement().isCompatible(e) ||
3400            !B.getType().getElement().isCompatible(e) ||
3401            !C.getType().getElement().isCompatible(e)) {
3402            throw new RSRuntimeException("Called BLAS with wrong Element type");
3403        }
3404        int Cdim = -1;
3405        // A is n x k if no transpose, k x n if transpose
3406        // C is n x n
3407        if (Trans == TRANSPOSE) {
3408            // check columns versus C
3409            Cdim = A.getType().getX();
3410        } else {
3411            // check rows versus C
3412            Cdim = A.getType().getY();
3413        }
3414        if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
3415            throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
3416        }
3417        // A dims == B dims
3418        if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3419            throw new RSRuntimeException("Invalid A and B in SYR2K");
3420        }
3421    }
3422
3423    /**
3424     * SSYR2K performs one of the symmetric rank 2k operations
3425     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3426     *
3427     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3428     *
3429     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3430     * @param Trans The type of transpose applied to the operation.
3431     * @param alpha The scalar alpha.
3432     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3433     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3434     * @param beta The scalar beta.
3435     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3436     */
3437    public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
3438        validateUplo(Uplo);
3439        validateSYR2K(Element.F32(mRS), Trans, A, B, C);
3440        int K = -1;
3441        if (Trans != NO_TRANSPOSE) {
3442            K = A.getType().getY();
3443        } else {
3444            K = A.getType().getX();
3445        }
3446
3447        boolean mUseIncSupp = isIncSupp();
3448        long aID = A.getID(mRS);
3449        long bID = B.getID(mRS);
3450        long cID = C.getID(mRS);
3451        if (mUseIncSupp) {
3452            aID = getDummyAlloc(A);
3453            bID = getDummyAlloc(B);
3454            cID = getDummyAlloc(C);
3455        }
3456        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3457    }
3458
3459    /**
3460     * DSYR2K performs one of the symmetric rank 2k operations
3461     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3462     *
3463     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3464     *
3465     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3466     * @param Trans The type of transpose applied to the operation.
3467     * @param alpha The scalar alpha.
3468     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3469     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3470     * @param beta The scalar beta.
3471     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3472     */
3473    public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
3474        validateUplo(Uplo);
3475        validateSYR2K(Element.F64(mRS), Trans, A, B, C);
3476        int K = -1;
3477        if (Trans != NO_TRANSPOSE) {
3478            K = A.getType().getY();
3479        } else {
3480            K = A.getType().getX();
3481        }
3482
3483        boolean mUseIncSupp = isIncSupp();
3484        long aID = A.getID(mRS);
3485        long bID = B.getID(mRS);
3486        long cID = C.getID(mRS);
3487        if (mUseIncSupp) {
3488            aID = getDummyAlloc(A);
3489            bID = getDummyAlloc(B);
3490            cID = getDummyAlloc(C);
3491        }
3492        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3493    }
3494
3495    /**
3496     * CSYR2K performs one of the symmetric rank 2k operations
3497     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3498     *
3499     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3500     *
3501     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3502     * @param Trans The type of transpose applied to the operation.
3503     * @param alpha The scalar alpha.
3504     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3505     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3506     * @param beta The scalar beta.
3507     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3508     */
3509    public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3510        validateUplo(Uplo);
3511        validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
3512        int K = -1;
3513        if (Trans != NO_TRANSPOSE) {
3514            K = A.getType().getY();
3515        } else {
3516            K = A.getType().getX();
3517        }
3518
3519        boolean mUseIncSupp = isIncSupp();
3520        long aID = A.getID(mRS);
3521        long bID = B.getID(mRS);
3522        long cID = C.getID(mRS);
3523        if (mUseIncSupp) {
3524            aID = getDummyAlloc(A);
3525            bID = getDummyAlloc(B);
3526            cID = getDummyAlloc(C);
3527        }
3528        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3529    }
3530
3531    /**
3532     * ZSYR2K performs one of the symmetric rank 2k operations
3533     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3534     *
3535     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3536     *
3537     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3538     * @param Trans The type of transpose applied to the operation.
3539     * @param alpha The scalar alpha.
3540     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3541     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3542     * @param beta The scalar beta.
3543     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3544     */
3545    public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3546        validateUplo(Uplo);
3547        validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
3548        int K = -1;
3549        if (Trans != NO_TRANSPOSE) {
3550            K = A.getType().getY();
3551        } else {
3552            K = A.getType().getX();
3553        }
3554
3555        boolean mUseIncSupp = isIncSupp();
3556        long aID = A.getID(mRS);
3557        long bID = B.getID(mRS);
3558        long cID = C.getID(mRS);
3559        if (mUseIncSupp) {
3560            aID = getDummyAlloc(A);
3561            bID = getDummyAlloc(B);
3562            cID = getDummyAlloc(C);
3563        }
3564        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3565    }
3566
3567    static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3568        validateSide(Side);
3569        validateTranspose(TransA);
3570        int aM = -1, aN = -1, bM = -1, bN = -1;
3571        if (!A.getType().getElement().isCompatible(e) ||
3572            !B.getType().getElement().isCompatible(e)) {
3573            throw new RSRuntimeException("Called BLAS with wrong Element type");
3574        }
3575
3576        aM = A.getType().getY();
3577        aN = A.getType().getX();
3578        if (aM != aN) {
3579            throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
3580        }
3581
3582        bM = B.getType().getY();
3583        bN = B.getType().getX();
3584        if (Side == LEFT) {
3585            if (aN != bM) {
3586                throw new RSRuntimeException("Called TRMM with invalid matrices");
3587            }
3588        } else {
3589            if (bN != aM) {
3590                throw new RSRuntimeException("Called TRMM with invalid matrices");
3591            }
3592        }
3593    }
3594
3595    /**
3596     * STRMM performs one of the matrix-matrix operations
3597     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3598     * op(A) is one of  op(A) = A  or  op(A) = A**T
3599     *
3600     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3601     *
3602     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3603     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3604     * @param TransA The type of transpose applied to matrix A.
3605     * @param Diag Specifies whether or not A is unit triangular.
3606     * @param alpha The scalar alpha.
3607     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3608     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3609     */
3610    public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3611        validateUplo(Uplo);
3612        validateDiag(Diag);
3613        validateTRMM(Element.F32(mRS), Side, TransA, A, B);
3614
3615        boolean mUseIncSupp = isIncSupp();
3616        long aID = A.getID(mRS);
3617        long bID = B.getID(mRS);
3618        if (mUseIncSupp) {
3619            aID = getDummyAlloc(A);
3620            bID = getDummyAlloc(B);
3621        }
3622        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3623                                        alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp);
3624    }
3625
3626    /**
3627     * DTRMM performs one of the matrix-matrix operations
3628     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3629     * op(A) is one of  op(A) = A  or  op(A) = A**T
3630     *
3631     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3632     *
3633     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3634     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3635     * @param TransA The type of transpose applied to matrix A.
3636     * @param Diag Specifies whether or not A is unit triangular.
3637     * @param alpha The scalar alpha.
3638     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3639     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3640     */
3641    public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3642        validateUplo(Uplo);
3643        validateDiag(Diag);
3644        validateTRMM(Element.F64(mRS), Side, TransA, A, B);
3645
3646        boolean mUseIncSupp = isIncSupp();
3647        long aID = A.getID(mRS);
3648        long bID = B.getID(mRS);
3649        if (mUseIncSupp) {
3650            aID = getDummyAlloc(A);
3651            bID = getDummyAlloc(B);
3652        }
3653        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3654                                        alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3655    }
3656
3657    /**
3658     * CTRMM performs one of the matrix-matrix operations
3659     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3660     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3661     *
3662     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3663     *
3664     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3665     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3666     * @param TransA The type of transpose applied to matrix A.
3667     * @param Diag Specifies whether or not A is unit triangular.
3668     * @param alpha The scalar alpha.
3669     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3670     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3671     */
3672    public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3673        validateUplo(Uplo);
3674        validateDiag(Diag);
3675        validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
3676
3677        boolean mUseIncSupp = isIncSupp();
3678        long aID = A.getID(mRS);
3679        long bID = B.getID(mRS);
3680        if (mUseIncSupp) {
3681            aID = getDummyAlloc(A);
3682            bID = getDummyAlloc(B);
3683        }
3684        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3685                                         alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3686    }
3687
3688    /**
3689     * ZTRMM performs one of the matrix-matrix operations
3690     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3691     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3692     *
3693     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3694     *
3695     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3696     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3697     * @param TransA The type of transpose applied to matrix A.
3698     * @param Diag Specifies whether or not A is unit triangular.
3699     * @param alpha The scalar alpha.
3700     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3701     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3702     */
3703    public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3704        validateUplo(Uplo);
3705        validateDiag(Diag);
3706        validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
3707
3708        boolean mUseIncSupp = isIncSupp();
3709        long aID = A.getID(mRS);
3710        long bID = B.getID(mRS);
3711        if (mUseIncSupp) {
3712            aID = getDummyAlloc(A);
3713            bID = getDummyAlloc(B);
3714        }
3715        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3716                                   alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3717    }
3718
3719    static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3720        int adim = -1, bM = -1, bN = -1;
3721        validateSide(Side);
3722        validateTranspose(TransA);
3723        if (!A.getType().getElement().isCompatible(e) ||
3724            !B.getType().getElement().isCompatible(e)) {
3725            throw new RSRuntimeException("Called BLAS with wrong Element type");
3726        }
3727        adim = A.getType().getX();
3728        if (adim != A.getType().getY()) {
3729            // this may be unnecessary, the restriction could potentially be relaxed
3730            // A needs to contain at least that symmetric matrix but could theoretically be larger
3731            // for now we assume adapters are sufficient, will reevaluate in the future
3732            throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
3733        }
3734        bM = B.getType().getY();
3735        bN = B.getType().getX();
3736        if (Side == LEFT) {
3737            // A is M*M
3738            if (adim != bM) {
3739                throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3740            }
3741        } else {
3742            // A is N*N
3743            if (adim != bN) {
3744                throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3745            }
3746        }
3747    }
3748
3749    /**
3750     * STRSM solves one of the matrix equations
3751     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3752     * op(A) is one of  op(A) = A  or  op(A) = A**T
3753     *
3754     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3755     *
3756     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3757     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3758     * @param TransA The type of transpose applied to matrix A.
3759     * @param Diag Specifies whether or not A is unit triangular.
3760     * @param alpha The scalar alpha.
3761     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3762     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3763     */
3764    public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3765        validateUplo(Uplo);
3766        validateDiag(Diag);
3767        validateTRSM(Element.F32(mRS), Side, TransA, A, B);
3768
3769        boolean mUseIncSupp = isIncSupp();
3770        long aID = A.getID(mRS);
3771        long bID = B.getID(mRS);
3772        if (mUseIncSupp) {
3773            aID = getDummyAlloc(A);
3774            bID = getDummyAlloc(B);
3775        }
3776        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3777                                        alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3778    }
3779
3780    /**
3781     * DTRSM solves one of the matrix equations
3782     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3783     * op(A) is one of  op(A) = A  or  op(A) = A**T
3784     *
3785     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3786     *
3787     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3788     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3789     * @param TransA The type of transpose applied to matrix A.
3790     * @param Diag Specifies whether or not A is unit triangular.
3791     * @param alpha The scalar alpha.
3792     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3793     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3794     */
3795    public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3796        validateUplo(Uplo);
3797        validateDiag(Diag);
3798        validateTRSM(Element.F64(mRS), Side, TransA, A, B);
3799
3800        boolean mUseIncSupp = isIncSupp();
3801        long aID = A.getID(mRS);
3802        long bID = B.getID(mRS);
3803        if (mUseIncSupp) {
3804            aID = getDummyAlloc(A);
3805            bID = getDummyAlloc(B);
3806        }
3807        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3808                                        alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3809    }
3810
3811    /**
3812     * CTRSM solves one of the matrix equations
3813     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3814     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3815     *
3816     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3817     *
3818     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3819     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3820     * @param TransA The type of transpose applied to matrix A.
3821     * @param Diag Specifies whether or not A is unit triangular.
3822     * @param alpha The scalar alpha.
3823     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3824     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3825     */
3826    public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3827        validateUplo(Uplo);
3828        validateDiag(Diag);
3829        validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
3830
3831        boolean mUseIncSupp = isIncSupp();
3832        long aID = A.getID(mRS);
3833        long bID = B.getID(mRS);
3834        if (mUseIncSupp) {
3835            aID = getDummyAlloc(A);
3836            bID = getDummyAlloc(B);
3837        }
3838        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3839                                         alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3840    }
3841
3842    /**
3843     * ZTRSM solves one of the matrix equations
3844     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3845     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3846     *
3847     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3848     *
3849     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3850     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3851     * @param TransA The type of transpose applied to matrix A.
3852     * @param Diag Specifies whether or not A is unit triangular.
3853     * @param alpha The scalar alpha.
3854     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3855     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3856     */
3857    public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3858        validateUplo(Uplo);
3859        validateDiag(Diag);
3860        validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
3861
3862        boolean mUseIncSupp = isIncSupp();
3863        long aID = A.getID(mRS);
3864        long bID = B.getID(mRS);
3865        if (mUseIncSupp) {
3866            aID = getDummyAlloc(A);
3867            bID = getDummyAlloc(B);
3868        }
3869        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3870                                   alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3871    }
3872
3873    static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3874        validateSide(Side);
3875
3876        if (!A.getType().getElement().isCompatible(e) ||
3877            !B.getType().getElement().isCompatible(e) ||
3878            !C.getType().getElement().isCompatible(e)) {
3879            throw new RSRuntimeException("Called BLAS with wrong Element type");
3880        }
3881
3882        // A must be square; can potentially be relaxed similar to TRSM
3883        int adim = A.getType().getX();
3884        if (adim != A.getType().getY()) {
3885            throw new RSRuntimeException("Called HEMM with non-square A");
3886        }
3887        if ((Side == LEFT && adim != B.getType().getY()) ||
3888            (Side == RIGHT && adim != B.getType().getX())) {
3889            throw new RSRuntimeException("Called HEMM with invalid B");
3890        }
3891        if (B.getType().getX() != C.getType().getX() ||
3892            B.getType().getY() != C.getType().getY()) {
3893            throw new RSRuntimeException("Called HEMM with mismatched B and C");
3894        }
3895    }
3896
3897    /**
3898     * CHEMM performs one of the matrix-matrix operations
3899     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3900     *
3901     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3902     *
3903     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3904     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3905     * @param alpha The scalar alpha.
3906     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3907     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3908     * @param beta The scalar beta.
3909     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3910     */
3911    public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3912        validateUplo(Uplo);
3913        validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3914
3915        boolean mUseIncSupp = isIncSupp();
3916        long aID = A.getID(mRS);
3917        long bID = B.getID(mRS);
3918        long cID = C.getID(mRS);
3919        if (mUseIncSupp) {
3920            aID = getDummyAlloc(A);
3921            bID = getDummyAlloc(B);
3922            cID = getDummyAlloc(C);
3923        }
3924        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3925                                         alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3926    }
3927
3928    /**
3929     * ZHEMM performs one of the matrix-matrix operations
3930     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3931     *
3932     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3933     *
3934     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3935     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3936     * @param alpha The scalar alpha.
3937     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3938     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3939     * @param beta The scalar beta.
3940     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3941     */
3942    public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3943        validateUplo(Uplo);
3944        validateHEMM(Element.F64_2(mRS), Side, A, B, C);
3945
3946        boolean mUseIncSupp = isIncSupp();
3947        long aID = A.getID(mRS);
3948        long bID = B.getID(mRS);
3949        long cID = C.getID(mRS);
3950        if (mUseIncSupp) {
3951            aID = getDummyAlloc(A);
3952            bID = getDummyAlloc(B);
3953            cID = getDummyAlloc(C);
3954        }
3955        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3956                                   alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3957    }
3958
3959    static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3960        if (!A.getType().getElement().isCompatible(e) ||
3961            !C.getType().getElement().isCompatible(e)) {
3962            throw new RSRuntimeException("Called BLAS with wrong Element type");
3963        }
3964        validateConjTranspose(Trans);
3965        int cdim = C.getType().getX();
3966        if (cdim != C.getType().getY()) {
3967            throw new RSRuntimeException("Called HERK with non-square C");
3968        }
3969        if (Trans == NO_TRANSPOSE) {
3970            if (cdim != A.getType().getY()) {
3971                throw new RSRuntimeException("Called HERK with invalid A");
3972            }
3973        } else {
3974            if (cdim != A.getType().getX()) {
3975                throw new RSRuntimeException("Called HERK with invalid A");
3976            }
3977        }
3978    }
3979
3980    /**
3981     * CHERK performs one of the hermitian rank k operations
3982     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3983     *
3984     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3985     *
3986     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3987     * @param Trans The type of transpose applied to the operation.
3988     * @param alpha The scalar alpha.
3989     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3990     * @param beta The scalar beta.
3991     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3992     */
3993    public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3994        validateUplo(Uplo);
3995        validateHERK(Element.F32_2(mRS), Trans, A, C);
3996        int k = 0;
3997        if (Trans == CONJ_TRANSPOSE) {
3998            k = A.getType().getY();
3999        } else {
4000            k = A.getType().getX();
4001        }
4002
4003        boolean mUseIncSupp = isIncSupp();
4004        long aID = A.getID(mRS);
4005        long cID = C.getID(mRS);
4006        if (mUseIncSupp) {
4007            aID = getDummyAlloc(A);
4008            cID = getDummyAlloc(C);
4009        }
4010        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
4011                                         alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4012    }
4013
4014    /**
4015     * ZHERK performs one of the hermitian rank k operations
4016     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
4017     *
4018     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
4019     *
4020     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4021     * @param Trans The type of transpose applied to the operation.
4022     * @param alpha The scalar alpha.
4023     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4024     * @param beta The scalar beta.
4025     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4026     */
4027    public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
4028        validateUplo(Uplo);
4029        validateHERK(Element.F64_2(mRS), Trans, A, C);
4030        int k = 0;
4031        if (Trans == CONJ_TRANSPOSE) {
4032            k = A.getType().getY();
4033        } else {
4034            k = A.getType().getX();
4035        }
4036
4037        boolean mUseIncSupp = isIncSupp();
4038        long aID = A.getID(mRS);
4039        long cID = C.getID(mRS);
4040        if (mUseIncSupp) {
4041            aID = getDummyAlloc(A);
4042            cID = getDummyAlloc(C);
4043        }
4044        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
4045                                   alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4046    }
4047
4048    static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
4049        if (!A.getType().getElement().isCompatible(e) ||
4050            !B.getType().getElement().isCompatible(e) ||
4051            !C.getType().getElement().isCompatible(e)) {
4052            throw new RSRuntimeException("Called BLAS with wrong Element type");
4053        }
4054        validateConjTranspose(Trans);
4055        int cdim = C.getType().getX();
4056        if (cdim != C.getType().getY()) {
4057            throw new RSRuntimeException("Called HER2K with non-square C");
4058        }
4059        if (Trans == NO_TRANSPOSE) {
4060            if (A.getType().getY() != cdim) {
4061                throw new RSRuntimeException("Called HER2K with invalid matrices");
4062            }
4063        } else {
4064            if (A.getType().getX() != cdim) {
4065                throw new RSRuntimeException("Called HER2K with invalid matrices");
4066            }
4067        }
4068        if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
4069            throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
4070        }
4071    }
4072
4073    /**
4074     * CHER2K performs one of the hermitian rank 2k operations
4075     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4076     *
4077     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
4078     *
4079     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4080     * @param Trans The type of transpose applied to the operation.
4081     * @param alpha The scalar alpha.
4082     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
4083     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
4084     * @param beta The scalar beta.
4085     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
4086     */
4087    public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
4088        validateUplo(Uplo);
4089        validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
4090        int k = 0;
4091        if (Trans == NO_TRANSPOSE) {
4092            k = A.getType().getX();
4093        } else {
4094            k = A.getType().getY();
4095        }
4096
4097        boolean mUseIncSupp = isIncSupp();
4098        long aID = A.getID(mRS);
4099        long bID = B.getID(mRS);
4100        long cID = C.getID(mRS);
4101        if (mUseIncSupp) {
4102            aID = getDummyAlloc(A);
4103            bID = getDummyAlloc(B);
4104            cID = getDummyAlloc(C);
4105        }
4106        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4107                                         A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4108    }
4109
4110    /**
4111     * ZHER2K performs one of the hermitian rank 2k operations
4112     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4113     *
4114     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
4115     *
4116     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4117     * @param Trans The type of transpose applied to the operation.
4118     * @param alpha The scalar alpha.
4119     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4120     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
4121     * @param beta The scalar beta.
4122     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4123     */
4124    public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
4125        validateUplo(Uplo);
4126        validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
4127        int k = 0;
4128        if (Trans == NO_TRANSPOSE) {
4129            k = A.getType().getX();
4130        } else {
4131            k = A.getType().getY();
4132        }
4133
4134        boolean mUseIncSupp = isIncSupp();
4135        long aID = A.getID(mRS);
4136        long bID = B.getID(mRS);
4137        long cID = C.getID(mRS);
4138        if (mUseIncSupp) {
4139            aID = getDummyAlloc(A);
4140            bID = getDummyAlloc(B);
4141            cID = getDummyAlloc(C);
4142        }
4143        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4144                                   A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4145    }
4146
4147
4148    /**
4149     * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
4150     * Calculations are done in 1.10.21 fixed-point format for the final output,
4151     * just before there's a shift down to drop the fractional parts. The output
4152     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
4153     * gives some headroom to avoid wrapping around on small overflows.
4154     *
4155     * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
4156     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
4157     * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
4158     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
4159     * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
4160     * @param c_offset The offset for all values in matrix C.
4161     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
4162     **/
4163    public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
4164        validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
4165
4166        if (a_offset < 0 || a_offset > 255) {
4167            throw new RSRuntimeException("Invalid a_offset passed to BNNM");
4168        }
4169        if (b_offset < 0 || b_offset > 255) {
4170            throw new RSRuntimeException("Invalid b_offset passed to BNNM");
4171        }
4172        int M = -1, N = -1, K = -1;
4173        M = A.getType().getY();
4174        N = B.getType().getY();
4175        K = A.getType().getX();
4176
4177        boolean mUseIncSupp = isIncSupp();
4178        long aID = A.getID(mRS);
4179        long bID = B.getID(mRS);
4180        long cID = C.getID(mRS);
4181        if (mUseIncSupp) {
4182            aID = getDummyAlloc(A);
4183            bID = getDummyAlloc(B);
4184            cID = getDummyAlloc(C);
4185        }
4186        mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp);
4187
4188    }
4189
4190}
4191