ScriptIntrinsicBLAS.java revision 5589de8e4724ebc52d7f09f7449484473a4b6ad3
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.support.v8.renderscript;
18
19import android.support.annotation.IntDef;
20import java.lang.annotation.Retention;
21import java.lang.annotation.RetentionPolicy;
22
23/**
24 * @hide
25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26 *
27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28 * building blocks for performing basic vector and matrix operations.
29 *
30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
31 *
32 **/
33public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
34    private Allocation mLUT;
35    private static final int INTRINSIC_API_LEVEL = 23;
36
37    private ScriptIntrinsicBLAS(long id, RenderScript rs) {
38        super(id, rs);
39    }
40
41    private static final int RsBlas_sdsdot = 1;
42    private static final int RsBlas_dsdot = 2;
43    private static final int RsBlas_sdot = 3;
44    private static final int RsBlas_ddot = 4;
45    private static final int RsBlas_cdotu_sub = 5;
46    private static final int RsBlas_cdotc_sub = 6;
47    private static final int RsBlas_zdotu_sub = 7;
48    private static final int RsBlas_zdotc_sub = 8;
49    private static final int RsBlas_snrm2 = 9;
50    private static final int RsBlas_sasum = 10;
51    private static final int RsBlas_dnrm2 = 11;
52    private static final int RsBlas_dasum = 12;
53    private static final int RsBlas_scnrm2 = 13;
54    private static final int RsBlas_scasum = 14;
55    private static final int RsBlas_dznrm2 = 15;
56    private static final int RsBlas_dzasum = 16;
57    private static final int RsBlas_isamax = 17;
58    private static final int RsBlas_idamax = 18;
59    private static final int RsBlas_icamax = 19;
60    private static final int RsBlas_izamax = 20;
61    private static final int RsBlas_sswap = 21;
62    private static final int RsBlas_scopy = 22;
63    private static final int RsBlas_saxpy = 23;
64    private static final int RsBlas_dswap = 24;
65    private static final int RsBlas_dcopy = 25;
66    private static final int RsBlas_daxpy = 26;
67    private static final int RsBlas_cswap = 27;
68    private static final int RsBlas_ccopy = 28;
69    private static final int RsBlas_caxpy = 29;
70    private static final int RsBlas_zswap = 30;
71    private static final int RsBlas_zcopy = 31;
72    private static final int RsBlas_zaxpy = 32;
73    private static final int RsBlas_srotg = 33;
74    private static final int RsBlas_srotmg = 34;
75    private static final int RsBlas_srot = 35;
76    private static final int RsBlas_srotm = 36;
77    private static final int RsBlas_drotg = 37;
78    private static final int RsBlas_drotmg = 38;
79    private static final int RsBlas_drot = 39;
80    private static final int RsBlas_drotm = 40;
81    private static final int RsBlas_sscal = 41;
82    private static final int RsBlas_dscal = 42;
83    private static final int RsBlas_cscal = 43;
84    private static final int RsBlas_zscal = 44;
85    private static final int RsBlas_csscal = 45;
86    private static final int RsBlas_zdscal = 46;
87    private static final int RsBlas_sgemv = 47;
88    private static final int RsBlas_sgbmv = 48;
89    private static final int RsBlas_strmv = 49;
90    private static final int RsBlas_stbmv = 50;
91    private static final int RsBlas_stpmv = 51;
92    private static final int RsBlas_strsv = 52;
93    private static final int RsBlas_stbsv = 53;
94    private static final int RsBlas_stpsv = 54;
95    private static final int RsBlas_dgemv = 55;
96    private static final int RsBlas_dgbmv = 56;
97    private static final int RsBlas_dtrmv = 57;
98    private static final int RsBlas_dtbmv = 58;
99    private static final int RsBlas_dtpmv = 59;
100    private static final int RsBlas_dtrsv = 60;
101    private static final int RsBlas_dtbsv = 61;
102    private static final int RsBlas_dtpsv = 62;
103    private static final int RsBlas_cgemv = 63;
104    private static final int RsBlas_cgbmv = 64;
105    private static final int RsBlas_ctrmv = 65;
106    private static final int RsBlas_ctbmv = 66;
107    private static final int RsBlas_ctpmv = 67;
108    private static final int RsBlas_ctrsv = 68;
109    private static final int RsBlas_ctbsv = 69;
110    private static final int RsBlas_ctpsv = 70;
111    private static final int RsBlas_zgemv = 71;
112    private static final int RsBlas_zgbmv = 72;
113    private static final int RsBlas_ztrmv = 73;
114    private static final int RsBlas_ztbmv = 74;
115    private static final int RsBlas_ztpmv = 75;
116    private static final int RsBlas_ztrsv = 76;
117    private static final int RsBlas_ztbsv = 77;
118    private static final int RsBlas_ztpsv = 78;
119    private static final int RsBlas_ssymv = 79;
120    private static final int RsBlas_ssbmv = 80;
121    private static final int RsBlas_sspmv = 81;
122    private static final int RsBlas_sger = 82;
123    private static final int RsBlas_ssyr = 83;
124    private static final int RsBlas_sspr = 84;
125    private static final int RsBlas_ssyr2 = 85;
126    private static final int RsBlas_sspr2 = 86;
127    private static final int RsBlas_dsymv = 87;
128    private static final int RsBlas_dsbmv = 88;
129    private static final int RsBlas_dspmv = 89;
130    private static final int RsBlas_dger = 90;
131    private static final int RsBlas_dsyr = 91;
132    private static final int RsBlas_dspr = 92;
133    private static final int RsBlas_dsyr2 = 93;
134    private static final int RsBlas_dspr2 = 94;
135    private static final int RsBlas_chemv = 95;
136    private static final int RsBlas_chbmv = 96;
137    private static final int RsBlas_chpmv = 97;
138    private static final int RsBlas_cgeru = 98;
139    private static final int RsBlas_cgerc = 99;
140    private static final int RsBlas_cher = 100;
141    private static final int RsBlas_chpr = 101;
142    private static final int RsBlas_cher2 = 102;
143    private static final int RsBlas_chpr2 = 103;
144    private static final int RsBlas_zhemv = 104;
145    private static final int RsBlas_zhbmv = 105;
146    private static final int RsBlas_zhpmv = 106;
147    private static final int RsBlas_zgeru = 107;
148    private static final int RsBlas_zgerc = 108;
149    private static final int RsBlas_zher = 109;
150    private static final int RsBlas_zhpr = 110;
151    private static final int RsBlas_zher2 = 111;
152    private static final int RsBlas_zhpr2 = 112;
153    private static final int RsBlas_sgemm = 113;
154    private static final int RsBlas_ssymm = 114;
155    private static final int RsBlas_ssyrk = 115;
156    private static final int RsBlas_ssyr2k = 116;
157    private static final int RsBlas_strmm = 117;
158    private static final int RsBlas_strsm = 118;
159    private static final int RsBlas_dgemm = 119;
160    private static final int RsBlas_dsymm = 120;
161    private static final int RsBlas_dsyrk = 121;
162    private static final int RsBlas_dsyr2k = 122;
163    private static final int RsBlas_dtrmm = 123;
164    private static final int RsBlas_dtrsm = 124;
165    private static final int RsBlas_cgemm = 125;
166    private static final int RsBlas_csymm = 126;
167    private static final int RsBlas_csyrk = 127;
168    private static final int RsBlas_csyr2k = 128;
169    private static final int RsBlas_ctrmm = 129;
170    private static final int RsBlas_ctrsm = 130;
171    private static final int RsBlas_zgemm = 131;
172    private static final int RsBlas_zsymm = 132;
173    private static final int RsBlas_zsyrk = 133;
174    private static final int RsBlas_zsyr2k = 134;
175    private static final int RsBlas_ztrmm = 135;
176    private static final int RsBlas_ztrsm = 136;
177    private static final int RsBlas_chemm = 137;
178    private static final int RsBlas_cherk = 138;
179    private static final int RsBlas_cher2k = 139;
180    private static final int RsBlas_zhemm = 140;
181    private static final int RsBlas_zherk = 141;
182    private static final int RsBlas_zher2k = 142;
183
184    // BLAS extensions start here
185    private static final int RsBlas_bnnm = 1000;
186
187    /**
188     * Create an intrinsic to access BLAS subroutines.
189     *
190     * @param rs The RenderScript context
191     * @return ScriptIntrinsicBLAS
192     */
193    public static ScriptIntrinsicBLAS create(RenderScript rs) {
194        long id;
195        boolean mUseIncSupp = rs.isUseNative() &&
196                              android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL;
197
198        id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp);
199        ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs);
200        si.setIncSupp(mUseIncSupp);
201        return si;
202    }
203
204    @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
205    @Retention(RetentionPolicy.SOURCE)
206    public @interface Transpose {}
207
208    @IntDef({UPPER, LOWER})
209    @Retention(RetentionPolicy.SOURCE)
210    public @interface Uplo {}
211
212    @IntDef({NON_UNIT, UNIT})
213    @Retention(RetentionPolicy.SOURCE)
214    public @interface Diag {}
215
216    @IntDef({LEFT, RIGHT})
217    @Retention(RetentionPolicy.SOURCE)
218    public @interface Side {}
219
220    public static final int NO_TRANSPOSE = 111;
221    public static final int TRANSPOSE = 112;
222    public static final int CONJ_TRANSPOSE = 113;
223
224    public static final int UPPER = 121;
225    public static final int LOWER = 122;
226
227    public static final int NON_UNIT = 131;
228    public static final int UNIT = 132;
229
230    public static final int LEFT = 141;
231    public static final int RIGHT = 142;
232
233    static void validateSide(@Side int Side) {
234        if (Side != LEFT && Side != RIGHT) {
235            throw new RSRuntimeException("Invalid side passed to BLAS");
236        }
237    }
238
239    static void validateTranspose(@Transpose int Trans) {
240        if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
241            Trans != CONJ_TRANSPOSE) {
242            throw new RSRuntimeException("Invalid transpose passed to BLAS");
243        }
244    }
245
246    static void validateConjTranspose(@Transpose int Trans) {
247        if (Trans != NO_TRANSPOSE &&
248            Trans != CONJ_TRANSPOSE) {
249            throw new RSRuntimeException("Invalid transpose passed to BLAS");
250        }
251    }
252
253    static void validateDiag(@Diag int Diag) {
254        if (Diag != NON_UNIT && Diag != UNIT) {
255            throw new RSRuntimeException("Invalid diag passed to BLAS");
256        }
257    }
258
259    static void validateUplo(@Uplo int Uplo) {
260        if (Uplo != UPPER && Uplo != LOWER) {
261            throw new RSRuntimeException("Invalid uplo passed to BLAS");
262        }
263    }
264
265
266    /**
267     * Level 2 BLAS
268     */
269
270    static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
271        validateTranspose(TransA);
272        int M = A.getType().getY();
273        int N = A.getType().getX();
274        if (!A.getType().getElement().isCompatible(e) ||
275            !X.getType().getElement().isCompatible(e) ||
276            !Y.getType().getElement().isCompatible(e)) {
277            throw new RSRuntimeException("Called BLAS with wrong Element type");
278        }
279        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
280            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
281        }
282
283        if (incX <= 0 || incY <= 0) {
284            throw new RSRuntimeException("Vector increments must be greater than 0");
285        }
286        int expectedXDim = -1, expectedYDim = -1;
287        if (TransA == NO_TRANSPOSE) {
288            expectedXDim = 1 + (N - 1) * incX;
289            expectedYDim = 1 + (M - 1) * incY;
290        } else {
291            expectedXDim = 1 + (M - 1) * incX;
292            expectedYDim = 1 + (N - 1) * incY;
293        }
294        if (X.getType().getX() != expectedXDim ||
295            Y.getType().getX() != expectedYDim) {
296            throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
297        }
298    }
299
300    /**
301     * SGEMV performs one of the matrix-vector operations
302     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
303     *
304     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
305     *
306     * @param TransA The type of transpose applied to matrix A.
307     * @param alpha The scalar alpha.
308     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
309     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
310     * @param incX The increment for the elements of vector x, must be larger than zero.
311     * @param beta The scalar beta.
312     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
313     * @param incY The increment for the elements of vector y, must be larger than zero.
314     */
315    public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
316        validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
317        int M = A.getType().getY();
318        int N = A.getType().getX();
319
320        boolean mUseIncSupp = isIncSupp();
321        long aID = A.getID(mRS);
322        long xID = X.getID(mRS);
323        long yID = Y.getID(mRS);
324        if (mUseIncSupp) {
325            aID = getDummyAlloc(A);
326            xID = getDummyAlloc(X);
327            yID = getDummyAlloc(Y);
328        }
329        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
330    }
331
332    /**
333     * DGEMV performs one of the matrix-vector operations
334     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
335     *
336     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
337     *
338     * @param TransA The type of transpose applied to matrix A.
339     * @param alpha The scalar alpha.
340     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
341     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
342     * @param incX The increment for the elements of vector x, must be larger than zero.
343     * @param beta The scalar beta.
344     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
345     * @param incY The increment for the elements of vector y, must be larger than zero.
346     */
347    public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
348        validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
349        int M = A.getType().getY();
350        int N = A.getType().getX();
351
352        boolean mUseIncSupp = isIncSupp();
353        long aID = A.getID(mRS);
354        long xID = X.getID(mRS);
355        long yID = Y.getID(mRS);
356        if (mUseIncSupp) {
357            aID = getDummyAlloc(A);
358            xID = getDummyAlloc(X);
359            yID = getDummyAlloc(Y);
360        }
361        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
362    }
363
364    /**
365     * CGEMV performs one of the matrix-vector operations
366     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
367     *
368     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
369     *
370     * @param TransA The type of transpose applied to matrix A.
371     * @param alpha The scalar alpha.
372     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
373     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
374     * @param incX The increment for the elements of vector x, must be larger than zero.
375     * @param beta The scalar beta.
376     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
377     * @param incY The increment for the elements of vector y, must be larger than zero.
378     */
379    public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
380        validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
381        int M = A.getType().getY();
382        int N = A.getType().getX();
383
384        boolean mUseIncSupp = isIncSupp();
385        long aID = A.getID(mRS);
386        long xID = X.getID(mRS);
387        long yID = Y.getID(mRS);
388        if (mUseIncSupp) {
389            aID = getDummyAlloc(A);
390            xID = getDummyAlloc(X);
391            yID = getDummyAlloc(Y);
392        }
393        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
394    }
395
396    /**
397     * ZGEMV performs one of the matrix-vector operations
398     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
399     *
400     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
401     *
402     * @param TransA The type of transpose applied to matrix A.
403     * @param alpha The scalar alpha.
404     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
405     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
406     * @param incX The increment for the elements of vector x, must be larger than zero.
407     * @param beta The scalar beta.
408     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
409     * @param incY The increment for the elements of vector y, must be larger than zero.
410     */
411    public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
412        validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
413        int M = A.getType().getY();
414        int N = A.getType().getX();
415
416        boolean mUseIncSupp = isIncSupp();
417        long aID = A.getID(mRS);
418        long xID = X.getID(mRS);
419        long yID = Y.getID(mRS);
420        if (mUseIncSupp) {
421            aID = getDummyAlloc(A);
422            xID = getDummyAlloc(X);
423            yID = getDummyAlloc(Y);
424        }
425        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
426    }
427
428    /**
429     * SGBMV performs one of the matrix-vector operations
430     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
431     *
432     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
433     *
434     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
435     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
436     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
437     *           for i in range(0, m):
438     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
439     *                  b[i, j-i+kl] = a[i, j]
440     *
441     * @param TransA The type of transpose applied to matrix A.
442     * @param KL The number of sub-diagonals of the matrix A.
443     * @param KU The number of super-diagonals of the matrix A.
444     * @param alpha The scalar alpha.
445     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
446     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
447     * @param incX The increment for the elements of vector x, must be larger than zero.
448     * @param beta The scalar beta.
449     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
450     * @param incY The increment for the elements of vector y, must be larger than zero.
451     */
452    public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
453        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
454        validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
455        if (KL < 0 || KU < 0) {
456            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
457        }
458        int M = A.getType().getY();
459        int N = A.getType().getX();
460
461        boolean mUseIncSupp = isIncSupp();
462        long aID = A.getID(mRS);
463        long xID = X.getID(mRS);
464        long yID = Y.getID(mRS);
465        if (mUseIncSupp) {
466            aID = getDummyAlloc(A);
467            xID = getDummyAlloc(X);
468            yID = getDummyAlloc(Y);
469        }
470        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
471    }
472
473    /**
474     * DGBMV performs one of the matrix-vector operations
475     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
476     *
477     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
478     *
479     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
480     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
481     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
482     *           for i in range(0, m):
483     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
484     *                  b[i, j-i+kl] = a[i, j]
485     *
486     * @param TransA The type of transpose applied to matrix A.
487     * @param KL The number of sub-diagonals of the matrix A.
488     * @param KU The number of super-diagonals of the matrix A.
489     * @param alpha The scalar alpha.
490     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
491     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
492     * @param incX The increment for the elements of vector x, must be larger than zero.
493     * @param beta The scalar beta.
494     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
495     * @param incY The increment for the elements of vector y, must be larger than zero.
496     */
497    public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
498        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
499        validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
500        if (KL < 0 || KU < 0) {
501            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
502        }
503        int M = A.getType().getY();
504        int N = A.getType().getX();
505
506        boolean mUseIncSupp = isIncSupp();
507        long aID = A.getID(mRS);
508        long xID = X.getID(mRS);
509        long yID = Y.getID(mRS);
510        if (mUseIncSupp) {
511            aID = getDummyAlloc(A);
512            xID = getDummyAlloc(X);
513            yID = getDummyAlloc(Y);
514        }
515        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
516    }
517
518    /**
519     * CGBMV performs one of the matrix-vector operations
520     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
521     *
522     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
523     *
524     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
525     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
526     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
527     *           for i in range(0, m):
528     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
529     *                  b[i, j-i+kl] = a[i, j]
530     *
531     * @param TransA The type of transpose applied to matrix A.
532     * @param KL The number of sub-diagonals of the matrix A.
533     * @param KU The number of super-diagonals of the matrix A.
534     * @param alpha The scalar alpha.
535     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
536     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
537     * @param incX The increment for the elements of vector x, must be larger than zero.
538     * @param beta The scalar beta.
539     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
540     * @param incY The increment for the elements of vector y, must be larger than zero.
541     */
542    public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
543        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
544        validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
545        if (KL < 0 || KU < 0) {
546            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
547        }
548        int M = A.getType().getY();
549        int N = A.getType().getX();
550
551        boolean mUseIncSupp = isIncSupp();
552        long aID = A.getID(mRS);
553        long xID = X.getID(mRS);
554        long yID = Y.getID(mRS);
555        if (mUseIncSupp) {
556            aID = getDummyAlloc(A);
557            xID = getDummyAlloc(X);
558            yID = getDummyAlloc(Y);
559        }
560        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
561    }
562
563    /**
564     * ZGBMV performs one of the matrix-vector operations
565     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
566     *
567     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
568     *
569     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
570     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
571     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
572     *           for i in range(0, m):
573     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
574     *                  b[i, j-i+kl] = a[i, j]
575     *
576     * @param TransA The type of transpose applied to matrix A.
577     * @param KL The number of sub-diagonals of the matrix A.
578     * @param KU The number of super-diagonals of the matrix A.
579     * @param alpha The scalar alpha.
580     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
581     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
582     * @param incX The increment for the elements of vector x, must be larger than zero.
583     * @param beta The scalar beta.
584     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
585     * @param incY The increment for the elements of vector y, must be larger than zero.
586     */
587    public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
588        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
589        validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
590        if (KL < 0 || KU < 0) {
591            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
592        }
593        int M = A.getType().getY();
594        int N = A.getType().getX();
595
596        boolean mUseIncSupp = isIncSupp();
597        long aID = A.getID(mRS);
598        long xID = X.getID(mRS);
599        long yID = Y.getID(mRS);
600        if (mUseIncSupp) {
601            aID = getDummyAlloc(A);
602            xID = getDummyAlloc(X);
603            yID = getDummyAlloc(Y);
604        }
605        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
606    }
607
608    static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
609        validateTranspose(TransA);
610        validateUplo(Uplo);
611        validateDiag(Diag);
612        int N = A.getType().getY();
613        if (A.getType().getX() != N) {
614            throw new RSRuntimeException("A must be a square matrix for TRMV");
615        }
616        if (!A.getType().getElement().isCompatible(e) ||
617            !X.getType().getElement().isCompatible(e)) {
618            throw new RSRuntimeException("Called BLAS with wrong Element type");
619        }
620        if (X.getType().getY() > 1) {
621            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
622        }
623
624        if (incX <= 0) {
625            throw new RSRuntimeException("Vector increments must be greater than 0");
626        }
627        int expectedXDim = 1 + (N - 1) * incX;
628        if (X.getType().getX() != expectedXDim) {
629            throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
630        }
631    }
632
633    static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
634        validateTranspose(TransA);
635        validateUplo(Uplo);
636        validateDiag(Diag);
637        if (!Ap.getType().getElement().isCompatible(e) ||
638            !X.getType().getElement().isCompatible(e)) {
639            throw new RSRuntimeException("Called BLAS with wrong Element type");
640        }
641        if (X.getType().getY() > 1) {
642            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
643        }
644
645        if (Ap.getType().getY() > 1) {
646            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
647        }
648
649        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
650        //is it really doing anything?
651        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
652            throw new RSRuntimeException("Invalid dimension for Ap");
653        }
654        if (incX <= 0) {
655            throw new RSRuntimeException("Vector increments must be greater than 0");
656        }
657        int expectedXDim = 1 + (N - 1) * incX;
658        if (X.getType().getX() != expectedXDim) {
659            throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
660        }
661
662        return N;
663    }
664
665    /**
666     * STRMV performs one of the matrix-vector operations
667     * x := A*x   or   x := A**T*x
668     *
669     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
670     *
671     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
672     * @param TransA The type of transpose applied to matrix A.
673     * @param Diag Specifies whether or not A is unit triangular.
674     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
675     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
676     * @param incX The increment for the elements of vector x, must be larger than zero.
677     */
678    public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
679        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
680        int N = A.getType().getY();
681
682        boolean mUseIncSupp = isIncSupp();
683        long aID = A.getID(mRS);
684        long xID = X.getID(mRS);
685        if (mUseIncSupp) {
686            aID = getDummyAlloc(A);
687            xID = getDummyAlloc(X);
688        }
689        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
690    }
691
692    /**
693     * DTRMV performs one of the matrix-vector operations
694     * x := A*x   or   x := A**T*x
695     *
696     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
697     *
698     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
699     * @param TransA The type of transpose applied to matrix A.
700     * @param Diag Specifies whether or not A is unit triangular.
701     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
702     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
703     * @param incX The increment for the elements of vector x, must be larger than zero.
704     */
705    public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
706        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
707        int N = A.getType().getY();
708
709        boolean mUseIncSupp = isIncSupp();
710        long aID = A.getID(mRS);
711        long xID = X.getID(mRS);
712        if (mUseIncSupp) {
713            aID = getDummyAlloc(A);
714            xID = getDummyAlloc(X);
715        }
716        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
717    }
718
719    /**
720     * CTRMV performs one of the matrix-vector operations
721     * x := A*x   or   x := A**T*x   or   x := A**H*x
722     *
723     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
724     *
725     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
726     * @param TransA The type of transpose applied to matrix A.
727     * @param Diag Specifies whether or not A is unit triangular.
728     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
729     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
730     * @param incX The increment for the elements of vector x, must be larger than zero.
731     */
732    public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
733        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
734        int N = A.getType().getY();
735
736        boolean mUseIncSupp = isIncSupp();
737        long aID = A.getID(mRS);
738        long xID = X.getID(mRS);
739        if (mUseIncSupp) {
740            aID = getDummyAlloc(A);
741            xID = getDummyAlloc(X);
742        }
743        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
744    }
745
746    /**
747     * ZTRMV performs one of the matrix-vector operations
748     * x := A*x   or   x := A**T*x   or   x := A**H*x
749     *
750     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
751     *
752     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
753     * @param TransA The type of transpose applied to matrix A.
754     * @param Diag Specifies whether or not A is unit triangular.
755     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
756     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
757     * @param incX The increment for the elements of vector x, must be larger than zero.
758     */
759    public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
760        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
761        int N = A.getType().getY();
762
763        boolean mUseIncSupp = isIncSupp();
764        long aID = A.getID(mRS);
765        long xID = X.getID(mRS);
766        if (mUseIncSupp) {
767            aID = getDummyAlloc(A);
768            xID = getDummyAlloc(X);
769        }
770        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
771    }
772
773    /**
774     * STBMV performs one of the matrix-vector operations
775     * x := A*x   or   x := A**T*x
776     *
777     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
778     *
779     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
780     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
781     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
782     *           for i in range(0, n):
783     *              for j in range(i, min(i+k+1, n)):
784     *                  b[i, j-i] = a[i, j]
785     *
786     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
787     * @param TransA The type of transpose applied to matrix A.
788     * @param Diag Specifies whether or not A is unit triangular.
789     * @param K The number of off-diagonals of the matrix A
790     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
791     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
792     * @param incX The increment for the elements of vector x, must be larger than zero.
793     */
794    public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
795        // TBMV has the same requirements as TRMV + K >= 0
796        if (K < 0) {
797            throw new RSRuntimeException("K must be greater than or equal to 0");
798        }
799        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
800        int N = A.getType().getY();
801
802        boolean mUseIncSupp = isIncSupp();
803        long aID = A.getID(mRS);
804        long xID = X.getID(mRS);
805        if (mUseIncSupp) {
806            aID = getDummyAlloc(A);
807            xID = getDummyAlloc(X);
808        }
809        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
810    }
811
812    /**
813     * DTBMV performs one of the matrix-vector operations
814     * x := A*x   or   x := A**T*x
815     *
816     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
817     *
818     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
819     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
820     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
821     *           for i in range(0, n):
822     *              for j in range(i, min(i+k+1, n)):
823     *                  b[i, j-i] = a[i, j]
824     *
825     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
826     * @param TransA The type of transpose applied to matrix A.
827     * @param Diag Specifies whether or not A is unit triangular.
828     * @param K The number of off-diagonals of the matrix A
829     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
830     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
831     * @param incX The increment for the elements of vector x, must be larger than zero.
832     */
833    public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
834        // TBMV has the same requirements as TRMV + K >= 0
835        if (K < 0) {
836            throw new RSRuntimeException("K must be greater than or equal to 0");
837        }
838        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
839        int N = A.getType().getY();
840
841        boolean mUseIncSupp = isIncSupp();
842        long aID = A.getID(mRS);
843        long xID = X.getID(mRS);
844        if (mUseIncSupp) {
845            aID = getDummyAlloc(A);
846            xID = getDummyAlloc(X);
847        }
848        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
849    }
850
851    /**
852     * CTBMV performs one of the matrix-vector operations
853     * x := A*x   or   x := A**T*x   or   x := A**H*x
854     *
855     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
856     *
857     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
858     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
859     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
860     *           for i in range(0, n):
861     *              for j in range(i, min(i+k+1, n)):
862     *                  b[i, j-i] = a[i, j]
863     *
864     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
865     * @param TransA The type of transpose applied to matrix A.
866     * @param Diag Specifies whether or not A is unit triangular.
867     * @param K The number of off-diagonals of the matrix A
868     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
869     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
870     * @param incX The increment for the elements of vector x, must be larger than zero.
871     */
872    public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
873        // TBMV has the same requirements as TRMV + K >= 0
874        if (K < 0) {
875            throw new RSRuntimeException("K must be greater than or equal to 0");
876        }
877        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
878        int N = A.getType().getY();
879
880        boolean mUseIncSupp = isIncSupp();
881        long aID = A.getID(mRS);
882        long xID = X.getID(mRS);
883        if (mUseIncSupp) {
884            aID = getDummyAlloc(A);
885            xID = getDummyAlloc(X);
886        }
887        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
888    }
889
890    /**
891     * ZTBMV performs one of the matrix-vector operations
892     * x := A*x   or   x := A**T*x   or   x := A**H*x
893     *
894     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
895     *
896     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
897     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
898     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
899     *           for i in range(0, n):
900     *              for j in range(i, min(i+k+1, n)):
901     *                  b[i, j-i] = a[i, j]
902     *
903     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
904     * @param TransA The type of transpose applied to matrix A.
905     * @param Diag Specifies whether or not A is unit triangular.
906     * @param K The number of off-diagonals of the matrix A
907     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
908     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
909     * @param incX The increment for the elements of vector x, must be larger than zero.
910     */
911    public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
912        // TBMV has the same requirements as TRMV + K >= 0
913        if (K < 0) {
914            throw new RSRuntimeException("K must be greater than or equal to 0");
915        }
916        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
917        int N = A.getType().getY();
918
919        boolean mUseIncSupp = isIncSupp();
920        long aID = A.getID(mRS);
921        long xID = X.getID(mRS);
922        if (mUseIncSupp) {
923            aID = getDummyAlloc(A);
924            xID = getDummyAlloc(X);
925        }
926        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
927    }
928
929    /**
930     * STPMV performs one of the matrix-vector operations
931     * x := A*x   or   x := A**T*x
932     *
933     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
934     *
935     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
936     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
937     *       'a' to packed matrix 'b'.
938     *           k = 0
939     *           for i in range(0, n):
940     *              for j in range(i, n):
941     *                  b[k++] = a[i, j]
942     *
943     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
944     * @param TransA The type of transpose applied to matrix A.
945     * @param Diag Specifies whether or not A is unit triangular.
946     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
947     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
948     * @param incX The increment for the elements of vector x, must be larger than zero.
949     */
950    public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
951        int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
952
953        boolean mUseIncSupp = isIncSupp();
954        long apID = Ap.getID(mRS);
955        long xID = X.getID(mRS);
956        if (mUseIncSupp) {
957            apID = getDummyAlloc(Ap);
958            xID = getDummyAlloc(X);
959        }
960        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
961    }
962
963    /**
964     * DTPMV performs one of the matrix-vector operations
965     * x := A*x   or   x := A**T*x
966     *
967     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
968     *
969     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
970     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
971     *       'a' to packed matrix 'b'.
972     *           k = 0
973     *           for i in range(0, n):
974     *              for j in range(i, n):
975     *                  b[k++] = a[i, j]
976     *
977     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
978     * @param TransA The type of transpose applied to matrix A.
979     * @param Diag Specifies whether or not A is unit triangular.
980     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
981     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
982     * @param incX The increment for the elements of vector x, must be larger than zero.
983     */
984    public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
985        int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
986
987        boolean mUseIncSupp = isIncSupp();
988        long apID = Ap.getID(mRS);
989        long xID = X.getID(mRS);
990        if (mUseIncSupp) {
991            apID = getDummyAlloc(Ap);
992            xID = getDummyAlloc(X);
993        }
994        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
995    }
996
997    /**
998     * CTPMV performs one of the matrix-vector operations
999     * x := A*x   or   x := A**T*x   or   x := A**H*x
1000     *
1001     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
1002     *
1003     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1004     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1005     *       'a' to packed matrix 'b'.
1006     *           k = 0
1007     *           for i in range(0, n):
1008     *              for j in range(i, n):
1009     *                  b[k++] = a[i, j]
1010     *
1011     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1012     * @param TransA The type of transpose applied to matrix A.
1013     * @param Diag Specifies whether or not A is unit triangular.
1014     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1015     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1016     * @param incX The increment for the elements of vector x, must be larger than zero.
1017     */
1018    public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1019        int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1020
1021        boolean mUseIncSupp = isIncSupp();
1022        long apID = Ap.getID(mRS);
1023        long xID = X.getID(mRS);
1024        if (mUseIncSupp) {
1025            apID = getDummyAlloc(Ap);
1026            xID = getDummyAlloc(X);
1027        }
1028        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1029    }
1030
1031    /**
1032     * ZTPMV performs one of the matrix-vector operations
1033     * x := A*x   or   x := A**T*x   or   x := A**H*x
1034     *
1035     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
1036     *
1037     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1038     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1039     *       'a' to packed matrix 'b'.
1040     *           k = 0
1041     *           for i in range(0, n):
1042     *              for j in range(i, n):
1043     *                  b[k++] = a[i, j]
1044     *
1045     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1046     * @param TransA The type of transpose applied to matrix A.
1047     * @param Diag Specifies whether or not A is unit triangular.
1048     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1049     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1050     * @param incX The increment for the elements of vector x, must be larger than zero.
1051     */
1052    public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1053        int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1054
1055        boolean mUseIncSupp = isIncSupp();
1056        long apID = Ap.getID(mRS);
1057        long xID = X.getID(mRS);
1058        if (mUseIncSupp) {
1059            apID = getDummyAlloc(Ap);
1060            xID = getDummyAlloc(X);
1061        }
1062        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1063    }
1064
1065    /**
1066     * STRSV solves one of the systems of equations
1067     * A*x = b   or   A**T*x = b
1068     *
1069     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
1070     *
1071     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1072     * @param TransA The type of transpose applied to matrix A.
1073     * @param Diag Specifies whether or not A is unit triangular.
1074     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1075     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1076     * @param incX The increment for the elements of vector x, must be larger than zero.
1077     */
1078    public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1079        // TRSV is the same as TRMV
1080        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1081        int N = A.getType().getY();
1082
1083        boolean mUseIncSupp = isIncSupp();
1084        long aID = A.getID(mRS);
1085        long xID = X.getID(mRS);
1086        if (mUseIncSupp) {
1087            aID = getDummyAlloc(A);
1088            xID = getDummyAlloc(X);
1089        }
1090        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1091
1092    }
1093
1094    /**
1095     * DTRSV solves one of the systems of equations
1096     * A*x = b   or   A**T*x = b
1097     *
1098     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
1099     *
1100     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1101     * @param TransA The type of transpose applied to matrix A.
1102     * @param Diag Specifies whether or not A is unit triangular.
1103     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1104     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1105     * @param incX The increment for the elements of vector x, must be larger than zero.
1106     */
1107    public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1108        // TRSV is the same as TRMV
1109        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1110        int N = A.getType().getY();
1111
1112        boolean mUseIncSupp = isIncSupp();
1113        long aID = A.getID(mRS);
1114        long xID = X.getID(mRS);
1115        if (mUseIncSupp) {
1116            aID = getDummyAlloc(A);
1117            xID = getDummyAlloc(X);
1118        }
1119        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1120
1121    }
1122
1123    /**
1124     * CTRSV solves one of the systems of equations
1125     * A*x = b   or   A**T*x = b   or   A**H*x = b
1126     *
1127     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
1128     *
1129     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1130     * @param TransA The type of transpose applied to matrix A.
1131     * @param Diag Specifies whether or not A is unit triangular.
1132     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1133     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1134     * @param incX The increment for the elements of vector x, must be larger than zero.
1135     */
1136    public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1137        // TRSV is the same as TRMV
1138        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1139        int N = A.getType().getY();
1140
1141        boolean mUseIncSupp = isIncSupp();
1142        long aID = A.getID(mRS);
1143        long xID = X.getID(mRS);
1144        if (mUseIncSupp) {
1145            aID = getDummyAlloc(A);
1146            xID = getDummyAlloc(X);
1147        }
1148        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1149
1150    }
1151
1152    /**
1153     * ZTRSV solves one of the systems of equations
1154     * A*x = b   or   A**T*x = b   or   A**H*x = b
1155     *
1156     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
1157     *
1158     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1159     * @param TransA The type of transpose applied to matrix A.
1160     * @param Diag Specifies whether or not A is unit triangular.
1161     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1162     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1163     * @param incX The increment for the elements of vector x, must be larger than zero.
1164     */
1165    public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1166        // TRSV is the same as TRMV
1167        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1168        int N = A.getType().getY();
1169
1170        boolean mUseIncSupp = isIncSupp();
1171        long aID = A.getID(mRS);
1172        long xID = X.getID(mRS);
1173        if (mUseIncSupp) {
1174            aID = getDummyAlloc(A);
1175            xID = getDummyAlloc(X);
1176        }
1177        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1178
1179    }
1180
1181    /**
1182     * STBSV solves one of the systems of equations
1183     * A*x = b   or   A**T*x = b
1184     *
1185     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
1186     *
1187     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1188     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1189     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1190     *           for i in range(0, n):
1191     *              for j in range(i, min(i+k+1, n)):
1192     *                  b[i, j-i] = a[i, j]
1193     *
1194     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1195     * @param TransA The type of transpose applied to matrix A.
1196     * @param Diag Specifies whether or not A is unit triangular.
1197     * @param K The number of off-diagonals of the matrix A
1198     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1199     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1200     * @param incX The increment for the elements of vector x, must be larger than zero.
1201     */
1202    public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1203        // TBSV is the same as TRMV + K >= 0
1204        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1205        int N = A.getType().getY();
1206        if (K < 0) {
1207            throw new RSRuntimeException("Number of diagonals must be positive");
1208        }
1209
1210        boolean mUseIncSupp = isIncSupp();
1211        long aID = A.getID(mRS);
1212        long xID = X.getID(mRS);
1213        if (mUseIncSupp) {
1214            aID = getDummyAlloc(A);
1215            xID = getDummyAlloc(X);
1216        }
1217        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1218    }
1219
1220    /**
1221     * DTBSV solves one of the systems of equations
1222     * A*x = b   or   A**T*x = b
1223     *
1224     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1225     *
1226     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1227     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1228     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1229     *           for i in range(0, n):
1230     *              for j in range(i, min(i+k+1, n)):
1231     *                  b[i, j-i] = a[i, j]
1232     *
1233     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1234     * @param TransA The type of transpose applied to matrix A.
1235     * @param Diag Specifies whether or not A is unit triangular.
1236     * @param K The number of off-diagonals of the matrix A
1237     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1238     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1239     * @param incX The increment for the elements of vector x, must be larger than zero.
1240     */
1241    public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1242        // TBSV is the same as TRMV + K >= 0
1243        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1244        int N = A.getType().getY();
1245        if (K < 0) {
1246            throw new RSRuntimeException("Number of diagonals must be positive");
1247        }
1248
1249        boolean mUseIncSupp = isIncSupp();
1250        long aID = A.getID(mRS);
1251        long xID = X.getID(mRS);
1252        if (mUseIncSupp) {
1253            aID = getDummyAlloc(A);
1254            xID = getDummyAlloc(X);
1255        }
1256        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1257    }
1258
1259    /**
1260     * CTBSV solves one of the systems of equations
1261     * A*x = b   or   A**T*x = b   or   A**H*x = b
1262     *
1263     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1264     *
1265     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1266     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1267     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1268     *           for i in range(0, n):
1269     *              for j in range(i, min(i+k+1, n)):
1270     *                  b[i, j-i] = a[i, j]
1271     *
1272     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1273     * @param TransA The type of transpose applied to matrix A.
1274     * @param Diag Specifies whether or not A is unit triangular.
1275     * @param K The number of off-diagonals of the matrix A
1276     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1277     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1278     * @param incX The increment for the elements of vector x, must be larger than zero.
1279     */
1280    public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1281        // TBSV is the same as TRMV + K >= 0
1282        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1283        int N = A.getType().getY();
1284        if (K < 0) {
1285            throw new RSRuntimeException("Number of diagonals must be positive");
1286        }
1287
1288        boolean mUseIncSupp = isIncSupp();
1289        long aID = A.getID(mRS);
1290        long xID = X.getID(mRS);
1291        if (mUseIncSupp) {
1292            aID = getDummyAlloc(A);
1293            xID = getDummyAlloc(X);
1294        }
1295        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1296    }
1297
1298    /**
1299     * ZTBSV solves one of the systems of equations
1300     * A*x = b   or   A**T*x = b   or   A**H*x = b
1301     *
1302     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1303     *
1304     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1305     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1306     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1307     *           for i in range(0, n):
1308     *              for j in range(i, min(i+k+1, n)):
1309     *                  b[i, j-i] = a[i, j]
1310     *
1311     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1312     * @param TransA The type of transpose applied to matrix A.
1313     * @param Diag Specifies whether or not A is unit triangular.
1314     * @param K The number of off-diagonals of the matrix A
1315     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1316     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1317     * @param incX The increment for the elements of vector x, must be larger than zero.
1318     */
1319    public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1320        // TBSV is the same as TRMV + K >= 0
1321        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1322        int N = A.getType().getY();
1323        if (K < 0) {
1324            throw new RSRuntimeException("Number of diagonals must be positive");
1325        }
1326
1327        boolean mUseIncSupp = isIncSupp();
1328        long aID = A.getID(mRS);
1329        long xID = X.getID(mRS);
1330        if (mUseIncSupp) {
1331            aID = getDummyAlloc(A);
1332            xID = getDummyAlloc(X);
1333        }
1334        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1335    }
1336
1337    /**
1338     * STPSV solves one of the systems of equations
1339     * A*x = b   or   A**T*x = b
1340     *
1341     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1342     *
1343     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1344     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1345     *       'a' to packed matrix 'b'.
1346     *           k = 0
1347     *           for i in range(0, n):
1348     *              for j in range(i, n):
1349     *                  b[k++] = a[i, j]
1350     *
1351     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1352     * @param TransA The type of transpose applied to matrix A.
1353     * @param Diag Specifies whether or not A is unit triangular.
1354     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1355     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1356     * @param incX The increment for the elements of vector x, must be larger than zero.
1357     */
1358    public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1359        // TPSV is same as TPMV
1360        int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1361
1362        boolean mUseIncSupp = isIncSupp();
1363        long apID = Ap.getID(mRS);
1364        long xID = X.getID(mRS);
1365        if (mUseIncSupp) {
1366            apID = getDummyAlloc(Ap);
1367            xID = getDummyAlloc(X);
1368        }
1369        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1370    }
1371
1372    /**
1373     * DTPSV solves one of the systems of equations
1374     * A*x = b   or   A**T*x = b
1375     *
1376     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1377     *
1378     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1379     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1380     *       'a' to packed matrix 'b'.
1381     *           k = 0
1382     *           for i in range(0, n):
1383     *              for j in range(i, n):
1384     *                  b[k++] = a[i, j]
1385     *
1386     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1387     * @param TransA The type of transpose applied to matrix A.
1388     * @param Diag Specifies whether or not A is unit triangular.
1389     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1390     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1391     * @param incX The increment for the elements of vector x, must be larger than zero.
1392     */
1393    public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1394        // TPSV is same as TPMV
1395        int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1396
1397        boolean mUseIncSupp = isIncSupp();
1398        long apID = Ap.getID(mRS);
1399        long xID = X.getID(mRS);
1400        if (mUseIncSupp) {
1401            apID = getDummyAlloc(Ap);
1402            xID = getDummyAlloc(X);
1403        }
1404        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1405    }
1406
1407    /**
1408     * CTPSV solves one of the systems of equations
1409     * A*x = b   or   A**T*x = b   or   A**H*x = b
1410     *
1411     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1412     *
1413     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1414     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1415     *       'a' to packed matrix 'b'.
1416     *           k = 0
1417     *           for i in range(0, n):
1418     *              for j in range(i, n):
1419     *                  b[k++] = a[i, j]
1420     *
1421     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1422     * @param TransA The type of transpose applied to matrix A.
1423     * @param Diag Specifies whether or not A is unit triangular.
1424     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1425     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1426     * @param incX The increment for the elements of vector x, must be larger than zero.
1427     */
1428    public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1429        // TPSV is same as TPMV
1430        int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1431
1432        boolean mUseIncSupp = isIncSupp();
1433        long apID = Ap.getID(mRS);
1434        long xID = X.getID(mRS);
1435        if (mUseIncSupp) {
1436            apID = getDummyAlloc(Ap);
1437            xID = getDummyAlloc(X);
1438        }
1439        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1440    }
1441
1442    /**
1443     * ZTPSV solves one of the systems of equations
1444     * A*x = b   or   A**T*x = b   or   A**H*x = b
1445     *
1446     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1447     *
1448     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1449     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1450     *       'a' to packed matrix 'b'.
1451     *           k = 0
1452     *           for i in range(0, n):
1453     *              for j in range(i, n):
1454     *                  b[k++] = a[i, j]
1455     *
1456     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1457     * @param TransA The type of transpose applied to matrix A.
1458     * @param Diag Specifies whether or not A is unit triangular.
1459     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1460     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1461     * @param incX The increment for the elements of vector x, must be larger than zero.
1462     */
1463    public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1464        // TPSV is same as TPMV
1465        int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1466
1467        boolean mUseIncSupp = isIncSupp();
1468        long apID = Ap.getID(mRS);
1469        long xID = X.getID(mRS);
1470        if (mUseIncSupp) {
1471            apID = getDummyAlloc(Ap);
1472            xID = getDummyAlloc(X);
1473        }
1474        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1475    }
1476
1477    /**
1478     * Level 2, S and D only
1479     */
1480    static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1481        validateUplo(Uplo);
1482        int N = A.getType().getY();
1483        if (A.getType().getX() != N) {
1484            throw new RSRuntimeException("A must be a square matrix for SYMV");
1485        }
1486        if (!A.getType().getElement().isCompatible(e) ||
1487            !X.getType().getElement().isCompatible(e) ||
1488            !Y.getType().getElement().isCompatible(e) ) {
1489            throw new RSRuntimeException("Called BLAS with wrong Element type");
1490        }
1491        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1492            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1493        }
1494
1495        if (incX <= 0 || incY <= 0) {
1496            throw new RSRuntimeException("Vector increments must be greater than 0");
1497        }
1498        int expectedXDim = 1 + (N - 1) * incX;
1499        if (X.getType().getX() != expectedXDim) {
1500            throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1501        }
1502        int expectedYDim = 1 + (N - 1) * incY;
1503        if (Y.getType().getX() != expectedYDim) {
1504            throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1505        }
1506        return N;
1507    }
1508    static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1509        validateUplo(Uplo);
1510        if (!Ap.getType().getElement().isCompatible(e) ||
1511            !X.getType().getElement().isCompatible(e) ||
1512            !Y.getType().getElement().isCompatible(e)) {
1513            throw new RSRuntimeException("Called BLAS with wrong Element type");
1514        }
1515        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1516            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1517        }
1518
1519        if (Ap.getType().getY() > 1) {
1520            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1521        }
1522
1523        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1524        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1525            throw new RSRuntimeException("Invalid dimension for Ap");
1526        }
1527        if (incX <= 0 || incY <= 0) {
1528            throw new RSRuntimeException("Vector increments must be greater than 0");
1529        }
1530        int expectedXDim = 1 + (N - 1) * incX;
1531        if (X.getType().getX() != expectedXDim) {
1532            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1533        }
1534        int expectedYDim = 1 + (N - 1) * incY;
1535        if (Y.getType().getX() != expectedYDim) {
1536            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1537        }
1538
1539        return N;
1540    }
1541    static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1542        if (!A.getType().getElement().isCompatible(e) ||
1543            !X.getType().getElement().isCompatible(e) ||
1544            !Y.getType().getElement().isCompatible(e) ) {
1545            throw new RSRuntimeException("Called BLAS with wrong Element type");
1546        }
1547
1548        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1549            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1550        }
1551
1552        int M = A.getType().getY();
1553        int N = A.getType().getX();
1554
1555        if (N < 1 || M < 1) {
1556            throw new RSRuntimeException("M and N must be 1 or greater for GER");
1557        }
1558        if (incX <= 0 || incY <= 0) {
1559            throw new RSRuntimeException("Vector increments must be greater than 0");
1560        }
1561        int expectedXDim = 1 + (M - 1) * incX;
1562        if (X.getType().getX() != expectedXDim) {
1563            throw new RSRuntimeException("Incorrect vector dimensions for GER");
1564        }
1565        int expectedYDim = 1 + (N - 1) * incY;
1566        if (Y.getType().getX() != expectedYDim) {
1567            throw new RSRuntimeException("Incorrect vector dimensions for GER");
1568        }
1569
1570
1571    }
1572    static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1573        validateUplo(Uplo);
1574        if (!A.getType().getElement().isCompatible(e) ||
1575            !X.getType().getElement().isCompatible(e)) {
1576            throw new RSRuntimeException("Called BLAS with wrong Element type");
1577        }
1578
1579        int N = A.getType().getX();
1580
1581        if (X.getType().getY() > 1) {
1582            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1583        }
1584        if (N != A.getType().getY()) {
1585            throw new RSRuntimeException("A must be a symmetric matrix");
1586        }
1587        if (incX <= 0) {
1588            throw new RSRuntimeException("Vector increments must be greater than 0");
1589        }
1590        int expectedXDim = 1 + (N - 1) * incX;
1591        if (X.getType().getX() != expectedXDim) {
1592            throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1593        }
1594        return N;
1595    }
1596    static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1597        validateUplo(Uplo);
1598        if (!Ap.getType().getElement().isCompatible(e) ||
1599            !X.getType().getElement().isCompatible(e)) {
1600            throw new RSRuntimeException("Called BLAS with wrong Element type");
1601        }
1602        if (X.getType().getY() > 1) {
1603            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1604        }
1605
1606        if (Ap.getType().getY() > 1) {
1607            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1608        }
1609
1610        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1611        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1612            throw new RSRuntimeException("Invalid dimension for Ap");
1613        }
1614        if (incX <= 0) {
1615            throw new RSRuntimeException("Vector increments must be greater than 0");
1616        }
1617        int expectedXDim = 1 + (N - 1) * incX;
1618        if (X.getType().getX() != expectedXDim) {
1619            throw new RSRuntimeException("Incorrect vector dimensions for SPR");
1620        }
1621
1622        return N;
1623    }
1624
1625    static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1626        validateUplo(Uplo);
1627        if (!A.getType().getElement().isCompatible(e) ||
1628            !X.getType().getElement().isCompatible(e) ||
1629            !Y.getType().getElement().isCompatible(e)) {
1630            throw new RSRuntimeException("Called BLAS with wrong Element type");
1631        }
1632
1633        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1634            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1635        }
1636
1637        int N = A.getType().getX();
1638
1639        if (N != A.getType().getY()) {
1640            throw new RSRuntimeException("A must be a symmetric matrix");
1641        }
1642        if (incX <= 0 || incY <= 0) {
1643            throw new RSRuntimeException("Vector increments must be greater than 0");
1644        }
1645        int expectedXDim = 1 + (N - 1) * incX;
1646        int expectedYDim = 1 + (N - 1) * incY;
1647        if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1648            throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1649        }
1650        return N;
1651
1652    }
1653    static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1654        validateUplo(Uplo);
1655        if (!Ap.getType().getElement().isCompatible(e) ||
1656            !X.getType().getElement().isCompatible(e) ||
1657            !Y.getType().getElement().isCompatible(e)) {
1658            throw new RSRuntimeException("Called BLAS with wrong Element type");
1659        }
1660        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1661            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1662        }
1663
1664        if (Ap.getType().getY() > 1) {
1665            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1666        }
1667
1668        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1669        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1670            throw new RSRuntimeException("Invalid dimension for Ap");
1671        }
1672        if (incX <= 0 || incY <= 0) {
1673            throw new RSRuntimeException("Vector increments must be greater than 0");
1674        }
1675        int expectedXDim = 1 + (N - 1) * incX;
1676        int expectedYDim = 1 + (N - 1) * incY;
1677        if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1678            throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
1679        }
1680
1681        return N;
1682    }
1683
1684    /**
1685     * SSYMV performs the matrix-vector operation
1686     * y := alpha*A*x + beta*y
1687     *
1688     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1689     *
1690     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1691     * @param alpha The scalar alpha.
1692     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1693     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1694     * @param incX The increment for the elements of vector x, must be larger than zero.
1695     * @param beta The scalar beta.
1696     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1697     * @param incY The increment for the elements of vector y, must be larger than zero.
1698     */
1699    public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1700        int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1701
1702        boolean mUseIncSupp = isIncSupp();
1703        long aID = A.getID(mRS);
1704        long xID = X.getID(mRS);
1705        long yID = Y.getID(mRS);
1706        if (mUseIncSupp) {
1707            aID = getDummyAlloc(A);
1708            xID = getDummyAlloc(X);
1709            yID = getDummyAlloc(Y);
1710        }
1711        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1712    }
1713
1714    /**
1715     * SSBMV performs the matrix-vector operation
1716     * y := alpha*A*x + beta*y
1717     *
1718     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1719     *
1720     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1721     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1722     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1723     *           for i in range(0, n):
1724     *              for j in range(i, min(i+k+1, n)):
1725     *                  b[i, j-i] = a[i, j]
1726     *
1727     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1728     * @param K The number of off-diagonals of the matrix A
1729     * @param alpha The scalar alpha.
1730     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1731     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1732     * @param incX The increment for the elements of vector x, must be larger than zero.
1733     * @param beta The scalar beta.
1734     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1735     * @param incY The increment for the elements of vector y, must be larger than zero.
1736     */
1737    public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1738        // SBMV is the same as SYMV + K >= 0
1739        if (K < 0) {
1740            throw new RSRuntimeException("K must be greater than or equal to 0");
1741        }
1742        int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1743
1744        boolean mUseIncSupp = isIncSupp();
1745        long aID = A.getID(mRS);
1746        long xID = X.getID(mRS);
1747        long yID = Y.getID(mRS);
1748        if (mUseIncSupp) {
1749            aID = getDummyAlloc(A);
1750            xID = getDummyAlloc(X);
1751            yID = getDummyAlloc(Y);
1752        }
1753        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1754    }
1755
1756    /**
1757     * SSPMV performs the matrix-vector operation
1758     * y := alpha*A*x + beta*y
1759     *
1760     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1761     *
1762     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1763     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1764     *       'a' to packed matrix 'b'.
1765     *           k = 0
1766     *           for i in range(0, n):
1767     *              for j in range(i, n):
1768     *                  b[k++] = a[i, j]
1769     *
1770     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1771     * @param alpha The scalar alpha.
1772     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1773     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1774     * @param incX The increment for the elements of vector x, must be larger than zero.
1775     * @param beta The scalar beta.
1776     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1777     * @param incY The increment for the elements of vector y, must be larger than zero.
1778     */
1779    public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
1780        int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1781
1782        boolean mUseIncSupp = isIncSupp();
1783        long apID = Ap.getID(mRS);
1784        long xID = X.getID(mRS);
1785        long yID = Y.getID(mRS);
1786        if (mUseIncSupp) {
1787            apID = getDummyAlloc(Ap);
1788            xID = getDummyAlloc(X);
1789            yID = getDummyAlloc(Y);
1790        }
1791        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1792    }
1793
1794    /**
1795     * SGER performs the rank 1 operation
1796     * A := alpha*x*y**T + A
1797     *
1798     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1799     *
1800     * @param alpha The scalar alpha.
1801     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1802     * @param incX The increment for the elements of vector x, must be larger than zero.
1803     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1804     * @param incY The increment for the elements of vector y, must be larger than zero.
1805     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1806     */
1807    public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1808        int M = A.getType().getY();
1809        int N = A.getType().getX();
1810        validateGER(Element.F32(mRS), X, incX, Y, incY, A);
1811
1812        boolean mUseIncSupp = isIncSupp();
1813        long aID = A.getID(mRS);
1814        long xID = X.getID(mRS);
1815        long yID = Y.getID(mRS);
1816        if (mUseIncSupp) {
1817            aID = getDummyAlloc(A);
1818            xID = getDummyAlloc(X);
1819            yID = getDummyAlloc(Y);
1820        }
1821        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
1822    }
1823
1824    /**
1825     * SSYR performs the rank 1 operation
1826     * A := alpha*x*x**T + A
1827     *
1828     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1829     *
1830     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1831     * @param alpha The scalar alpha.
1832     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1833     * @param incX The increment for the elements of vector x, must be larger than zero.
1834     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1835     */
1836    public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1837        int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1838
1839        boolean mUseIncSupp = isIncSupp();
1840        long aID = A.getID(mRS);
1841        long xID = X.getID(mRS);
1842        if (mUseIncSupp) {
1843            aID = getDummyAlloc(A);
1844            xID = getDummyAlloc(X);
1845        }
1846        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1847    }
1848
1849    /**
1850     * SSPR performs the rank 1 operation
1851     * A := alpha*x*x**T + A
1852     *
1853     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1854     *
1855     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1856     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1857     *       'a' to packed matrix 'b'.
1858     *           k = 0
1859     *           for i in range(0, n):
1860     *              for j in range(i, n):
1861     *                  b[k++] = a[i, j]
1862     *
1863     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1864     * @param alpha The scalar alpha.
1865     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1866     * @param incX The increment for the elements of vector x, must be larger than zero.
1867     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1868     */
1869    public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1870        int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1871
1872        boolean mUseIncSupp = isIncSupp();
1873        long apID = Ap.getID(mRS);
1874        long xID = X.getID(mRS);
1875        if (mUseIncSupp) {
1876            apID = getDummyAlloc(Ap);
1877            xID = getDummyAlloc(X);
1878        }
1879        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1880    }
1881
1882    /**
1883     * SSYR2 performs the symmetric rank 2 operation
1884     * A := alpha*x*y**T + alpha*y*x**T + A
1885     *
1886     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1887     *
1888     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1889     * @param alpha The scalar alpha.
1890     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1891     * @param incX The increment for the elements of vector x, must be larger than zero.
1892     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1893     * @param incY The increment for the elements of vector y, must be larger than zero.
1894     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1895     */
1896    public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1897        int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1898
1899        boolean mUseIncSupp = isIncSupp();
1900        long aID = A.getID(mRS);
1901        long xID = X.getID(mRS);
1902        long yID = Y.getID(mRS);
1903        if (mUseIncSupp) {
1904            aID = getDummyAlloc(A);
1905            xID = getDummyAlloc(X);
1906            yID = getDummyAlloc(Y);
1907        }
1908        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
1909    }
1910
1911    /**
1912     * SSPR2 performs the symmetric rank 2 operation
1913     * A := alpha*x*y**T + alpha*y*x**T + A
1914     *
1915     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1916     *
1917     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1918     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1919     *       'a' to packed matrix 'b'.
1920     *           k = 0
1921     *           for i in range(0, n):
1922     *              for j in range(i, n):
1923     *                  b[k++] = a[i, j]
1924     *
1925     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1926     * @param alpha The scalar alpha.
1927     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1928     * @param incX The increment for the elements of vector x, must be larger than zero.
1929     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1930     * @param incY The increment for the elements of vector y, must be larger than zero.
1931     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1932     */
1933    public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1934        int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1935
1936        boolean mUseIncSupp = isIncSupp();
1937        long apID = Ap.getID(mRS);
1938        long xID = X.getID(mRS);
1939        long yID = Y.getID(mRS);
1940        if (mUseIncSupp) {
1941            apID = getDummyAlloc(Ap);
1942            xID = getDummyAlloc(X);
1943            yID = getDummyAlloc(Y);
1944        }
1945        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
1946    }
1947
1948    /**
1949     * DSYMV performs the matrix-vector operation
1950     * y := alpha*A*x + beta*y
1951     *
1952     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1953     *
1954     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1955     * @param alpha The scalar alpha.
1956     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1957     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1958     * @param incX The increment for the elements of vector x, must be larger than zero.
1959     * @param beta The scalar beta.
1960     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1961     * @param incY The increment for the elements of vector y, must be larger than zero.
1962     */
1963    public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1964        int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1965
1966        boolean mUseIncSupp = isIncSupp();
1967        long aID = A.getID(mRS);
1968        long xID = X.getID(mRS);
1969        long yID = Y.getID(mRS);
1970        if (mUseIncSupp) {
1971            aID = getDummyAlloc(A);
1972            xID = getDummyAlloc(X);
1973            yID = getDummyAlloc(Y);
1974        }
1975        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1976    }
1977
1978    /**
1979     * DSBMV performs the matrix-vector operation
1980     * y := alpha*A*x + beta*y
1981     *
1982     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1983     *
1984     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1985     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1986     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1987     *           for i in range(0, n):
1988     *              for j in range(i, min(i+k+1, n)):
1989     *                  b[i, j-i] = a[i, j]
1990     *
1991     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1992     * @param K The number of off-diagonals of the matrix A
1993     * @param alpha The scalar alpha.
1994     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1995     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1996     * @param incX The increment for the elements of vector x, must be larger than zero.
1997     * @param beta The scalar beta.
1998     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1999     * @param incY The increment for the elements of vector y, must be larger than zero.
2000     */
2001    public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
2002        // SBMV is the same as SYMV + K >= 0
2003        if (K < 0) {
2004            throw new RSRuntimeException("K must be greater than or equal to 0");
2005        }
2006        int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
2007
2008        boolean mUseIncSupp = isIncSupp();
2009        long aID = A.getID(mRS);
2010        long xID = X.getID(mRS);
2011        long yID = Y.getID(mRS);
2012        if (mUseIncSupp) {
2013            aID = getDummyAlloc(A);
2014            xID = getDummyAlloc(X);
2015            yID = getDummyAlloc(Y);
2016        }
2017        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2018    }
2019
2020    /**
2021     * DSPMV performs the matrix-vector operation
2022     * y := alpha*A*x + beta*y
2023     *
2024     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2025     *
2026     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2027     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2028     *       'a' to packed matrix 'b'.
2029     *           k = 0
2030     *           for i in range(0, n):
2031     *              for j in range(i, n):
2032     *                  b[k++] = a[i, j]
2033     *
2034     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2035     * @param alpha The scalar alpha.
2036     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2037     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2038     * @param incX The increment for the elements of vector x, must be larger than zero.
2039     * @param beta The scalar beta.
2040     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2041     * @param incY The increment for the elements of vector y, must be larger than zero.
2042     */
2043    public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
2044        int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
2045
2046        boolean mUseIncSupp = isIncSupp();
2047        long apID = Ap.getID(mRS);
2048        long xID = X.getID(mRS);
2049        long yID = Y.getID(mRS);
2050        if (mUseIncSupp) {
2051            apID = getDummyAlloc(Ap);
2052            xID = getDummyAlloc(X);
2053            yID = getDummyAlloc(Y);
2054        }
2055        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2056    }
2057
2058    /**
2059     * DGER performs the rank 1 operation
2060     * A := alpha*x*y**T + A
2061     *
2062     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2063     *
2064     * @param alpha The scalar alpha.
2065     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2066     * @param incX The increment for the elements of vector x, must be larger than zero.
2067     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2068     * @param incY The increment for the elements of vector y, must be larger than zero.
2069     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2070     */
2071    public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2072        int M = A.getType().getY();
2073        int N = A.getType().getX();
2074        validateGER(Element.F64(mRS), X, incX, Y, incY, A);
2075
2076        boolean mUseIncSupp = isIncSupp();
2077        long aID = A.getID(mRS);
2078        long xID = X.getID(mRS);
2079        long yID = Y.getID(mRS);
2080        if (mUseIncSupp) {
2081            aID = getDummyAlloc(A);
2082            xID = getDummyAlloc(X);
2083            yID = getDummyAlloc(Y);
2084        }
2085        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
2086    }
2087
2088    /**
2089     * DSYR performs the rank 1 operation
2090     * A := alpha*x*x**T + A
2091     *
2092     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2093     *
2094     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2095     * @param alpha The scalar alpha.
2096     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2097     * @param incX The increment for the elements of vector x, must be larger than zero.
2098     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2099     */
2100    public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2101        int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
2102
2103        boolean mUseIncSupp = isIncSupp();
2104        long aID = A.getID(mRS);
2105        long xID = X.getID(mRS);
2106        if (mUseIncSupp) {
2107            aID = getDummyAlloc(A);
2108            xID = getDummyAlloc(X);
2109        }
2110        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2111    }
2112
2113    /**
2114     * DSPR performs the rank 1 operation
2115     * A := alpha*x*x**T + A
2116     *
2117     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2118     *
2119     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2120     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2121     *       'a' to packed matrix 'b'.
2122     *           k = 0
2123     *           for i in range(0, n):
2124     *              for j in range(i, n):
2125     *                  b[k++] = a[i, j]
2126     *
2127     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2128     * @param alpha The scalar alpha.
2129     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2130     * @param incX The increment for the elements of vector x, must be larger than zero.
2131     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2132     */
2133    public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2134        int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
2135
2136        boolean mUseIncSupp = isIncSupp();
2137        long apID = Ap.getID(mRS);
2138        long xID = X.getID(mRS);
2139        if (mUseIncSupp) {
2140            apID = getDummyAlloc(Ap);
2141            xID = getDummyAlloc(X);
2142        }
2143        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2144    }
2145
2146    /**
2147     * DSYR2 performs the symmetric rank 2 operation
2148     * A := alpha*x*y**T + alpha*y*x**T + A
2149     *
2150     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2151     *
2152     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2153     * @param alpha The scalar alpha.
2154     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2155     * @param incX The increment for the elements of vector x, must be larger than zero.
2156     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2157     * @param incY The increment for the elements of vector y, must be larger than zero.
2158     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2159     */
2160    public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2161        int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
2162
2163        boolean mUseIncSupp = isIncSupp();
2164        long aID = A.getID(mRS);
2165        long xID = X.getID(mRS);
2166        long yID = Y.getID(mRS);
2167        if (mUseIncSupp) {
2168            aID = getDummyAlloc(A);
2169            xID = getDummyAlloc(X);
2170            yID = getDummyAlloc(Y);
2171        }
2172        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2173    }
2174
2175    /**
2176     * DSPR2 performs the symmetric rank 2 operation
2177     * A := alpha*x*y**T + alpha*y*x**T + A
2178     *
2179     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2180     *
2181     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2182     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2183     *       'a' to packed matrix 'b'.
2184     *           k = 0
2185     *           for i in range(0, n):
2186     *              for j in range(i, n):
2187     *                  b[k++] = a[i, j]
2188     *
2189     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2190     * @param alpha The scalar alpha.
2191     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2192     * @param incX The increment for the elements of vector x, must be larger than zero.
2193     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2194     * @param incY The increment for the elements of vector y, must be larger than zero.
2195     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2196     */
2197    public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2198        int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
2199
2200        boolean mUseIncSupp = isIncSupp();
2201        long apID = Ap.getID(mRS);
2202        long xID = X.getID(mRS);
2203        long yID = Y.getID(mRS);
2204        if (mUseIncSupp) {
2205            apID = getDummyAlloc(Ap);
2206            xID = getDummyAlloc(X);
2207            yID = getDummyAlloc(Y);
2208        }
2209        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2210    }
2211
2212
2213    /**
2214     * Level 2, C and Z only
2215     */
2216
2217    static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2218        if (!A.getType().getElement().isCompatible(e) ||
2219            !X.getType().getElement().isCompatible(e) ||
2220            !Y.getType().getElement().isCompatible(e)) {
2221            throw new RSRuntimeException("Called BLAS with wrong Element type");
2222        }
2223        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
2224            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
2225        }
2226
2227        int M = A.getType().getY();
2228        int N = A.getType().getX();
2229        if (incX <= 0 || incY <= 0) {
2230            throw new RSRuntimeException("Vector increments must be greater than 0");
2231        }
2232        int expectedXDim = 1 + (M - 1) * incX;
2233        if (X.getType().getX() != expectedXDim) {
2234            throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2235        }
2236        int expectedYDim = 1 + (N - 1) * incY;
2237        if (Y.getType().getX() != expectedYDim) {
2238            throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2239        }
2240
2241    }
2242
2243    /**
2244     * CHEMV performs the matrix-vector operation
2245     * y := alpha*A*x + beta*y
2246     *
2247     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2248     *
2249     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2250     * @param alpha The scalar alpha.
2251     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2252     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2253     * @param incX The increment for the elements of vector x, must be larger than zero.
2254     * @param beta The scalar beta.
2255     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2256     * @param incY The increment for the elements of vector y, must be larger than zero.
2257     */
2258    public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2259        // HEMV is the same as SYR2 validation-wise
2260        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2261
2262        boolean mUseIncSupp = isIncSupp();
2263        long aID = A.getID(mRS);
2264        long xID = X.getID(mRS);
2265        long yID = Y.getID(mRS);
2266        if (mUseIncSupp) {
2267            aID = getDummyAlloc(A);
2268            xID = getDummyAlloc(X);
2269            yID = getDummyAlloc(Y);
2270        }
2271        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2272    }
2273
2274    /**
2275     * CHBMV performs the matrix-vector operation
2276     * y := alpha*A*x + beta*y
2277     *
2278     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2279     *
2280     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2281     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2282     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2283     *           for i in range(0, n):
2284     *              for j in range(i, min(i+k+1, n)):
2285     *                  b[i, j-i] = a[i, j]
2286     *
2287     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2288     * @param K The number of off-diagonals of the matrix A
2289     * @param alpha The scalar alpha.
2290     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2291     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2292     * @param incX The increment for the elements of vector x, must be larger than zero.
2293     * @param beta The scalar beta.
2294     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2295     * @param incY The increment for the elements of vector y, must be larger than zero.
2296     */
2297    public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2298        // HBMV is the same as SYR2 validation-wise
2299        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2300        if (K < 0) {
2301            throw new RSRuntimeException("K must be 0 or greater for HBMV");
2302        }
2303
2304        boolean mUseIncSupp = isIncSupp();
2305        long aID = A.getID(mRS);
2306        long xID = X.getID(mRS);
2307        long yID = Y.getID(mRS);
2308        if (mUseIncSupp) {
2309            aID = getDummyAlloc(A);
2310            xID = getDummyAlloc(X);
2311            yID = getDummyAlloc(Y);
2312        }
2313        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2314    }
2315
2316    /**
2317     * CHPMV performs the matrix-vector operation
2318     * y := alpha*A*x + beta*y
2319     *
2320     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2321     *
2322     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2323     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2324     *       'a' to packed matrix 'b'.
2325     *           k = 0
2326     *           for i in range(0, n):
2327     *              for j in range(i, n):
2328     *                  b[k++] = a[i, j]
2329     *
2330     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2331     * @param alpha The scalar alpha.
2332     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2333     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2334     * @param incX The increment for the elements of vector x, must be larger than zero.
2335     * @param beta The scalar beta.
2336     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2337     * @param incY The increment for the elements of vector y, must be larger than zero.
2338     */
2339    public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2340        // HPMV is the same as SPR2
2341        int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2342
2343        boolean mUseIncSupp = isIncSupp();
2344        long apID = Ap.getID(mRS);
2345        long xID = X.getID(mRS);
2346        long yID = Y.getID(mRS);
2347        if (mUseIncSupp) {
2348            apID = getDummyAlloc(Ap);
2349            xID = getDummyAlloc(X);
2350            yID = getDummyAlloc(Y);
2351        }
2352        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2353    }
2354
2355    /**
2356     * CGERU performs the rank 1 operation
2357     * A := alpha*x*y**T + A
2358     *
2359     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2360     *
2361     * @param alpha The scalar alpha.
2362     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2363     * @param incX The increment for the elements of vector x, must be larger than zero.
2364     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2365     * @param incY The increment for the elements of vector y, must be larger than zero.
2366     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2367     */
2368    public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2369        validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2370        int M = A.getType().getY();
2371        int N = A.getType().getX();
2372
2373        boolean mUseIncSupp = isIncSupp();
2374        long aID = A.getID(mRS);
2375        long xID = X.getID(mRS);
2376        long yID = Y.getID(mRS);
2377        if (mUseIncSupp) {
2378            aID = getDummyAlloc(A);
2379            xID = getDummyAlloc(X);
2380            yID = getDummyAlloc(Y);
2381        }
2382        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2383    }
2384
2385    /**
2386     * CGERC performs the rank 1 operation
2387     * A := alpha*x*y**H + A
2388     *
2389     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2390     *
2391     * @param alpha The scalar alpha.
2392     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2393     * @param incX The increment for the elements of vector x, must be larger than zero.
2394     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2395     * @param incY The increment for the elements of vector y, must be larger than zero.
2396     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2397     */
2398    public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2399        // same as GERU
2400        validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2401        int M = A.getType().getY();
2402        int N = A.getType().getX();
2403
2404        boolean mUseIncSupp = isIncSupp();
2405        long aID = A.getID(mRS);
2406        long xID = X.getID(mRS);
2407        long yID = Y.getID(mRS);
2408        if (mUseIncSupp) {
2409            aID = getDummyAlloc(A);
2410            xID = getDummyAlloc(X);
2411            yID = getDummyAlloc(Y);
2412        }
2413        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2414    }
2415
2416    /**
2417     * CHER performs the rank 1 operation
2418     * A := alpha*x*x**H + A
2419     *
2420     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2421     *
2422     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2423     * @param alpha The scalar alpha.
2424     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2425     * @param incX The increment for the elements of vector x, must be larger than zero.
2426     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2427     */
2428    public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
2429        // same as SYR
2430        int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
2431
2432        boolean mUseIncSupp = isIncSupp();
2433        long aID = A.getID(mRS);
2434        long xID = X.getID(mRS);
2435        if (mUseIncSupp) {
2436            aID = getDummyAlloc(A);
2437            xID = getDummyAlloc(X);
2438        }
2439        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2440    }
2441
2442    /**
2443     * CHPR performs the rank 1 operation
2444     * A := alpha*x*x**H + A
2445     *
2446     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2447     *
2448     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2449     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2450     *       'a' to packed matrix 'b'.
2451     *           k = 0
2452     *           for i in range(0, n):
2453     *              for j in range(i, n):
2454     *                  b[k++] = a[i, j]
2455     *
2456     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2457     * @param alpha The scalar alpha.
2458     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2459     * @param incX The increment for the elements of vector x, must be larger than zero.
2460     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2461     */
2462    public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
2463        // equivalent to SPR for validation
2464        int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
2465
2466        boolean mUseIncSupp = isIncSupp();
2467        long apID = Ap.getID(mRS);
2468        long xID = X.getID(mRS);
2469        if (mUseIncSupp) {
2470            apID = getDummyAlloc(Ap);
2471            xID = getDummyAlloc(X);
2472        }
2473        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2474    }
2475
2476    /**
2477     * CHER2 performs the symmetric rank 2 operation
2478     * A := alpha*x*y**H + alpha*y*x**H + A
2479     *
2480     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2481     *
2482     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2483     * @param alpha The scalar alpha.
2484     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2485     * @param incX The increment for the elements of vector x, must be larger than zero.
2486     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2487     * @param incY The increment for the elements of vector y, must be larger than zero.
2488     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2489     */
2490    public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2491        // same as SYR2
2492        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2493
2494        boolean mUseIncSupp = isIncSupp();
2495        long aID = A.getID(mRS);
2496        long xID = X.getID(mRS);
2497        long yID = Y.getID(mRS);
2498        if (mUseIncSupp) {
2499            aID = getDummyAlloc(A);
2500            xID = getDummyAlloc(X);
2501            yID = getDummyAlloc(Y);
2502        }
2503        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2504    }
2505
2506    /**
2507     * CHPR2 performs the symmetric rank 2 operation
2508     * A := alpha*x*y**H + alpha*y*x**H + A
2509     *
2510     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2511     *
2512     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2513     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2514     *       'a' to packed matrix 'b'.
2515     *           k = 0
2516     *           for i in range(0, n):
2517     *              for j in range(i, n):
2518     *                  b[k++] = a[i, j]
2519     *
2520     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2521     * @param alpha The scalar alpha.
2522     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2523     * @param incX The increment for the elements of vector x, must be larger than zero.
2524     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2525     * @param incY The increment for the elements of vector y, must be larger than zero.
2526     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2527     */
2528    public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2529        // same as SPR2
2530        int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2531
2532        boolean mUseIncSupp = isIncSupp();
2533        long apID = Ap.getID(mRS);
2534        long xID = X.getID(mRS);
2535        long yID = Y.getID(mRS);
2536        if (mUseIncSupp) {
2537            apID = getDummyAlloc(Ap);
2538            xID = getDummyAlloc(X);
2539            yID = getDummyAlloc(Y);
2540        }
2541        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2542    }
2543
2544    /**
2545     * ZHEMV performs the matrix-vector operation
2546     * y := alpha*A*x + beta*y
2547     *
2548     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2549     *
2550     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2551     * @param alpha The scalar alpha.
2552     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2553     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2554     * @param incX The increment for the elements of vector x, must be larger than zero.
2555     * @param beta The scalar beta.
2556     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2557     * @param incY The increment for the elements of vector y, must be larger than zero.
2558     */
2559    public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2560        // HEMV is the same as SYR2 validation-wise
2561        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2562
2563        boolean mUseIncSupp = isIncSupp();
2564        long aID = A.getID(mRS);
2565        long xID = X.getID(mRS);
2566        long yID = Y.getID(mRS);
2567        if (mUseIncSupp) {
2568            aID = getDummyAlloc(A);
2569            xID = getDummyAlloc(X);
2570            yID = getDummyAlloc(Y);
2571        }
2572        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2573    }
2574
2575    /**
2576     * ZHBMV performs the matrix-vector operation
2577     * y := alpha*A*x + beta*y
2578     *
2579     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2580     *
2581     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2582     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2583     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2584     *           for i in range(0, n):
2585     *              for j in range(i, min(i+k+1, n)):
2586     *                  b[i, j-i] = a[i, j]
2587     *
2588     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2589     * @param K The number of off-diagonals of the matrix A
2590     * @param alpha The scalar alpha.
2591     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2592     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2593     * @param incX The increment for the elements of vector x, must be larger than zero.
2594     * @param beta The scalar beta.
2595     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2596     * @param incY The increment for the elements of vector y, must be larger than zero.
2597     */
2598    public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2599        // HBMV is the same as SYR2 validation-wise
2600        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2601        if (K < 0) {
2602            throw new RSRuntimeException("K must be 0 or greater for HBMV");
2603        }
2604
2605        boolean mUseIncSupp = isIncSupp();
2606        long aID = A.getID(mRS);
2607        long xID = X.getID(mRS);
2608        long yID = Y.getID(mRS);
2609        if (mUseIncSupp) {
2610            aID = getDummyAlloc(A);
2611            xID = getDummyAlloc(X);
2612            yID = getDummyAlloc(Y);
2613        }
2614        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2615    }
2616
2617    /**
2618     * ZHPMV performs the matrix-vector operation
2619     * y := alpha*A*x + beta*y
2620     *
2621     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2622     *
2623     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2624     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2625     *       'a' to packed matrix 'b'.
2626     *           k = 0
2627     *           for i in range(0, n):
2628     *              for j in range(i, n):
2629     *                  b[k++] = a[i, j]
2630     *
2631     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2632     * @param alpha The scalar alpha.
2633     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2634     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2635     * @param incX The increment for the elements of vector x, must be larger than zero.
2636     * @param beta The scalar beta.
2637     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2638     * @param incY The increment for the elements of vector y, must be larger than zero.
2639     */
2640    public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2641        // HPMV is the same as SPR2
2642        int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2643
2644        boolean mUseIncSupp = isIncSupp();
2645        long apID = Ap.getID(mRS);
2646        long xID = X.getID(mRS);
2647        long yID = Y.getID(mRS);
2648        if (mUseIncSupp) {
2649            apID = getDummyAlloc(Ap);
2650            xID = getDummyAlloc(X);
2651            yID = getDummyAlloc(Y);
2652        }
2653        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2654    }
2655
2656    /**
2657     * ZGERU performs the rank 1 operation
2658     * A := alpha*x*y**T + A
2659     *
2660     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2661     *
2662     * @param alpha The scalar alpha.
2663     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2664     * @param incX The increment for the elements of vector x, must be larger than zero.
2665     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2666     * @param incY The increment for the elements of vector y, must be larger than zero.
2667     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2668     */
2669    public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2670        validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2671        int M = A.getType().getY();
2672        int N = A.getType().getX();
2673
2674        boolean mUseIncSupp = isIncSupp();
2675        long aID = A.getID(mRS);
2676        long xID = X.getID(mRS);
2677        long yID = Y.getID(mRS);
2678        if (mUseIncSupp) {
2679            aID = getDummyAlloc(A);
2680            xID = getDummyAlloc(X);
2681            yID = getDummyAlloc(Y);
2682        }
2683        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2684    }
2685
2686    /**
2687     * ZGERC performs the rank 1 operation
2688     * A := alpha*x*y**H + A
2689     *
2690     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2691     *
2692     * @param alpha The scalar alpha.
2693     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2694     * @param incX The increment for the elements of vector x, must be larger than zero.
2695     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2696     * @param incY The increment for the elements of vector y, must be larger than zero.
2697     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2698     */
2699    public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2700        // same as GERU
2701        validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2702        int M = A.getType().getY();
2703        int N = A.getType().getX();
2704
2705        boolean mUseIncSupp = isIncSupp();
2706        long aID = A.getID(mRS);
2707        long xID = X.getID(mRS);
2708        long yID = Y.getID(mRS);
2709        if (mUseIncSupp) {
2710            aID = getDummyAlloc(A);
2711            xID = getDummyAlloc(X);
2712            yID = getDummyAlloc(Y);
2713        }
2714        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2715    }
2716
2717    /**
2718     * ZHER performs the rank 1 operation
2719     * A := alpha*x*x**H + A
2720     *
2721     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2722     *
2723     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2724     * @param alpha The scalar alpha.
2725     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2726     * @param incX The increment for the elements of vector x, must be larger than zero.
2727     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2728     */
2729    public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2730        // same as SYR
2731        int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
2732
2733        boolean mUseIncSupp = isIncSupp();
2734        long aID = A.getID(mRS);
2735        long xID = X.getID(mRS);
2736        if (mUseIncSupp) {
2737            aID = getDummyAlloc(A);
2738            xID = getDummyAlloc(X);
2739        }
2740        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2741    }
2742
2743    /**
2744     * ZHPR performs the rank 1 operation
2745     * A := alpha*x*x**H + A
2746     *
2747     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2748     *
2749     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2750     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2751     *       'a' to packed matrix 'b'.
2752     *           k = 0
2753     *           for i in range(0, n):
2754     *              for j in range(i, n):
2755     *                  b[k++] = a[i, j]
2756     *
2757     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2758     * @param alpha The scalar alpha.
2759     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2760     * @param incX The increment for the elements of vector x, must be larger than zero.
2761     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2762     */
2763    public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2764        // equivalent to SPR for validation
2765        int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2766
2767        boolean mUseIncSupp = isIncSupp();
2768        long apID = Ap.getID(mRS);
2769        long xID = X.getID(mRS);
2770        if (mUseIncSupp) {
2771            apID = getDummyAlloc(Ap);
2772            xID = getDummyAlloc(X);
2773        }
2774        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2775    }
2776
2777    /**
2778     * ZHER2 performs the symmetric rank 2 operation
2779     * A := alpha*x*y**H + alpha*y*x**H + A
2780     *
2781     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2782     *
2783     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2784     * @param alpha The scalar alpha.
2785     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2786     * @param incX The increment for the elements of vector x, must be larger than zero.
2787     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2788     * @param incY The increment for the elements of vector y, must be larger than zero.
2789     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2790     */
2791    public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2792        // same as SYR2
2793        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2794
2795        boolean mUseIncSupp = isIncSupp();
2796        long aID = A.getID(mRS);
2797        long xID = X.getID(mRS);
2798        long yID = Y.getID(mRS);
2799        if (mUseIncSupp) {
2800            aID = getDummyAlloc(A);
2801            xID = getDummyAlloc(X);
2802            yID = getDummyAlloc(Y);
2803        }
2804        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2805    }
2806
2807    /**
2808     * ZHPR2 performs the symmetric rank 2 operation
2809     * A := alpha*x*y**H + alpha*y*x**H + A
2810     *
2811     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2812     *
2813     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2814     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2815     *       'a' to packed matrix 'b'.
2816     *           k = 0
2817     *           for i in range(0, n):
2818     *              for j in range(i, n):
2819     *                  b[k++] = a[i, j]
2820     *
2821     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2822     * @param alpha The scalar alpha.
2823     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2824     * @param incX The increment for the elements of vector x, must be larger than zero.
2825     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2826     * @param incY The increment for the elements of vector y, must be larger than zero.
2827     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2828     */
2829    public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2830        // same as SPR2
2831        int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2832
2833        boolean mUseIncSupp = isIncSupp();
2834        long apID = Ap.getID(mRS);
2835        long xID = X.getID(mRS);
2836        long yID = Y.getID(mRS);
2837        if (mUseIncSupp) {
2838            apID = getDummyAlloc(Ap);
2839            xID = getDummyAlloc(X);
2840            yID = getDummyAlloc(Y);
2841        }
2842        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2843    }
2844
2845
2846    /**
2847     * Level 3 BLAS
2848     */
2849
2850    static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
2851        int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
2852        if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2853            (B != null && !B.getType().getElement().isCompatible(e)) ||
2854            (C != null && !C.getType().getElement().isCompatible(e))) {
2855            throw new RSRuntimeException("Called BLAS with wrong Element type");
2856        }
2857        if (C == null) {
2858            //since matrix C is used to store the result, it cannot be null.
2859            throw new RSRuntimeException("Allocation C cannot be null");
2860        }
2861        cM = C.getType().getY();
2862        cN = C.getType().getX();
2863
2864        if (Side == RIGHT) {
2865            if ((A == null && B != null) || (A != null && B == null)) {
2866                throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2867            }
2868            if (B != null) {
2869                bM = A.getType().getY();
2870                bN = A.getType().getX();
2871            }
2872            if (A != null) {
2873                aM = B.getType().getY();
2874                aN = B.getType().getX();
2875            }
2876        } else {
2877            if (A != null) {
2878                if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
2879                    aN = A.getType().getY();
2880                    aM = A.getType().getX();
2881                } else {
2882                    aM = A.getType().getY();
2883                    aN = A.getType().getX();
2884                }
2885            }
2886            if (B != null) {
2887                if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
2888                    bN = B.getType().getY();
2889                    bM = B.getType().getX();
2890                } else {
2891                    bM = B.getType().getY();
2892                    bN = B.getType().getX();
2893                }
2894            }
2895        }
2896        if (A != null && B != null && C != null) {
2897            if (aN != bM || aM != cM || bN != cN) {
2898                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2899            }
2900        } else if (A != null && C != null) {
2901            // A and C only, for SYRK
2902            if (cM != cN) {
2903                throw new RSRuntimeException("Matrix C is not symmetric");
2904            }
2905            if (aM != cM) {
2906                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2907            }
2908        } else if (A != null && B != null) {
2909            // A and B only
2910            if (aN != bM) {
2911                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2912            }
2913        }
2914
2915    }
2916
2917    /**
2918     * SGEMM performs one of the matrix-matrix operations
2919     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2920     *
2921     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2922     *
2923     * @param TransA The type of transpose applied to matrix A.
2924     * @param TransB The type of transpose applied to matrix B.
2925     * @param alpha The scalar alpha.
2926     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2927     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2928     * @param beta The scalar beta.
2929     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2930     */
2931    public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2932                      Allocation B, float beta, Allocation C) {
2933        validateTranspose(TransA);
2934        validateTranspose(TransB);
2935        validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2936
2937        int M = -1, N = -1, K = -1;
2938        if (TransA != NO_TRANSPOSE) {
2939            M = A.getType().getX();
2940            K = A.getType().getY();
2941        } else {
2942            M = A.getType().getY();
2943            K = A.getType().getX();
2944        }
2945        if (TransB != NO_TRANSPOSE) {
2946            N = B.getType().getY();
2947        } else {
2948            N = B.getType().getX();
2949        }
2950
2951        boolean mUseIncSupp = isIncSupp();
2952        long aID = A.getID(mRS);
2953        long bID = B.getID(mRS);
2954        long cID = C.getID(mRS);
2955        if (mUseIncSupp) {
2956            aID = getDummyAlloc(A);
2957            bID = getDummyAlloc(B);
2958            cID = getDummyAlloc(C);
2959        }
2960        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
2961                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
2962    }
2963
2964    /**
2965     * DGEMM performs one of the matrix-matrix operations
2966     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2967     *
2968     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2969     *
2970     * @param TransA The type of transpose applied to matrix A.
2971     * @param TransB The type of transpose applied to matrix B.
2972     * @param alpha The scalar alpha.
2973     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2974     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2975     * @param beta The scalar beta.
2976     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2977     */
2978    public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2979                      Allocation B, double beta, Allocation C) {
2980        validateTranspose(TransA);
2981        validateTranspose(TransB);
2982        validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2983        int M = -1, N = -1, K = -1;
2984        if (TransA != NO_TRANSPOSE) {
2985            M = A.getType().getX();
2986            K = A.getType().getY();
2987        } else {
2988            M = A.getType().getY();
2989            K = A.getType().getX();
2990        }
2991        if (TransB != NO_TRANSPOSE) {
2992            N = B.getType().getY();
2993        } else {
2994            N = B.getType().getX();
2995        }
2996
2997        boolean mUseIncSupp = isIncSupp();
2998        long aID = A.getID(mRS);
2999        long bID = B.getID(mRS);
3000        long cID = C.getID(mRS);
3001        if (mUseIncSupp) {
3002            aID = getDummyAlloc(A);
3003            bID = getDummyAlloc(B);
3004            cID = getDummyAlloc(C);
3005        }
3006        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
3007                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
3008    }
3009
3010    /**
3011     * CGEMM performs one of the matrix-matrix operations
3012     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3013     *
3014     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3015     *
3016     * @param TransA The type of transpose applied to matrix A.
3017     * @param TransB The type of transpose applied to matrix B.
3018     * @param alpha The scalar alpha.
3019     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3020     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3021     * @param beta The scalar beta.
3022     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3023     */
3024    public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
3025                      Allocation B, Float2 beta, Allocation C) {
3026        validateTranspose(TransA);
3027        validateTranspose(TransB);
3028        validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
3029        int M = -1, N = -1, K = -1;
3030        if (TransA != NO_TRANSPOSE) {
3031            M = A.getType().getX();
3032            K = A.getType().getY();
3033        } else {
3034            M = A.getType().getY();
3035            K = A.getType().getX();
3036        }
3037        if (TransB != NO_TRANSPOSE) {
3038            N = B.getType().getY();
3039        } else {
3040            N = B.getType().getX();
3041        }
3042
3043        boolean mUseIncSupp = isIncSupp();
3044        long aID = A.getID(mRS);
3045        long bID = B.getID(mRS);
3046        long cID = C.getID(mRS);
3047        if (mUseIncSupp) {
3048            aID = getDummyAlloc(A);
3049            bID = getDummyAlloc(B);
3050            cID = getDummyAlloc(C);
3051        }
3052        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3053                                         beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3054    }
3055
3056    /**
3057     * ZGEMM performs one of the matrix-matrix operations
3058     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3059     *
3060     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3061     *
3062     * @param TransA The type of transpose applied to matrix A.
3063     * @param TransB The type of transpose applied to matrix B.
3064     * @param alpha The scalar alpha.
3065     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2
3066     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2
3067     * @param beta The scalar beta.
3068     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2
3069     */
3070    public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
3071                      Allocation B, Double2 beta, Allocation C) {
3072        validateTranspose(TransA);
3073        validateTranspose(TransB);
3074        validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
3075        int M = -1, N = -1, K = -1;
3076        if (TransA != NO_TRANSPOSE) {
3077            M = A.getType().getX();
3078            K = A.getType().getY();
3079        } else {
3080            M = A.getType().getY();
3081            K = A.getType().getX();
3082        }
3083        if (TransB != NO_TRANSPOSE) {
3084            N = B.getType().getY();
3085        } else {
3086            N = B.getType().getX();
3087        }
3088
3089        boolean mUseIncSupp = isIncSupp();
3090        long aID = A.getID(mRS);
3091        long bID = B.getID(mRS);
3092        long cID = C.getID(mRS);
3093        if (mUseIncSupp) {
3094            aID = getDummyAlloc(A);
3095            bID = getDummyAlloc(B);
3096            cID = getDummyAlloc(C);
3097        }
3098        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3099                                   beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3100    }
3101
3102    /**
3103     * SSYMM performs one of the matrix-matrix operations
3104     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3105     *
3106     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3107     *
3108     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3109     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3110     * @param alpha The scalar alpha.
3111     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3112     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3113     * @param beta The scalar beta.
3114     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3115     */
3116    public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
3117                      Allocation B, float beta, Allocation C) {
3118        validateSide(Side);
3119        validateUplo(Uplo);
3120        //For SYMM, Matrix A should be symmetric
3121        if (A.getType().getX() != A.getType().getY()) {
3122            throw new RSRuntimeException("Matrix A is not symmetric");
3123        }
3124        validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
3125
3126        boolean mUseIncSupp = isIncSupp();
3127        long aID = A.getID(mRS);
3128        long bID = B.getID(mRS);
3129        long cID = C.getID(mRS);
3130        if (mUseIncSupp) {
3131            aID = getDummyAlloc(A);
3132            bID = getDummyAlloc(B);
3133            cID = getDummyAlloc(C);
3134        }
3135        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3136                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
3137    }
3138
3139    /**
3140     * DSYMM performs one of the matrix-matrix operations
3141     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3142     *
3143     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3144     *
3145     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3146     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3147     * @param alpha The scalar alpha.
3148     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3149     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3150     * @param beta The scalar beta.
3151     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3152     */
3153    public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
3154                      Allocation B, double beta, Allocation C) {
3155        validateSide(Side);
3156        validateUplo(Uplo);
3157        if (A.getType().getX() != A.getType().getY()) {
3158            throw new RSRuntimeException("Matrix A is not symmetric");
3159        }
3160        validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
3161
3162        boolean mUseIncSupp = isIncSupp();
3163        long aID = A.getID(mRS);
3164        long bID = B.getID(mRS);
3165        long cID = C.getID(mRS);
3166        if (mUseIncSupp) {
3167            aID = getDummyAlloc(A);
3168            bID = getDummyAlloc(B);
3169            cID = getDummyAlloc(C);
3170        }
3171        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3172                                        beta, cID, 0, 0, 0, 0, mUseIncSupp);
3173    }
3174
3175    /**
3176     * CSYMM performs one of the matrix-matrix operations
3177     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3178     *
3179     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3180     *
3181     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3182     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3183     * @param alpha The scalar alpha.
3184     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3185     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3186     * @param beta The scalar beta.
3187     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3188     */
3189    public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
3190                      Allocation B, Float2 beta, Allocation C) {
3191        validateSide(Side);
3192        validateUplo(Uplo);
3193        if (A.getType().getX() != A.getType().getY()) {
3194            throw new RSRuntimeException("Matrix A is not symmetric");
3195        }
3196        validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
3197
3198        boolean mUseIncSupp = isIncSupp();
3199        long aID = A.getID(mRS);
3200        long bID = B.getID(mRS);
3201        long cID = C.getID(mRS);
3202        if (mUseIncSupp) {
3203            aID = getDummyAlloc(A);
3204            bID = getDummyAlloc(B);
3205            cID = getDummyAlloc(C);
3206        }
3207        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3208                                         beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3209    }
3210
3211    /**
3212     * ZSYMM performs one of the matrix-matrix operations
3213     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3214     *
3215     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3216     *
3217     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3218     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3219     * @param alpha The scalar alpha.
3220     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3221     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3222     * @param beta The scalar beta.
3223     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3224     */
3225    public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
3226                      Allocation B, Double2 beta, Allocation C) {
3227        validateSide(Side);
3228        validateUplo(Uplo);
3229        if (A.getType().getX() != A.getType().getY()) {
3230            throw new RSRuntimeException("Matrix A is not symmetric");
3231        }
3232        validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
3233
3234        boolean mUseIncSupp = isIncSupp();
3235        long aID = A.getID(mRS);
3236        long bID = B.getID(mRS);
3237        long cID = C.getID(mRS);
3238        if (mUseIncSupp) {
3239            aID = getDummyAlloc(A);
3240            bID = getDummyAlloc(B);
3241            cID = getDummyAlloc(C);
3242        }
3243        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3244                                   beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3245    }
3246
3247    /**
3248     * SSYRK performs one of the symmetric rank k operations
3249     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3250     *
3251     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3252     *
3253     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3254     * @param Trans The type of transpose applied to the operation.
3255     * @param alpha The scalar alpha.
3256     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3257     * @param beta The scalar beta.
3258     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3259     */
3260    public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3261        validateTranspose(Trans);
3262        validateUplo(Uplo);
3263        validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
3264        int K = -1;
3265        if (Trans != NO_TRANSPOSE) {
3266            K = A.getType().getY();
3267        } else {
3268            K = A.getType().getX();
3269        }
3270
3271        boolean mUseIncSupp = isIncSupp();
3272        long aID = A.getID(mRS);
3273        long cID = C.getID(mRS);
3274        if (mUseIncSupp) {
3275            aID = getDummyAlloc(A);
3276            cID = getDummyAlloc(C);
3277        }
3278        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3279    }
3280
3281    /**
3282     * DSYRK performs one of the symmetric rank k operations
3283     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3284     *
3285     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3286     *
3287     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3288     * @param Trans The type of transpose applied to the operation.
3289     * @param alpha The scalar alpha.
3290     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3291     * @param beta The scalar beta.
3292     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3293     */
3294    public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3295        validateTranspose(Trans);
3296        validateUplo(Uplo);
3297        validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
3298        int K = -1;
3299        if (Trans != NO_TRANSPOSE) {
3300            K = A.getType().getY();
3301        } else {
3302            K = A.getType().getX();
3303        }
3304
3305        boolean mUseIncSupp = isIncSupp();
3306        long aID = A.getID(mRS);
3307        long cID = C.getID(mRS);
3308        if (mUseIncSupp) {
3309            aID = getDummyAlloc(A);
3310            cID = getDummyAlloc(C);
3311        }
3312        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3313    }
3314
3315    /**
3316     * CSYRK performs one of the symmetric rank k operations
3317     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3318     *
3319     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3320     *
3321     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3322     * @param Trans The type of transpose applied to the operation.
3323     * @param alpha The scalar alpha.
3324     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3325     * @param beta The scalar beta.
3326     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3327     */
3328    public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
3329        validateTranspose(Trans);
3330        validateUplo(Uplo);
3331        validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
3332        int K = -1;
3333        if (Trans != NO_TRANSPOSE) {
3334            K = A.getType().getY();
3335        } else {
3336            K = A.getType().getX();
3337        }
3338
3339        boolean mUseIncSupp = isIncSupp();
3340        long aID = A.getID(mRS);
3341        long cID = C.getID(mRS);
3342        if (mUseIncSupp) {
3343            aID = getDummyAlloc(A);
3344            cID = getDummyAlloc(C);
3345        }
3346        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3347                                         C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3348    }
3349
3350    /**
3351     * ZSYRK performs one of the symmetric rank k operations
3352     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3353     *
3354     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3355     *
3356     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3357     * @param Trans The type of transpose applied to the operation.
3358     * @param alpha The scalar alpha.
3359     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3360     * @param beta The scalar beta.
3361     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3362     */
3363    public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
3364        validateTranspose(Trans);
3365        validateUplo(Uplo);
3366        validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
3367        int K = -1;
3368        if (Trans != NO_TRANSPOSE) {
3369            K = A.getType().getY();
3370        } else {
3371            K = A.getType().getX();
3372        }
3373
3374        boolean mUseIncSupp = isIncSupp();
3375        long aID = A.getID(mRS);
3376        long cID = C.getID(mRS);
3377        if (mUseIncSupp) {
3378            aID = getDummyAlloc(A);
3379            cID = getDummyAlloc(C);
3380        }
3381        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3382                                   C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3383    }
3384
3385    static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3386        validateTranspose(Trans);
3387        if (!A.getType().getElement().isCompatible(e) ||
3388            !B.getType().getElement().isCompatible(e) ||
3389            !C.getType().getElement().isCompatible(e)) {
3390            throw new RSRuntimeException("Called BLAS with wrong Element type");
3391        }
3392        int Cdim = -1;
3393        // A is n x k if no transpose, k x n if transpose
3394        // C is n x n
3395        if (Trans == TRANSPOSE) {
3396            // check columns versus C
3397            Cdim = A.getType().getX();
3398        } else {
3399            // check rows versus C
3400            Cdim = A.getType().getY();
3401        }
3402        if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
3403            throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
3404        }
3405        // A dims == B dims
3406        if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3407            throw new RSRuntimeException("Invalid A and B in SYR2K");
3408        }
3409    }
3410
3411    /**
3412     * SSYR2K performs one of the symmetric rank 2k operations
3413     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3414     *
3415     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3416     *
3417     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3418     * @param Trans The type of transpose applied to the operation.
3419     * @param alpha The scalar alpha.
3420     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3421     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3422     * @param beta The scalar beta.
3423     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3424     */
3425    public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
3426        validateUplo(Uplo);
3427        validateSYR2K(Element.F32(mRS), Trans, A, B, C);
3428        int K = -1;
3429        if (Trans != NO_TRANSPOSE) {
3430            K = A.getType().getY();
3431        } else {
3432            K = A.getType().getX();
3433        }
3434
3435        boolean mUseIncSupp = isIncSupp();
3436        long aID = A.getID(mRS);
3437        long bID = B.getID(mRS);
3438        long cID = C.getID(mRS);
3439        if (mUseIncSupp) {
3440            aID = getDummyAlloc(A);
3441            bID = getDummyAlloc(B);
3442            cID = getDummyAlloc(C);
3443        }
3444        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3445    }
3446
3447    /**
3448     * DSYR2K performs one of the symmetric rank 2k operations
3449     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3450     *
3451     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3452     *
3453     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3454     * @param Trans The type of transpose applied to the operation.
3455     * @param alpha The scalar alpha.
3456     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3457     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3458     * @param beta The scalar beta.
3459     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3460     */
3461    public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
3462        validateUplo(Uplo);
3463        validateSYR2K(Element.F64(mRS), Trans, A, B, C);
3464        int K = -1;
3465        if (Trans != NO_TRANSPOSE) {
3466            K = A.getType().getY();
3467        } else {
3468            K = A.getType().getX();
3469        }
3470
3471        boolean mUseIncSupp = isIncSupp();
3472        long aID = A.getID(mRS);
3473        long bID = B.getID(mRS);
3474        long cID = C.getID(mRS);
3475        if (mUseIncSupp) {
3476            aID = getDummyAlloc(A);
3477            bID = getDummyAlloc(B);
3478            cID = getDummyAlloc(C);
3479        }
3480        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3481    }
3482
3483    /**
3484     * CSYR2K performs one of the symmetric rank 2k operations
3485     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3486     *
3487     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3488     *
3489     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3490     * @param Trans The type of transpose applied to the operation.
3491     * @param alpha The scalar alpha.
3492     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3493     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3494     * @param beta The scalar beta.
3495     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3496     */
3497    public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3498        validateUplo(Uplo);
3499        validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
3500        int K = -1;
3501        if (Trans != NO_TRANSPOSE) {
3502            K = A.getType().getY();
3503        } else {
3504            K = A.getType().getX();
3505        }
3506
3507        boolean mUseIncSupp = isIncSupp();
3508        long aID = A.getID(mRS);
3509        long bID = B.getID(mRS);
3510        long cID = C.getID(mRS);
3511        if (mUseIncSupp) {
3512            aID = getDummyAlloc(A);
3513            bID = getDummyAlloc(B);
3514            cID = getDummyAlloc(C);
3515        }
3516        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3517    }
3518
3519    /**
3520     * ZSYR2K performs one of the symmetric rank 2k operations
3521     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3522     *
3523     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3524     *
3525     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3526     * @param Trans The type of transpose applied to the operation.
3527     * @param alpha The scalar alpha.
3528     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3529     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3530     * @param beta The scalar beta.
3531     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3532     */
3533    public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3534        validateUplo(Uplo);
3535        validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
3536        int K = -1;
3537        if (Trans != NO_TRANSPOSE) {
3538            K = A.getType().getY();
3539        } else {
3540            K = A.getType().getX();
3541        }
3542
3543        boolean mUseIncSupp = isIncSupp();
3544        long aID = A.getID(mRS);
3545        long bID = B.getID(mRS);
3546        long cID = C.getID(mRS);
3547        if (mUseIncSupp) {
3548            aID = getDummyAlloc(A);
3549            bID = getDummyAlloc(B);
3550            cID = getDummyAlloc(C);
3551        }
3552        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3553    }
3554
3555    static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3556        validateSide(Side);
3557        validateTranspose(TransA);
3558        int aM = -1, aN = -1, bM = -1, bN = -1;
3559        if (!A.getType().getElement().isCompatible(e) ||
3560            !B.getType().getElement().isCompatible(e)) {
3561            throw new RSRuntimeException("Called BLAS with wrong Element type");
3562        }
3563
3564        aM = A.getType().getY();
3565        aN = A.getType().getX();
3566        if (aM != aN) {
3567            throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
3568        }
3569
3570        bM = B.getType().getY();
3571        bN = B.getType().getX();
3572        if (Side == LEFT) {
3573            if (aN != bM) {
3574                throw new RSRuntimeException("Called TRMM with invalid matrices");
3575            }
3576        } else {
3577            if (bN != aM) {
3578                throw new RSRuntimeException("Called TRMM with invalid matrices");
3579            }
3580        }
3581    }
3582
3583    /**
3584     * STRMM performs one of the matrix-matrix operations
3585     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3586     * op(A) is one of  op(A) = A  or  op(A) = A**T
3587     *
3588     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3589     *
3590     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3591     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3592     * @param TransA The type of transpose applied to matrix A.
3593     * @param Diag Specifies whether or not A is unit triangular.
3594     * @param alpha The scalar alpha.
3595     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3596     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3597     */
3598    public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3599        validateUplo(Uplo);
3600        validateDiag(Diag);
3601        validateTRMM(Element.F32(mRS), Side, TransA, A, B);
3602
3603        boolean mUseIncSupp = isIncSupp();
3604        long aID = A.getID(mRS);
3605        long bID = B.getID(mRS);
3606        if (mUseIncSupp) {
3607            aID = getDummyAlloc(A);
3608            bID = getDummyAlloc(B);
3609        }
3610        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3611                                        alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp);
3612    }
3613
3614    /**
3615     * DTRMM performs one of the matrix-matrix operations
3616     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3617     * op(A) is one of  op(A) = A  or  op(A) = A**T
3618     *
3619     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3620     *
3621     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3622     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3623     * @param TransA The type of transpose applied to matrix A.
3624     * @param Diag Specifies whether or not A is unit triangular.
3625     * @param alpha The scalar alpha.
3626     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3627     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3628     */
3629    public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3630        validateUplo(Uplo);
3631        validateDiag(Diag);
3632        validateTRMM(Element.F64(mRS), Side, TransA, A, B);
3633
3634        boolean mUseIncSupp = isIncSupp();
3635        long aID = A.getID(mRS);
3636        long bID = B.getID(mRS);
3637        if (mUseIncSupp) {
3638            aID = getDummyAlloc(A);
3639            bID = getDummyAlloc(B);
3640        }
3641        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3642                                        alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3643    }
3644
3645    /**
3646     * CTRMM performs one of the matrix-matrix operations
3647     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3648     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3649     *
3650     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3651     *
3652     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3653     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3654     * @param TransA The type of transpose applied to matrix A.
3655     * @param Diag Specifies whether or not A is unit triangular.
3656     * @param alpha The scalar alpha.
3657     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3658     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3659     */
3660    public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3661        validateUplo(Uplo);
3662        validateDiag(Diag);
3663        validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
3664
3665        boolean mUseIncSupp = isIncSupp();
3666        long aID = A.getID(mRS);
3667        long bID = B.getID(mRS);
3668        if (mUseIncSupp) {
3669            aID = getDummyAlloc(A);
3670            bID = getDummyAlloc(B);
3671        }
3672        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3673                                         alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3674    }
3675
3676    /**
3677     * ZTRMM performs one of the matrix-matrix operations
3678     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3679     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3680     *
3681     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3682     *
3683     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3684     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3685     * @param TransA The type of transpose applied to matrix A.
3686     * @param Diag Specifies whether or not A is unit triangular.
3687     * @param alpha The scalar alpha.
3688     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3689     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3690     */
3691    public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3692        validateUplo(Uplo);
3693        validateDiag(Diag);
3694        validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
3695
3696        boolean mUseIncSupp = isIncSupp();
3697        long aID = A.getID(mRS);
3698        long bID = B.getID(mRS);
3699        if (mUseIncSupp) {
3700            aID = getDummyAlloc(A);
3701            bID = getDummyAlloc(B);
3702        }
3703        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3704                                   alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3705    }
3706
3707    static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3708        int adim = -1, bM = -1, bN = -1;
3709        validateSide(Side);
3710        validateTranspose(TransA);
3711        if (!A.getType().getElement().isCompatible(e) ||
3712            !B.getType().getElement().isCompatible(e)) {
3713            throw new RSRuntimeException("Called BLAS with wrong Element type");
3714        }
3715        adim = A.getType().getX();
3716        if (adim != A.getType().getY()) {
3717            // this may be unnecessary, the restriction could potentially be relaxed
3718            // A needs to contain at least that symmetric matrix but could theoretically be larger
3719            // for now we assume adapters are sufficient, will reevaluate in the future
3720            throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
3721        }
3722        bM = B.getType().getY();
3723        bN = B.getType().getX();
3724        if (Side == LEFT) {
3725            // A is M*M
3726            if (adim != bM) {
3727                throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3728            }
3729        } else {
3730            // A is N*N
3731            if (adim != bN) {
3732                throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3733            }
3734        }
3735    }
3736
3737    /**
3738     * STRSM solves one of the matrix equations
3739     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3740     * op(A) is one of  op(A) = A  or  op(A) = A**T
3741     *
3742     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3743     *
3744     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3745     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3746     * @param TransA The type of transpose applied to matrix A.
3747     * @param Diag Specifies whether or not A is unit triangular.
3748     * @param alpha The scalar alpha.
3749     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3750     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3751     */
3752    public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3753        validateUplo(Uplo);
3754        validateDiag(Diag);
3755        validateTRSM(Element.F32(mRS), Side, TransA, A, B);
3756
3757        boolean mUseIncSupp = isIncSupp();
3758        long aID = A.getID(mRS);
3759        long bID = B.getID(mRS);
3760        if (mUseIncSupp) {
3761            aID = getDummyAlloc(A);
3762            bID = getDummyAlloc(B);
3763        }
3764        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3765                                        alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3766    }
3767
3768    /**
3769     * DTRSM solves one of the matrix equations
3770     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3771     * op(A) is one of  op(A) = A  or  op(A) = A**T
3772     *
3773     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3774     *
3775     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3776     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3777     * @param TransA The type of transpose applied to matrix A.
3778     * @param Diag Specifies whether or not A is unit triangular.
3779     * @param alpha The scalar alpha.
3780     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3781     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3782     */
3783    public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3784        validateUplo(Uplo);
3785        validateDiag(Diag);
3786        validateTRSM(Element.F64(mRS), Side, TransA, A, B);
3787
3788        boolean mUseIncSupp = isIncSupp();
3789        long aID = A.getID(mRS);
3790        long bID = B.getID(mRS);
3791        if (mUseIncSupp) {
3792            aID = getDummyAlloc(A);
3793            bID = getDummyAlloc(B);
3794        }
3795        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3796                                        alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3797    }
3798
3799    /**
3800     * CTRSM solves one of the matrix equations
3801     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3802     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3803     *
3804     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3805     *
3806     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3807     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3808     * @param TransA The type of transpose applied to matrix A.
3809     * @param Diag Specifies whether or not A is unit triangular.
3810     * @param alpha The scalar alpha.
3811     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3812     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3813     */
3814    public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3815        validateUplo(Uplo);
3816        validateDiag(Diag);
3817        validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
3818
3819        boolean mUseIncSupp = isIncSupp();
3820        long aID = A.getID(mRS);
3821        long bID = B.getID(mRS);
3822        if (mUseIncSupp) {
3823            aID = getDummyAlloc(A);
3824            bID = getDummyAlloc(B);
3825        }
3826        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3827                                         alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3828    }
3829
3830    /**
3831     * ZTRSM solves one of the matrix equations
3832     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3833     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3834     *
3835     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3836     *
3837     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3838     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3839     * @param TransA The type of transpose applied to matrix A.
3840     * @param Diag Specifies whether or not A is unit triangular.
3841     * @param alpha The scalar alpha.
3842     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3843     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3844     */
3845    public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3846        validateUplo(Uplo);
3847        validateDiag(Diag);
3848        validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
3849
3850        boolean mUseIncSupp = isIncSupp();
3851        long aID = A.getID(mRS);
3852        long bID = B.getID(mRS);
3853        if (mUseIncSupp) {
3854            aID = getDummyAlloc(A);
3855            bID = getDummyAlloc(B);
3856        }
3857        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3858                                   alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3859    }
3860
3861    static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3862        validateSide(Side);
3863
3864        if (!A.getType().getElement().isCompatible(e) ||
3865            !B.getType().getElement().isCompatible(e) ||
3866            !C.getType().getElement().isCompatible(e)) {
3867            throw new RSRuntimeException("Called BLAS with wrong Element type");
3868        }
3869
3870        // A must be square; can potentially be relaxed similar to TRSM
3871        int adim = A.getType().getX();
3872        if (adim != A.getType().getY()) {
3873            throw new RSRuntimeException("Called HEMM with non-square A");
3874        }
3875        if ((Side == LEFT && adim != B.getType().getY()) ||
3876            (Side == RIGHT && adim != B.getType().getX())) {
3877            throw new RSRuntimeException("Called HEMM with invalid B");
3878        }
3879        if (B.getType().getX() != C.getType().getX() ||
3880            B.getType().getY() != C.getType().getY()) {
3881            throw new RSRuntimeException("Called HEMM with mismatched B and C");
3882        }
3883    }
3884
3885    /**
3886     * CHEMM performs one of the matrix-matrix operations
3887     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3888     *
3889     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3890     *
3891     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3892     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3893     * @param alpha The scalar alpha.
3894     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3895     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3896     * @param beta The scalar beta.
3897     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3898     */
3899    public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3900        validateUplo(Uplo);
3901        validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3902
3903        boolean mUseIncSupp = isIncSupp();
3904        long aID = A.getID(mRS);
3905        long bID = B.getID(mRS);
3906        long cID = C.getID(mRS);
3907        if (mUseIncSupp) {
3908            aID = getDummyAlloc(A);
3909            bID = getDummyAlloc(B);
3910            cID = getDummyAlloc(C);
3911        }
3912        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3913                                         alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3914    }
3915
3916    /**
3917     * ZHEMM performs one of the matrix-matrix operations
3918     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3919     *
3920     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3921     *
3922     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3923     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3924     * @param alpha The scalar alpha.
3925     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3926     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3927     * @param beta The scalar beta.
3928     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3929     */
3930    public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3931        validateUplo(Uplo);
3932        validateHEMM(Element.F64_2(mRS), Side, A, B, C);
3933
3934        boolean mUseIncSupp = isIncSupp();
3935        long aID = A.getID(mRS);
3936        long bID = B.getID(mRS);
3937        long cID = C.getID(mRS);
3938        if (mUseIncSupp) {
3939            aID = getDummyAlloc(A);
3940            bID = getDummyAlloc(B);
3941            cID = getDummyAlloc(C);
3942        }
3943        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3944                                   alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3945    }
3946
3947    static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3948        if (!A.getType().getElement().isCompatible(e) ||
3949            !C.getType().getElement().isCompatible(e)) {
3950            throw new RSRuntimeException("Called BLAS with wrong Element type");
3951        }
3952        validateConjTranspose(Trans);
3953        int cdim = C.getType().getX();
3954        if (cdim != C.getType().getY()) {
3955            throw new RSRuntimeException("Called HERK with non-square C");
3956        }
3957        if (Trans == NO_TRANSPOSE) {
3958            if (cdim != A.getType().getY()) {
3959                throw new RSRuntimeException("Called HERK with invalid A");
3960            }
3961        } else {
3962            if (cdim != A.getType().getX()) {
3963                throw new RSRuntimeException("Called HERK with invalid A");
3964            }
3965        }
3966    }
3967
3968    /**
3969     * CHERK performs one of the hermitian rank k operations
3970     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3971     *
3972     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3973     *
3974     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3975     * @param Trans The type of transpose applied to the operation.
3976     * @param alpha The scalar alpha.
3977     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3978     * @param beta The scalar beta.
3979     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3980     */
3981    public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3982        validateUplo(Uplo);
3983        validateHERK(Element.F32_2(mRS), Trans, A, C);
3984        int k = 0;
3985        if (Trans == CONJ_TRANSPOSE) {
3986            k = A.getType().getY();
3987        } else {
3988            k = A.getType().getX();
3989        }
3990
3991        boolean mUseIncSupp = isIncSupp();
3992        long aID = A.getID(mRS);
3993        long cID = C.getID(mRS);
3994        if (mUseIncSupp) {
3995            aID = getDummyAlloc(A);
3996            cID = getDummyAlloc(C);
3997        }
3998        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3999                                         alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4000    }
4001
4002    /**
4003     * ZHERK performs one of the hermitian rank k operations
4004     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
4005     *
4006     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
4007     *
4008     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4009     * @param Trans The type of transpose applied to the operation.
4010     * @param alpha The scalar alpha.
4011     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4012     * @param beta The scalar beta.
4013     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4014     */
4015    public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
4016        validateUplo(Uplo);
4017        validateHERK(Element.F64_2(mRS), Trans, A, C);
4018        int k = 0;
4019        if (Trans == CONJ_TRANSPOSE) {
4020            k = A.getType().getY();
4021        } else {
4022            k = A.getType().getX();
4023        }
4024
4025        boolean mUseIncSupp = isIncSupp();
4026        long aID = A.getID(mRS);
4027        long cID = C.getID(mRS);
4028        if (mUseIncSupp) {
4029            aID = getDummyAlloc(A);
4030            cID = getDummyAlloc(C);
4031        }
4032        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
4033                                   alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4034    }
4035
4036    static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
4037        if (!A.getType().getElement().isCompatible(e) ||
4038            !B.getType().getElement().isCompatible(e) ||
4039            !C.getType().getElement().isCompatible(e)) {
4040            throw new RSRuntimeException("Called BLAS with wrong Element type");
4041        }
4042        validateConjTranspose(Trans);
4043        int cdim = C.getType().getX();
4044        if (cdim != C.getType().getY()) {
4045            throw new RSRuntimeException("Called HER2K with non-square C");
4046        }
4047        if (Trans == NO_TRANSPOSE) {
4048            if (A.getType().getY() != cdim) {
4049                throw new RSRuntimeException("Called HER2K with invalid matrices");
4050            }
4051        } else {
4052            if (A.getType().getX() != cdim) {
4053                throw new RSRuntimeException("Called HER2K with invalid matrices");
4054            }
4055        }
4056        if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
4057            throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
4058        }
4059    }
4060
4061    /**
4062     * CHER2K performs one of the hermitian rank 2k operations
4063     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4064     *
4065     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
4066     *
4067     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4068     * @param Trans The type of transpose applied to the operation.
4069     * @param alpha The scalar alpha.
4070     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
4071     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
4072     * @param beta The scalar beta.
4073     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
4074     */
4075    public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
4076        validateUplo(Uplo);
4077        validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
4078        int k = 0;
4079        if (Trans == NO_TRANSPOSE) {
4080            k = A.getType().getX();
4081        } else {
4082            k = A.getType().getY();
4083        }
4084
4085        boolean mUseIncSupp = isIncSupp();
4086        long aID = A.getID(mRS);
4087        long bID = B.getID(mRS);
4088        long cID = C.getID(mRS);
4089        if (mUseIncSupp) {
4090            aID = getDummyAlloc(A);
4091            bID = getDummyAlloc(B);
4092            cID = getDummyAlloc(C);
4093        }
4094        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4095                                         A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4096    }
4097
4098    /**
4099     * ZHER2K performs one of the hermitian rank 2k operations
4100     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4101     *
4102     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
4103     *
4104     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4105     * @param Trans The type of transpose applied to the operation.
4106     * @param alpha The scalar alpha.
4107     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4108     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
4109     * @param beta The scalar beta.
4110     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4111     */
4112    public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
4113        validateUplo(Uplo);
4114        validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
4115        int k = 0;
4116        if (Trans == NO_TRANSPOSE) {
4117            k = A.getType().getX();
4118        } else {
4119            k = A.getType().getY();
4120        }
4121
4122        boolean mUseIncSupp = isIncSupp();
4123        long aID = A.getID(mRS);
4124        long bID = B.getID(mRS);
4125        long cID = C.getID(mRS);
4126        if (mUseIncSupp) {
4127            aID = getDummyAlloc(A);
4128            bID = getDummyAlloc(B);
4129            cID = getDummyAlloc(C);
4130        }
4131        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4132                                   A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4133    }
4134
4135
4136    /**
4137     * 8-bit GEMM-like operation for neural networks: C = B.transposed() * A
4138     * Calculations are done in 1.10.21 fixed-point format for the final output,
4139     * just before there's a shift down to drop the fractional parts. The output
4140     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
4141     * gives some headroom to avoid wrapping around on small overflows.
4142     *
4143     * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
4144     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset.
4145     * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
4146     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset.
4147     * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
4148     * @param c_offset The offset for all values in matrix C.
4149     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
4150     **/
4151    public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
4152        validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
4153
4154        int M = -1, N = -1, K = -1;
4155        M = A.getType().getY();
4156        N = B.getType().getY();
4157        K = A.getType().getX();
4158
4159        boolean mUseIncSupp = isIncSupp();
4160        long aID = A.getID(mRS);
4161        long bID = B.getID(mRS);
4162        long cID = C.getID(mRS);
4163        if (mUseIncSupp) {
4164            aID = getDummyAlloc(A);
4165            bID = getDummyAlloc(B);
4166            cID = getDummyAlloc(C);
4167        }
4168        mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp);
4169
4170    }
4171
4172}
4173