1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.renderscript;
18
19import android.annotation.IntDef;
20import java.lang.annotation.Retention;
21import java.lang.annotation.RetentionPolicy;
22
23/**
24 *
25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26 *
27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28 * building blocks for performing basic vector and matrix operations.
29 *
30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
31 *
32 **/
33public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
34    private Allocation mLUT;
35
36    private ScriptIntrinsicBLAS(long id, RenderScript rs) {
37        super(id, rs);
38    }
39
40    private static final int RsBlas_sdsdot = 1;
41    private static final int RsBlas_dsdot = 2;
42    private static final int RsBlas_sdot = 3;
43    private static final int RsBlas_ddot = 4;
44    private static final int RsBlas_cdotu_sub = 5;
45    private static final int RsBlas_cdotc_sub = 6;
46    private static final int RsBlas_zdotu_sub = 7;
47    private static final int RsBlas_zdotc_sub = 8;
48    private static final int RsBlas_snrm2 = 9;
49    private static final int RsBlas_sasum = 10;
50    private static final int RsBlas_dnrm2 = 11;
51    private static final int RsBlas_dasum = 12;
52    private static final int RsBlas_scnrm2 = 13;
53    private static final int RsBlas_scasum = 14;
54    private static final int RsBlas_dznrm2 = 15;
55    private static final int RsBlas_dzasum = 16;
56    private static final int RsBlas_isamax = 17;
57    private static final int RsBlas_idamax = 18;
58    private static final int RsBlas_icamax = 19;
59    private static final int RsBlas_izamax = 20;
60    private static final int RsBlas_sswap = 21;
61    private static final int RsBlas_scopy = 22;
62    private static final int RsBlas_saxpy = 23;
63    private static final int RsBlas_dswap = 24;
64    private static final int RsBlas_dcopy = 25;
65    private static final int RsBlas_daxpy = 26;
66    private static final int RsBlas_cswap = 27;
67    private static final int RsBlas_ccopy = 28;
68    private static final int RsBlas_caxpy = 29;
69    private static final int RsBlas_zswap = 30;
70    private static final int RsBlas_zcopy = 31;
71    private static final int RsBlas_zaxpy = 32;
72    private static final int RsBlas_srotg = 33;
73    private static final int RsBlas_srotmg = 34;
74    private static final int RsBlas_srot = 35;
75    private static final int RsBlas_srotm = 36;
76    private static final int RsBlas_drotg = 37;
77    private static final int RsBlas_drotmg = 38;
78    private static final int RsBlas_drot = 39;
79    private static final int RsBlas_drotm = 40;
80    private static final int RsBlas_sscal = 41;
81    private static final int RsBlas_dscal = 42;
82    private static final int RsBlas_cscal = 43;
83    private static final int RsBlas_zscal = 44;
84    private static final int RsBlas_csscal = 45;
85    private static final int RsBlas_zdscal = 46;
86    private static final int RsBlas_sgemv = 47;
87    private static final int RsBlas_sgbmv = 48;
88    private static final int RsBlas_strmv = 49;
89    private static final int RsBlas_stbmv = 50;
90    private static final int RsBlas_stpmv = 51;
91    private static final int RsBlas_strsv = 52;
92    private static final int RsBlas_stbsv = 53;
93    private static final int RsBlas_stpsv = 54;
94    private static final int RsBlas_dgemv = 55;
95    private static final int RsBlas_dgbmv = 56;
96    private static final int RsBlas_dtrmv = 57;
97    private static final int RsBlas_dtbmv = 58;
98    private static final int RsBlas_dtpmv = 59;
99    private static final int RsBlas_dtrsv = 60;
100    private static final int RsBlas_dtbsv = 61;
101    private static final int RsBlas_dtpsv = 62;
102    private static final int RsBlas_cgemv = 63;
103    private static final int RsBlas_cgbmv = 64;
104    private static final int RsBlas_ctrmv = 65;
105    private static final int RsBlas_ctbmv = 66;
106    private static final int RsBlas_ctpmv = 67;
107    private static final int RsBlas_ctrsv = 68;
108    private static final int RsBlas_ctbsv = 69;
109    private static final int RsBlas_ctpsv = 70;
110    private static final int RsBlas_zgemv = 71;
111    private static final int RsBlas_zgbmv = 72;
112    private static final int RsBlas_ztrmv = 73;
113    private static final int RsBlas_ztbmv = 74;
114    private static final int RsBlas_ztpmv = 75;
115    private static final int RsBlas_ztrsv = 76;
116    private static final int RsBlas_ztbsv = 77;
117    private static final int RsBlas_ztpsv = 78;
118    private static final int RsBlas_ssymv = 79;
119    private static final int RsBlas_ssbmv = 80;
120    private static final int RsBlas_sspmv = 81;
121    private static final int RsBlas_sger = 82;
122    private static final int RsBlas_ssyr = 83;
123    private static final int RsBlas_sspr = 84;
124    private static final int RsBlas_ssyr2 = 85;
125    private static final int RsBlas_sspr2 = 86;
126    private static final int RsBlas_dsymv = 87;
127    private static final int RsBlas_dsbmv = 88;
128    private static final int RsBlas_dspmv = 89;
129    private static final int RsBlas_dger = 90;
130    private static final int RsBlas_dsyr = 91;
131    private static final int RsBlas_dspr = 92;
132    private static final int RsBlas_dsyr2 = 93;
133    private static final int RsBlas_dspr2 = 94;
134    private static final int RsBlas_chemv = 95;
135    private static final int RsBlas_chbmv = 96;
136    private static final int RsBlas_chpmv = 97;
137    private static final int RsBlas_cgeru = 98;
138    private static final int RsBlas_cgerc = 99;
139    private static final int RsBlas_cher = 100;
140    private static final int RsBlas_chpr = 101;
141    private static final int RsBlas_cher2 = 102;
142    private static final int RsBlas_chpr2 = 103;
143    private static final int RsBlas_zhemv = 104;
144    private static final int RsBlas_zhbmv = 105;
145    private static final int RsBlas_zhpmv = 106;
146    private static final int RsBlas_zgeru = 107;
147    private static final int RsBlas_zgerc = 108;
148    private static final int RsBlas_zher = 109;
149    private static final int RsBlas_zhpr = 110;
150    private static final int RsBlas_zher2 = 111;
151    private static final int RsBlas_zhpr2 = 112;
152    private static final int RsBlas_sgemm = 113;
153    private static final int RsBlas_ssymm = 114;
154    private static final int RsBlas_ssyrk = 115;
155    private static final int RsBlas_ssyr2k = 116;
156    private static final int RsBlas_strmm = 117;
157    private static final int RsBlas_strsm = 118;
158    private static final int RsBlas_dgemm = 119;
159    private static final int RsBlas_dsymm = 120;
160    private static final int RsBlas_dsyrk = 121;
161    private static final int RsBlas_dsyr2k = 122;
162    private static final int RsBlas_dtrmm = 123;
163    private static final int RsBlas_dtrsm = 124;
164    private static final int RsBlas_cgemm = 125;
165    private static final int RsBlas_csymm = 126;
166    private static final int RsBlas_csyrk = 127;
167    private static final int RsBlas_csyr2k = 128;
168    private static final int RsBlas_ctrmm = 129;
169    private static final int RsBlas_ctrsm = 130;
170    private static final int RsBlas_zgemm = 131;
171    private static final int RsBlas_zsymm = 132;
172    private static final int RsBlas_zsyrk = 133;
173    private static final int RsBlas_zsyr2k = 134;
174    private static final int RsBlas_ztrmm = 135;
175    private static final int RsBlas_ztrsm = 136;
176    private static final int RsBlas_chemm = 137;
177    private static final int RsBlas_cherk = 138;
178    private static final int RsBlas_cher2k = 139;
179    private static final int RsBlas_zhemm = 140;
180    private static final int RsBlas_zherk = 141;
181    private static final int RsBlas_zher2k = 142;
182
183    // BLAS extensions start here
184    private static final int RsBlas_bnnm = 1000;
185
186    /**
187     * Create an intrinsic to access BLAS subroutines.
188     *
189     * @param rs The RenderScript context
190     * @return ScriptIntrinsicBLAS
191     */
192    public static ScriptIntrinsicBLAS create(RenderScript rs) {
193        long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs));
194        return new ScriptIntrinsicBLAS(id, rs);
195    }
196
197    /**
198     * @hide
199     */
200    @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
201    @Retention(RetentionPolicy.SOURCE)
202    public @interface Transpose {}
203
204    /**
205     * @hide
206     */
207    @IntDef({UPPER, LOWER})
208    @Retention(RetentionPolicy.SOURCE)
209    public @interface Uplo {}
210
211    /**
212     * @hide
213     */
214    @IntDef({NON_UNIT, UNIT})
215    @Retention(RetentionPolicy.SOURCE)
216    public @interface Diag {}
217
218    /**
219     * @hide
220     */
221    @IntDef({LEFT, RIGHT})
222    @Retention(RetentionPolicy.SOURCE)
223    public @interface Side {}
224
225    public static final int NO_TRANSPOSE = 111;
226    public static final int TRANSPOSE = 112;
227    public static final int CONJ_TRANSPOSE = 113;
228
229    public static final int UPPER = 121;
230    public static final int LOWER = 122;
231
232    public static final int NON_UNIT = 131;
233    public static final int UNIT = 132;
234
235    public static final int LEFT = 141;
236    public static final int RIGHT = 142;
237
238    static void validateSide(@Side int Side) {
239        if (Side != LEFT && Side != RIGHT) {
240            throw new RSRuntimeException("Invalid side passed to BLAS");
241        }
242    }
243
244    static void validateTranspose(@Transpose int Trans) {
245        if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
246            Trans != CONJ_TRANSPOSE) {
247            throw new RSRuntimeException("Invalid transpose passed to BLAS");
248        }
249    }
250
251    static void validateConjTranspose(@Transpose int Trans) {
252        if (Trans != NO_TRANSPOSE &&
253            Trans != CONJ_TRANSPOSE) {
254            throw new RSRuntimeException("Invalid transpose passed to BLAS");
255        }
256    }
257
258    static void validateDiag(@Diag int Diag) {
259        if (Diag != NON_UNIT && Diag != UNIT) {
260            throw new RSRuntimeException("Invalid diag passed to BLAS");
261        }
262    }
263
264    static void validateUplo(@Uplo int Uplo) {
265        if (Uplo != UPPER && Uplo != LOWER) {
266            throw new RSRuntimeException("Invalid uplo passed to BLAS");
267        }
268    }
269
270
271    /**
272     * Level 2 BLAS
273     */
274
275    static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
276        validateTranspose(TransA);
277        int M = A.getType().getY();
278        int N = A.getType().getX();
279        if (!A.getType().getElement().isCompatible(e) ||
280            !X.getType().getElement().isCompatible(e) ||
281            !Y.getType().getElement().isCompatible(e)) {
282            throw new RSRuntimeException("Called BLAS with wrong Element type");
283        }
284        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
285            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
286        }
287
288        if (incX <= 0 || incY <= 0) {
289            throw new RSRuntimeException("Vector increments must be greater than 0");
290        }
291        int expectedXDim = -1, expectedYDim = -1;
292        if (TransA == NO_TRANSPOSE) {
293            expectedXDim = 1 + (N - 1) * incX;
294            expectedYDim = 1 + (M - 1) * incY;
295        } else {
296            expectedXDim = 1 + (M - 1) * incX;
297            expectedYDim = 1 + (N - 1) * incY;
298        }
299        if (X.getType().getX() != expectedXDim ||
300            Y.getType().getX() != expectedYDim) {
301            throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
302        }
303    }
304
305    /**
306     * SGEMV performs one of the matrix-vector operations
307     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
308     *
309     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
310     *
311     * @param TransA The type of transpose applied to matrix A.
312     * @param alpha The scalar alpha.
313     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
314     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
315     * @param incX The increment for the elements of vector x, must be larger than zero.
316     * @param beta The scalar beta.
317     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
318     * @param incY The increment for the elements of vector y, must be larger than zero.
319     */
320    public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
321        validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
322        int M = A.getType().getY();
323        int N = A.getType().getX();
324        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
325    }
326
327    /**
328     * DGEMV performs one of the matrix-vector operations
329     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
330     *
331     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
332     *
333     * @param TransA The type of transpose applied to matrix A.
334     * @param alpha The scalar alpha.
335     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
336     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
337     * @param incX The increment for the elements of vector x, must be larger than zero.
338     * @param beta The scalar beta.
339     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
340     * @param incY The increment for the elements of vector y, must be larger than zero.
341     */
342    public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
343        validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
344        int M = A.getType().getY();
345        int N = A.getType().getX();
346        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
347    }
348
349    /**
350     * CGEMV performs one of the matrix-vector operations
351     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
352     *
353     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
354     *
355     * @param TransA The type of transpose applied to matrix A.
356     * @param alpha The scalar alpha.
357     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
358     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
359     * @param incX The increment for the elements of vector x, must be larger than zero.
360     * @param beta The scalar beta.
361     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
362     * @param incY The increment for the elements of vector y, must be larger than zero.
363     */
364    public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
365        validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
366        int M = A.getType().getY();
367        int N = A.getType().getX();
368        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
369    }
370
371    /**
372     * ZGEMV performs one of the matrix-vector operations
373     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
374     *
375     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
376     *
377     * @param TransA The type of transpose applied to matrix A.
378     * @param alpha The scalar alpha.
379     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
380     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
381     * @param incX The increment for the elements of vector x, must be larger than zero.
382     * @param beta The scalar beta.
383     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
384     * @param incY The increment for the elements of vector y, must be larger than zero.
385     */
386    public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
387        validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
388        int M = A.getType().getY();
389        int N = A.getType().getX();
390        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
391    }
392
393    /**
394     * SGBMV performs one of the matrix-vector operations
395     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
396     *
397     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
398     *
399     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
400     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
401     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
402     *           for i in range(0, m):
403     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
404     *                  b[i, j-i+kl] = a[i, j]
405     *
406     * @param TransA The type of transpose applied to matrix A.
407     * @param KL The number of sub-diagonals of the matrix A.
408     * @param KU The number of super-diagonals of the matrix A.
409     * @param alpha The scalar alpha.
410     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
411     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
412     * @param incX The increment for the elements of vector x, must be larger than zero.
413     * @param beta The scalar beta.
414     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
415     * @param incY The increment for the elements of vector y, must be larger than zero.
416     */
417    public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
418        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
419        validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
420        if (KL < 0 || KU < 0) {
421            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
422        }
423        int M = A.getType().getY();
424        int N = A.getType().getX();
425        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
426    }
427
428    /**
429     * DGBMV performs one of the matrix-vector operations
430     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
431     *
432     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
433     *
434     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
435     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
436     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
437     *           for i in range(0, m):
438     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
439     *                  b[i, j-i+kl] = a[i, j]
440     *
441     * @param TransA The type of transpose applied to matrix A.
442     * @param KL The number of sub-diagonals of the matrix A.
443     * @param KU The number of super-diagonals of the matrix A.
444     * @param alpha The scalar alpha.
445     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
446     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
447     * @param incX The increment for the elements of vector x, must be larger than zero.
448     * @param beta The scalar beta.
449     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
450     * @param incY The increment for the elements of vector y, must be larger than zero.
451     */
452    public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
453        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
454        validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
455        if (KL < 0 || KU < 0) {
456            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
457        }
458        int M = A.getType().getY();
459        int N = A.getType().getX();
460        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
461    }
462
463    /**
464     * CGBMV performs one of the matrix-vector operations
465     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
466     *
467     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
468     *
469     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
470     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
471     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
472     *           for i in range(0, m):
473     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
474     *                  b[i, j-i+kl] = a[i, j]
475     *
476     * @param TransA The type of transpose applied to matrix A.
477     * @param KL The number of sub-diagonals of the matrix A.
478     * @param KU The number of super-diagonals of the matrix A.
479     * @param alpha The scalar alpha.
480     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
481     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
482     * @param incX The increment for the elements of vector x, must be larger than zero.
483     * @param beta The scalar beta.
484     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
485     * @param incY The increment for the elements of vector y, must be larger than zero.
486     */
487    public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
488        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
489        validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
490        if (KL < 0 || KU < 0) {
491            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
492        }
493        int M = A.getType().getY();
494        int N = A.getType().getX();
495        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
496    }
497
498    /**
499     * ZGBMV performs one of the matrix-vector operations
500     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
501     *
502     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
503     *
504     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
505     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
506     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
507     *           for i in range(0, m):
508     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
509     *                  b[i, j-i+kl] = a[i, j]
510     *
511     * @param TransA The type of transpose applied to matrix A.
512     * @param KL The number of sub-diagonals of the matrix A.
513     * @param KU The number of super-diagonals of the matrix A.
514     * @param alpha The scalar alpha.
515     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
516     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
517     * @param incX The increment for the elements of vector x, must be larger than zero.
518     * @param beta The scalar beta.
519     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
520     * @param incY The increment for the elements of vector y, must be larger than zero.
521     */
522    public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
523        // GBMV has the same validation requirements as GEMV + KL and KU >= 0
524        validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
525        if (KL < 0 || KU < 0) {
526            throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
527        }
528        int M = A.getType().getY();
529        int N = A.getType().getX();
530        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
531    }
532
533    static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
534        validateTranspose(TransA);
535        validateUplo(Uplo);
536        validateDiag(Diag);
537        int N = A.getType().getY();
538        if (A.getType().getX() != N) {
539            throw new RSRuntimeException("A must be a square matrix for TRMV");
540        }
541        if (!A.getType().getElement().isCompatible(e) ||
542            !X.getType().getElement().isCompatible(e)) {
543            throw new RSRuntimeException("Called BLAS with wrong Element type");
544        }
545        if (X.getType().getY() > 1) {
546            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
547        }
548
549        if (incX <= 0) {
550            throw new RSRuntimeException("Vector increments must be greater than 0");
551        }
552        int expectedXDim = 1 + (N - 1) * incX;
553        if (X.getType().getX() != expectedXDim) {
554            throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
555        }
556    }
557
558    static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
559        validateTranspose(TransA);
560        validateUplo(Uplo);
561        validateDiag(Diag);
562        if (!Ap.getType().getElement().isCompatible(e) ||
563            !X.getType().getElement().isCompatible(e)) {
564            throw new RSRuntimeException("Called BLAS with wrong Element type");
565        }
566        if (X.getType().getY() > 1) {
567            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
568        }
569
570        if (Ap.getType().getY() > 1) {
571            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
572        }
573
574        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
575        //is it really doing anything?
576        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
577            throw new RSRuntimeException("Invalid dimension for Ap");
578        }
579        if (incX <= 0) {
580            throw new RSRuntimeException("Vector increments must be greater than 0");
581        }
582        int expectedXDim = 1 + (N - 1) * incX;
583        if (X.getType().getX() != expectedXDim) {
584            throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
585        }
586
587        return N;
588    }
589
590    /**
591     * STRMV performs one of the matrix-vector operations
592     * x := A*x   or   x := A**T*x
593     *
594     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
595     *
596     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
597     * @param TransA The type of transpose applied to matrix A.
598     * @param Diag Specifies whether or not A is unit triangular.
599     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
600     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
601     * @param incX The increment for the elements of vector x, must be larger than zero.
602     */
603    public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
604        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
605        int N = A.getType().getY();
606        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
607    }
608
609    /**
610     * DTRMV performs one of the matrix-vector operations
611     * x := A*x   or   x := A**T*x
612     *
613     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
614     *
615     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
616     * @param TransA The type of transpose applied to matrix A.
617     * @param Diag Specifies whether or not A is unit triangular.
618     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
619     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
620     * @param incX The increment for the elements of vector x, must be larger than zero.
621     */
622    public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
623        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
624        int N = A.getType().getY();
625        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
626    }
627
628    /**
629     * CTRMV performs one of the matrix-vector operations
630     * x := A*x   or   x := A**T*x   or   x := A**H*x
631     *
632     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
633     *
634     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
635     * @param TransA The type of transpose applied to matrix A.
636     * @param Diag Specifies whether or not A is unit triangular.
637     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
638     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
639     * @param incX The increment for the elements of vector x, must be larger than zero.
640     */
641    public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
642        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
643        int N = A.getType().getY();
644        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
645    }
646
647    /**
648     * ZTRMV performs one of the matrix-vector operations
649     * x := A*x   or   x := A**T*x   or   x := A**H*x
650     *
651     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
652     *
653     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
654     * @param TransA The type of transpose applied to matrix A.
655     * @param Diag Specifies whether or not A is unit triangular.
656     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
657     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
658     * @param incX The increment for the elements of vector x, must be larger than zero.
659     */
660    public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
661        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
662        int N = A.getType().getY();
663        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
664    }
665
666    /**
667     * STBMV performs one of the matrix-vector operations
668     * x := A*x   or   x := A**T*x
669     *
670     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
671     *
672     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
673     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
674     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
675     *           for i in range(0, n):
676     *              for j in range(i, min(i+k+1, n)):
677     *                  b[i, j-i] = a[i, j]
678     *
679     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
680     * @param TransA The type of transpose applied to matrix A.
681     * @param Diag Specifies whether or not A is unit triangular.
682     * @param K The number of off-diagonals of the matrix A
683     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
684     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
685     * @param incX The increment for the elements of vector x, must be larger than zero.
686     */
687    public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
688        // TBMV has the same requirements as TRMV + K >= 0
689        if (K < 0) {
690            throw new RSRuntimeException("K must be greater than or equal to 0");
691        }
692        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
693        int N = A.getType().getY();
694        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
695    }
696
697    /**
698     * DTBMV performs one of the matrix-vector operations
699     * x := A*x   or   x := A**T*x
700     *
701     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
702     *
703     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
704     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
705     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
706     *           for i in range(0, n):
707     *              for j in range(i, min(i+k+1, n)):
708     *                  b[i, j-i] = a[i, j]
709     *
710     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
711     * @param TransA The type of transpose applied to matrix A.
712     * @param Diag Specifies whether or not A is unit triangular.
713     * @param K The number of off-diagonals of the matrix A
714     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
715     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
716     * @param incX The increment for the elements of vector x, must be larger than zero.
717     */
718    public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
719        // TBMV has the same requirements as TRMV + K >= 0
720        if (K < 0) {
721            throw new RSRuntimeException("K must be greater than or equal to 0");
722        }
723        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
724        int N = A.getType().getY();
725        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
726    }
727
728    /**
729     * CTBMV performs one of the matrix-vector operations
730     * x := A*x   or   x := A**T*x   or   x := A**H*x
731     *
732     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
733     *
734     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
735     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
736     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
737     *           for i in range(0, n):
738     *              for j in range(i, min(i+k+1, n)):
739     *                  b[i, j-i] = a[i, j]
740     *
741     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
742     * @param TransA The type of transpose applied to matrix A.
743     * @param Diag Specifies whether or not A is unit triangular.
744     * @param K The number of off-diagonals of the matrix A
745     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
746     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
747     * @param incX The increment for the elements of vector x, must be larger than zero.
748     */
749    public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
750        // TBMV has the same requirements as TRMV + K >= 0
751        if (K < 0) {
752            throw new RSRuntimeException("K must be greater than or equal to 0");
753        }
754        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
755        int N = A.getType().getY();
756        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
757    }
758
759    /**
760     * ZTBMV performs one of the matrix-vector operations
761     * x := A*x   or   x := A**T*x   or   x := A**H*x
762     *
763     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
764     *
765     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
766     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
767     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
768     *           for i in range(0, n):
769     *              for j in range(i, min(i+k+1, n)):
770     *                  b[i, j-i] = a[i, j]
771     *
772     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
773     * @param TransA The type of transpose applied to matrix A.
774     * @param Diag Specifies whether or not A is unit triangular.
775     * @param K The number of off-diagonals of the matrix A
776     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
777     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
778     * @param incX The increment for the elements of vector x, must be larger than zero.
779     */
780    public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
781        // TBMV has the same requirements as TRMV + K >= 0
782        if (K < 0) {
783            throw new RSRuntimeException("K must be greater than or equal to 0");
784        }
785        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
786        int N = A.getType().getY();
787        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
788    }
789
790    /**
791     * STPMV performs one of the matrix-vector operations
792     * x := A*x   or   x := A**T*x
793     *
794     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
795     *
796     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
797     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
798     *       'a' to packed matrix 'b'.
799     *           k = 0
800     *           for i in range(0, n):
801     *              for j in range(i, n):
802     *                  b[k++] = a[i, j]
803     *
804     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
805     * @param TransA The type of transpose applied to matrix A.
806     * @param Diag Specifies whether or not A is unit triangular.
807     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
808     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
809     * @param incX The increment for the elements of vector x, must be larger than zero.
810     */
811    public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
812        int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
813        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
814    }
815
816    /**
817     * DTPMV performs one of the matrix-vector operations
818     * x := A*x   or   x := A**T*x
819     *
820     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
821     *
822     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
823     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
824     *       'a' to packed matrix 'b'.
825     *           k = 0
826     *           for i in range(0, n):
827     *              for j in range(i, n):
828     *                  b[k++] = a[i, j]
829     *
830     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
831     * @param TransA The type of transpose applied to matrix A.
832     * @param Diag Specifies whether or not A is unit triangular.
833     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
834     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
835     * @param incX The increment for the elements of vector x, must be larger than zero.
836     */
837    public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
838        int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
839        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
840    }
841
842    /**
843     * CTPMV performs one of the matrix-vector operations
844     * x := A*x   or   x := A**T*x   or   x := A**H*x
845     *
846     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
847     *
848     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
849     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
850     *       'a' to packed matrix 'b'.
851     *           k = 0
852     *           for i in range(0, n):
853     *              for j in range(i, n):
854     *                  b[k++] = a[i, j]
855     *
856     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
857     * @param TransA The type of transpose applied to matrix A.
858     * @param Diag Specifies whether or not A is unit triangular.
859     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
860     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
861     * @param incX The increment for the elements of vector x, must be larger than zero.
862     */
863    public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
864        int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
865        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
866    }
867
868    /**
869     * ZTPMV performs one of the matrix-vector operations
870     * x := A*x   or   x := A**T*x   or   x := A**H*x
871     *
872     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
873     *
874     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
875     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
876     *       'a' to packed matrix 'b'.
877     *           k = 0
878     *           for i in range(0, n):
879     *              for j in range(i, n):
880     *                  b[k++] = a[i, j]
881     *
882     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
883     * @param TransA The type of transpose applied to matrix A.
884     * @param Diag Specifies whether or not A is unit triangular.
885     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
886     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
887     * @param incX The increment for the elements of vector x, must be larger than zero.
888     */
889    public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
890        int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
891        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
892    }
893
894    /**
895     * STRSV solves one of the systems of equations
896     * A*x = b   or   A**T*x = b
897     *
898     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
899     *
900     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
901     * @param TransA The type of transpose applied to matrix A.
902     * @param Diag Specifies whether or not A is unit triangular.
903     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
904     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
905     * @param incX The increment for the elements of vector x, must be larger than zero.
906     */
907    public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
908        // TRSV is the same as TRMV
909        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
910        int N = A.getType().getY();
911        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
912
913    }
914
915    /**
916     * DTRSV solves one of the systems of equations
917     * A*x = b   or   A**T*x = b
918     *
919     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
920     *
921     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
922     * @param TransA The type of transpose applied to matrix A.
923     * @param Diag Specifies whether or not A is unit triangular.
924     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
925     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
926     * @param incX The increment for the elements of vector x, must be larger than zero.
927     */
928    public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
929        // TRSV is the same as TRMV
930        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
931        int N = A.getType().getY();
932        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
933
934    }
935
936    /**
937     * CTRSV solves one of the systems of equations
938     * A*x = b   or   A**T*x = b   or   A**H*x = b
939     *
940     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
941     *
942     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
943     * @param TransA The type of transpose applied to matrix A.
944     * @param Diag Specifies whether or not A is unit triangular.
945     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
946     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
947     * @param incX The increment for the elements of vector x, must be larger than zero.
948     */
949    public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
950        // TRSV is the same as TRMV
951        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
952        int N = A.getType().getY();
953        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
954
955    }
956
957    /**
958     * ZTRSV solves one of the systems of equations
959     * A*x = b   or   A**T*x = b   or   A**H*x = b
960     *
961     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
962     *
963     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
964     * @param TransA The type of transpose applied to matrix A.
965     * @param Diag Specifies whether or not A is unit triangular.
966     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
967     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
968     * @param incX The increment for the elements of vector x, must be larger than zero.
969     */
970    public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
971        // TRSV is the same as TRMV
972        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
973        int N = A.getType().getY();
974        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
975
976    }
977
978    /**
979     * STBSV solves one of the systems of equations
980     * A*x = b   or   A**T*x = b
981     *
982     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
983     *
984     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
985     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
986     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
987     *           for i in range(0, n):
988     *              for j in range(i, min(i+k+1, n)):
989     *                  b[i, j-i] = a[i, j]
990     *
991     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
992     * @param TransA The type of transpose applied to matrix A.
993     * @param Diag Specifies whether or not A is unit triangular.
994     * @param K The number of off-diagonals of the matrix A
995     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
996     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
997     * @param incX The increment for the elements of vector x, must be larger than zero.
998     */
999    public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1000        // TBSV is the same as TRMV + K >= 0
1001        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1002        int N = A.getType().getY();
1003        if (K < 0) {
1004            throw new RSRuntimeException("Number of diagonals must be positive");
1005        }
1006        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1007    }
1008
1009    /**
1010     * DTBSV solves one of the systems of equations
1011     * A*x = b   or   A**T*x = b
1012     *
1013     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1014     *
1015     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1016     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1017     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1018     *           for i in range(0, n):
1019     *              for j in range(i, min(i+k+1, n)):
1020     *                  b[i, j-i] = a[i, j]
1021     *
1022     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1023     * @param TransA The type of transpose applied to matrix A.
1024     * @param Diag Specifies whether or not A is unit triangular.
1025     * @param K The number of off-diagonals of the matrix A
1026     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1027     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1028     * @param incX The increment for the elements of vector x, must be larger than zero.
1029     */
1030    public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1031        // TBSV is the same as TRMV + K >= 0
1032        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1033        int N = A.getType().getY();
1034        if (K < 0) {
1035            throw new RSRuntimeException("Number of diagonals must be positive");
1036        }
1037        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1038    }
1039
1040    /**
1041     * CTBSV solves one of the systems of equations
1042     * A*x = b   or   A**T*x = b   or   A**H*x = b
1043     *
1044     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1045     *
1046     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1047     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1048     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1049     *           for i in range(0, n):
1050     *              for j in range(i, min(i+k+1, n)):
1051     *                  b[i, j-i] = a[i, j]
1052     *
1053     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1054     * @param TransA The type of transpose applied to matrix A.
1055     * @param Diag Specifies whether or not A is unit triangular.
1056     * @param K The number of off-diagonals of the matrix A
1057     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1058     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1059     * @param incX The increment for the elements of vector x, must be larger than zero.
1060     */
1061    public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1062        // TBSV is the same as TRMV + K >= 0
1063        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1064        int N = A.getType().getY();
1065        if (K < 0) {
1066            throw new RSRuntimeException("Number of diagonals must be positive");
1067        }
1068        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1069    }
1070
1071    /**
1072     * ZTBSV solves one of the systems of equations
1073     * A*x = b   or   A**T*x = b   or   A**H*x = b
1074     *
1075     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1076     *
1077     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1078     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1079     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1080     *           for i in range(0, n):
1081     *              for j in range(i, min(i+k+1, n)):
1082     *                  b[i, j-i] = a[i, j]
1083     *
1084     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1085     * @param TransA The type of transpose applied to matrix A.
1086     * @param Diag Specifies whether or not A is unit triangular.
1087     * @param K The number of off-diagonals of the matrix A
1088     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1089     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1090     * @param incX The increment for the elements of vector x, must be larger than zero.
1091     */
1092    public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1093        // TBSV is the same as TRMV + K >= 0
1094        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1095        int N = A.getType().getY();
1096        if (K < 0) {
1097            throw new RSRuntimeException("Number of diagonals must be positive");
1098        }
1099        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1100    }
1101
1102    /**
1103     * STPSV solves one of the systems of equations
1104     * A*x = b   or   A**T*x = b
1105     *
1106     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1107     *
1108     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1109     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1110     *       'a' to packed matrix 'b'.
1111     *           k = 0
1112     *           for i in range(0, n):
1113     *              for j in range(i, n):
1114     *                  b[k++] = a[i, j]
1115     *
1116     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1117     * @param TransA The type of transpose applied to matrix A.
1118     * @param Diag Specifies whether or not A is unit triangular.
1119     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1120     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1121     * @param incX The increment for the elements of vector x, must be larger than zero.
1122     */
1123    public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1124        // TPSV is same as TPMV
1125        int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1126        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1127    }
1128
1129    /**
1130     * DTPSV solves one of the systems of equations
1131     * A*x = b   or   A**T*x = b
1132     *
1133     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1134     *
1135     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1136     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1137     *       'a' to packed matrix 'b'.
1138     *           k = 0
1139     *           for i in range(0, n):
1140     *              for j in range(i, n):
1141     *                  b[k++] = a[i, j]
1142     *
1143     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1144     * @param TransA The type of transpose applied to matrix A.
1145     * @param Diag Specifies whether or not A is unit triangular.
1146     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1147     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1148     * @param incX The increment for the elements of vector x, must be larger than zero.
1149     */
1150    public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1151        // TPSV is same as TPMV
1152        int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1153        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1154    }
1155
1156    /**
1157     * CTPSV solves one of the systems of equations
1158     * A*x = b   or   A**T*x = b   or   A**H*x = b
1159     *
1160     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1161     *
1162     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1163     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1164     *       'a' to packed matrix 'b'.
1165     *           k = 0
1166     *           for i in range(0, n):
1167     *              for j in range(i, n):
1168     *                  b[k++] = a[i, j]
1169     *
1170     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1171     * @param TransA The type of transpose applied to matrix A.
1172     * @param Diag Specifies whether or not A is unit triangular.
1173     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1174     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1175     * @param incX The increment for the elements of vector x, must be larger than zero.
1176     */
1177    public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1178        // TPSV is same as TPMV
1179        int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1180        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1181    }
1182
1183    /**
1184     * ZTPSV solves one of the systems of equations
1185     * A*x = b   or   A**T*x = b   or   A**H*x = b
1186     *
1187     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1188     *
1189     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1190     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1191     *       'a' to packed matrix 'b'.
1192     *           k = 0
1193     *           for i in range(0, n):
1194     *              for j in range(i, n):
1195     *                  b[k++] = a[i, j]
1196     *
1197     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1198     * @param TransA The type of transpose applied to matrix A.
1199     * @param Diag Specifies whether or not A is unit triangular.
1200     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1201     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1202     * @param incX The increment for the elements of vector x, must be larger than zero.
1203     */
1204    public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1205        // TPSV is same as TPMV
1206        int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1207        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1208    }
1209
1210    /**
1211     * Level 2, S and D only
1212     */
1213    static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1214        validateUplo(Uplo);
1215        int N = A.getType().getY();
1216        if (A.getType().getX() != N) {
1217            throw new RSRuntimeException("A must be a square matrix for SYMV");
1218        }
1219        if (!A.getType().getElement().isCompatible(e) ||
1220            !X.getType().getElement().isCompatible(e) ||
1221            !Y.getType().getElement().isCompatible(e) ) {
1222            throw new RSRuntimeException("Called BLAS with wrong Element type");
1223        }
1224        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1225            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1226        }
1227
1228        if (incX <= 0 || incY <= 0) {
1229            throw new RSRuntimeException("Vector increments must be greater than 0");
1230        }
1231        int expectedXDim = 1 + (N - 1) * incX;
1232        if (X.getType().getX() != expectedXDim) {
1233            throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1234        }
1235        int expectedYDim = 1 + (N - 1) * incY;
1236        if (Y.getType().getX() != expectedYDim) {
1237            throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1238        }
1239        return N;
1240    }
1241    static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1242        validateUplo(Uplo);
1243        if (!Ap.getType().getElement().isCompatible(e) ||
1244            !X.getType().getElement().isCompatible(e) ||
1245            !Y.getType().getElement().isCompatible(e)) {
1246            throw new RSRuntimeException("Called BLAS with wrong Element type");
1247        }
1248        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1249            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1250        }
1251
1252        if (Ap.getType().getY() > 1) {
1253            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1254        }
1255
1256        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1257        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1258            throw new RSRuntimeException("Invalid dimension for Ap");
1259        }
1260        if (incX <= 0 || incY <= 0) {
1261            throw new RSRuntimeException("Vector increments must be greater than 0");
1262        }
1263        int expectedXDim = 1 + (N - 1) * incX;
1264        if (X.getType().getX() != expectedXDim) {
1265            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1266        }
1267        int expectedYDim = 1 + (N - 1) * incY;
1268        if (Y.getType().getX() != expectedYDim) {
1269            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1270        }
1271
1272        return N;
1273    }
1274    static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1275        if (!A.getType().getElement().isCompatible(e) ||
1276            !X.getType().getElement().isCompatible(e) ||
1277            !Y.getType().getElement().isCompatible(e) ) {
1278            throw new RSRuntimeException("Called BLAS with wrong Element type");
1279        }
1280
1281        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1282            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1283        }
1284
1285        int M = A.getType().getY();
1286        int N = A.getType().getX();
1287
1288        if (N < 1 || M < 1) {
1289            throw new RSRuntimeException("M and N must be 1 or greater for GER");
1290        }
1291        if (incX <= 0 || incY <= 0) {
1292            throw new RSRuntimeException("Vector increments must be greater than 0");
1293        }
1294        int expectedXDim = 1 + (M - 1) * incX;
1295        if (X.getType().getX() != expectedXDim) {
1296            throw new RSRuntimeException("Incorrect vector dimensions for GER");
1297        }
1298        int expectedYDim = 1 + (N - 1) * incY;
1299        if (Y.getType().getX() != expectedYDim) {
1300            throw new RSRuntimeException("Incorrect vector dimensions for GER");
1301        }
1302
1303
1304    }
1305    static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1306        validateUplo(Uplo);
1307        if (!A.getType().getElement().isCompatible(e) ||
1308            !X.getType().getElement().isCompatible(e)) {
1309            throw new RSRuntimeException("Called BLAS with wrong Element type");
1310        }
1311
1312        int N = A.getType().getX();
1313
1314        if (X.getType().getY() > 1) {
1315            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1316        }
1317        if (N != A.getType().getY()) {
1318            throw new RSRuntimeException("A must be a symmetric matrix");
1319        }
1320        if (incX <= 0) {
1321            throw new RSRuntimeException("Vector increments must be greater than 0");
1322        }
1323        int expectedXDim = 1 + (N - 1) * incX;
1324        if (X.getType().getX() != expectedXDim) {
1325            throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1326        }
1327        return N;
1328    }
1329    static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1330        validateUplo(Uplo);
1331        if (!Ap.getType().getElement().isCompatible(e) ||
1332            !X.getType().getElement().isCompatible(e)) {
1333            throw new RSRuntimeException("Called BLAS with wrong Element type");
1334        }
1335        if (X.getType().getY() > 1) {
1336            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1337        }
1338
1339        if (Ap.getType().getY() > 1) {
1340            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1341        }
1342
1343        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1344        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1345            throw new RSRuntimeException("Invalid dimension for Ap");
1346        }
1347        if (incX <= 0) {
1348            throw new RSRuntimeException("Vector increments must be greater than 0");
1349        }
1350        int expectedXDim = 1 + (N - 1) * incX;
1351        if (X.getType().getX() != expectedXDim) {
1352            throw new RSRuntimeException("Incorrect vector dimensions for SPR");
1353        }
1354
1355        return N;
1356    }
1357
1358    static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1359        validateUplo(Uplo);
1360        if (!A.getType().getElement().isCompatible(e) ||
1361            !X.getType().getElement().isCompatible(e) ||
1362            !Y.getType().getElement().isCompatible(e)) {
1363            throw new RSRuntimeException("Called BLAS with wrong Element type");
1364        }
1365
1366        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1367            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1368        }
1369
1370        int N = A.getType().getX();
1371
1372        if (N != A.getType().getY()) {
1373            throw new RSRuntimeException("A must be a symmetric matrix");
1374        }
1375        if (incX <= 0 || incY <= 0) {
1376            throw new RSRuntimeException("Vector increments must be greater than 0");
1377        }
1378        int expectedXDim = 1 + (N - 1) * incX;
1379        int expectedYDim = 1 + (N - 1) * incY;
1380        if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1381            throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1382        }
1383        return N;
1384
1385    }
1386    static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1387        validateUplo(Uplo);
1388        if (!Ap.getType().getElement().isCompatible(e) ||
1389            !X.getType().getElement().isCompatible(e) ||
1390            !Y.getType().getElement().isCompatible(e)) {
1391            throw new RSRuntimeException("Called BLAS with wrong Element type");
1392        }
1393        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1394            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1395        }
1396
1397        if (Ap.getType().getY() > 1) {
1398            throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1399        }
1400
1401        int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1402        if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1403            throw new RSRuntimeException("Invalid dimension for Ap");
1404        }
1405        if (incX <= 0 || incY <= 0) {
1406            throw new RSRuntimeException("Vector increments must be greater than 0");
1407        }
1408        int expectedXDim = 1 + (N - 1) * incX;
1409        int expectedYDim = 1 + (N - 1) * incY;
1410        if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1411            throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
1412        }
1413
1414        return N;
1415    }
1416
1417    /**
1418     * SSYMV performs the matrix-vector operation
1419     * y := alpha*A*x + beta*y
1420     *
1421     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1422     *
1423     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1424     * @param alpha The scalar alpha.
1425     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1426     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1427     * @param incX The increment for the elements of vector x, must be larger than zero.
1428     * @param beta The scalar beta.
1429     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1430     * @param incY The increment for the elements of vector y, must be larger than zero.
1431     */
1432    public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1433        int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1434        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1435    }
1436
1437    /**
1438     * SSBMV performs the matrix-vector operation
1439     * y := alpha*A*x + beta*y
1440     *
1441     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1442     *
1443     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1444     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1445     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1446     *           for i in range(0, n):
1447     *              for j in range(i, min(i+k+1, n)):
1448     *                  b[i, j-i] = a[i, j]
1449     *
1450     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1451     * @param K The number of off-diagonals of the matrix A
1452     * @param alpha The scalar alpha.
1453     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1454     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1455     * @param incX The increment for the elements of vector x, must be larger than zero.
1456     * @param beta The scalar beta.
1457     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1458     * @param incY The increment for the elements of vector y, must be larger than zero.
1459     */
1460    public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1461        // SBMV is the same as SYMV + K >= 0
1462        if (K < 0) {
1463            throw new RSRuntimeException("K must be greater than or equal to 0");
1464        }
1465        int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1466        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1467    }
1468
1469    /**
1470     * SSPMV performs the matrix-vector operation
1471     * y := alpha*A*x + beta*y
1472     *
1473     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1474     *
1475     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1476     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1477     *       'a' to packed matrix 'b'.
1478     *           k = 0
1479     *           for i in range(0, n):
1480     *              for j in range(i, n):
1481     *                  b[k++] = a[i, j]
1482     *
1483     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1484     * @param alpha The scalar alpha.
1485     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1486     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1487     * @param incX The increment for the elements of vector x, must be larger than zero.
1488     * @param beta The scalar beta.
1489     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1490     * @param incY The increment for the elements of vector y, must be larger than zero.
1491     */
1492    public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
1493        int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1494        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1495    }
1496
1497    /**
1498     * SGER performs the rank 1 operation
1499     * A := alpha*x*y**T + A
1500     *
1501     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1502     *
1503     * @param alpha The scalar alpha.
1504     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1505     * @param incX The increment for the elements of vector x, must be larger than zero.
1506     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1507     * @param incY The increment for the elements of vector y, must be larger than zero.
1508     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1509     */
1510    public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1511        int M = A.getType().getY();
1512        int N = A.getType().getX();
1513        validateGER(Element.F32(mRS), X, incX, Y, incY, A);
1514        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1515    }
1516
1517    /**
1518     * SSYR performs the rank 1 operation
1519     * A := alpha*x*x**T + A
1520     *
1521     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1522     *
1523     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1524     * @param alpha The scalar alpha.
1525     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1526     * @param incX The increment for the elements of vector x, must be larger than zero.
1527     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1528     */
1529    public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1530        int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1531        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1532    }
1533
1534    /**
1535     * SSPR performs the rank 1 operation
1536     * A := alpha*x*x**T + A
1537     *
1538     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1539     *
1540     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1541     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1542     *       'a' to packed matrix 'b'.
1543     *           k = 0
1544     *           for i in range(0, n):
1545     *              for j in range(i, n):
1546     *                  b[k++] = a[i, j]
1547     *
1548     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1549     * @param alpha The scalar alpha.
1550     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1551     * @param incX The increment for the elements of vector x, must be larger than zero.
1552     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1553     */
1554    public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1555        int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1556        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1557    }
1558
1559    /**
1560     * SSYR2 performs the symmetric rank 2 operation
1561     * A := alpha*x*y**T + alpha*y*x**T + A
1562     *
1563     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1564     *
1565     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1566     * @param alpha The scalar alpha.
1567     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1568     * @param incX The increment for the elements of vector x, must be larger than zero.
1569     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1570     * @param incY The increment for the elements of vector y, must be larger than zero.
1571     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1572     */
1573    public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1574        int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1575        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1576    }
1577
1578    /**
1579     * SSPR2 performs the symmetric rank 2 operation
1580     * A := alpha*x*y**T + alpha*y*x**T + A
1581     *
1582     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1583     *
1584     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1585     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1586     *       'a' to packed matrix 'b'.
1587     *           k = 0
1588     *           for i in range(0, n):
1589     *              for j in range(i, n):
1590     *                  b[k++] = a[i, j]
1591     *
1592     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1593     * @param alpha The scalar alpha.
1594     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1595     * @param incX The increment for the elements of vector x, must be larger than zero.
1596     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1597     * @param incY The increment for the elements of vector y, must be larger than zero.
1598     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1599     */
1600    public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1601        int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1602        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1603    }
1604
1605    /**
1606     * DSYMV performs the matrix-vector operation
1607     * y := alpha*A*x + beta*y
1608     *
1609     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1610     *
1611     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1612     * @param alpha The scalar alpha.
1613     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1614     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1615     * @param incX The increment for the elements of vector x, must be larger than zero.
1616     * @param beta The scalar beta.
1617     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1618     * @param incY The increment for the elements of vector y, must be larger than zero.
1619     */
1620    public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1621        int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1622        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1623    }
1624
1625    /**
1626     * DSBMV performs the matrix-vector operation
1627     * y := alpha*A*x + beta*y
1628     *
1629     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1630     *
1631     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1632     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1633     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1634     *           for i in range(0, n):
1635     *              for j in range(i, min(i+k+1, n)):
1636     *                  b[i, j-i] = a[i, j]
1637     *
1638     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1639     * @param K The number of off-diagonals of the matrix A
1640     * @param alpha The scalar alpha.
1641     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1642     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1643     * @param incX The increment for the elements of vector x, must be larger than zero.
1644     * @param beta The scalar beta.
1645     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1646     * @param incY The increment for the elements of vector y, must be larger than zero.
1647     */
1648    public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1649        // SBMV is the same as SYMV + K >= 0
1650        if (K < 0) {
1651            throw new RSRuntimeException("K must be greater than or equal to 0");
1652        }
1653        int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1654        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1655    }
1656
1657    /**
1658     * DSPMV performs the matrix-vector operation
1659     * y := alpha*A*x + beta*y
1660     *
1661     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
1662     *
1663     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1664     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1665     *       'a' to packed matrix 'b'.
1666     *           k = 0
1667     *           for i in range(0, n):
1668     *              for j in range(i, n):
1669     *                  b[k++] = a[i, j]
1670     *
1671     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1672     * @param alpha The scalar alpha.
1673     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1674     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1675     * @param incX The increment for the elements of vector x, must be larger than zero.
1676     * @param beta The scalar beta.
1677     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1678     * @param incY The increment for the elements of vector y, must be larger than zero.
1679     */
1680    public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
1681        int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
1682        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1683    }
1684
1685    /**
1686     * DGER performs the rank 1 operation
1687     * A := alpha*x*y**T + A
1688     *
1689     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
1690     *
1691     * @param alpha The scalar alpha.
1692     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1693     * @param incX The increment for the elements of vector x, must be larger than zero.
1694     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1695     * @param incY The increment for the elements of vector y, must be larger than zero.
1696     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1697     */
1698    public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1699        int M = A.getType().getY();
1700        int N = A.getType().getX();
1701        validateGER(Element.F64(mRS), X, incX, Y, incY, A);
1702        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1703    }
1704
1705    /**
1706     * DSYR performs the rank 1 operation
1707     * A := alpha*x*x**T + A
1708     *
1709     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
1710     *
1711     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1712     * @param alpha The scalar alpha.
1713     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1714     * @param incX The increment for the elements of vector x, must be larger than zero.
1715     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1716     */
1717    public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
1718        int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
1719        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1720    }
1721
1722    /**
1723     * DSPR performs the rank 1 operation
1724     * A := alpha*x*x**T + A
1725     *
1726     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
1727     *
1728     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1729     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1730     *       'a' to packed matrix 'b'.
1731     *           k = 0
1732     *           for i in range(0, n):
1733     *              for j in range(i, n):
1734     *                  b[k++] = a[i, j]
1735     *
1736     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1737     * @param alpha The scalar alpha.
1738     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1739     * @param incX The increment for the elements of vector x, must be larger than zero.
1740     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1741     */
1742    public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
1743        int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
1744        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1745    }
1746
1747    /**
1748     * DSYR2 performs the symmetric rank 2 operation
1749     * A := alpha*x*y**T + alpha*y*x**T + A
1750     *
1751     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
1752     *
1753     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1754     * @param alpha The scalar alpha.
1755     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1756     * @param incX The increment for the elements of vector x, must be larger than zero.
1757     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1758     * @param incY The increment for the elements of vector y, must be larger than zero.
1759     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1760     */
1761    public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1762        int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
1763        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1764    }
1765
1766    /**
1767     * DSPR2 performs the symmetric rank 2 operation
1768     * A := alpha*x*y**T + alpha*y*x**T + A
1769     *
1770     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
1771     *
1772     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1773     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1774     *       'a' to packed matrix 'b'.
1775     *           k = 0
1776     *           for i in range(0, n):
1777     *              for j in range(i, n):
1778     *                  b[k++] = a[i, j]
1779     *
1780     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1781     * @param alpha The scalar alpha.
1782     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1783     * @param incX The increment for the elements of vector x, must be larger than zero.
1784     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1785     * @param incY The increment for the elements of vector y, must be larger than zero.
1786     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1787     */
1788    public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1789        int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
1790        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1791    }
1792
1793
1794    /**
1795     * Level 2, C and Z only
1796     */
1797
1798    static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1799        if (!A.getType().getElement().isCompatible(e) ||
1800            !X.getType().getElement().isCompatible(e) ||
1801            !Y.getType().getElement().isCompatible(e)) {
1802            throw new RSRuntimeException("Called BLAS with wrong Element type");
1803        }
1804        if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1805            throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1806        }
1807
1808        int M = A.getType().getY();
1809        int N = A.getType().getX();
1810        if (incX <= 0 || incY <= 0) {
1811            throw new RSRuntimeException("Vector increments must be greater than 0");
1812        }
1813        int expectedXDim = 1 + (M - 1) * incX;
1814        if (X.getType().getX() != expectedXDim) {
1815            throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1816        }
1817        int expectedYDim = 1 + (N - 1) * incY;
1818        if (Y.getType().getX() != expectedYDim) {
1819            throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1820        }
1821
1822    }
1823
1824    /**
1825     * CHEMV performs the matrix-vector operation
1826     * y := alpha*A*x + beta*y
1827     *
1828     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
1829     *
1830     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1831     * @param alpha The scalar alpha.
1832     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1833     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1834     * @param incX The increment for the elements of vector x, must be larger than zero.
1835     * @param beta The scalar beta.
1836     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1837     * @param incY The increment for the elements of vector y, must be larger than zero.
1838     */
1839    public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
1840        // HEMV is the same as SYR2 validation-wise
1841        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1842        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1843    }
1844
1845    /**
1846     * CHBMV performs the matrix-vector operation
1847     * y := alpha*A*x + beta*y
1848     *
1849     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
1850     *
1851     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1852     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1853     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1854     *           for i in range(0, n):
1855     *              for j in range(i, min(i+k+1, n)):
1856     *                  b[i, j-i] = a[i, j]
1857     *
1858     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1859     * @param K The number of off-diagonals of the matrix A
1860     * @param alpha The scalar alpha.
1861     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1862     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1863     * @param incX The increment for the elements of vector x, must be larger than zero.
1864     * @param beta The scalar beta.
1865     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1866     * @param incY The increment for the elements of vector y, must be larger than zero.
1867     */
1868    public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
1869        // HBMV is the same as SYR2 validation-wise
1870        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1871        if (K < 0) {
1872            throw new RSRuntimeException("K must be 0 or greater for HBMV");
1873        }
1874        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1875    }
1876
1877    /**
1878     * CHPMV performs the matrix-vector operation
1879     * y := alpha*A*x + beta*y
1880     *
1881     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
1882     *
1883     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1884     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1885     *       'a' to packed matrix 'b'.
1886     *           k = 0
1887     *           for i in range(0, n):
1888     *              for j in range(i, n):
1889     *                  b[k++] = a[i, j]
1890     *
1891     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1892     * @param alpha The scalar alpha.
1893     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1894     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1895     * @param incX The increment for the elements of vector x, must be larger than zero.
1896     * @param beta The scalar beta.
1897     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1898     * @param incY The increment for the elements of vector y, must be larger than zero.
1899     */
1900    public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
1901        // HPMV is the same as SPR2
1902        int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
1903        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1904    }
1905
1906    /**
1907     * CGERU performs the rank 1 operation
1908     * A := alpha*x*y**T + A
1909     *
1910     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
1911     *
1912     * @param alpha The scalar alpha.
1913     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1914     * @param incX The increment for the elements of vector x, must be larger than zero.
1915     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1916     * @param incY The increment for the elements of vector y, must be larger than zero.
1917     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1918     */
1919    public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1920        validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1921        int M = A.getType().getY();
1922        int N = A.getType().getX();
1923        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1924    }
1925
1926    /**
1927     * CGERC performs the rank 1 operation
1928     * A := alpha*x*y**H + A
1929     *
1930     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
1931     *
1932     * @param alpha The scalar alpha.
1933     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1934     * @param incX The increment for the elements of vector x, must be larger than zero.
1935     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1936     * @param incY The increment for the elements of vector y, must be larger than zero.
1937     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1938     */
1939    public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1940        // same as GERU
1941        validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1942        int M = A.getType().getY();
1943        int N = A.getType().getX();
1944        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1945    }
1946
1947    /**
1948     * CHER performs the rank 1 operation
1949     * A := alpha*x*x**H + A
1950     *
1951     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
1952     *
1953     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1954     * @param alpha The scalar alpha.
1955     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1956     * @param incX The increment for the elements of vector x, must be larger than zero.
1957     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1958     */
1959    public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1960        // same as SYR
1961        int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
1962        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
1963    }
1964
1965    /**
1966     * CHPR performs the rank 1 operation
1967     * A := alpha*x*x**H + A
1968     *
1969     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
1970     *
1971     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1972     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1973     *       'a' to packed matrix 'b'.
1974     *           k = 0
1975     *           for i in range(0, n):
1976     *              for j in range(i, n):
1977     *                  b[k++] = a[i, j]
1978     *
1979     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1980     * @param alpha The scalar alpha.
1981     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1982     * @param incX The increment for the elements of vector x, must be larger than zero.
1983     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1984     */
1985    public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1986        // equivalent to SPR for validation
1987        int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
1988        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
1989    }
1990
1991    /**
1992     * CHER2 performs the symmetric rank 2 operation
1993     * A := alpha*x*y**H + alpha*y*x**H + A
1994     *
1995     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
1996     *
1997     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1998     * @param alpha The scalar alpha.
1999     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2000     * @param incX The increment for the elements of vector x, must be larger than zero.
2001     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2002     * @param incY The increment for the elements of vector y, must be larger than zero.
2003     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2004     */
2005    public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2006        // same as SYR2
2007        int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2008        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2009    }
2010
2011    /**
2012     * CHPR2 performs the symmetric rank 2 operation
2013     * A := alpha*x*y**H + alpha*y*x**H + A
2014     *
2015     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2016     *
2017     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2018     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2019     *       'a' to packed matrix 'b'.
2020     *           k = 0
2021     *           for i in range(0, n):
2022     *              for j in range(i, n):
2023     *                  b[k++] = a[i, j]
2024     *
2025     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2026     * @param alpha The scalar alpha.
2027     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2028     * @param incX The increment for the elements of vector x, must be larger than zero.
2029     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2030     * @param incY The increment for the elements of vector y, must be larger than zero.
2031     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2032     */
2033    public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2034        // same as SPR2
2035        int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2036        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2037    }
2038
2039    /**
2040     * ZHEMV performs the matrix-vector operation
2041     * y := alpha*A*x + beta*y
2042     *
2043     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2044     *
2045     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2046     * @param alpha The scalar alpha.
2047     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2048     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2049     * @param incX The increment for the elements of vector x, must be larger than zero.
2050     * @param beta The scalar beta.
2051     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2052     * @param incY The increment for the elements of vector y, must be larger than zero.
2053     */
2054    public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2055        // HEMV is the same as SYR2 validation-wise
2056        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2057        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2058    }
2059
2060    /**
2061     * ZHBMV performs the matrix-vector operation
2062     * y := alpha*A*x + beta*y
2063     *
2064     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2065     *
2066     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2067     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2068     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2069     *           for i in range(0, n):
2070     *              for j in range(i, min(i+k+1, n)):
2071     *                  b[i, j-i] = a[i, j]
2072     *
2073     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2074     * @param K The number of off-diagonals of the matrix A
2075     * @param alpha The scalar alpha.
2076     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2077     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2078     * @param incX The increment for the elements of vector x, must be larger than zero.
2079     * @param beta The scalar beta.
2080     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2081     * @param incY The increment for the elements of vector y, must be larger than zero.
2082     */
2083    public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2084        // HBMV is the same as SYR2 validation-wise
2085        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2086        if (K < 0) {
2087            throw new RSRuntimeException("K must be 0 or greater for HBMV");
2088        }
2089        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2090    }
2091
2092    /**
2093     * ZHPMV performs the matrix-vector operation
2094     * y := alpha*A*x + beta*y
2095     *
2096     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2097     *
2098     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2099     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2100     *       'a' to packed matrix 'b'.
2101     *           k = 0
2102     *           for i in range(0, n):
2103     *              for j in range(i, n):
2104     *                  b[k++] = a[i, j]
2105     *
2106     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2107     * @param alpha The scalar alpha.
2108     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2109     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2110     * @param incX The increment for the elements of vector x, must be larger than zero.
2111     * @param beta The scalar beta.
2112     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2113     * @param incY The increment for the elements of vector y, must be larger than zero.
2114     */
2115    public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2116        // HPMV is the same as SPR2
2117        int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2118        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2119    }
2120
2121    /**
2122     * ZGERU performs the rank 1 operation
2123     * A := alpha*x*y**T + A
2124     *
2125     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2126     *
2127     * @param alpha The scalar alpha.
2128     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2129     * @param incX The increment for the elements of vector x, must be larger than zero.
2130     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2131     * @param incY The increment for the elements of vector y, must be larger than zero.
2132     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2133     */
2134    public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2135        validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2136        int M = A.getType().getY();
2137        int N = A.getType().getX();
2138        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2139    }
2140
2141    /**
2142     * ZGERC performs the rank 1 operation
2143     * A := alpha*x*y**H + A
2144     *
2145     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2146     *
2147     * @param alpha The scalar alpha.
2148     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2149     * @param incX The increment for the elements of vector x, must be larger than zero.
2150     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2151     * @param incY The increment for the elements of vector y, must be larger than zero.
2152     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2153     */
2154    public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2155        // same as GERU
2156        validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2157        int M = A.getType().getY();
2158        int N = A.getType().getX();
2159        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2160    }
2161
2162    /**
2163     * ZHER performs the rank 1 operation
2164     * A := alpha*x*x**H + A
2165     *
2166     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2167     *
2168     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2169     * @param alpha The scalar alpha.
2170     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2171     * @param incX The increment for the elements of vector x, must be larger than zero.
2172     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2173     */
2174    public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2175        // same as SYR
2176        int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
2177        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
2178    }
2179
2180    /**
2181     * ZHPR performs the rank 1 operation
2182     * A := alpha*x*x**H + A
2183     *
2184     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2185     *
2186     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2187     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2188     *       'a' to packed matrix 'b'.
2189     *           k = 0
2190     *           for i in range(0, n):
2191     *              for j in range(i, n):
2192     *                  b[k++] = a[i, j]
2193     *
2194     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2195     * @param alpha The scalar alpha.
2196     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2197     * @param incX The increment for the elements of vector x, must be larger than zero.
2198     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2199     */
2200    public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2201        // equivalent to SPR for validation
2202        int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2203        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
2204    }
2205
2206    /**
2207     * ZHER2 performs the symmetric rank 2 operation
2208     * A := alpha*x*y**H + alpha*y*x**H + A
2209     *
2210     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2211     *
2212     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2213     * @param alpha The scalar alpha.
2214     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2215     * @param incX The increment for the elements of vector x, must be larger than zero.
2216     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2217     * @param incY The increment for the elements of vector y, must be larger than zero.
2218     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2219     */
2220    public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2221        // same as SYR2
2222        int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2223        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2224    }
2225
2226    /**
2227     * ZHPR2 performs the symmetric rank 2 operation
2228     * A := alpha*x*y**H + alpha*y*x**H + A
2229     *
2230     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2231     *
2232     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2233     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2234     *       'a' to packed matrix 'b'.
2235     *           k = 0
2236     *           for i in range(0, n):
2237     *              for j in range(i, n):
2238     *                  b[k++] = a[i, j]
2239     *
2240     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2241     * @param alpha The scalar alpha.
2242     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2243     * @param incX The increment for the elements of vector x, must be larger than zero.
2244     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2245     * @param incY The increment for the elements of vector y, must be larger than zero.
2246     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2247     */
2248    public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2249        // same as SPR2
2250        int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2251        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2252    }
2253
2254
2255    /**
2256     * Level 3 BLAS
2257     */
2258
2259    static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
2260        int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
2261        if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2262            (B != null && !B.getType().getElement().isCompatible(e)) ||
2263            (C != null && !C.getType().getElement().isCompatible(e))) {
2264            throw new RSRuntimeException("Called BLAS with wrong Element type");
2265        }
2266        if (C == null) {
2267            //since matrix C is used to store the result, it cannot be null.
2268            throw new RSRuntimeException("Allocation C cannot be null");
2269        }
2270        cM = C.getType().getY();
2271        cN = C.getType().getX();
2272
2273        if (Side == RIGHT) {
2274            if ((A == null && B != null) || (A != null && B == null)) {
2275                throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2276            }
2277            if (B != null) {
2278                bM = A.getType().getY();
2279                bN = A.getType().getX();
2280            }
2281            if (A != null) {
2282                aM = B.getType().getY();
2283                aN = B.getType().getX();
2284            }
2285        } else {
2286            if (A != null) {
2287                if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
2288                    aN = A.getType().getY();
2289                    aM = A.getType().getX();
2290                } else {
2291                    aM = A.getType().getY();
2292                    aN = A.getType().getX();
2293                }
2294            }
2295            if (B != null) {
2296                if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
2297                    bN = B.getType().getY();
2298                    bM = B.getType().getX();
2299                } else {
2300                    bM = B.getType().getY();
2301                    bN = B.getType().getX();
2302                }
2303            }
2304        }
2305        if (A != null && B != null && C != null) {
2306            if (aN != bM || aM != cM || bN != cN) {
2307                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2308            }
2309        } else if (A != null && C != null) {
2310            // A and C only, for SYRK
2311            if (cM != cN) {
2312                throw new RSRuntimeException("Matrix C is not symmetric");
2313            }
2314            if (aM != cM) {
2315                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2316            }
2317        } else if (A != null && B != null) {
2318            // A and B only
2319            if (aN != bM) {
2320                throw new RSRuntimeException("Called BLAS with invalid dimensions");
2321            }
2322        }
2323
2324    }
2325
2326    /**
2327     * SGEMM performs one of the matrix-matrix operations
2328     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2329     *
2330     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2331     *
2332     * @param TransA The type of transpose applied to matrix A.
2333     * @param TransB The type of transpose applied to matrix B.
2334     * @param alpha The scalar alpha.
2335     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2336     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2337     * @param beta The scalar beta.
2338     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2339     */
2340    public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2341                      Allocation B, float beta, Allocation C) {
2342        validateTranspose(TransA);
2343        validateTranspose(TransB);
2344        validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2345
2346        int M = -1, N = -1, K = -1;
2347        if (TransA != NO_TRANSPOSE) {
2348            M = A.getType().getX();
2349            K = A.getType().getY();
2350        } else {
2351            M = A.getType().getY();
2352            K = A.getType().getX();
2353        }
2354        if (TransB != NO_TRANSPOSE) {
2355            N = B.getType().getY();
2356        } else {
2357            N = B.getType().getX();
2358        }
2359        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, A.getID(mRS), B.getID(mRS),
2360                                        beta, C.getID(mRS), 0, 0, 0, 0);
2361    }
2362
2363    /**
2364     * DGEMM performs one of the matrix-matrix operations
2365     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2366     *
2367     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2368     *
2369     * @param TransA The type of transpose applied to matrix A.
2370     * @param TransB The type of transpose applied to matrix B.
2371     * @param alpha The scalar alpha.
2372     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2373     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2374     * @param beta The scalar beta.
2375     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2376     */
2377    public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2378                      Allocation B, double beta, Allocation C) {
2379        validateTranspose(TransA);
2380        validateTranspose(TransB);
2381        validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2382        int M = -1, N = -1, K = -1;
2383        if (TransA != NO_TRANSPOSE) {
2384            M = A.getType().getX();
2385            K = A.getType().getY();
2386        } else {
2387            M = A.getType().getY();
2388            K = A.getType().getX();
2389        }
2390        if (TransB != NO_TRANSPOSE) {
2391            N = B.getType().getY();
2392        } else {
2393            N = B.getType().getX();
2394        }
2395        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, A.getID(mRS), B.getID(mRS),
2396                                        beta, C.getID(mRS), 0, 0, 0, 0);
2397    }
2398
2399    /**
2400     * CGEMM performs one of the matrix-matrix operations
2401     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
2402     *
2403     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
2404     *
2405     * @param TransA The type of transpose applied to matrix A.
2406     * @param TransB The type of transpose applied to matrix B.
2407     * @param alpha The scalar alpha.
2408     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2409     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2410     * @param beta The scalar beta.
2411     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2412     */
2413    public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
2414                      Allocation B, Float2 beta, Allocation C) {
2415        validateTranspose(TransA);
2416        validateTranspose(TransB);
2417        validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
2418        int M = -1, N = -1, K = -1;
2419        if (TransA != NO_TRANSPOSE) {
2420            M = A.getType().getX();
2421            K = A.getType().getY();
2422        } else {
2423            M = A.getType().getY();
2424            K = A.getType().getX();
2425        }
2426        if (TransB != NO_TRANSPOSE) {
2427            N = B.getType().getY();
2428        } else {
2429            N = B.getType().getX();
2430        }
2431        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2432                                         beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2433    }
2434
2435    /**
2436     * ZGEMM performs one of the matrix-matrix operations
2437     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
2438     *
2439     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
2440     *
2441     * @param TransA The type of transpose applied to matrix A.
2442     * @param TransB The type of transpose applied to matrix B.
2443     * @param alpha The scalar alpha.
2444     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2445     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2446     * @param beta The scalar beta.
2447     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2448     */
2449    public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
2450                      Allocation B, Double2 beta, Allocation C) {
2451        validateTranspose(TransA);
2452        validateTranspose(TransB);
2453        validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
2454        int M = -1, N = -1, K = -1;
2455        if (TransA != NO_TRANSPOSE) {
2456            M = A.getType().getX();
2457            K = A.getType().getY();
2458        } else {
2459            M = A.getType().getY();
2460            K = A.getType().getX();
2461        }
2462        if (TransB != NO_TRANSPOSE) {
2463            N = B.getType().getY();
2464        } else {
2465            N = B.getType().getX();
2466        }
2467        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2468                                   beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2469    }
2470
2471    /**
2472     * SSYMM performs one of the matrix-matrix operations
2473     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2474     *
2475     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
2476     *
2477     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2478     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2479     * @param alpha The scalar alpha.
2480     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2481     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2482     * @param beta The scalar beta.
2483     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2484     */
2485    public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
2486                      Allocation B, float beta, Allocation C) {
2487        validateSide(Side);
2488        validateUplo(Uplo);
2489        //For SYMM, Matrix A should be symmetric
2490        if (A.getType().getX() != A.getType().getY()) {
2491            throw new RSRuntimeException("Matrix A is not symmetric");
2492        }
2493        validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
2494        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2495                                        beta, C.getID(mRS), 0, 0, 0, 0);
2496    }
2497
2498    /**
2499     * DSYMM performs one of the matrix-matrix operations
2500     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2501     *
2502     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
2503     *
2504     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2505     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2506     * @param alpha The scalar alpha.
2507     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2508     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2509     * @param beta The scalar beta.
2510     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2511     */
2512    public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
2513                      Allocation B, double beta, Allocation C) {
2514        validateSide(Side);
2515        validateUplo(Uplo);
2516        if (A.getType().getX() != A.getType().getY()) {
2517            throw new RSRuntimeException("Matrix A is not symmetric");
2518        }
2519        validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
2520        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2521                                        beta, C.getID(mRS), 0, 0, 0, 0);
2522    }
2523
2524    /**
2525     * CSYMM performs one of the matrix-matrix operations
2526     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2527     *
2528     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
2529     *
2530     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2531     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2532     * @param alpha The scalar alpha.
2533     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2534     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2535     * @param beta The scalar beta.
2536     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2537     */
2538    public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
2539                      Allocation B, Float2 beta, Allocation C) {
2540        validateSide(Side);
2541        validateUplo(Uplo);
2542        if (A.getType().getX() != A.getType().getY()) {
2543            throw new RSRuntimeException("Matrix A is not symmetric");
2544        }
2545        validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
2546        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2547                                         beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2548    }
2549
2550    /**
2551     * ZSYMM performs one of the matrix-matrix operations
2552     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2553     *
2554     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
2555     *
2556     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2557     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2558     * @param alpha The scalar alpha.
2559     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2560     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2561     * @param beta The scalar beta.
2562     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2563     */
2564    public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
2565                      Allocation B, Double2 beta, Allocation C) {
2566        validateSide(Side);
2567        validateUplo(Uplo);
2568        if (A.getType().getX() != A.getType().getY()) {
2569            throw new RSRuntimeException("Matrix A is not symmetric");
2570        }
2571        validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
2572        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2573                                   beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2574    }
2575
2576    /**
2577     * SSYRK performs one of the symmetric rank k operations
2578     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2579     *
2580     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
2581     *
2582     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2583     * @param Trans The type of transpose applied to the operation.
2584     * @param alpha The scalar alpha.
2585     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2586     * @param beta The scalar beta.
2587     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2588     */
2589    public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
2590        validateTranspose(Trans);
2591        validateUplo(Uplo);
2592        validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
2593        int K = -1;
2594        if (Trans != NO_TRANSPOSE) {
2595            K = A.getType().getY();
2596        } else {
2597            K = A.getType().getX();
2598        }
2599
2600        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2601    }
2602
2603    /**
2604     * DSYRK performs one of the symmetric rank k operations
2605     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2606     *
2607     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
2608     *
2609     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2610     * @param Trans The type of transpose applied to the operation.
2611     * @param alpha The scalar alpha.
2612     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2613     * @param beta The scalar beta.
2614     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2615     */
2616    public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
2617        validateTranspose(Trans);
2618        validateUplo(Uplo);
2619        validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
2620        int K = -1;
2621        if (Trans != NO_TRANSPOSE) {
2622            K = A.getType().getY();
2623        } else {
2624            K = A.getType().getX();
2625        }
2626        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2627    }
2628
2629    /**
2630     * CSYRK performs one of the symmetric rank k operations
2631     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2632     *
2633     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
2634     *
2635     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2636     * @param Trans The type of transpose applied to the operation.
2637     * @param alpha The scalar alpha.
2638     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2639     * @param beta The scalar beta.
2640     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2641     */
2642    public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
2643        validateTranspose(Trans);
2644        validateUplo(Uplo);
2645        validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
2646        int K = -1;
2647        if (Trans != NO_TRANSPOSE) {
2648            K = A.getType().getY();
2649        } else {
2650            K = A.getType().getX();
2651        }
2652        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
2653                                         C.getID(mRS), 0, 0, 0, 0);
2654    }
2655
2656    /**
2657     * ZSYRK performs one of the symmetric rank k operations
2658     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2659     *
2660     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
2661     *
2662     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2663     * @param Trans The type of transpose applied to the operation.
2664     * @param alpha The scalar alpha.
2665     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2666     * @param beta The scalar beta.
2667     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2668     */
2669    public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
2670        validateTranspose(Trans);
2671        validateUplo(Uplo);
2672        validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
2673        int K = -1;
2674        if (Trans != NO_TRANSPOSE) {
2675            K = A.getType().getY();
2676        } else {
2677            K = A.getType().getX();
2678        }
2679        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
2680                                   C.getID(mRS), 0, 0, 0, 0);
2681    }
2682
2683    static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
2684        validateTranspose(Trans);
2685        if (!A.getType().getElement().isCompatible(e) ||
2686            !B.getType().getElement().isCompatible(e) ||
2687            !C.getType().getElement().isCompatible(e)) {
2688            throw new RSRuntimeException("Called BLAS with wrong Element type");
2689        }
2690        int Cdim = -1;
2691        // A is n x k if no transpose, k x n if transpose
2692        // C is n x n
2693        if (Trans == TRANSPOSE) {
2694            // check columns versus C
2695            Cdim = A.getType().getX();
2696        } else {
2697            // check rows versus C
2698            Cdim = A.getType().getY();
2699        }
2700        if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
2701            throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
2702        }
2703        // A dims == B dims
2704        if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
2705            throw new RSRuntimeException("Invalid A and B in SYR2K");
2706        }
2707    }
2708
2709    /**
2710     * SSYR2K performs one of the symmetric rank 2k operations
2711     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2712     *
2713     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
2714     *
2715     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2716     * @param Trans The type of transpose applied to the operation.
2717     * @param alpha The scalar alpha.
2718     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2719     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2720     * @param beta The scalar beta.
2721     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2722     */
2723    public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
2724        validateUplo(Uplo);
2725        validateSYR2K(Element.F32(mRS), Trans, A, B, C);
2726        int K = -1;
2727        if (Trans != NO_TRANSPOSE) {
2728            K = A.getType().getY();
2729        } else {
2730            K = A.getType().getX();
2731        }
2732        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
2733    }
2734
2735    /**
2736     * DSYR2K performs one of the symmetric rank 2k operations
2737     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2738     *
2739     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
2740     *
2741     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2742     * @param Trans The type of transpose applied to the operation.
2743     * @param alpha The scalar alpha.
2744     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2745     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2746     * @param beta The scalar beta.
2747     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2748     */
2749    public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
2750        validateUplo(Uplo);
2751        validateSYR2K(Element.F64(mRS), Trans, A, B, C);
2752        int K = -1;
2753        if (Trans != NO_TRANSPOSE) {
2754            K = A.getType().getY();
2755        } else {
2756            K = A.getType().getX();
2757        }
2758        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
2759    }
2760
2761    /**
2762     * CSYR2K performs one of the symmetric rank 2k operations
2763     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2764     *
2765     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
2766     *
2767     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2768     * @param Trans The type of transpose applied to the operation.
2769     * @param alpha The scalar alpha.
2770     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2771     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2772     * @param beta The scalar beta.
2773     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2774     */
2775    public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
2776        validateUplo(Uplo);
2777        validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
2778        int K = -1;
2779        if (Trans != NO_TRANSPOSE) {
2780            K = A.getType().getY();
2781        } else {
2782            K = A.getType().getX();
2783        }
2784        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2785    }
2786
2787    /**
2788     * ZSYR2K performs one of the symmetric rank 2k operations
2789     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2790     *
2791     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
2792     *
2793     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2794     * @param Trans The type of transpose applied to the operation.
2795     * @param alpha The scalar alpha.
2796     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2797     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2798     * @param beta The scalar beta.
2799     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2800     */
2801    public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
2802        validateUplo(Uplo);
2803        validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
2804        int K = -1;
2805        if (Trans != NO_TRANSPOSE) {
2806            K = A.getType().getY();
2807        } else {
2808            K = A.getType().getX();
2809        }
2810        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2811    }
2812
2813    static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
2814        validateSide(Side);
2815        validateTranspose(TransA);
2816        int aM = -1, aN = -1, bM = -1, bN = -1;
2817        if (!A.getType().getElement().isCompatible(e) ||
2818            !B.getType().getElement().isCompatible(e)) {
2819            throw new RSRuntimeException("Called BLAS with wrong Element type");
2820        }
2821
2822        aM = A.getType().getY();
2823        aN = A.getType().getX();
2824        if (aM != aN) {
2825            throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
2826        }
2827
2828        bM = B.getType().getY();
2829        bN = B.getType().getX();
2830        if (Side == LEFT) {
2831            if (aN != bM) {
2832                throw new RSRuntimeException("Called TRMM with invalid matrices");
2833            }
2834        } else {
2835            if (bN != aM) {
2836                throw new RSRuntimeException("Called TRMM with invalid matrices");
2837            }
2838        }
2839    }
2840
2841    /**
2842     * STRMM performs one of the matrix-matrix operations
2843     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2844     * op(A) is one of  op(A) = A  or  op(A) = A**T
2845     *
2846     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
2847     *
2848     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2849     * @param Uplo Specifies whether matrix A is upper or lower triangular.
2850     * @param TransA The type of transpose applied to matrix A.
2851     * @param Diag Specifies whether or not A is unit triangular.
2852     * @param alpha The scalar alpha.
2853     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2854     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2855     */
2856    public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2857        validateUplo(Uplo);
2858        validateDiag(Diag);
2859        validateTRMM(Element.F32(mRS), Side, TransA, A, B);
2860        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2861                                        alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);
2862    }
2863
2864    /**
2865     * DTRMM performs one of the matrix-matrix operations
2866     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2867     * op(A) is one of  op(A) = A  or  op(A) = A**T
2868     *
2869     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
2870     *
2871     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2872     * @param Uplo Specifies whether matrix A is upper or lower triangular.
2873     * @param TransA The type of transpose applied to matrix A.
2874     * @param Diag Specifies whether or not A is unit triangular.
2875     * @param alpha The scalar alpha.
2876     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2877     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2878     */
2879    public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
2880        validateUplo(Uplo);
2881        validateDiag(Diag);
2882        validateTRMM(Element.F64(mRS), Side, TransA, A, B);
2883        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2884                                        alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
2885    }
2886
2887    /**
2888     * CTRMM performs one of the matrix-matrix operations
2889     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2890     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
2891     *
2892     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
2893     *
2894     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2895     * @param Uplo Specifies whether matrix A is upper or lower triangular.
2896     * @param TransA The type of transpose applied to matrix A.
2897     * @param Diag Specifies whether or not A is unit triangular.
2898     * @param alpha The scalar alpha.
2899     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2900     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2901     */
2902    public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
2903        validateUplo(Uplo);
2904        validateDiag(Diag);
2905        validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
2906        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2907                                         alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2908    }
2909
2910    /**
2911     * ZTRMM performs one of the matrix-matrix operations
2912     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2913     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
2914     *
2915     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
2916     *
2917     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2918     * @param Uplo Specifies whether matrix A is upper or lower triangular.
2919     * @param TransA The type of transpose applied to matrix A.
2920     * @param Diag Specifies whether or not A is unit triangular.
2921     * @param alpha The scalar alpha.
2922     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2923     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2924     */
2925    public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
2926        validateUplo(Uplo);
2927        validateDiag(Diag);
2928        validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
2929        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2930                                   alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2931    }
2932
2933    static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
2934        int adim = -1, bM = -1, bN = -1;
2935        validateSide(Side);
2936        validateTranspose(TransA);
2937        if (!A.getType().getElement().isCompatible(e) ||
2938            !B.getType().getElement().isCompatible(e)) {
2939            throw new RSRuntimeException("Called BLAS with wrong Element type");
2940        }
2941        adim = A.getType().getX();
2942        if (adim != A.getType().getY()) {
2943            // this may be unnecessary, the restriction could potentially be relaxed
2944            // A needs to contain at least that symmetric matrix but could theoretically be larger
2945            // for now we assume adapters are sufficient, will reevaluate in the future
2946            throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
2947        }
2948        bM = B.getType().getY();
2949        bN = B.getType().getX();
2950        if (Side == LEFT) {
2951            // A is M*M
2952            if (adim != bM) {
2953                throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2954            }
2955        } else {
2956            // A is N*N
2957            if (adim != bN) {
2958                throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2959            }
2960        }
2961    }
2962
2963    /**
2964     * STRSM solves one of the matrix equations
2965     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
2966     * op(A) is one of  op(A) = A  or  op(A) = A**T
2967     *
2968     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
2969     *
2970     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2971     * @param Uplo Specifies whether matrix A is upper or lower triangular.
2972     * @param TransA The type of transpose applied to matrix A.
2973     * @param Diag Specifies whether or not A is unit triangular.
2974     * @param alpha The scalar alpha.
2975     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2976     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2977     */
2978    public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2979        validateUplo(Uplo);
2980        validateDiag(Diag);
2981        validateTRSM(Element.F32(mRS), Side, TransA, A, B);
2982        mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2983                                        alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
2984    }
2985
2986    /**
2987     * DTRSM solves one of the matrix equations
2988     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
2989     * op(A) is one of  op(A) = A  or  op(A) = A**T
2990     *
2991     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
2992     *
2993     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2994     * @param Uplo Specifies whether matrix A is upper or lower triangular.
2995     * @param TransA The type of transpose applied to matrix A.
2996     * @param Diag Specifies whether or not A is unit triangular.
2997     * @param alpha The scalar alpha.
2998     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2999     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3000     */
3001    public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3002        validateUplo(Uplo);
3003        validateDiag(Diag);
3004        validateTRSM(Element.F64(mRS), Side, TransA, A, B);
3005        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3006                                        alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
3007    }
3008
3009    /**
3010     * CTRSM solves one of the matrix equations
3011     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3012     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3013     *
3014     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3015     *
3016     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3017     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3018     * @param TransA The type of transpose applied to matrix A.
3019     * @param Diag Specifies whether or not A is unit triangular.
3020     * @param alpha The scalar alpha.
3021     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3022     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3023     */
3024    public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3025        validateUplo(Uplo);
3026        validateDiag(Diag);
3027        validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
3028        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3029                                         alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3030    }
3031
3032    /**
3033     * ZTRSM solves one of the matrix equations
3034     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3035     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3036     *
3037     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3038     *
3039     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3040     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3041     * @param TransA The type of transpose applied to matrix A.
3042     * @param Diag Specifies whether or not A is unit triangular.
3043     * @param alpha The scalar alpha.
3044     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3045     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3046     */
3047    public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3048        validateUplo(Uplo);
3049        validateDiag(Diag);
3050        validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
3051        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3052                                   alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3053    }
3054
3055    static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3056        validateSide(Side);
3057
3058        if (!A.getType().getElement().isCompatible(e) ||
3059            !B.getType().getElement().isCompatible(e) ||
3060            !C.getType().getElement().isCompatible(e)) {
3061            throw new RSRuntimeException("Called BLAS with wrong Element type");
3062        }
3063
3064        // A must be square; can potentially be relaxed similar to TRSM
3065        int adim = A.getType().getX();
3066        if (adim != A.getType().getY()) {
3067            throw new RSRuntimeException("Called HEMM with non-square A");
3068        }
3069        if ((Side == LEFT && adim != B.getType().getY()) ||
3070            (Side == RIGHT && adim != B.getType().getX())) {
3071            throw new RSRuntimeException("Called HEMM with invalid B");
3072        }
3073        if (B.getType().getX() != C.getType().getX() ||
3074            B.getType().getY() != C.getType().getY()) {
3075            throw new RSRuntimeException("Called HEMM with mismatched B and C");
3076        }
3077    }
3078
3079    /**
3080     * CHEMM performs one of the matrix-matrix operations
3081     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3082     *
3083     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3084     *
3085     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3086     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3087     * @param alpha The scalar alpha.
3088     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3089     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3090     * @param beta The scalar beta.
3091     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3092     */
3093    public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3094        validateUplo(Uplo);
3095        validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3096        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3097                                         alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
3098    }
3099
3100    /**
3101     * ZHEMM performs one of the matrix-matrix operations
3102     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3103     *
3104     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3105     *
3106     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3107     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3108     * @param alpha The scalar alpha.
3109     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3110     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3111     * @param beta The scalar beta.
3112     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3113     */
3114    public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3115        validateUplo(Uplo);
3116        validateHEMM(Element.F64_2(mRS), Side, A, B, C);
3117        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3118                                   alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
3119    }
3120
3121    static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3122        if (!A.getType().getElement().isCompatible(e) ||
3123            !C.getType().getElement().isCompatible(e)) {
3124            throw new RSRuntimeException("Called BLAS with wrong Element type");
3125        }
3126        validateConjTranspose(Trans);
3127        int cdim = C.getType().getX();
3128        if (cdim != C.getType().getY()) {
3129            throw new RSRuntimeException("Called HERK with non-square C");
3130        }
3131        if (Trans == NO_TRANSPOSE) {
3132            if (cdim != A.getType().getY()) {
3133                throw new RSRuntimeException("Called HERK with invalid A");
3134            }
3135        } else {
3136            if (cdim != A.getType().getX()) {
3137                throw new RSRuntimeException("Called HERK with invalid A");
3138            }
3139        }
3140    }
3141
3142    /**
3143     * CHERK performs one of the hermitian rank k operations
3144     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3145     *
3146     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3147     *
3148     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3149     * @param Trans The type of transpose applied to the operation.
3150     * @param alpha The scalar alpha.
3151     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3152     * @param beta The scalar beta.
3153     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3154     */
3155    public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3156        validateUplo(Uplo);
3157        validateHERK(Element.F32_2(mRS), Trans, A, C);
3158        int k = 0;
3159        if (Trans == CONJ_TRANSPOSE) {
3160            k = A.getType().getY();
3161        } else {
3162            k = A.getType().getX();
3163        }
3164        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3165                                         alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3166    }
3167
3168    /**
3169     * ZHERK performs one of the hermitian rank k operations
3170     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3171     *
3172     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3173     *
3174     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3175     * @param Trans The type of transpose applied to the operation.
3176     * @param alpha The scalar alpha.
3177     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3178     * @param beta The scalar beta.
3179     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3180     */
3181    public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3182        validateUplo(Uplo);
3183        validateHERK(Element.F64_2(mRS), Trans, A, C);
3184        int k = 0;
3185        if (Trans == CONJ_TRANSPOSE) {
3186            k = A.getType().getY();
3187        } else {
3188            k = A.getType().getX();
3189        }
3190        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3191                                   alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3192    }
3193
3194    static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3195        if (!A.getType().getElement().isCompatible(e) ||
3196            !B.getType().getElement().isCompatible(e) ||
3197            !C.getType().getElement().isCompatible(e)) {
3198            throw new RSRuntimeException("Called BLAS with wrong Element type");
3199        }
3200        validateConjTranspose(Trans);
3201        int cdim = C.getType().getX();
3202        if (cdim != C.getType().getY()) {
3203            throw new RSRuntimeException("Called HER2K with non-square C");
3204        }
3205        if (Trans == NO_TRANSPOSE) {
3206            if (A.getType().getY() != cdim) {
3207                throw new RSRuntimeException("Called HER2K with invalid matrices");
3208            }
3209        } else {
3210            if (A.getType().getX() != cdim) {
3211                throw new RSRuntimeException("Called HER2K with invalid matrices");
3212            }
3213        }
3214        if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3215            throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
3216        }
3217    }
3218
3219    /**
3220     * CHER2K performs one of the hermitian rank 2k operations
3221     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3222     *
3223     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3224     *
3225     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3226     * @param Trans The type of transpose applied to the operation.
3227     * @param alpha The scalar alpha.
3228     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3229     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3230     * @param beta The scalar beta.
3231     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3232     */
3233    public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
3234        validateUplo(Uplo);
3235        validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
3236        int k = 0;
3237        if (Trans == NO_TRANSPOSE) {
3238            k = A.getType().getX();
3239        } else {
3240            k = A.getType().getY();
3241        }
3242        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3243                                         A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3244    }
3245
3246    /**
3247     * ZHER2K performs one of the hermitian rank 2k operations
3248     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3249     *
3250     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3251     *
3252     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3253     * @param Trans The type of transpose applied to the operation.
3254     * @param alpha The scalar alpha.
3255     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3256     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3257     * @param beta The scalar beta.
3258     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3259     */
3260    public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
3261        validateUplo(Uplo);
3262        validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
3263        int k = 0;
3264        if (Trans == NO_TRANSPOSE) {
3265            k = A.getType().getX();
3266        } else {
3267            k = A.getType().getY();
3268        }
3269        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3270                                   A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3271    }
3272
3273
3274    /**
3275     * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
3276     * Calculations are done in 1.10.21 fixed-point format for the final output,
3277     * just before there's a shift down to drop the fractional parts. The output
3278     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3279     * gives some headroom to avoid wrapping around on small overflows.
3280     *
3281     * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
3282     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
3283     * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
3284     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
3285     * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
3286     * @param c_offset The offset for all values in matrix C.
3287     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
3288     **/
3289    public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
3290        validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
3291
3292        if (a_offset < 0 || a_offset > 255) {
3293            throw new RSRuntimeException("Invalid a_offset passed to BNNM");
3294        }
3295        if (b_offset < 0 || b_offset > 255) {
3296            throw new RSRuntimeException("Invalid b_offset passed to BNNM");
3297        }
3298        int M = -1, N = -1, K = -1;
3299        M = A.getType().getY();
3300        N = B.getType().getY();
3301        K = A.getType().getX();
3302
3303
3304        mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult);
3305
3306    }
3307
3308}
3309