1cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang/* 2cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * Copyright (C) 2015 The Android Open Source Project 3cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * 4cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * Licensed under the Apache License, Version 2.0 (the "License"); 5cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * you may not use this file except in compliance with the License. 6cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * You may obtain a copy of the License at 7cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * 8cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * http://www.apache.org/licenses/LICENSE-2.0 9cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * 10cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * Unless required by applicable law or agreed to in writing, software 11cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * distributed under the License is distributed on an "AS IS" BASIS, 12cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * See the License for the specific language governing permissions and 14cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * limitations under the License. 15cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang */ 16cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 17cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wangpackage android.support.v8.renderscript; 18cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 19cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wangimport android.support.annotation.IntDef; 20cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wangimport java.lang.annotation.Retention; 21cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wangimport java.lang.annotation.RetentionPolicy; 22cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 23cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang/** 24ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang * 25ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * 27ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * building blocks for performing basic vector and matrix operations. 29ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 30ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * 32cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang **/ 33cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wangpublic final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 34cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private Allocation mLUT; 35cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int INTRINSIC_API_LEVEL = 23; 36cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 37cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private ScriptIntrinsicBLAS(long id, RenderScript rs) { 38cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang super(id, rs); 39cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 40cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 41cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sdsdot = 1; 42cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsdot = 2; 43cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sdot = 3; 44cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ddot = 4; 45cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cdotu_sub = 5; 46cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cdotc_sub = 6; 47cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zdotu_sub = 7; 48cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zdotc_sub = 8; 49cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_snrm2 = 9; 50cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sasum = 10; 51cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dnrm2 = 11; 52cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dasum = 12; 53cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_scnrm2 = 13; 54cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_scasum = 14; 55cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dznrm2 = 15; 56cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dzasum = 16; 57cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_isamax = 17; 58cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_idamax = 18; 59cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_icamax = 19; 60cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_izamax = 20; 61cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sswap = 21; 62cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_scopy = 22; 63cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_saxpy = 23; 64cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dswap = 24; 65cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dcopy = 25; 66cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_daxpy = 26; 67cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cswap = 27; 68cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ccopy = 28; 69cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_caxpy = 29; 70cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zswap = 30; 71cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zcopy = 31; 72cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zaxpy = 32; 73cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_srotg = 33; 74cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_srotmg = 34; 75cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_srot = 35; 76cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_srotm = 36; 77cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_drotg = 37; 78cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_drotmg = 38; 79cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_drot = 39; 80cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_drotm = 40; 81cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sscal = 41; 82cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dscal = 42; 83cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cscal = 43; 84cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zscal = 44; 85cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_csscal = 45; 86cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zdscal = 46; 87cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sgemv = 47; 88cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sgbmv = 48; 89cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_strmv = 49; 90cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_stbmv = 50; 91cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_stpmv = 51; 92cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_strsv = 52; 93cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_stbsv = 53; 94cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_stpsv = 54; 95cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dgemv = 55; 96cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dgbmv = 56; 97cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtrmv = 57; 98cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtbmv = 58; 99cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtpmv = 59; 100cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtrsv = 60; 101cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtbsv = 61; 102cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtpsv = 62; 103cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cgemv = 63; 104cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cgbmv = 64; 105cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctrmv = 65; 106cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctbmv = 66; 107cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctpmv = 67; 108cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctrsv = 68; 109cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctbsv = 69; 110cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctpsv = 70; 111cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zgemv = 71; 112cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zgbmv = 72; 113cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztrmv = 73; 114cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztbmv = 74; 115cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztpmv = 75; 116cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztrsv = 76; 117cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztbsv = 77; 118cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztpsv = 78; 119cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ssymv = 79; 120cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ssbmv = 80; 121cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sspmv = 81; 122cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sger = 82; 123cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ssyr = 83; 124cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sspr = 84; 125cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ssyr2 = 85; 126cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sspr2 = 86; 127cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsymv = 87; 128cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsbmv = 88; 129cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dspmv = 89; 130cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dger = 90; 131cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsyr = 91; 132cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dspr = 92; 133cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsyr2 = 93; 134cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dspr2 = 94; 135cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_chemv = 95; 136cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_chbmv = 96; 137cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_chpmv = 97; 138cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cgeru = 98; 139cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cgerc = 99; 140cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cher = 100; 141cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_chpr = 101; 142cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cher2 = 102; 143cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_chpr2 = 103; 144cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zhemv = 104; 145cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zhbmv = 105; 146cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zhpmv = 106; 147cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zgeru = 107; 148cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zgerc = 108; 149cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zher = 109; 150cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zhpr = 110; 151cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zher2 = 111; 152cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zhpr2 = 112; 153cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_sgemm = 113; 154cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ssymm = 114; 155cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ssyrk = 115; 156cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ssyr2k = 116; 157cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_strmm = 117; 158cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_strsm = 118; 159cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dgemm = 119; 160cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsymm = 120; 161cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsyrk = 121; 162cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dsyr2k = 122; 163cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtrmm = 123; 164cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_dtrsm = 124; 165cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cgemm = 125; 166cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_csymm = 126; 167cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_csyrk = 127; 168cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_csyr2k = 128; 169cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctrmm = 129; 170cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ctrsm = 130; 171cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zgemm = 131; 172cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zsymm = 132; 173cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zsyrk = 133; 174cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zsyr2k = 134; 175cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztrmm = 135; 176cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_ztrsm = 136; 177cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_chemm = 137; 178cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cherk = 138; 179cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_cher2k = 139; 180cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zhemm = 140; 181cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zherk = 141; 182cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_zher2k = 142; 183cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 184cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // BLAS extensions start here 185cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang private static final int RsBlas_bnnm = 1000; 186cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 187cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang /** 188ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Create an intrinsic to access BLAS subroutines. 189ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 190ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param rs The RenderScript context 191ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @return ScriptIntrinsicBLAS 192cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang */ 193cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static ScriptIntrinsicBLAS create(RenderScript rs) { 194cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long id; 195cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = rs.isUseNative() && 196cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL; 197cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 198cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp); 199cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs); 200cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang si.setIncSupp(mUseIncSupp); 201cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return si; 202cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 203cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 204ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang /** 205ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang * @hide 206ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang */ 207cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 208cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @Retention(RetentionPolicy.SOURCE) 209cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public @interface Transpose {} 210cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 211ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang /** 212ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang * @hide 213ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang */ 214cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @IntDef({UPPER, LOWER}) 215cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @Retention(RetentionPolicy.SOURCE) 216cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public @interface Uplo {} 217cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 218ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang /** 219ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang * @hide 220ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang */ 221cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @IntDef({NON_UNIT, UNIT}) 222cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @Retention(RetentionPolicy.SOURCE) 223cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public @interface Diag {} 224cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 225ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang /** 226ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang * @hide 227ef940e14cb21fef4c62cb7bff5ec5d1069ad5f87Miao Wang */ 228cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @IntDef({LEFT, RIGHT}) 229cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang @Retention(RetentionPolicy.SOURCE) 230cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public @interface Side {} 231cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 232cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int NO_TRANSPOSE = 111; 233cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int TRANSPOSE = 112; 234cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int CONJ_TRANSPOSE = 113; 235cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 236cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int UPPER = 121; 237cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int LOWER = 122; 238cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 239cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int NON_UNIT = 131; 240cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int UNIT = 132; 241cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 242cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int LEFT = 141; 243cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public static final int RIGHT = 142; 244cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 245cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateSide(@Side int Side) { 246cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Side != LEFT && Side != RIGHT) { 247cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid side passed to BLAS"); 248cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 249cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 250cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 251cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateTranspose(@Transpose int Trans) { 252cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 253cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Trans != CONJ_TRANSPOSE) { 254cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid transpose passed to BLAS"); 255cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 256cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 257cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 258cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateConjTranspose(@Transpose int Trans) { 259cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE && 260cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Trans != CONJ_TRANSPOSE) { 261cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid transpose passed to BLAS"); 262cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 263cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 264cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 265cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateDiag(@Diag int Diag) { 266cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Diag != NON_UNIT && Diag != UNIT) { 267cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid diag passed to BLAS"); 268cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 269cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 270cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 271cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateUplo(@Uplo int Uplo) { 272cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Uplo != UPPER && Uplo != LOWER) { 273cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid uplo passed to BLAS"); 274cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 275cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 276cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 277cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 278cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang /** 279cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * Level 2 BLAS 280cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang */ 281cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 282cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 283cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 284cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 285cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 286cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 287cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e) || 288cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !Y.getType().getElement().isCompatible(e)) { 289cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 290cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 291cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 292cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 293cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 294cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 295cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0 || incY <= 0) { 296cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 297cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 298cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = -1, expectedYDim = -1; 299cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransA == NO_TRANSPOSE) { 300cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang expectedXDim = 1 + (N - 1) * incX; 301cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang expectedYDim = 1 + (M - 1) * incY; 302cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 303cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang expectedXDim = 1 + (M - 1) * incX; 304cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang expectedYDim = 1 + (N - 1) * incY; 305cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 306cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim || 307cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Y.getType().getX() != expectedYDim) { 308cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 309cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 310cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 311ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 312ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 313ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SGEMV performs one of the matrix-vector operations 314ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 315ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 316ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 317ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 318ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 319ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 320ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 321ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 322ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 323ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 324ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 325ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 326ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 327cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 328cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 329cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 330cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 331cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 332cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 333cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 334cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 335cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 336cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 337cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 338cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 339cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 340cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 341cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 342cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 343ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 344ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 345ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DGEMV performs one of the matrix-vector operations 346ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 347ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 348ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 349ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 350ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 351ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 352ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 353ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 354ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 355ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 356ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 357ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 358ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 359cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 360cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 361cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 362cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 363cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 364cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 365cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 366cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 367cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 368cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 369cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 370cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 371cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 372cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 373cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 374cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 375ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 376ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 377ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CGEMV performs one of the matrix-vector operations 378ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 379ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 380ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 381ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 382ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 383ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 384ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 385ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 386ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 387ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 388ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 389ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 390ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 391cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 392cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 393cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 394cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 395cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 396cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 397cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 398cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 399cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 400cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 401cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 402cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 403cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 404cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 405cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 406cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 407ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 408ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 409ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZGEMV performs one of the matrix-vector operations 410ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 411ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 412ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 413ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 414ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 415ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 416ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 417ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 418ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 419ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 420ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 421ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 422ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 423cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 424cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 425cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 426cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 427cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 428cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 429cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 430cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 431cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 432cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 433cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 434cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 435cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 436cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 437cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 438cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 439cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 440ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 441ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SGBMV performs one of the matrix-vector operations 442ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 443ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 444ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 445ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 446ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 447ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 448ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 449ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, m): 450ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(max(0, i-kl), min(i+ku+1, n)): 451ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i+kl] = a[i, j] 452ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 453ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 454ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KL The number of sub-diagonals of the matrix A. 455ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KU The number of super-diagonals of the matrix A. 456ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 457ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 458ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 459ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 460ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 461ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 462ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 463ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 464cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 465cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // GBMV has the same validation requirements as GEMV + KL and KU >= 0 466cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 467cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (KL < 0 || KU < 0) { 468cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 469cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 470cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 471cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 472cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 473cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 474cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 475cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 476cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 477cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 478cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 479cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 480cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 481cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 482cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 483cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 484ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 485ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 486ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DGBMV performs one of the matrix-vector operations 487ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 488ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 489ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 490ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 491ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 492ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 493ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 494ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, m): 495ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(max(0, i-kl), min(i+ku+1, n)): 496ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i+kl] = a[i, j] 497ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 498ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 499ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KL The number of sub-diagonals of the matrix A. 500ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KU The number of super-diagonals of the matrix A. 501ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 502ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 503ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 504ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 505ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 506ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 507ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 508ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 509cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 510cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // GBMV has the same validation requirements as GEMV + KL and KU >= 0 511cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 512cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (KL < 0 || KU < 0) { 513cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 514cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 515cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 516cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 517cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 518cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 519cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 520cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 521cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 522cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 523cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 524cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 525cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 526cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 527cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 528cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 529ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 530ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 531ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CGBMV performs one of the matrix-vector operations 532ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 533ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 534ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 535ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 536ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 537ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 538ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 539ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, m): 540ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(max(0, i-kl), min(i+ku+1, n)): 541ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i+kl] = a[i, j] 542ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 543ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 544ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KL The number of sub-diagonals of the matrix A. 545ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KU The number of super-diagonals of the matrix A. 546ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 547ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 548ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 549ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 550ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 551ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 552ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 553ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 554cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 555cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // GBMV has the same validation requirements as GEMV + KL and KU >= 0 556cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 557cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (KL < 0 || KU < 0) { 558cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 559cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 560cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 561cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 562cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 563cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 564cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 565cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 566cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 567cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 568cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 569cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 570cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 571cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 572cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 573cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 574ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 575ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 576ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZGBMV performs one of the matrix-vector operations 577ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 578ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 579ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 580ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 581ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 582ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 583ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 584ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, m): 585ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(max(0, i-kl), min(i+ku+1, n)): 586ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i+kl] = a[i, j] 587ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 588ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 589ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KL The number of sub-diagonals of the matrix A. 590ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param KU The number of super-diagonals of the matrix A. 591ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 592ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 593ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 594ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 595ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 596ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 597ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 598ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 599cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 600cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // GBMV has the same validation requirements as GEMV + KL and KU >= 0 601cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 602cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (KL < 0 || KU < 0) { 603cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 604cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 605cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 606cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 607cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 608cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 609cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 610cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 611cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 612cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 613cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 614cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 615cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 616cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 617cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 618cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 619cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 620cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 621cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 622cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 623cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 624cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 625cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != N) { 626cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("A must be a square matrix for TRMV"); 627cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 628cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 629cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e)) { 630cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 631cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 632cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1) { 633cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 634cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 635cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 636cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0) { 637cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 638cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 639cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 640cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 641cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 642cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 643cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 644cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 645cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 646cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 647cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 648cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 649cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!Ap.getType().getElement().isCompatible(e) || 650cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e)) { 651cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 652cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 653cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1) { 654cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 655cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 656cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 657cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getY() > 1) { 658cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 659cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 660cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 661cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 662cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang //is it really doing anything? 663cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 664cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid dimension for Ap"); 665cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 666cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0) { 667cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 668cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 669cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 670cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 671cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 672cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 673cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 674cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return N; 675cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 676cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 677ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 678ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STRMV performs one of the matrix-vector operations 679ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x 680ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 681ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 682ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 683ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 684ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 685ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 686ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 687ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 688ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 689ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 690cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 691cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 692cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 693cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 694cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 695cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 696cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 697cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 698cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 699cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 700cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 701cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 702cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 703ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 704ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 705ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTRMV performs one of the matrix-vector operations 706ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x 707ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 708ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 709ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 710ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 711ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 712ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 713ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 714ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 715ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 716ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 717cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 718cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 719cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 720cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 721cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 722cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 723cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 724cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 725cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 726cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 727cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 728cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 729cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 730ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 731ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 732ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTRMV performs one of the matrix-vector operations 733ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x or x := A**H*x 734ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 735ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 736ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 737ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 738ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 739ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 740ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 741ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 742ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 743ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 744cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 745cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 746cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 747cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 748cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 749cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 750cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 751cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 752cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 753cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 754cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 755cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 756cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 757ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 758ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 759ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTRMV performs one of the matrix-vector operations 760ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x or x := A**H*x 761ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 762ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 763ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 764ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 765ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 766ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 767ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 768ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 769ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 770ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 771cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 772cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 773cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 774cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 775cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 776cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 777cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 778cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 779cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 780cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 781cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 782cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 783cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 784cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 785ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 786ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STBMV performs one of the matrix-vector operations 787ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x 788ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 789ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 790ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 791ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 792ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 793ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 794ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 795ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 796ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 797ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 798ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 799ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 800ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 801ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 802ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 803ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 804ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 805ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 806cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 807cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBMV has the same requirements as TRMV + K >= 0 808cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 809cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be greater than or equal to 0"); 810cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 811cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 812cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 813cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 814cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 815cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 816cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 817cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 818cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 819cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 820cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 821cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 822cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 823ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 824ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 825ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTBMV performs one of the matrix-vector operations 826ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x 827ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 828ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 829ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 830ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 831ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 832ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 833ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 834ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 835ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 836ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 837ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 838ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 839ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 840ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 841ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 842ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 843ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 844ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 845cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 846cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBMV has the same requirements as TRMV + K >= 0 847cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 848cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be greater than or equal to 0"); 849cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 850cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 851cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 852cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 853cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 854cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 855cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 856cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 857cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 858cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 859cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 860cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 861cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 862ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 863ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 864ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTBMV performs one of the matrix-vector operations 865ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x or x := A**H*x 866ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 867ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 868ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 869ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 870ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 871ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 872ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 873ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 874ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 875ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 876ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 877ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 878ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 879ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 880ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 881ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 882ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 883ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 884cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 885cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBMV has the same requirements as TRMV + K >= 0 886cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 887cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be greater than or equal to 0"); 888cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 889cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 890cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 891cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 892cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 893cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 894cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 895cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 896cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 897cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 898cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 899cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 900cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 901ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 902ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 903ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTBMV performs one of the matrix-vector operations 904ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x or x := A**H*x 905ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 906ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 907ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 908ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 909ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 910ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 911ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 912ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 913ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 914ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 915ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 916ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 917ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 918ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 919ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 920ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 921ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 922ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 923cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 924cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBMV has the same requirements as TRMV + K >= 0 925cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 926cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be greater than or equal to 0"); 927cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 928cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 929cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 930cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 931cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 932cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 933cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 934cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 935cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 936cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 937cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 938cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 939cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 940ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 941ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 942ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STPMV performs one of the matrix-vector operations 943ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x 944ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 945ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 946ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 947ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 948ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 949ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 950ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 951ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 952ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 953ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 954ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 955ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 956ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 957ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 958ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 959ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 960ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 961ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 962cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 963cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 964cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 965cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 966cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 967cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 968cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 969cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 970cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 971cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 972cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 973cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 974ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 975ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 976ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTPMV performs one of the matrix-vector operations 977ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x 978ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 979ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 980ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 981ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 982ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 983ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 984ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 985ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 986ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 987ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 988ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 989ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 990ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 991ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 992ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 993ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 994ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 995ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 996cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 997cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 998cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 999cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1000cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1001cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1002cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1003cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1004cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1005cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1006cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1007cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1008ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1009ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1010ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTPMV performs one of the matrix-vector operations 1011ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x or x := A**H*x 1012ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1013ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 1014ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1015ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1016ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1017ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1018ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1019ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1020ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1021ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1022ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1023ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1024ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1025ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1026ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1027ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1028ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1029ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1030cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1031cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1032cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1033cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1034cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1035cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1036cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1037cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1038cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1039cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1040cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1041cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1042ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1043ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1044ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTPMV performs one of the matrix-vector operations 1045ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * x := A*x or x := A**T*x or x := A**H*x 1046ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1047ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 1048ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1049ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1050ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1051ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1052ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1053ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1054ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1055ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1056ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1057ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1058ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1059ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1060ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1061ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1062ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1063ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1064cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1065cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1066cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1067cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1068cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1069cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1070cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1071cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1072cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1073cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1074cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1075cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1076ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1077ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1078ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STRSV solves one of the systems of equations 1079ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b 1080ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1081ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 1082ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1083ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1084ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1085ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1086ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1087ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1088ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1089ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1090cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1091cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TRSV is the same as TRMV 1092cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1093cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1094cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1095cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1096cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1097cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1098cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1099cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1100cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1101cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1102cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1103cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1104cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1105ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1106ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1107ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTRSV solves one of the systems of equations 1108ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b 1109ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1110ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 1111ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1112ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1113ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1114ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1115ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1116ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1117ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1118ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1119cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1120cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TRSV is the same as TRMV 1121cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1122cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1123cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1124cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1125cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1126cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1127cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1128cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1129cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1130cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1131cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1132cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1133cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1134ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1135ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1136ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTRSV solves one of the systems of equations 1137ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b or A**H*x = b 1138ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1139ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 1140ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1141ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1142ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1143ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1144ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1145ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1146ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1147ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1148cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1149cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TRSV is the same as TRMV 1150cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1151cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1152cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1153cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1154cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1155cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1156cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1157cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1158cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1159cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1160cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1161cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1162cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1163ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1164ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1165ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTRSV solves one of the systems of equations 1166ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b or A**H*x = b 1167ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1168ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 1169ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1170ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1171ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1172ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1173ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1174ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1175ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1176ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1177cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1178cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TRSV is the same as TRMV 1179cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1180cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1181cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1182cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1183cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1184cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1185cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1186cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1187cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1188cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1189cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1190cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1191cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1192ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1193ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1194ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STBSV solves one of the systems of equations 1195ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b 1196ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1197ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 1198ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1199ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1200ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 1201ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1202ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1203ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 1204ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 1205ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1206ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1207ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1208ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1209ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 1210ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1211ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1212ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1213ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1214cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1215cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBSV is the same as TRMV + K >= 0 1216cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1217cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1218cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 1219cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Number of diagonals must be positive"); 1220cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1221cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1222cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1223cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1224cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1225cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1226cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1227cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1228cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1229cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1230cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1231ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1232ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1233ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTBSV solves one of the systems of equations 1234ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b 1235ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1236ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1237ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1238ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1239ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 1240ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1241ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1242ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 1243ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 1244ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1245ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1246ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1247ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1248ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 1249ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1250ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1251ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1252ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1253cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1254cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBSV is the same as TRMV + K >= 0 1255cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1256cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1257cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 1258cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Number of diagonals must be positive"); 1259cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1260cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1261cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1262cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1263cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1264cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1265cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1266cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1267cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1268cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1269cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1270ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1271ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1272ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTBSV solves one of the systems of equations 1273ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b or A**H*x = b 1274ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1275ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1276ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1277ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1278ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 1279ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1280ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1281ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 1282ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 1283ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1284ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1285ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1286ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1287ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 1288ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1289ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1290ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1291ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1292cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1293cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBSV is the same as TRMV + K >= 0 1294cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1295cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1296cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 1297cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Number of diagonals must be positive"); 1298cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1299cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1300cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1301cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1302cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1303cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1304cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1305cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1306cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1307cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1308cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1309ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1310ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1311ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTBSV solves one of the systems of equations 1312ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b or A**H*x = b 1313ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1314ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1315ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1316ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1317ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 1318ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1319ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1320ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 1321ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 1322ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1323ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1324ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1325ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1326ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 1327ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1328ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1329ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1330ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1331cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1332cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TBSV is the same as TRMV + K >= 0 1333cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1334cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1335cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 1336cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Number of diagonals must be positive"); 1337cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1338cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1339cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1340cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1341cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1342cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1343cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1344cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1345cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1346cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1347cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1348ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1349ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1350ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STPSV solves one of the systems of equations 1351ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b 1352ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1353ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1354ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1355ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1356ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1357ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1358ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1359ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1360ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1361ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1362ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1363ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1364ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1365ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1366ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1367ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1368ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1369ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1370cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1371cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TPSV is same as TPMV 1372cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1373cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1374cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1375cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1376cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1377cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1378cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1379cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1380cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1381cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1382cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1383ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1384ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1385ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTPSV solves one of the systems of equations 1386ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b 1387ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1388ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1389ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1390ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1391ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1392ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1393ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1394ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1395ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1396ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1397ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1398ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1399ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1400ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1401ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1402ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1403ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1404ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1405cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1406cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TPSV is same as TPMV 1407cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1408cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1409cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1410cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1411cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1412cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1413cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1414cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1415cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1416cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1417cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1418ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1419ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1420ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTPSV solves one of the systems of equations 1421ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b or A**H*x = b 1422ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1423ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1424ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1425ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1426ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1427ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1428ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1429ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1430ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1431ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1432ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1433ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1434ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1435ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1436ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1437ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1438ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1439ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1440cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1441cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TPSV is same as TPMV 1442cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1443cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1444cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1445cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1446cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1447cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1448cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1449cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1450cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1451cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1452cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1453ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1454ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1455ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTPSV solves one of the systems of equations 1456ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A*x = b or A**T*x = b or A**H*x = b 1457ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1458ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1459ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1460ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1461ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1462ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1463ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1464ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1465ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1466ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1467ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1468ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1469ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 1470ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 1471ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1472ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1473ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1474ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1475cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1476cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // TPSV is same as TPMV 1477cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1478cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1479cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1480cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1481cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1482cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1483cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1484cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1485cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1486cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1487cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1488cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1489cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang /** 1490cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * Level 2, S and D only 1491cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang */ 1492cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1493cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 1494cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getY(); 1495cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != N) { 1496cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("A must be a square matrix for SYMV"); 1497cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1498cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 1499cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e) || 1500cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !Y.getType().getElement().isCompatible(e) ) { 1501cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 1502cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1503cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1504cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1505cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1506cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1507cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0 || incY <= 0) { 1508cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 1509cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1510cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 1511cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 1512cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1513cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1514cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedYDim = 1 + (N - 1) * incY; 1515cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Y.getType().getX() != expectedYDim) { 1516cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1517cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1518cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return N; 1519cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1520cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1521cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 1522cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!Ap.getType().getElement().isCompatible(e) || 1523cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e) || 1524cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !Y.getType().getElement().isCompatible(e)) { 1525cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 1526cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1527cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1528cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1529cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1530cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1531cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getY() > 1) { 1532cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1533cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1534cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1535cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1536cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1537cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid dimension for Ap"); 1538cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1539cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0 || incY <= 0) { 1540cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 1541cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1542cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 1543cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 1544cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1545cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1546cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedYDim = 1 + (N - 1) * incY; 1547cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Y.getType().getX() != expectedYDim) { 1548cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1549cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1550cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1551cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return N; 1552cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1553cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1554cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 1555cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e) || 1556cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !Y.getType().getElement().isCompatible(e) ) { 1557cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 1558cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1559cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1560cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1561cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1562cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1563cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1564cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 1565cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 1566cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1567cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (N < 1 || M < 1) { 1568cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1569cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1570cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0 || incY <= 0) { 1571cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 1572cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1573cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (M - 1) * incX; 1574cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 1575cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1576cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1577cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedYDim = 1 + (N - 1) * incY; 1578cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Y.getType().getX() != expectedYDim) { 1579cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1580cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1581cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1582cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1583cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1584cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1585cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 1586cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 1587cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e)) { 1588cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 1589cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1590cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1591cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 1592cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1593cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1) { 1594cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1595cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1596cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (N != A.getType().getY()) { 1597cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("A must be a symmetric matrix"); 1598cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1599cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0) { 1600cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 1601cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1602cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 1603cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 1604cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1605cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1606cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return N; 1607cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1608cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1609cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 1610cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!Ap.getType().getElement().isCompatible(e) || 1611cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e)) { 1612cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 1613cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1614cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1) { 1615cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1616cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1617cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1618cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getY() > 1) { 1619cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1620cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1621cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1622cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1623cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1624cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid dimension for Ap"); 1625cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1626cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0) { 1627cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 1628cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1629cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 1630cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 1631cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1632cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1633cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1634cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return N; 1635cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1636cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1637cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1638cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 1639cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 1640cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e) || 1641cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !Y.getType().getElement().isCompatible(e)) { 1642cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 1643cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1644cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1645cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1646cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1647cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1648cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1649cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 1650cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1651cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (N != A.getType().getY()) { 1652cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("A must be a symmetric matrix"); 1653cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1654cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0 || incY <= 0) { 1655cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 1656cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1657cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 1658cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedYDim = 1 + (N - 1) * incY; 1659cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1660cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1661cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1662cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return N; 1663cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1664cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1665cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1666cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 1667cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!Ap.getType().getElement().isCompatible(e) || 1668cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e) || 1669cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !Y.getType().getElement().isCompatible(e)) { 1670cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 1671cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1672cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1673cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1674cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1675cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1676cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getY() > 1) { 1677cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1678cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1679cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1680cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1681cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1682cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid dimension for Ap"); 1683cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1684cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0 || incY <= 0) { 1685cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 1686cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1687cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (N - 1) * incX; 1688cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedYDim = 1 + (N - 1) * incY; 1689cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1690cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1691cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1692cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1693cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang return N; 1694cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1695cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1696ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1697ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSYMV performs the matrix-vector operation 1698ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 1699ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1700ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1701ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1702ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1703ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1704ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1705ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1706ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1707ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 1708ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1709ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 1710ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1711cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1712cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1713cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1714cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1715cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1716cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1717cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 1718cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1719cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1720cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1721cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 1722cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1723cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1724cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1725ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1726ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1727ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSBMV performs the matrix-vector operation 1728ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 1729ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1730ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1731ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1732ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1733ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 1734ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1735ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1736ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 1737ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 1738ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1739ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1740ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 1741ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1742ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1743ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1744ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1745ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 1746ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1747ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 1748ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1749cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1750cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // SBMV is the same as SYMV + K >= 0 1751cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 1752cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be greater than or equal to 0"); 1753cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1754cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1755cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1756cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1757cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1758cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1759cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 1760cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1761cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1762cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1763cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 1764cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1765cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1766cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1767ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1768ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1769ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSPMV performs the matrix-vector operation 1770ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 1771ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1772ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1773ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1774ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1775ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1776ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1777ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1778ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1779ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1780ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1781ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1782ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1783ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1784ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1785ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1786ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1787ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 1788ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1789ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 1790ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1791cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1792cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1793cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1794cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1795cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1796cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1797cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 1798cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1799cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1800cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1801cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 1802cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1803cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1804cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1805ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1806ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1807ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SGER performs the rank 1 operation 1808ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + A 1809ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1810ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1811ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1812ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1813ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1814ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1815ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1816ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 1817ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1818ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1819cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1820cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 1821cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 1822cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1823cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1824cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1825cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1826cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1827cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 1828cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1829cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1830cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1831cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 1832cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1833cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 1834cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1835ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1836ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1837ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSYR performs the rank 1 operation 1838ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**T + A 1839ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1840ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1841ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1842ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1843ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1844ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1845ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1846ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1847ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1848cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1849cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1850cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1851cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1852cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1853cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1854cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1855cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1856cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1857cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1858cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1859cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1860ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1861ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1862ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSPR performs the rank 1 operation 1863ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**T + A 1864ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1865ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1866ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1867ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1868ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1869ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1870ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1871ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1872ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1873ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1874ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1875ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1876ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1877ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1878ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1879ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1880ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1881cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1882cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1883cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1884cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1885cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1886cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1887cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1888cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1889cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1890cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1891cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1892cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1893ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1894ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1895ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSYR2 performs the symmetric rank 2 operation 1896ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + alpha*y*x**T + A 1897ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1898ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1899ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1900ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1901ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1902ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1903ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1904ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1905ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 1906ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1907ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1908cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1909cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1910cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1911cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1912cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1913cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1914cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 1915cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1916cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1917cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1918cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 1919cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1920cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 1921cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1922ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1923ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1924ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSPR2 performs the symmetric rank 2 operation 1925ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + alpha*y*x**T + A 1926ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1927ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1928ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1929ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1930ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1931ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 1932ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 1933ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 1934ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 1935ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 1936ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1937ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1938ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1939ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1940ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1941ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1942ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 1943ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1944ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1945cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1946cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1947cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1948cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1949cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 1950cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1951cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 1952cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1953cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 1954cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1955cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 1956cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1957cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 1958cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1959ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1960ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1961ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSYMV performs the matrix-vector operation 1962ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 1963ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1964ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1965ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1966ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1967ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 1968ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1969ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1970ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 1971ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 1972ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1973ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 1974ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 1975cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1976cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1977cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 1978cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 1979cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 1980cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 1981cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 1982cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 1983cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 1984cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 1985cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 1986cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1987cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1988cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 1989ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 1990ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 1991ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSBMV performs the matrix-vector operation 1992ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 1993ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1994ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1995ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 1996ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1997ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 1998ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1999ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2000ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 2001ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 2002ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2003ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2004ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 2005ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2006ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2007ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2008ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2009ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2010ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2011ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2012ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2013cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 2014cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // SBMV is the same as SYMV + K >= 0 2015cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 2016cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be greater than or equal to 0"); 2017cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2018cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 2019cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2020cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2021cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2022cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2023cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2024cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2025cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2026cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2027cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2028cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2029cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2030cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2031ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2032ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2033ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSPMV performs the matrix-vector operation 2034ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 2035ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2036ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 2037ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2038ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2039ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2040ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2041ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2042ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2043ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2044ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2045ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2046ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2047ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2048ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2049ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2050ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2051ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2052ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2053ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2054ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2055cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 2056cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 2057cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2058cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2059cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2060cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2061cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2062cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2063cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2064cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2065cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2066cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2067cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2068cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2069ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2070ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2071ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DGER performs the rank 1 operation 2072ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + A 2073ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2074ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 2075ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2076ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2077ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2078ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2079ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2080ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2081ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2082ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2083cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2084cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 2085cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 2086cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGER(Element.F64(mRS), X, incX, Y, incY, A); 2087cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2088cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2089cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2090cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2091cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2092cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2093cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2094cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2095cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2096cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2097cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 2098cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2099ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2100ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2101ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSYR performs the rank 1 operation 2102ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**T + A 2103ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2104ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 2105ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2106ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2107ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2108ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2109ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2110ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2111ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2112cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2113cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 2114cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2115cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2116cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2117cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2118cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2119cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2120cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2121cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2122cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2123cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2124ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2125ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2126ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSPR performs the rank 1 operation 2127ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**T + A 2128ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2129ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 2130ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2131ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2132ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2133ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2134ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2135ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2136ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2137ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2138ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2139ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2140ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2141ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2142ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2143ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2144ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2145cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2146cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 2147cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2148cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2149cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2150cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2151cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2152cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2153cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2154cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2155cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2156cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2157ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2158ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2159ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSYR2 performs the symmetric rank 2 operation 2160ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + alpha*y*x**T + A 2161ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2162ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 2163ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2164ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2165ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2166ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2167ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2168ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2169ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2170ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2171ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2172cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2173cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 2174cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2175cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2176cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2177cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2178cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2179cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2180cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2181cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2182cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2183cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2184cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2185cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2186ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2187ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2188ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSPR2 performs the symmetric rank 2 operation 2189ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + alpha*y*x**T + A 2190ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2191ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 2192ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2193ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2194ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2195ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2196ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2197ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2198ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2199ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2200ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2201ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2202ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2203ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2204ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2205ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2206ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2207ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2208ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2209cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2210cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 2211cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2212cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2213cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2214cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2215cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2216cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2217cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2218cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2219cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2220cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2221cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2222cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2223cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2224cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2225cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang /** 2226cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * Level 2, C and Z only 2227cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang */ 2228cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2229cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2230cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 2231cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !X.getType().getElement().isCompatible(e) || 2232cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !Y.getType().getElement().isCompatible(e)) { 2233cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 2234cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2235cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 2236cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 2237cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2238cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2239cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 2240cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 2241cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (incX <= 0 || incY <= 0) { 2242cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Vector increments must be greater than 0"); 2243cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2244cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedXDim = 1 + (M - 1) * incX; 2245cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (X.getType().getX() != expectedXDim) { 2246cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2247cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2248cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int expectedYDim = 1 + (N - 1) * incY; 2249cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Y.getType().getX() != expectedYDim) { 2250cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2251cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2252cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2253cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2254cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2255ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2256ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHEMV performs the matrix-vector operation 2257ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 2258ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2259ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 2260ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2261ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2262ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2263ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2264ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2265ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2266ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2267ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2268ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2269ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2270cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2271cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // HEMV is the same as SYR2 validation-wise 2272cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2273cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2274cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2275cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2276cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2277cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2278cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2279cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2280cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2281cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2282cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2283cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2284cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2285ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2286ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2287ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHBMV performs the matrix-vector operation 2288ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 2289ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2290ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 2291ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2292ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2293ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 2294ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2295ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2296ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 2297ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 2298ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2299ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2300ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 2301ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2302ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2303ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2304ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2305ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2306ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2307ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2308ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2309cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2310cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // HBMV is the same as SYR2 validation-wise 2311cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2312cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 2313cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2314cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2315cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2316cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2317cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2318cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2319cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2320cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2321cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2322cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2323cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2324cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2325cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2326cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2327ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2328ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2329ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHPMV performs the matrix-vector operation 2330ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 2331ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2332ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 2333ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2334ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2335ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2336ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2337ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2338ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2339ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2340ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2341ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2342ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2343ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2344ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2345ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2346ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2347ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2348ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2349ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2350ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2351cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2352cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // HPMV is the same as SPR2 2353cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2354cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2355cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2356cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2357cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2358cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2359cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2360cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2361cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2362cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2363cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2364cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2365cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2366ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2367ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2368ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CGERU performs the rank 1 operation 2369ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + A 2370ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2371ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 2372ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2373ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2374ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2375ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2376ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2377ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2378ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2379ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2380cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2381cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2382cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 2383cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 2384cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2385cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2386cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2387cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2388cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2389cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2390cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2391cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2392cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2393cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2394cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2395cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2396ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2397ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2398ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CGERC performs the rank 1 operation 2399ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**H + A 2400ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2401ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 2402ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2403ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2404ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2405ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2406ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2407ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2408ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2409ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2410cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2411cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as GERU 2412cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2413cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 2414cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 2415cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2416cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2417cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2418cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2419cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2420cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2421cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2422cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2423cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2424cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2425cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2426cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2427ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2428ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2429ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHER performs the rank 1 operation 2430ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**H + A 2431ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2432ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 2433ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2434ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2435ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2436ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2437ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2438ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2439ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2440cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 2441cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as SYR 2442cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 2443cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2444cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2445cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2446cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2447cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2448cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2449cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2450cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2451cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2452cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2453ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2454ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2455ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHPR performs the rank 1 operation 2456ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**H + A 2457ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2458ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 2459ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2460ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2461ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2462ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2463ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2464ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2465ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2466ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2467ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2468ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2469ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2470ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2471ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2472ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2473ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2474cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 2475cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // equivalent to SPR for validation 2476cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 2477cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2478cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2479cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2480cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2481cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2482cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2483cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2484cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2485cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2486cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2487ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2488ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2489ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHER2 performs the symmetric rank 2 operation 2490ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**H + alpha*y*x**H + A 2491ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2492ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 2493ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2494ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2495ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2496ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2497ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2498ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2499ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2500ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2501ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2502cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2503cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as SYR2 2504cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2505cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2506cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2507cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2508cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2509cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2510cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2511cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2512cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2513cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2514cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2515cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2516cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2517ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2518ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2519ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHPR2 performs the symmetric rank 2 operation 2520ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**H + alpha*y*x**H + A 2521ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2522ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2523ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2524ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2525ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2526ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2527ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2528ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2529ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2530ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2531ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2532ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2533ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2534ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2535ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2536ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2537ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2538ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2539ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2540cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2541cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as SPR2 2542cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2543cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2544cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2545cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2546cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2547cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2548cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2549cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2550cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2551cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2552cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2553cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2554cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2555ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2556ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2557ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHEMV performs the matrix-vector operation 2558ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 2559ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2560ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2561ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2562ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2563ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2564ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2565ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2566ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2567ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2568ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2569ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2570ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2571cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2572cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // HEMV is the same as SYR2 validation-wise 2573cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2574cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2575cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2576cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2577cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2578cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2579cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2580cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2581cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2582cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2583cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2584cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2585cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2586ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2587ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2588ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHBMV performs the matrix-vector operation 2589ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 2590ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2591ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2592ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2593ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2594ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * but only the region N*(K+1) will be referenced. The following subroutine can is an 2595ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2596ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2597ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, min(i+k+1, n)): 2598ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[i, j-i] = a[i, j] 2599ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2600ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2601ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param K The number of off-diagonals of the matrix A 2602ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2603ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2604ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2605ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2606ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2607ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2608ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2609ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2610cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2611cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // HBMV is the same as SYR2 validation-wise 2612cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2613cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (K < 0) { 2614cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2615cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2616cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2617cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2618cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2619cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2620cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2621cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2622cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2623cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2624cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2625cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2626cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2627cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2628ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2629ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2630ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHPMV performs the matrix-vector operation 2631ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * y := alpha*A*x + beta*y 2632ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2633ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2634ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2635ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2636ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2637ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2638ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2639ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2640ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2641ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2642ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2643ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2644ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2645ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2646ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2647ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2648ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2649ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2650ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2651ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2652cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2653cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // HPMV is the same as SPR2 2654cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2655cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2656cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2657cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2658cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2659cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2660cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2661cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2662cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2663cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2664cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2665cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2666cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2667ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2668ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2669ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZGERU performs the rank 1 operation 2670ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**T + A 2671ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2672ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2673ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2674ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2675ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2676ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2677ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2678ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2679ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2680ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2681cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2682cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2683cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 2684cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 2685cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2686cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2687cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2688cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2689cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2690cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2691cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2692cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2693cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2694cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2695cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2696cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2697ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2698ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2699ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZGERC performs the rank 1 operation 2700ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**H + A 2701ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2702ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2703ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2704ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2705ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2706ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2707ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2708ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2709ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2710ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2711cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2712cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as GERU 2713cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2714cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = A.getType().getY(); 2715cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = A.getType().getX(); 2716cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2717cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2718cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2719cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2720cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2721cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2722cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2723cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2724cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2725cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2726cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2727cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2728ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2729ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2730ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHER performs the rank 1 operation 2731ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**H + A 2732ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2733ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2734ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2735ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2736ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2737ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2738ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2739ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2740ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2741cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2742cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as SYR 2743cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2744cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2745cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2746cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2747cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2748cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2749cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2750cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2751cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2752cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2753cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2754ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2755ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2756ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHPR performs the rank 1 operation 2757ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*x**H + A 2758ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2759ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2760ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2761ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2762ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2763ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2764ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2765ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2766ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2767ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2768ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2769ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2770ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2771ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2772ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2773ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2774ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2775cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2776cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // equivalent to SPR for validation 2777cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2778cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2779cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2780cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2781cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2782cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2783cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2784cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2785cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2786cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2787cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2788ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2789ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2790ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHER2 performs the symmetric rank 2 operation 2791ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**H + alpha*y*x**H + A 2792ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2793ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2794ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2795ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2796ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2797ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2798ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2799ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2800ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2801ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2802ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2803cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2804cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as SYR2 2805cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2806cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2807cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2808cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2809cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2810cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2811cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2812cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2813cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2814cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2815cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2816cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2817cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2818ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2819ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2820ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHPR2 performs the symmetric rank 2 operation 2821ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * A := alpha*x*y**H + alpha*y*x**H + A 2822ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2823ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2824ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2825ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2826ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2827ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 'a' to packed matrix 'b'. 2828ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * k = 0 2829ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for i in range(0, n): 2830ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * for j in range(i, n): 2831ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * b[k++] = a[i, j] 2832ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2833ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2834ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2835ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2836ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incX The increment for the elements of vector x, must be larger than zero. 2837ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2838ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param incY The increment for the elements of vector y, must be larger than zero. 2839ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2840ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2841cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2842cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // same as SPR2 2843cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2844cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2845cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2846cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long apID = Ap.getID(mRS); 2847cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long xID = X.getID(mRS); 2848cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long yID = Y.getID(mRS); 2849cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2850cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang apID = getDummyAlloc(Ap); 2851cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang xID = getDummyAlloc(X); 2852cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang yID = getDummyAlloc(Y); 2853cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2854cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2855cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2856cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2857cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2858cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang /** 2859cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang * Level 3 BLAS 2860cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang */ 2861cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2862cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2863cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2864cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if ((A != null && !A.getType().getElement().isCompatible(e)) || 2865cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang (B != null && !B.getType().getElement().isCompatible(e)) || 2866cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang (C != null && !C.getType().getElement().isCompatible(e))) { 2867cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 2868cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2869cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (C == null) { 2870cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang //since matrix C is used to store the result, it cannot be null. 2871cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Allocation C cannot be null"); 2872cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2873cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cM = C.getType().getY(); 2874cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cN = C.getType().getX(); 2875cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2876cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Side == RIGHT) { 2877cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if ((A == null && B != null) || (A != null && B == null)) { 2878cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2879cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2880cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (B != null) { 2881cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bM = A.getType().getY(); 2882cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bN = A.getType().getX(); 2883cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2884cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A != null) { 2885cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aM = B.getType().getY(); 2886cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aN = B.getType().getX(); 2887cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2888cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 2889cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A != null) { 2890cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2891cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aN = A.getType().getY(); 2892cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aM = A.getType().getX(); 2893cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 2894cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aM = A.getType().getY(); 2895cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aN = A.getType().getX(); 2896cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2897cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2898cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (B != null) { 2899cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2900cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bN = B.getType().getY(); 2901cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bM = B.getType().getX(); 2902cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 2903cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bM = B.getType().getY(); 2904cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bN = B.getType().getX(); 2905cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2906cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2907cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2908cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A != null && B != null && C != null) { 2909cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (aN != bM || aM != cM || bN != cN) { 2910cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2911cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2912cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else if (A != null && C != null) { 2913cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A and C only, for SYRK 2914cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (cM != cN) { 2915cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Matrix C is not symmetric"); 2916cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2917cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (aM != cM) { 2918cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2919cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2920cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else if (A != null && B != null) { 2921cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A and B only 2922cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (aN != bM) { 2923cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2924cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2925cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2926cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2927cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2928cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2929ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2930ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SGEMM performs one of the matrix-matrix operations 2931ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2932ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2933ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2934ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2935ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 2936ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransB The type of transpose applied to matrix B. 2937ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2938ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2939ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2940ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2941ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2942ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2943cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2944cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, float beta, Allocation C) { 2945cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 2946cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransB); 2947cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2948cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2949cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = -1, N = -1, K = -1; 2950cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransA != NO_TRANSPOSE) { 2951cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getX(); 2952cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 2953cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 2954cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getY(); 2955cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 2956cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2957cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransB != NO_TRANSPOSE) { 2958cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getY(); 2959cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 2960cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getX(); 2961cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2962cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 2963cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 2964cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 2965cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 2966cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 2967cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 2968cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 2969cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 2970cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 2971cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2972cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 2973cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta, cID, 0, 0, 0, 0, mUseIncSupp); 2974cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 2975ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 2976ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 2977ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DGEMM performs one of the matrix-matrix operations 2978ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2979ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2980ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2981ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 2982ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 2983ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransB The type of transpose applied to matrix B. 2984ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 2985ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2986ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2987ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 2988ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2989ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 2990cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2991cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, double beta, Allocation C) { 2992cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 2993cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransB); 2994cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2995cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = -1, N = -1, K = -1; 2996cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransA != NO_TRANSPOSE) { 2997cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getX(); 2998cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 2999cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3000cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getY(); 3001cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3002cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3003cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransB != NO_TRANSPOSE) { 3004cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getY(); 3005cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3006cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getX(); 3007cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3008cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3009cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3010cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3011cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3012cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3013cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3014cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3015cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3016cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3017cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3018cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 3019cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta, cID, 0, 0, 0, 0, mUseIncSupp); 3020cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3021ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3022ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3023ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CGEMM performs one of the matrix-matrix operations 3024ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3025ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3026ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 3027ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3028ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3029ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransB The type of transpose applied to matrix B. 3030ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3031ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3032ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3033ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3034ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3035ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3036cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 3037cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, Float2 beta, Allocation C) { 3038cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 3039cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransB); 3040cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 3041cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = -1, N = -1, K = -1; 3042cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransA != NO_TRANSPOSE) { 3043cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getX(); 3044cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3045cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3046cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getY(); 3047cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3048cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3049cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransB != NO_TRANSPOSE) { 3050cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getY(); 3051cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3052cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getX(); 3053cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3054cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3055cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3056cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3057cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3058cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3059cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3060cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3061cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3062cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3063cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3064cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3065cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3066cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3067cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3068ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3069ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZGEMM performs one of the matrix-matrix operations 3070ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3071ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3072ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 3073ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3074ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3075ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransB The type of transpose applied to matrix B. 3076ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3077ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2 3078ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2 3079ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3080ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2 3081ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3082cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 3083cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, Double2 beta, Allocation C) { 3084cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 3085cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransB); 3086cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 3087cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = -1, N = -1, K = -1; 3088cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransA != NO_TRANSPOSE) { 3089cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getX(); 3090cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3091cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3092cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getY(); 3093cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3094cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3095cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (TransB != NO_TRANSPOSE) { 3096cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getY(); 3097cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3098cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getX(); 3099cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3100cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3101cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3102cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3103cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3104cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3105cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3106cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3107cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3108cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3109cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3110cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3111cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3112cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3113cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3114ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3115ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSYMM performs one of the matrix-matrix operations 3116ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3117ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3118ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 3119ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3120ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3121ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3122ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3123ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3124ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3125ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3126ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3127ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3128cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 3129cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, float beta, Allocation C) { 3130cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSide(Side); 3131cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3132cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang //For SYMM, Matrix A should be symmetric 3133cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != A.getType().getY()) { 3134cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Matrix A is not symmetric"); 3135cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3136cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 3137cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3138cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3139cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3140cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3141cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3142cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3143cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3144cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3145cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3146cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3147cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3148cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta, cID, 0, 0, 0, 0, mUseIncSupp); 3149cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3150ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3151ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3152ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSYMM performs one of the matrix-matrix operations 3153ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3154ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3155ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 3156ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3157ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3158ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3159ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3160ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3161ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3162ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3163ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3164ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3165cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 3166cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, double beta, Allocation C) { 3167cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSide(Side); 3168cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3169cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != A.getType().getY()) { 3170cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Matrix A is not symmetric"); 3171cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3172cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 3173cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3174cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3175cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3176cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3177cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3178cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3179cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3180cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3181cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3182cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3183cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3184cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta, cID, 0, 0, 0, 0, mUseIncSupp); 3185cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3186ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3187ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3188ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CSYMM performs one of the matrix-matrix operations 3189ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3190ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3191ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 3192ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3193ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3194ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3195ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3196ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3197ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3198ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3199ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3200ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3201cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 3202cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, Float2 beta, Allocation C) { 3203cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSide(Side); 3204cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3205cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != A.getType().getY()) { 3206cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Matrix A is not symmetric"); 3207cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3208cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 3209cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3210cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3211cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3212cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3213cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3214cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3215cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3216cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3217cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3218cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3219cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3220cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3221cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3222ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3223ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3224ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZSYMM performs one of the matrix-matrix operations 3225ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3226ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3227ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 3228ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3229ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3230ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3231ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3232ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3233ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3234ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3235ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3236ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3237cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 3238cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Allocation B, Double2 beta, Allocation C) { 3239cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSide(Side); 3240cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3241cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != A.getType().getY()) { 3242cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Matrix A is not symmetric"); 3243cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3244cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 3245cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3246cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3247cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3248cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3249cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3250cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3251cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3252cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3253cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3254cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3255cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3256cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3257cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3258cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3259ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3260ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSYRK performs one of the symmetric rank k operations 3261ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3262ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3263ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 3264ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3265ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3266ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3267ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3268ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3269ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3270ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3271ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3272cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3273cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(Trans); 3274cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3275cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 3276cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3277cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3278cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3279cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3280cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3281cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3282cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3283cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3284cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3285cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3286cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3287cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3288cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3289cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3290cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3291cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3292cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3293ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3294ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSYRK performs one of the symmetric rank k operations 3295ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3296ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3297ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 3298ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3299ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3300ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3301ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3302ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3303ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3304ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3305ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3306cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3307cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(Trans); 3308cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3309cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 3310cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3311cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3312cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3313cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3314cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3315cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3316cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3317cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3318cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3319cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3320cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3321cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3322cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3323cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3324cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3325cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3326ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3327ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3328ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CSYRK performs one of the symmetric rank k operations 3329ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3330ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3331ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 3332ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3333ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3334ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3335ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3336ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3337ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3338ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3339ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3340cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 3341cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(Trans); 3342cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3343cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 3344cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3345cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3346cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3347cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3348cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3349cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3350cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3351cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3352cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3353cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3354cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3355cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3356cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3357cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3358cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3359cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3360cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3361ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3362ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3363ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZSYRK performs one of the symmetric rank k operations 3364ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3365ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3366ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 3367ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3368ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3369ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3370ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3371ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3372ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3373ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3374ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3375cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 3376cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(Trans); 3377cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3378cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 3379cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3380cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3381cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3382cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3383cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3384cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3385cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3386cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3387cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3388cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3389cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3390cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3391cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3392cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3393cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3394cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3395cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3396cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3397cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3398cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(Trans); 3399cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 3400cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !B.getType().getElement().isCompatible(e) || 3401cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !C.getType().getElement().isCompatible(e)) { 3402cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 3403cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3404cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int Cdim = -1; 3405cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A is n x k if no transpose, k x n if transpose 3406cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // C is n x n 3407cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans == TRANSPOSE) { 3408cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // check columns versus C 3409cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Cdim = A.getType().getX(); 3410cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3411cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // check rows versus C 3412cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang Cdim = A.getType().getY(); 3413cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3414cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 3415cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 3416cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3417cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A dims == B dims 3418cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3419cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Invalid A and B in SYR2K"); 3420cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3421cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3422ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3423ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3424ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * SSYR2K performs one of the symmetric rank 2k operations 3425ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3426ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3427ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 3428ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3429ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3430ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3431ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3432ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3433ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3434ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3435ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3436ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3437cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 3438cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3439cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSYR2K(Element.F32(mRS), Trans, A, B, C); 3440cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3441cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3442cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3443cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3444cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3445cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3446cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3447cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3448cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3449cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3450cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3451cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3452cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3453cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3454cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3455cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3456cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3457cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3458ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3459ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3460ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DSYR2K performs one of the symmetric rank 2k operations 3461ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3462ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3463ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 3464ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3465ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3466ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3467ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3468ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3469ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3470ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3471ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3472ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3473cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 3474cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3475cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSYR2K(Element.F64(mRS), Trans, A, B, C); 3476cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3477cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3478cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3479cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3480cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3481cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3482cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3483cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3484cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3485cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3486cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3487cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3488cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3489cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3490cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3491cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3492cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3493cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3494ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3495ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3496ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CSYR2K performs one of the symmetric rank 2k operations 3497ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3498ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3499ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 3500ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3501ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3502ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3503ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3504ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3505ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3506ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3507ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3508ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3509cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3510cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3511cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 3512cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3513cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3514cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3515cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3516cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3517cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3518cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3519cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3520cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3521cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3522cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3523cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3524cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3525cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3526cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3527cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3528cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3529cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3530ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3531ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3532ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZSYR2K performs one of the symmetric rank 2k operations 3533ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3534ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3535ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 3536ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3537ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3538ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3539ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3540ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3541ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3542ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3543ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3544ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3545cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3546cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3547cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 3548cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int K = -1; 3549cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans != NO_TRANSPOSE) { 3550cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getY(); 3551cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3552cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 3553cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3554cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3555cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3556cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3557cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3558cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3559cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3560cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3561cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3562cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3563cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3564cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3565cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3566cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3567cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3568cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSide(Side); 3569cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 3570cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int aM = -1, aN = -1, bM = -1, bN = -1; 3571cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 3572cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !B.getType().getElement().isCompatible(e)) { 3573cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 3574cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3575cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3576cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aM = A.getType().getY(); 3577cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aN = A.getType().getX(); 3578cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (aM != aN) { 3579cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 3580cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3581cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3582cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bM = B.getType().getY(); 3583cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bN = B.getType().getX(); 3584cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Side == LEFT) { 3585cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (aN != bM) { 3586cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called TRMM with invalid matrices"); 3587cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3588cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3589cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (bN != aM) { 3590cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called TRMM with invalid matrices"); 3591cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3592cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3593cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3594ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3595ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3596ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STRMM performs one of the matrix-matrix operations 3597ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * B := alpha*op(A)*B or B := alpha*B*op(A) 3598ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T 3599ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3600ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 3601ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3602ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3603ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3604ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3605ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3606ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3607ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3608ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3609ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3610cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3611cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3612cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3613cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMM(Element.F32(mRS), Side, TransA, A, B); 3614cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3615cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3616cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3617cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3618cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3619cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3620cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3621cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3622cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3623cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp); 3624cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3625ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3626ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3627ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTRMM performs one of the matrix-matrix operations 3628ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * B := alpha*op(A)*B or B := alpha*B*op(A) 3629ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T 3630ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3631ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 3632ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3633ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3634ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3635ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3636ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3637ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3638ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3639ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3640ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3641cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3642cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3643cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3644cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMM(Element.F64(mRS), Side, TransA, A, B); 3645cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3646cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3647cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3648cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3649cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3650cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3651cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3652cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3653cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3654cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3655cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3656ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3657ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3658ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTRMM performs one of the matrix-matrix operations 3659ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * B := alpha*op(A)*B or B := alpha*B*op(A) 3660ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3661ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3662ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 3663ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3664ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3665ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3666ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3667ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3668ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3669ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3670ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3671ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3672cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3673cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3674cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3675cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 3676cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3677cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3678cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3679cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3680cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3681cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3682cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3683cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3684cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3685cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3686cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3687ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3688ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3689ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTRMM performs one of the matrix-matrix operations 3690ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * B := alpha*op(A)*B or B := alpha*B*op(A) 3691ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3692ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3693ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 3694ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3695ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3696ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3697ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3698ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3699ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3700ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3701ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3702ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3703cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3704cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3705cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3706cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 3707cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3708cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3709cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3710cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3711cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3712cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3713cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3714cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3715cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3716cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3717cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3718cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3719cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3720cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int adim = -1, bM = -1, bN = -1; 3721cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSide(Side); 3722cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTranspose(TransA); 3723cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 3724cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !B.getType().getElement().isCompatible(e)) { 3725cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 3726cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3727cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang adim = A.getType().getX(); 3728cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (adim != A.getType().getY()) { 3729cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // this may be unnecessary, the restriction could potentially be relaxed 3730cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A needs to contain at least that symmetric matrix but could theoretically be larger 3731cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // for now we assume adapters are sufficient, will reevaluate in the future 3732cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 3733cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3734cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bM = B.getType().getY(); 3735cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bN = B.getType().getX(); 3736cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Side == LEFT) { 3737cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A is M*M 3738cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (adim != bM) { 3739cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3740cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3741cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3742cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A is N*N 3743cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (adim != bN) { 3744cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3745cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3746cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3747cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3748ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3749ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3750ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * STRSM solves one of the matrix equations 3751ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A)*X := alpha*B or X*op(A) := alpha*B 3752ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T 3753ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3754ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 3755ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3756ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3757ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3758ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3759ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3760ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3761ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3762ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3763ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3764cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3765cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3766cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3767cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRSM(Element.F32(mRS), Side, TransA, A, B); 3768cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3769cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3770cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3771cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3772cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3773cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3774cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3775cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3776cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3777cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3778cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3779ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3780ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3781ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * DTRSM solves one of the matrix equations 3782ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A)*X := alpha*B or X*op(A) := alpha*B 3783ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T 3784ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3785ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 3786ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3787ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3788ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3789ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3790ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3791ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3792ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3793ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3794ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3795cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3796cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3797cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3798cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3799cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3800cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3801cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3802cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3803cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3804cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3805cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3806cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3807cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3808cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3809cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3810ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3811ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3812ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CTRSM solves one of the matrix equations 3813ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A)*X := alpha*B or X*op(A) := alpha*B 3814ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3815ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3816ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3817ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3818ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3819ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3820ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3821ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3822ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3823ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3824ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3825ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3826cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3827cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3828cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3829cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3830cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3831cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3832cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3833cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3834cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3835cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3836cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3837cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3838cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3839cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3840cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3841ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3842ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3843ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZTRSM solves one of the matrix equations 3844ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A)*X := alpha*B or X*op(A) := alpha*B 3845ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3846ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3847ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3848ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3849ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3850ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether matrix A is upper or lower triangular. 3851ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param TransA The type of transpose applied to matrix A. 3852ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Diag Specifies whether or not A is unit triangular. 3853ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3854ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3855ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3856ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3857cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3858cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3859cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateDiag(Diag); 3860cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3861cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3862cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3863cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3864cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3865cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3866cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3867cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3868cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3869cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3870cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3871cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3872cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3873cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3874cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateSide(Side); 3875cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3876cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 3877cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !B.getType().getElement().isCompatible(e) || 3878cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !C.getType().getElement().isCompatible(e)) { 3879cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 3880cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3881cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3882cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang // A must be square; can potentially be relaxed similar to TRSM 3883cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int adim = A.getType().getX(); 3884cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (adim != A.getType().getY()) { 3885cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HEMM with non-square A"); 3886cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3887cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if ((Side == LEFT && adim != B.getType().getY()) || 3888cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang (Side == RIGHT && adim != B.getType().getX())) { 3889cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HEMM with invalid B"); 3890cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3891cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (B.getType().getX() != C.getType().getX() || 3892cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang B.getType().getY() != C.getType().getY()) { 3893cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3894cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3895cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3896ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3897ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3898ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHEMM performs one of the matrix-matrix operations 3899ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3900ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3901ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3902ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3903ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3904ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3905ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3906ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3907ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3908ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3909ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3910ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3911cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3912cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3913cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3914cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3915cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3916cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3917cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3918cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3919cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3920cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3921cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3922cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3923cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3924cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3925cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3926cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3927ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3928ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3929ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHEMM performs one of the matrix-matrix operations 3930ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3931ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3932ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3933ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3934ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3935ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3936ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3937ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3938ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3939ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3940ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3941ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3942cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3943cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3944cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3945cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3946cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 3947cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 3948cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 3949cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 3950cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 3951cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 3952cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 3953cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 3954cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3955cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3956cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3957cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3958cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 3959cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3960cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 3961cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !C.getType().getElement().isCompatible(e)) { 3962cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 3963cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3964cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateConjTranspose(Trans); 3965cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int cdim = C.getType().getX(); 3966cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (cdim != C.getType().getY()) { 3967cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HERK with non-square C"); 3968cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3969cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans == NO_TRANSPOSE) { 3970cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (cdim != A.getType().getY()) { 3971cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HERK with invalid A"); 3972cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3973cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 3974cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (cdim != A.getType().getX()) { 3975cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HERK with invalid A"); 3976cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3977cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3978cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 3979ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 3980ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 3981ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHERK performs one of the hermitian rank k operations 3982ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3983ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3984ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3985ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 3986ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3987ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 3988ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 3989ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3990ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 3991ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3992ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 3993cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3994cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 3995cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateHERK(Element.F32_2(mRS), Trans, A, C); 3996cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int k = 0; 3997cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans == CONJ_TRANSPOSE) { 3998cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getY(); 3999cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 4000cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getX(); 4001cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4002cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4003cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 4004cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 4005cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 4006cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 4007cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 4008cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 4009cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4010cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4011cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4012cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4013ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 4014ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 4015ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHERK performs one of the hermitian rank k operations 4016ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 4017ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 4018ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 4019ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 4020ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4021ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 4022ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 4023ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4024ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 4025ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4026ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 4027cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 4028cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 4029cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateHERK(Element.F64_2(mRS), Trans, A, C); 4030cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int k = 0; 4031cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans == CONJ_TRANSPOSE) { 4032cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getY(); 4033cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 4034cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getX(); 4035cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4036cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4037cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 4038cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 4039cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 4040cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 4041cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 4042cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 4043cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4044cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4045cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4046cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4047cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4048cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 4049cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (!A.getType().getElement().isCompatible(e) || 4050cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !B.getType().getElement().isCompatible(e) || 4051cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang !C.getType().getElement().isCompatible(e)) { 4052cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called BLAS with wrong Element type"); 4053cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4054cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateConjTranspose(Trans); 4055cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int cdim = C.getType().getX(); 4056cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (cdim != C.getType().getY()) { 4057cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HER2K with non-square C"); 4058cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4059cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans == NO_TRANSPOSE) { 4060cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getY() != cdim) { 4061cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HER2K with invalid matrices"); 4062cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4063cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 4064cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != cdim) { 4065cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HER2K with invalid matrices"); 4066cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4067cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4068cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 4069cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 4070cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4071cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4072ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 4073ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 4074ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * CHER2K performs one of the hermitian rank 2k operations 4075ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4076ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 4077ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 4078ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 4079ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4080ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 4081ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 4082ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 4083ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 4084ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 4085ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 4086ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 4087cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 4088cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 4089cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 4090cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int k = 0; 4091cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans == NO_TRANSPOSE) { 4092cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getX(); 4093cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 4094cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getY(); 4095cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4096cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4097cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 4098cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 4099cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 4100cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 4101cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 4102cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 4103cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 4104cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 4105cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4106cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4107cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4108cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4109ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang 4110ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang /** 4111ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * ZHER2K performs one of the hermitian rank 2k operations 4112ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4113ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 4114ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 4115ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * 4116ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4117ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param Trans The type of transpose applied to the operation. 4118ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param alpha The scalar alpha. 4119ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4120ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 4121ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param beta The scalar beta. 4122ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4123ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang */ 4124cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 4125cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateUplo(Uplo); 4126cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 4127cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int k = 0; 4128cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (Trans == NO_TRANSPOSE) { 4129cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getX(); 4130cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } else { 4131cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang k = A.getType().getY(); 4132cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4133cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4134cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 4135cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 4136cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 4137cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 4138cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 4139cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 4140cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 4141cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 4142cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4143cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4144cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4145cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4146cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4147cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4148cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang /** 4149482fe702f3a2f5850778b1b83f312a86965f5212Miao Wang * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) 4150ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * Calculations are done in 1.10.21 fixed-point format for the final output, 4151ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * just before there's a shift down to drop the fractional parts. The output 4152ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 4153ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * gives some headroom to avoid wrapping around on small overflows. 4154db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang * 4155ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 4156db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. 4157ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 4158db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. 4159ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 4160ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param c_offset The offset for all values in matrix C. 4161ccd6dfe31a12881a5ec1c61844d1ed28f33f2e5fMiao Wang * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 4162cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang **/ 4163cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 4164cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 4165cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4166db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang if (a_offset < 0 || a_offset > 255) { 4167db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang throw new RSRuntimeException("Invalid a_offset passed to BNNM"); 4168db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang } 4169db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang if (b_offset < 0 || b_offset > 255) { 4170db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang throw new RSRuntimeException("Invalid b_offset passed to BNNM"); 4171db485add053a4fca86f1ebd92301bfb2836a0e7dMiao Wang } 4172cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang int M = -1, N = -1, K = -1; 4173cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang M = A.getType().getY(); 4174cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang N = B.getType().getY(); 4175cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang K = A.getType().getX(); 4176cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4177cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang boolean mUseIncSupp = isIncSupp(); 4178cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long aID = A.getID(mRS); 4179cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long bID = B.getID(mRS); 4180cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang long cID = C.getID(mRS); 4181cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang if (mUseIncSupp) { 4182cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang aID = getDummyAlloc(A); 4183cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang bID = getDummyAlloc(B); 4184cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang cID = getDummyAlloc(C); 4185cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4186cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp); 4187cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4188cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang } 4189cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang 4190cf76b25724b256ed3adb0fdd6d450ae64040f2acMiao Wang} 4191