12c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang/* 22c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * Copyright (C) 2015 The Android Open Source Project 32c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * 42c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * Licensed under the Apache License, Version 2.0 (the "License"); 52c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * you may not use this file except in compliance with the License. 62c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * You may obtain a copy of the License at 72c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * 82c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * http://www.apache.org/licenses/LICENSE-2.0 92c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * 102c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * Unless required by applicable law or agreed to in writing, software 112c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * distributed under the License is distributed on an "AS IS" BASIS, 122c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * See the License for the specific language governing permissions and 142c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * limitations under the License. 152c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang */ 162c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 172c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangpackage com.example.android.rs.blasbenchmark; 182c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 192c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport android.renderscript.*; 202c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport android.util.Log; 212c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport java.util.Random; 222c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport java.lang.Math; 232c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 242c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangpublic class SGEMMTest extends TestBase { 252c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 262c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang static { 272c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang System.loadLibrary("gemmdata"); 282c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 292c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 302c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang native void getData(byte[] a, byte[] b, byte[] c); 312c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 322c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang ScriptIntrinsicBLAS mBLAS; 332c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private Allocation matA; 342c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private Allocation matB; 352c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private Allocation matC; 362c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 372c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private int m; 382c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private int n; 392c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private int k; 402c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 412c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private int a_offset; 422c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private int b_offset; 432c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private int mTestSize; 442c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private final float allowedError = 0.000001f; 452c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 462c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang SGEMMTest(int testSize) { 472c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang mTestSize = testSize; 482c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 492c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 502c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang public void createTest() { 512c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang mBLAS = ScriptIntrinsicBLAS.create(mRS); 522c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang setTest(); 532c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 542c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 552c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private void setTest() { 562c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang switch (mTestSize) { 572c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang case 1: 582c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang setTestSmall(); 592c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang break; 602c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang case 2: 612c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang setTestMedium(); 622c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang break; 632c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang case 3: 642c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang setTestLarge(); 652c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang break; 662c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang default: 672c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang break; 682c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 692c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 702c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 712c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // Calculate the square of the L2 norm of a matrix. 722c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private float calcL2Norm(float[] input) { 732c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float l2Norm = 0.f; 742c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang for (int i = 0; i < input.length; ++i) { 752c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang l2Norm += input[i] * input[i]; 762c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 772c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang return l2Norm; 782c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 792c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 802c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // Test whether the error of each element is samller the allowed error range. 812c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private boolean testWithTolerance(float[] out, float[] ref) { 822c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float l2NormOut = calcL2Norm(out); 832c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float l2NormRef = calcL2Norm(ref); 842c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float tolerance = allowedError * (l2NormOut < l2NormRef ? l2NormOut : l2NormRef); 852c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang tolerance /= m * n; 862c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang for (int i = 0; i < out.length; ++i) { 872c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float err = out[i] - ref[i]; 882c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float absErr = err * err; 892c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang if (absErr > tolerance) { 902c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang return false; 912c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 922c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 932c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang return true; 942c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 952c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 962c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // Transform byte data into float, given a offset. 972c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private float[] byteToFloat(byte[] input, int offset) { 982c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] output = new float[input.length]; 992c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang for (int i = 0; i < input.length; ++i) { 1002c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang output[i] = (float)(input[i] - offset); 1012c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1022c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang return output; 1032c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1042c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1052c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // Calculate the reference result for C = A*B 1062c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang private float[] getGEMMResult(int m, int n, int k, float[] a_float, float[] b_float) { 1072c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] c_float = new float[m * n]; 1082c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang for (int j = 0; j < n; j++) { 1092c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang for (int i = 0; i < m; i++) { 1102c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float total = 0.f; 1112c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang for (int l = 0; l < k; l++) { 1122c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang int a_index = ((i * k) + l); 1132c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang int b_index = ((l * n) + j); 1142c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float mult = a_float[a_index] * b_float[b_index]; 1152c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang total += mult; 1162c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1172c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang int c_index = ((i * n) + j); 1182c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang c_float[c_index] = total; 1192c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1202c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1212c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang return c_float; 1222c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1232c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1242c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // This test multiplies a couple of small float matrices, and compares the 1252c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // results with java-calculated expectations. The data here is arbitrary. 1262c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang public void setTestSmall() { 1272c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang m = 2; 1282c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang n = 4; 1292c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang k = 3; 1302c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang a_offset = 0; 1312c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang b_offset = 12; 1322c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1332c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] a_float = byteToFloat(new byte[] { 1342c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1, 2, 3, 1352c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 4, 5, 6, 1362c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang }, a_offset); 1372c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1382c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] b_float = byteToFloat(new byte[] { 1392c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 11, 7, 3, 1402c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 10, 6, 2, 1412c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 9, 5, 1, 1422c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 8, 4, 0, 1432c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang }, b_offset); 1442c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1452c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type.Builder builder = new Type.Builder(mRS, Element.F32(mRS)); 1462c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type a_type = builder.setX(k).setY(m).create(); 1472c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type b_type = builder.setX(n).setY(k).create(); 1482c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type c_type = builder.setX(n).setY(m).create(); 1492c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1502c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matA = Allocation.createTyped(mRS, a_type); 1512c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matB = Allocation.createTyped(mRS, b_type); 1522c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matC = Allocation.createTyped(mRS, c_type); 1532c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1542c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matA.copyFrom(a_float); 1552c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matB.copyFrom(b_float); 1562c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1572c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang //During setup, do a sample run to see if the result is correct. 1582c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE, 1592c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1.0f, matA, matB, 0.f, matC); 1602c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] c_float_ref = getGEMMResult(m, n, k, a_float, b_float); 1612c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] c_float_out = new float[m * n]; 1622c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matC.copyTo(c_float_out); 1632c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang if (!testWithTolerance(c_float_ref, c_float_out)) { 1642c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Log.e(TAG, "Result is not correct!"); 1652c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang throw new AssertionError("Result is not correct."); 1662c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1672c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 1682c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1692c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // This test multiplies another two medium matrices, and compares the 1702c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // results with the expected values. The data here is arbitrary. 1712c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang public void setTestMedium() { 1722c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang m = 7; 1732c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang n = 9; 1742c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang k = 23; 1752c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang a_offset = 13; 1762c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang b_offset = 23; 1772c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1782c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] a_float = byteToFloat(new byte[] { 1792c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1802c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 1812c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1, 23, 2, 22, 3, 21, 4, 20, 5, 19, 6, 18, 7, 17, 8, 16, 9, 15, 10, 14, 11, 13, 12, 1822c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 23, 1, 22, 2, 21, 3, 20, 4, 19, 5, 18, 6, 17, 7, 16, 8, 15, 9, 14, 10, 13, 11, 12, 1832c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1842c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 3, 1, 4, 1, 5, 8, 2, 3, 1, 14, 11, 15, 18, 12, 13, 11, 14, 11, 15, 18, 12, 13, 11, 1852c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 8, 0, 5, 8, 1, 3, 7, 5, 7, 13, 10, 23, 13, 11, 17, 23, 12, 19, 17, 13, 14, 10, 19, 1862c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang }, a_offset); 1872c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1882c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] b_float = byteToFloat(new byte[] { 1892c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9, 11, 0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9, 1902c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 0, 20, 40, 60, 80, 10, 11, 13, 15, 17, 19, 21, 10, 12, 14, 6, 8, 10, 1, 3, 5, 7, 9, 1912c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1, 21, 41, 61, 81, 11, 12, 14, 16, 18, 20, 22, 11, 13, 15, 7, 9, 11, 2, 4, 6, 8, 9, 1922c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 0, 19, 39, 59, 79, 9, 10, 12, 14, 16, 18, 20, 9, 11, 13, 5, 7, 9, 0, 2, 4, 6, 8, 1932c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2, 22, 42, 62, 82, 12, 13, 15, 17, 19, 21, 23, 12, 14, 16, 8, 9, 12, 3, 5, 7, 9, 9, 1942c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 0, 18, 38, 58, 78, 8, 9, 11, 13, 15, 17, 19, 8, 10, 12, 4, 6, 8, 0, 1, 3, 5, 7, 1952c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 3, 23, 43, 63, 83, 13, 14, 16, 18, 20, 22, 24, 13, 15, 17, 9, 9, 13, 4, 6, 8, 9, 9, 1962c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 0, 17, 37, 57, 77, 7, 8, 10, 12, 14, 16, 18, 7, 9, 11, 3, 5, 7, 0, 0, 2, 4, 6, 1972c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 10, 20, 30, 40, 50, 1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 21, 22, 23, 24, 25, 1, 2, 3, 1982c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang }, b_offset); 1992c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2002c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type.Builder builder = new Type.Builder(mRS, Element.F32(mRS)); 2012c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type a_type = builder.setX(k).setY(m).create(); 2022c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type b_type = builder.setX(n).setY(k).create(); 2032c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type c_type = builder.setX(n).setY(m).create(); 2042c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2052c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matA = Allocation.createTyped(mRS, a_type); 2062c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matB = Allocation.createTyped(mRS, b_type); 2072c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matC = Allocation.createTyped(mRS, c_type); 2082c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2092c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matA.copyFrom(a_float); 2102c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matB.copyFrom(b_float); 2112c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2122c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang //During setup, do a sample run to see if the result is correct. 2132c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE, 2142c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1.0f, matA, matB, 0.f, matC); 2152c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] c_float_ref = getGEMMResult(m, n, k, a_float, b_float); 2162c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] c_float_out = new float[m * n]; 2172c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matC.copyTo(c_float_out); 2182c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang if (!testWithTolerance(c_float_ref, c_float_out)) { 2192c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Log.e(TAG, "Result is not correct!"); 2202c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang throw new AssertionError("Result is not correct."); 2212c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 2222c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 2232c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2242c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2252c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // This test takes a large set of real data captured from a convolutional 2262c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang // neural network solving a computer vision problem, and runs it through SGEMM. 2272c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang public void setTestLarge() { 2282c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2292c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang m = 256; 2302c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang n = 192; 2312c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang k = 1152; 2322c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang a_offset = 0; 2332c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang b_offset = 84; 2342c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2352c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang int a_count = (m * k); 2362c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang int b_count = (n * k); 2372c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang int c_count = (m * n); 2382c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2392c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang byte[] a_byte = new byte[a_count]; 2402c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang byte[] b_byte = new byte[b_count]; 2412c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang byte[] c_byte = new byte[c_count]; 2422c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2432c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang getData(a_byte, b_byte, c_byte); 2442c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2452c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] a_float = byteToFloat(a_byte, a_offset); 2462c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] b_float = byteToFloat(b_byte, b_offset); 2472c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2482c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type.Builder builder = new Type.Builder(mRS, Element.F32(mRS)); 2492c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type a_type = builder.setX(k).setY(m).create(); 2502c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type b_type = builder.setX(n).setY(k).create(); 2512c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Type c_type = builder.setX(n).setY(m).create(); 2522c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2532c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matA = Allocation.createTyped(mRS, a_type); 2542c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matB = Allocation.createTyped(mRS, b_type); 2552c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matC = Allocation.createTyped(mRS, c_type); 2562c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2572c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matA.copyFrom(a_float); 2582c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matB.copyFrom(b_float); 2592c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2602c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang //During setup, do a sample run to see if the result is correct. 2612c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE, 2622c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1.0f, matA, matB, 0.f, matC); 2632c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] c_float_ref = getGEMMResult(m, n, k, a_float, b_float); 2642c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang float[] c_float_out = new float[c_count]; 2652c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang matC.copyTo(c_float_out); 2662c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang if (!testWithTolerance(c_float_ref, c_float_out)) { 2672c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang Log.e(TAG, "Result is not correct!"); 2682c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang throw new AssertionError("Result is not correct."); 2692c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 2702c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 2712c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2722c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang public void runTest() { 2732c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE, 2742c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 1.0f, matA, matB, 0.f, matC); 2752c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 2762c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang 2772c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang public String getTestInfo() { 2782c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang return "SGEMM Test: m=" + m + ", n=" + n + ", k=" + k; 2792c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang } 2802c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang} 281