12c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang/*
22c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * Copyright (C) 2015 The Android Open Source Project
32c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang *
42c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * Licensed under the Apache License, Version 2.0 (the "License");
52c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * you may not use this file except in compliance with the License.
62c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * You may obtain a copy of the License at
72c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang *
82c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang *      http://www.apache.org/licenses/LICENSE-2.0
92c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang *
102c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * Unless required by applicable law or agreed to in writing, software
112c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * distributed under the License is distributed on an "AS IS" BASIS,
122c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
132c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * See the License for the specific language governing permissions and
142c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang * limitations under the License.
152c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang */
162c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
172c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangpackage com.example.android.rs.blasbenchmark;
182c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
192c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport android.renderscript.*;
202c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport android.util.Log;
212c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport java.util.Random;
222c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangimport java.lang.Math;
232c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
242c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wangpublic class SGEMMTest extends TestBase {
252c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
262c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    static {
272c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        System.loadLibrary("gemmdata");
282c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
292c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
302c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    native void getData(byte[] a, byte[] b, byte[] c);
312c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
322c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    ScriptIntrinsicBLAS mBLAS;
332c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private Allocation matA;
342c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private Allocation matB;
352c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private Allocation matC;
362c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
372c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private int m;
382c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private int n;
392c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private int k;
402c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
412c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private int a_offset;
422c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private int b_offset;
432c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private int mTestSize;
442c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private final float allowedError = 0.000001f;
452c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
462c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    SGEMMTest(int testSize) {
472c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        mTestSize = testSize;
482c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
492c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
502c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    public void createTest() {
512c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        mBLAS = ScriptIntrinsicBLAS.create(mRS);
522c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        setTest();
532c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
542c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
552c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private void setTest() {
562c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        switch (mTestSize) {
572c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            case 1:
582c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                setTestSmall();
592c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                break;
602c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            case 2:
612c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                setTestMedium();
622c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                break;
632c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            case 3:
642c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                setTestLarge();
652c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                break;
662c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            default:
672c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                break;
682c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
692c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
702c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
712c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // Calculate the square of the L2 norm of a matrix.
722c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private float calcL2Norm(float[] input) {
732c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float l2Norm = 0.f;
742c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        for (int i = 0; i < input.length; ++i) {
752c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            l2Norm += input[i] * input[i];
762c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
772c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        return l2Norm;
782c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
792c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
802c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // Test whether the error of each element is samller the allowed error range.
812c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private boolean testWithTolerance(float[] out, float[] ref) {
822c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float l2NormOut = calcL2Norm(out);
832c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float l2NormRef = calcL2Norm(ref);
842c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float tolerance = allowedError * (l2NormOut < l2NormRef ? l2NormOut : l2NormRef);
852c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        tolerance /= m * n;
862c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        for (int i = 0; i < out.length; ++i) {
872c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            float err = out[i] - ref[i];
882c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            float absErr = err * err;
892c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            if (absErr > tolerance) {
902c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                return false;
912c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            }
922c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
932c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        return true;
942c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
952c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
962c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // Transform byte data into float, given a offset.
972c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private float[] byteToFloat(byte[] input, int offset) {
982c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] output = new float[input.length];
992c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        for (int i = 0; i < input.length; ++i) {
1002c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            output[i] = (float)(input[i] - offset);
1012c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
1022c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        return output;
1032c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
1042c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1052c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // Calculate the reference result for C = A*B
1062c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    private float[] getGEMMResult(int m, int n, int k, float[] a_float, float[] b_float) {
1072c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] c_float = new float[m * n];
1082c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        for (int j = 0; j < n; j++) {
1092c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            for (int i = 0; i < m; i++) {
1102c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                float total = 0.f;
1112c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                for (int l = 0; l < k; l++) {
1122c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    int a_index = ((i * k) + l);
1132c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    int b_index = ((l * n) + j);
1142c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    float mult = a_float[a_index] * b_float[b_index];
1152c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    total += mult;
1162c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                }
1172c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                int c_index = ((i * n) + j);
1182c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                c_float[c_index] = total;
1192c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            }
1202c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
1212c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        return c_float;
1222c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
1232c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1242c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // This test multiplies a couple of small float matrices, and compares the
1252c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // results with java-calculated expectations. The data here is arbitrary.
1262c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    public void setTestSmall() {
1272c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        m = 2;
1282c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        n = 4;
1292c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        k = 3;
1302c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        a_offset = 0;
1312c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        b_offset = 12;
1322c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1332c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] a_float = byteToFloat(new byte[] {
1342c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                1, 2, 3,
1352c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                4, 5, 6,
1362c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            }, a_offset);
1372c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1382c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] b_float = byteToFloat(new byte[] {
1392c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                11, 7, 3,
1402c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                10, 6, 2,
1412c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                9, 5, 1,
1422c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                8, 4, 0,
1432c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            }, b_offset);
1442c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1452c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type.Builder builder = new Type.Builder(mRS, Element.F32(mRS));
1462c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type a_type = builder.setX(k).setY(m).create();
1472c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type b_type = builder.setX(n).setY(k).create();
1482c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type c_type = builder.setX(n).setY(m).create();
1492c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1502c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matA = Allocation.createTyped(mRS, a_type);
1512c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matB = Allocation.createTyped(mRS, b_type);
1522c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matC = Allocation.createTyped(mRS, c_type);
1532c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1542c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matA.copyFrom(a_float);
1552c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matB.copyFrom(b_float);
1562c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1572c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        //During setup, do a sample run to see if the result is correct.
1582c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE,
1592c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    1.0f, matA, matB, 0.f, matC);
1602c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] c_float_ref = getGEMMResult(m, n, k, a_float, b_float);
1612c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] c_float_out = new float[m * n];
1622c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matC.copyTo(c_float_out);
1632c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        if (!testWithTolerance(c_float_ref, c_float_out)) {
1642c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            Log.e(TAG, "Result is not correct!");
1652c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            throw new AssertionError("Result is not correct.");
1662c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
1672c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
1682c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1692c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // This test multiplies another two medium matrices, and compares the
1702c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // results with the expected values. The data here is arbitrary.
1712c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    public void setTestMedium() {
1722c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        m = 7;
1732c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        n = 9;
1742c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        k = 23;
1752c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        a_offset = 13;
1762c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        b_offset = 23;
1772c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1782c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] a_float = byteToFloat(new byte[] {
1792c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
1802c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
1812c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                1, 23, 2, 22, 3, 21, 4, 20, 5, 19, 6, 18, 7, 17, 8, 16, 9, 15, 10, 14, 11, 13, 12,
1822c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                23, 1, 22, 2, 21, 3, 20, 4, 19, 5, 18, 6, 17, 7, 16, 8, 15, 9, 14, 10, 13, 11, 12,
1832c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
1842c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                3, 1, 4, 1, 5, 8, 2, 3, 1, 14, 11, 15, 18, 12, 13, 11, 14, 11, 15, 18, 12, 13, 11,
1852c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                8, 0, 5, 8, 1, 3, 7, 5, 7, 13, 10, 23, 13, 11, 17, 23, 12, 19, 17, 13, 14, 10, 19,
1862c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            }, a_offset);
1872c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
1882c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] b_float = byteToFloat(new byte[] {
1892c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9, 11, 0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9,
1902c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                0, 20, 40, 60, 80, 10, 11, 13, 15, 17, 19, 21, 10, 12, 14, 6, 8, 10, 1, 3, 5, 7, 9,
1912c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                1, 21, 41, 61, 81, 11, 12, 14, 16, 18, 20, 22, 11, 13, 15, 7, 9, 11, 2, 4, 6, 8, 9,
1922c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                0, 19, 39, 59, 79, 9, 10, 12, 14, 16, 18, 20, 9, 11, 13, 5, 7, 9, 0, 2, 4, 6, 8,
1932c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                2, 22, 42, 62, 82, 12, 13, 15, 17, 19, 21, 23, 12, 14, 16, 8, 9, 12, 3, 5, 7, 9, 9,
1942c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                0, 18, 38, 58, 78, 8, 9, 11, 13, 15, 17, 19, 8, 10, 12, 4, 6, 8, 0, 1, 3, 5, 7,
1952c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                3, 23, 43, 63, 83, 13, 14, 16, 18, 20, 22, 24, 13, 15, 17, 9, 9, 13, 4, 6, 8, 9, 9,
1962c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                0, 17, 37, 57, 77, 7, 8, 10, 12, 14, 16, 18, 7, 9, 11, 3, 5, 7, 0, 0, 2, 4, 6,
1972c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                10, 20, 30, 40, 50, 1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 21, 22, 23, 24, 25, 1, 2, 3,
1982c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            }, b_offset);
1992c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2002c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type.Builder builder = new Type.Builder(mRS, Element.F32(mRS));
2012c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type a_type = builder.setX(k).setY(m).create();
2022c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type b_type = builder.setX(n).setY(k).create();
2032c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type c_type = builder.setX(n).setY(m).create();
2042c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2052c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matA = Allocation.createTyped(mRS, a_type);
2062c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matB = Allocation.createTyped(mRS, b_type);
2072c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matC = Allocation.createTyped(mRS, c_type);
2082c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2092c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matA.copyFrom(a_float);
2102c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matB.copyFrom(b_float);
2112c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2122c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        //During setup, do a sample run to see if the result is correct.
2132c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE,
2142c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    1.0f, matA, matB, 0.f, matC);
2152c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] c_float_ref = getGEMMResult(m, n, k, a_float, b_float);
2162c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] c_float_out = new float[m * n];
2172c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matC.copyTo(c_float_out);
2182c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        if (!testWithTolerance(c_float_ref, c_float_out)) {
2192c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            Log.e(TAG, "Result is not correct!");
2202c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            throw new AssertionError("Result is not correct.");
2212c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
2222c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
2232c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2242c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2252c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // This test takes a large set of real data captured from a convolutional
2262c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    // neural network solving a computer vision problem, and runs it through SGEMM.
2272c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    public void setTestLarge() {
2282c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2292c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        m = 256;
2302c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        n = 192;
2312c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        k = 1152;
2322c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        a_offset = 0;
2332c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        b_offset = 84;
2342c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2352c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        int a_count = (m * k);
2362c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        int b_count = (n * k);
2372c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        int c_count = (m * n);
2382c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2392c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        byte[] a_byte = new byte[a_count];
2402c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        byte[] b_byte = new byte[b_count];
2412c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        byte[] c_byte = new byte[c_count];
2422c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2432c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        getData(a_byte, b_byte, c_byte);
2442c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2452c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] a_float = byteToFloat(a_byte, a_offset);
2462c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] b_float = byteToFloat(b_byte, b_offset);
2472c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2482c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type.Builder builder = new Type.Builder(mRS, Element.F32(mRS));
2492c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type a_type = builder.setX(k).setY(m).create();
2502c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type b_type = builder.setX(n).setY(k).create();
2512c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        Type c_type = builder.setX(n).setY(m).create();
2522c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2532c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matA = Allocation.createTyped(mRS, a_type);
2542c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matB = Allocation.createTyped(mRS, b_type);
2552c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matC = Allocation.createTyped(mRS, c_type);
2562c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2572c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matA.copyFrom(a_float);
2582c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matB.copyFrom(b_float);
2592c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2602c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        //During setup, do a sample run to see if the result is correct.
2612c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE,
2622c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    1.0f, matA, matB, 0.f, matC);
2632c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] c_float_ref = getGEMMResult(m, n, k, a_float, b_float);
2642c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        float[] c_float_out = new float[c_count];
2652c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        matC.copyTo(c_float_out);
2662c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        if (!testWithTolerance(c_float_ref, c_float_out)) {
2672c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            Log.e(TAG, "Result is not correct!");
2682c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang            throw new AssertionError("Result is not correct.");
2692c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        }
2702c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
2712c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2722c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    public void runTest() {
2732c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        mBLAS.SGEMM(ScriptIntrinsicBLAS.NO_TRANSPOSE, ScriptIntrinsicBLAS.NO_TRANSPOSE,
2742c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang                    1.0f, matA, matB, 0.f, matC);
2752c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
2762c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang
2772c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    public String getTestInfo() {
2782c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang        return "SGEMM Test: m=" + m + ", n=" + n + ", k=" + k;
2792c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang    }
2802c9b10f9b72ebec0b1861532e4ccb6e68ac769a4Miao Wang}
281