1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "shared.rsh"
18
19const int TEST_COUNT = 1;
20
21static float data_f1[1025];
22static float4 data_f4[1025];
23
24static void test_mad4(uint32_t index) {
25    start();
26
27    // Do ~1 billion ops
28    for (int ct=0; ct < 1000 * (1000 / 80); ct++) {
29        for (int i=0; i < (1000); i++) {
30            data_f4[i] = (data_f4[i] * 0.02f +
31                          data_f4[i+1] * 0.04f +
32                          data_f4[i+2] * 0.05f +
33                          data_f4[i+3] * 0.1f +
34                          data_f4[i+4] * 0.2f +
35                          data_f4[i+5] * 0.2f +
36                          data_f4[i+6] * 0.1f +
37                          data_f4[i+7] * 0.05f +
38                          data_f4[i+8] * 0.04f +
39                          data_f4[i+9] * 0.02f + 1.f);
40        }
41    }
42
43    float time = end(index);
44    rsDebug("fp_mad4 M ops", 1000.f / time);
45}
46
47static void test_mad(uint32_t index) {
48    start();
49
50    // Do ~1 billion ops
51    for (int ct=0; ct < 1000 * (1000 / 20); ct++) {
52        for (int i=0; i < (1000); i++) {
53            data_f1[i] = (data_f1[i] * 0.02f +
54                          data_f1[i+1] * 0.04f +
55                          data_f1[i+2] * 0.05f +
56                          data_f1[i+3] * 0.1f +
57                          data_f1[i+4] * 0.2f +
58                          data_f1[i+5] * 0.2f +
59                          data_f1[i+6] * 0.1f +
60                          data_f1[i+7] * 0.05f +
61                          data_f1[i+8] * 0.04f +
62                          data_f1[i+9] * 0.02f + 1.f);
63        }
64    }
65
66    float time = end(index);
67    rsDebug("fp_mad M ops", 1000.f / time);
68}
69
70static void test_norm(uint32_t index) {
71    start();
72
73    // Do ~10 M ops
74    for (int ct=0; ct < 1000 * 10; ct++) {
75        for (int i=0; i < (1000); i++) {
76            data_f4[i] = normalize(data_f4[i]);
77        }
78    }
79
80    float time = end(index);
81    rsDebug("fp_norm M ops", 10.f / time);
82}
83
84static void test_sincos4(uint32_t index) {
85    start();
86
87    // Do ~10 M ops
88    for (int ct=0; ct < 1000 * 10 / 4; ct++) {
89        for (int i=0; i < (1000); i++) {
90            data_f4[i] = sin(data_f4[i]) * cos(data_f4[i]);
91        }
92    }
93
94    float time = end(index);
95    rsDebug("fp_sincos4 M ops", 10.f / time);
96}
97
98static void test_sincos(uint32_t index) {
99    start();
100
101    // Do ~10 M ops
102    for (int ct=0; ct < 1000 * 10; ct++) {
103        for (int i=0; i < (1000); i++) {
104            data_f1[i] = sin(data_f1[i]) * cos(data_f1[i]);
105        }
106    }
107
108    float time = end(index);
109    rsDebug("fp_sincos M ops", 10.f / time);
110}
111
112static void test_clamp(uint32_t index) {
113    start();
114
115    // Do ~100 M ops
116    for (int ct=0; ct < 1000 * 100; ct++) {
117        for (int i=0; i < (1000); i++) {
118            data_f1[i] = clamp(data_f1[i], -1.f, 1.f);
119        }
120    }
121
122    float time = end(index);
123    rsDebug("fp_clamp M ops", 100.f / time);
124
125    start();
126    // Do ~100 M ops
127    for (int ct=0; ct < 1000 * 100; ct++) {
128        for (int i=0; i < (1000); i++) {
129            if (data_f1[i] < -1.f) data_f1[i] = -1.f;
130            if (data_f1[i] > -1.f) data_f1[i] = 1.f;
131        }
132    }
133
134    time = end(index);
135    rsDebug("fp_clamp ref M ops", 100.f / time);
136}
137
138static void test_clamp4(uint32_t index) {
139    start();
140
141    // Do ~100 M ops
142    for (int ct=0; ct < 1000 * 100 /4; ct++) {
143        for (int i=0; i < (1000); i++) {
144            data_f4[i] = clamp(data_f4[i], -1.f, 1.f);
145        }
146    }
147
148    float time = end(index);
149    rsDebug("fp_clamp4 M ops", 100.f / time);
150}
151
152void fp_mad_test(uint32_t index, int test_num) {
153    int x;
154    for (x=0; x < 1025; x++) {
155        data_f1[x] = (x & 0xf) * 0.1f;
156        data_f4[x].x = (x & 0xf) * 0.1f;
157        data_f4[x].y = (x & 0xf0) * 0.1f;
158        data_f4[x].z = (x & 0x33) * 0.1f;
159        data_f4[x].w = (x & 0x77) * 0.1f;
160    }
161
162    test_mad4(index);
163    test_mad(index);
164
165    for (x=0; x < 1025; x++) {
166        data_f1[x] = (x & 0xf) * 0.1f + 1.f;
167        data_f4[x].x = (x & 0xf) * 0.1f + 1.f;
168        data_f4[x].y = (x & 0xf0) * 0.1f + 1.f;
169        data_f4[x].z = (x & 0x33) * 0.1f + 1.f;
170        data_f4[x].w = (x & 0x77) * 0.1f + 1.f;
171    }
172
173    test_norm(index);
174    test_sincos4(index);
175    test_sincos(index);
176    test_clamp4(index);
177    test_clamp(index);
178
179    // TODO Actually verify test result accuracy
180    rsDebug("fp_mad_test PASSED", 0);
181    rsSendToClientBlocking(RS_MSG_TEST_PASSED);
182}
183