1#include "shared.rsh"
2
3const int TEST_COUNT = 1;
4
5static float data_f1[1025];
6static float4 data_f4[1025];
7
8static void test_mad4(uint32_t index) {
9    start();
10
11    float total = 0;
12    // Do ~1 billion ops
13    for (int ct=0; ct < 1000 * (1000 / 80); ct++) {
14        for (int i=0; i < (1000); i++) {
15            data_f4[i] = (data_f4[i] * 0.02f +
16                          data_f4[i+1] * 0.04f +
17                          data_f4[i+2] * 0.05f +
18                          data_f4[i+3] * 0.1f +
19                          data_f4[i+4] * 0.2f +
20                          data_f4[i+5] * 0.2f +
21                          data_f4[i+6] * 0.1f +
22                          data_f4[i+7] * 0.05f +
23                          data_f4[i+8] * 0.04f +
24                          data_f4[i+9] * 0.02f + 1.f);
25        }
26    }
27
28    float time = end(index);
29    rsDebug("fp_mad4 M ops", 1000.f / time);
30}
31
32static void test_mad(uint32_t index) {
33    start();
34
35    float total = 0;
36    // Do ~1 billion ops
37    for (int ct=0; ct < 1000 * (1000 / 20); ct++) {
38        for (int i=0; i < (1000); i++) {
39            data_f1[i] = (data_f1[i] * 0.02f +
40                          data_f1[i+1] * 0.04f +
41                          data_f1[i+2] * 0.05f +
42                          data_f1[i+3] * 0.1f +
43                          data_f1[i+4] * 0.2f +
44                          data_f1[i+5] * 0.2f +
45                          data_f1[i+6] * 0.1f +
46                          data_f1[i+7] * 0.05f +
47                          data_f1[i+8] * 0.04f +
48                          data_f1[i+9] * 0.02f + 1.f);
49        }
50    }
51
52    float time = end(index);
53    rsDebug("fp_mad M ops", 1000.f / time);
54}
55
56static void test_norm(uint32_t index) {
57    start();
58
59    float total = 0;
60    // Do ~10 M ops
61    for (int ct=0; ct < 1000 * 10; ct++) {
62        for (int i=0; i < (1000); i++) {
63            data_f4[i] = normalize(data_f4[i]);
64        }
65    }
66
67    float time = end(index);
68    rsDebug("fp_norm M ops", 10.f / time);
69}
70
71static void test_sincos4(uint32_t index) {
72    start();
73
74    float total = 0;
75    // Do ~10 M ops
76    for (int ct=0; ct < 1000 * 10 / 4; ct++) {
77        for (int i=0; i < (1000); i++) {
78            data_f4[i] = sin(data_f4[i]) * cos(data_f4[i]);
79        }
80    }
81
82    float time = end(index);
83    rsDebug("fp_sincos4 M ops", 10.f / time);
84}
85
86static void test_sincos(uint32_t index) {
87    start();
88
89    float total = 0;
90    // Do ~10 M ops
91    for (int ct=0; ct < 1000 * 10; ct++) {
92        for (int i=0; i < (1000); i++) {
93            data_f1[i] = sin(data_f1[i]) * cos(data_f1[i]);
94        }
95    }
96
97    float time = end(index);
98    rsDebug("fp_sincos M ops", 10.f / time);
99}
100
101static void test_clamp(uint32_t index) {
102    start();
103
104    // Do ~100 M ops
105    for (int ct=0; ct < 1000 * 100; ct++) {
106        for (int i=0; i < (1000); i++) {
107            data_f1[i] = clamp(data_f1[i], -1.f, 1.f);
108        }
109    }
110
111    float time = end(index);
112    rsDebug("fp_clamp M ops", 100.f / time);
113
114    start();
115    // Do ~100 M ops
116    for (int ct=0; ct < 1000 * 100; ct++) {
117        for (int i=0; i < (1000); i++) {
118            if (data_f1[i] < -1.f) data_f1[i] = -1.f;
119            if (data_f1[i] > -1.f) data_f1[i] = 1.f;
120        }
121    }
122
123    time = end(index);
124    rsDebug("fp_clamp ref M ops", 100.f / time);
125}
126
127static void test_clamp4(uint32_t index) {
128    start();
129
130    float total = 0;
131    // Do ~100 M ops
132    for (int ct=0; ct < 1000 * 100 /4; ct++) {
133        for (int i=0; i < (1000); i++) {
134            data_f4[i] = clamp(data_f4[i], -1.f, 1.f);
135        }
136    }
137
138    float time = end(index);
139    rsDebug("fp_clamp4 M ops", 100.f / time);
140}
141
142void fp_mad_test(uint32_t index, int test_num) {
143    int x;
144    for (x=0; x < 1025; x++) {
145        data_f1[x] = (x & 0xf) * 0.1f;
146        data_f4[x].x = (x & 0xf) * 0.1f;
147        data_f4[x].y = (x & 0xf0) * 0.1f;
148        data_f4[x].z = (x & 0x33) * 0.1f;
149        data_f4[x].w = (x & 0x77) * 0.1f;
150    }
151
152    test_mad4(index);
153    test_mad(index);
154
155    for (x=0; x < 1025; x++) {
156        data_f1[x] = (x & 0xf) * 0.1f + 1.f;
157        data_f4[x].x = (x & 0xf) * 0.1f + 1.f;
158        data_f4[x].y = (x & 0xf0) * 0.1f + 1.f;
159        data_f4[x].z = (x & 0x33) * 0.1f + 1.f;
160        data_f4[x].w = (x & 0x77) * 0.1f + 1.f;
161    }
162
163    test_norm(index);
164    test_sincos4(index);
165    test_sincos(index);
166    test_clamp4(index);
167    test_clamp(index);
168
169    // TODO Actually verify test result accuracy
170    rsDebug("fp_mad_test PASSED", 0);
171    rsSendToClientBlocking(RS_MSG_TEST_PASSED);
172}
173
174
175