fp_mad.rs revision 648a1c137663ef7207684d0d7009dd5518942111
1#include "shared.rsh"
2
3const int TEST_COUNT = 1;
4
5static float data_f1[1025];
6static float4 data_f4[1025];
7
8static void test_mad4(uint32_t index) {
9    start();
10
11    // Do ~1 billion ops
12    for (int ct=0; ct < 1000 * (1000 / 80); ct++) {
13        for (int i=0; i < (1000); i++) {
14            data_f4[i] = (data_f4[i] * 0.02f +
15                          data_f4[i+1] * 0.04f +
16                          data_f4[i+2] * 0.05f +
17                          data_f4[i+3] * 0.1f +
18                          data_f4[i+4] * 0.2f +
19                          data_f4[i+5] * 0.2f +
20                          data_f4[i+6] * 0.1f +
21                          data_f4[i+7] * 0.05f +
22                          data_f4[i+8] * 0.04f +
23                          data_f4[i+9] * 0.02f + 1.f);
24        }
25    }
26
27    float time = end(index);
28    rsDebug("fp_mad4 M ops", 1000.f / time);
29}
30
31static void test_mad(uint32_t index) {
32    start();
33
34    // Do ~1 billion ops
35    for (int ct=0; ct < 1000 * (1000 / 20); ct++) {
36        for (int i=0; i < (1000); i++) {
37            data_f1[i] = (data_f1[i] * 0.02f +
38                          data_f1[i+1] * 0.04f +
39                          data_f1[i+2] * 0.05f +
40                          data_f1[i+3] * 0.1f +
41                          data_f1[i+4] * 0.2f +
42                          data_f1[i+5] * 0.2f +
43                          data_f1[i+6] * 0.1f +
44                          data_f1[i+7] * 0.05f +
45                          data_f1[i+8] * 0.04f +
46                          data_f1[i+9] * 0.02f + 1.f);
47        }
48    }
49
50    float time = end(index);
51    rsDebug("fp_mad M ops", 1000.f / time);
52}
53
54static void test_norm(uint32_t index) {
55    start();
56
57    // Do ~10 M ops
58    for (int ct=0; ct < 1000 * 10; ct++) {
59        for (int i=0; i < (1000); i++) {
60            data_f4[i] = normalize(data_f4[i]);
61        }
62    }
63
64    float time = end(index);
65    rsDebug("fp_norm M ops", 10.f / time);
66}
67
68static void test_sincos4(uint32_t index) {
69    start();
70
71    // Do ~10 M ops
72    for (int ct=0; ct < 1000 * 10 / 4; ct++) {
73        for (int i=0; i < (1000); i++) {
74            data_f4[i] = sin(data_f4[i]) * cos(data_f4[i]);
75        }
76    }
77
78    float time = end(index);
79    rsDebug("fp_sincos4 M ops", 10.f / time);
80}
81
82static void test_sincos(uint32_t index) {
83    start();
84
85    // Do ~10 M ops
86    for (int ct=0; ct < 1000 * 10; ct++) {
87        for (int i=0; i < (1000); i++) {
88            data_f1[i] = sin(data_f1[i]) * cos(data_f1[i]);
89        }
90    }
91
92    float time = end(index);
93    rsDebug("fp_sincos M ops", 10.f / time);
94}
95
96static void test_clamp(uint32_t index) {
97    start();
98
99    // Do ~100 M ops
100    for (int ct=0; ct < 1000 * 100; ct++) {
101        for (int i=0; i < (1000); i++) {
102            data_f1[i] = clamp(data_f1[i], -1.f, 1.f);
103        }
104    }
105
106    float time = end(index);
107    rsDebug("fp_clamp M ops", 100.f / time);
108
109    start();
110    // Do ~100 M ops
111    for (int ct=0; ct < 1000 * 100; ct++) {
112        for (int i=0; i < (1000); i++) {
113            if (data_f1[i] < -1.f) data_f1[i] = -1.f;
114            if (data_f1[i] > -1.f) data_f1[i] = 1.f;
115        }
116    }
117
118    time = end(index);
119    rsDebug("fp_clamp ref M ops", 100.f / time);
120}
121
122static void test_clamp4(uint32_t index) {
123    start();
124
125    // Do ~100 M ops
126    for (int ct=0; ct < 1000 * 100 /4; ct++) {
127        for (int i=0; i < (1000); i++) {
128            data_f4[i] = clamp(data_f4[i], -1.f, 1.f);
129        }
130    }
131
132    float time = end(index);
133    rsDebug("fp_clamp4 M ops", 100.f / time);
134}
135
136void fp_mad_test(uint32_t index, int test_num) {
137    int x;
138    for (x=0; x < 1025; x++) {
139        data_f1[x] = (x & 0xf) * 0.1f;
140        data_f4[x].x = (x & 0xf) * 0.1f;
141        data_f4[x].y = (x & 0xf0) * 0.1f;
142        data_f4[x].z = (x & 0x33) * 0.1f;
143        data_f4[x].w = (x & 0x77) * 0.1f;
144    }
145
146    test_mad4(index);
147    test_mad(index);
148
149    for (x=0; x < 1025; x++) {
150        data_f1[x] = (x & 0xf) * 0.1f + 1.f;
151        data_f4[x].x = (x & 0xf) * 0.1f + 1.f;
152        data_f4[x].y = (x & 0xf0) * 0.1f + 1.f;
153        data_f4[x].z = (x & 0x33) * 0.1f + 1.f;
154        data_f4[x].w = (x & 0x77) * 0.1f + 1.f;
155    }
156
157    test_norm(index);
158    test_sincos4(index);
159    test_sincos(index);
160    test_clamp4(index);
161    test_clamp(index);
162
163    // TODO Actually verify test result accuracy
164    rsDebug("fp_mad_test PASSED", 0);
165    rsSendToClientBlocking(RS_MSG_TEST_PASSED);
166}
167
168
169