1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/time.h>
21#include <time.h>
22#include <unistd.h>
23#include <sched.h>
24#include <sys/resource.h>
25#include <sys/syscall.h>
26#include <sys/types.h>
27#include <sys/mman.h>
28
29#ifdef __ARM_NEON__
30#include <arm_neon.h>
31#endif
32
33
34typedef long long nsecs_t;
35static nsecs_t gTime;
36float data_f[1024 * 128];
37
38static nsecs_t system_time()
39{
40    struct timespec t;
41    t.tv_sec = t.tv_nsec = 0;
42    clock_gettime(CLOCK_MONOTONIC, &t);
43    return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
44}
45
46static void startTime()
47{
48    gTime = system_time();
49}
50
51static void endTime(const char *str, double ops)
52{
53    nsecs_t t = system_time() - gTime;
54    double ds = ((double)t) / 1e9;
55    printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
56}
57
58
59static void test_mad() {
60    for(int i=0; i<1020; i++) {
61        data_f[i] = i;
62    }
63
64    startTime();
65
66    float total = 0;
67    // Do ~1 billion ops
68    for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
69        for (int i=0; i < 1000; i++) {
70            data_f[i] = (data_f[i] * 0.02f +
71                         data_f[i+1] * 0.04f +
72                         data_f[i+2] * 0.05f +
73                         data_f[i+3] * 0.1f +
74                         data_f[i+4] * 0.2f +
75                         data_f[i+5] * 0.2f +
76                         data_f[i+6] * 0.1f +
77                         data_f[i+7] * 0.05f +
78                         data_f[i+8] * 0.04f +
79                         data_f[i+9] * 0.02f + 1.f);
80        }
81    }
82
83    endTime("scalar mad", 1e9);
84}
85
86
87#ifdef __ARM_NEON__
88
89static void test_fma() {
90    for(int i=0; i<1020 * 4; i++) {
91        data_f[i] = i;
92    }
93    float32x4_t c0_02 = vdupq_n_f32(0.02f);
94    float32x4_t c0_04 = vdupq_n_f32(0.04f);
95    float32x4_t c0_05 = vdupq_n_f32(0.05f);
96    float32x4_t c0_10 = vdupq_n_f32(0.1f);
97    float32x4_t c0_20 = vdupq_n_f32(0.2f);
98    float32x4_t c1_00 = vdupq_n_f32(1.0f);
99
100    startTime();
101
102    float total = 0;
103    // Do ~1 billion ops
104    for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
105        for (int i=0; i < 1000; i++) {
106            float32x4_t t;
107            t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
108            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
109            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
110            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
111            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
112            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
113            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
114            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
115            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
116            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
117            t = vaddq_f32(t, c1_00);
118            vst1q_f32((float32_t *)&data_f[i], t);
119        }
120    }
121
122    endTime("neon fma", 1e9);
123}
124#endif
125
126int fp_test(int argc, char** argv) {
127    test_mad();
128
129#ifdef __ARM_NEON__
130    test_fma();
131#endif
132
133    return 0;
134}
135
136
137
138
139