13cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams/*
23cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Copyright (C) 2007 The Android Open Source Project
33cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *
43cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
53cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * you may not use this file except in compliance with the License.
63cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * You may obtain a copy of the License at
73cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *
83cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
93cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *
103cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Unless required by applicable law or agreed to in writing, software
113cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
123cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
133cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * See the License for the specific language governing permissions and
143cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * limitations under the License.
153cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams */
163cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
173cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <stdio.h>
183cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <stdlib.h>
193cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <string.h>
203cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/time.h>
213cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <time.h>
223cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <unistd.h>
233cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sched.h>
243cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/resource.h>
253cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/syscall.h>
263cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/types.h>
273cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/mman.h>
283cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
29d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__
30d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#include <arm_neon.h>
31d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif
323cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
333cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
343cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Samstypedef long long nsecs_t;
35d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic nsecs_t gTime;
36d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsfloat data_f[1024 * 128];
373cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
383cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Samsstatic nsecs_t system_time()
393cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams{
403cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    struct timespec t;
413cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    t.tv_sec = t.tv_nsec = 0;
423cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    clock_gettime(CLOCK_MONOTONIC, &t);
433cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
443cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams}
453cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
46d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void startTime()
47d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams{
48d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    gTime = system_time();
49d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
50d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
51d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void endTime(const char *str, double ops)
52d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams{
53d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    nsecs_t t = system_time() - gTime;
54d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    double ds = ((double)t) / 1e9;
55d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
56d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
573cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
583cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
59d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void test_mad() {
60d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for(int i=0; i<1020; i++) {
61d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        data_f[i] = i;
62d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    }
63d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
64d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    startTime();
65d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
66d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float total = 0;
67d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    // Do ~1 billion ops
68d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
69d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        for (int i=0; i < 1000; i++) {
70d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            data_f[i] = (data_f[i] * 0.02f +
71d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+1] * 0.04f +
72d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+2] * 0.05f +
73d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+3] * 0.1f +
74d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+4] * 0.2f +
75d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+5] * 0.2f +
76d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+6] * 0.1f +
77d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+7] * 0.05f +
78d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+8] * 0.04f +
79d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+9] * 0.02f + 1.f);
803cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams        }
813cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    }
823cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
83d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    endTime("scalar mad", 1e9);
84d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
85d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
86d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
87d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__
88d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
89d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void test_fma() {
90d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for(int i=0; i<1020 * 4; i++) {
91d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        data_f[i] = i;
92d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    }
93d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_02 = vdupq_n_f32(0.02f);
94d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_04 = vdupq_n_f32(0.04f);
95d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_05 = vdupq_n_f32(0.05f);
96d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_10 = vdupq_n_f32(0.1f);
97d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_20 = vdupq_n_f32(0.2f);
98d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c1_00 = vdupq_n_f32(1.0f);
99d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
100d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    startTime();
101d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
102d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float total = 0;
103d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    // Do ~1 billion ops
104d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
105d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        for (int i=0; i < 1000; i++) {
106d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            float32x4_t t;
107d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
108d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
109d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
110d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
111d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
112d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
113d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
114d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
115d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
116d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
117d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vaddq_f32(t, c1_00);
118d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            vst1q_f32((float32_t *)&data_f[i], t);
1193cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams        }
1203cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    }
1213cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
122d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    endTime("neon fma", 1e9);
123d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
124d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif
125d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
126d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsint fp_test(int argc, char** argv) {
127d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    test_mad();
1283cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
129d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__
130d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    test_fma();
131d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif
1323cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
1333cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    return 0;
1343cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams}
1353cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
136d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
137d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
138d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
139