13cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams/*
23cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Copyright (C) 2007 The Android Open Source Project
33cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *
43cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
53cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * you may not use this file except in compliance with the License.
63cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * You may obtain a copy of the License at
73cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *
83cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
93cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams *
103cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Unless required by applicable law or agreed to in writing, software
113cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
123cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
133cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * See the License for the specific language governing permissions and
143cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * limitations under the License.
153cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams */
163cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
173cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <stdio.h>
183cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <stdlib.h>
193cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <string.h>
203cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/time.h>
213cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <time.h>
223cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <unistd.h>
233cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sched.h>
243cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/resource.h>
253cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/syscall.h>
263cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/types.h>
273cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/mman.h>
283cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
29d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__
30d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#include <arm_neon.h>
31d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif
323cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
333cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
343cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Samstypedef long long nsecs_t;
35d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic nsecs_t gTime;
36d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsfloat data_f[1024 * 128];
373cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
383cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Samsstatic nsecs_t system_time()
393cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams{
403cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    struct timespec t;
413cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    t.tv_sec = t.tv_nsec = 0;
423cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    clock_gettime(CLOCK_MONOTONIC, &t);
433cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
443cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams}
453cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
46d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void startTime()
47d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams{
48d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    gTime = system_time();
49d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
50d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
51d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void endTime(const char *str, double ops)
52d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams{
53d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    nsecs_t t = system_time() - gTime;
54d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    double ds = ((double)t) / 1e9;
55d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
56d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
573cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
583cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
59d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void test_mad() {
60d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for(int i=0; i<1020; i++) {
61d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        data_f[i] = i;
62d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    }
63d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
64d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    startTime();
65d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
66d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    // Do ~1 billion ops
67d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
68d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        for (int i=0; i < 1000; i++) {
69d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            data_f[i] = (data_f[i] * 0.02f +
70d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+1] * 0.04f +
71d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+2] * 0.05f +
72d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+3] * 0.1f +
73d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+4] * 0.2f +
74d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+5] * 0.2f +
75d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+6] * 0.1f +
76d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+7] * 0.05f +
77d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+8] * 0.04f +
78d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams                         data_f[i+9] * 0.02f + 1.f);
793cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams        }
803cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    }
813cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
82d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    endTime("scalar mad", 1e9);
83d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
84d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
85d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
86d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__
87d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
88d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void test_fma() {
89d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for(int i=0; i<1020 * 4; i++) {
90d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        data_f[i] = i;
91d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    }
92d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_02 = vdupq_n_f32(0.02f);
93d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_04 = vdupq_n_f32(0.04f);
94d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_05 = vdupq_n_f32(0.05f);
95d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_10 = vdupq_n_f32(0.1f);
96d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c0_20 = vdupq_n_f32(0.2f);
97d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    float32x4_t c1_00 = vdupq_n_f32(1.0f);
98d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
99d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    startTime();
100d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
101d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    // Do ~1 billion ops
102d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
103d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams        for (int i=0; i < 1000; i++) {
104d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            float32x4_t t;
105d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
106d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
107d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
108d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
109d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
110d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
111d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
112d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
113d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
114d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
115d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            t = vaddq_f32(t, c1_00);
116d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams            vst1q_f32((float32_t *)&data_f[i], t);
1173cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams        }
1183cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    }
1193cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
120d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    endTime("neon fma", 1e9);
121d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams}
122d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif
123d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams
1248f1da8fe2db97aff22320776b46adcd2333cc5a9Christopher Ferrisint fp_test(int, char**) {
125d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    test_mad();
1263cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
127d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__
128d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams    test_fma();
129d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif
1303cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams
1313cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams    return 0;
1323cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams}
133