13cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams/* 23cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Copyright (C) 2007 The Android Open Source Project 33cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * 43cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 53cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * you may not use this file except in compliance with the License. 63cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * You may obtain a copy of the License at 73cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * 83cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * http://www.apache.org/licenses/LICENSE-2.0 93cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * 103cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * Unless required by applicable law or agreed to in writing, software 113cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * distributed under the License is distributed on an "AS IS" BASIS, 123cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 133cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * See the License for the specific language governing permissions and 143cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams * limitations under the License. 153cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams */ 163cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 173cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <stdio.h> 183cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <stdlib.h> 193cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <string.h> 203cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/time.h> 213cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <time.h> 223cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <unistd.h> 233cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sched.h> 243cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/resource.h> 253cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/syscall.h> 263cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/types.h> 273cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams#include <sys/mman.h> 283cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 29d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__ 30d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#include <arm_neon.h> 31d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif 323cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 333cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 343cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Samstypedef long long nsecs_t; 35d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic nsecs_t gTime; 36d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsfloat data_f[1024 * 128]; 373cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 383cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Samsstatic nsecs_t system_time() 393cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams{ 403cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams struct timespec t; 413cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams t.tv_sec = t.tv_nsec = 0; 423cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams clock_gettime(CLOCK_MONOTONIC, &t); 433cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec; 443cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams} 453cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 46d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void startTime() 47d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams{ 48d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams gTime = system_time(); 49d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams} 50d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 51d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void endTime(const char *str, double ops) 52d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams{ 53d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams nsecs_t t = system_time() - gTime; 54d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams double ds = ((double)t) / 1e9; 55d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams printf("Test: %s, %f Mops\n", str, ops / ds / 1e6); 56d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams} 573cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 583cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 59d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void test_mad() { 60d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams for(int i=0; i<1020; i++) { 61d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i] = i; 62d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams } 63d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 64d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams startTime(); 65d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 66d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float total = 0; 67d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams // Do ~1 billion ops 68d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams for (int ct=0; ct < (1000 * (1000 / 20)); ct++) { 69d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams for (int i=0; i < 1000; i++) { 70d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i] = (data_f[i] * 0.02f + 71d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+1] * 0.04f + 72d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+2] * 0.05f + 73d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+3] * 0.1f + 74d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+4] * 0.2f + 75d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+5] * 0.2f + 76d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+6] * 0.1f + 77d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+7] * 0.05f + 78d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+8] * 0.04f + 79d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i+9] * 0.02f + 1.f); 803cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams } 813cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams } 823cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 83d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams endTime("scalar mad", 1e9); 84d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams} 85d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 86d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 87d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__ 88d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 89d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsstatic void test_fma() { 90d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams for(int i=0; i<1020 * 4; i++) { 91d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams data_f[i] = i; 92d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams } 93d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float32x4_t c0_02 = vdupq_n_f32(0.02f); 94d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float32x4_t c0_04 = vdupq_n_f32(0.04f); 95d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float32x4_t c0_05 = vdupq_n_f32(0.05f); 96d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float32x4_t c0_10 = vdupq_n_f32(0.1f); 97d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float32x4_t c0_20 = vdupq_n_f32(0.2f); 98d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float32x4_t c1_00 = vdupq_n_f32(1.0f); 99d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 100d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams startTime(); 101d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 102d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float total = 0; 103d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams // Do ~1 billion ops 104d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams for (int ct=0; ct < (1000 * (1000 / 80)); ct++) { 105d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams for (int i=0; i < 1000; i++) { 106d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams float32x4_t t; 107d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02); 108d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04); 109d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05); 110d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10); 111d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20); 112d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20); 113d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10); 114d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05); 115d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04); 116d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02); 117d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams t = vaddq_f32(t, c1_00); 118d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams vst1q_f32((float32_t *)&data_f[i], t); 1193cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams } 1203cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams } 1213cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 122d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams endTime("neon fma", 1e9); 123d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams} 124d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif 125d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 126d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Samsint fp_test(int argc, char** argv) { 127d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams test_mad(); 1283cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 129d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#ifdef __ARM_NEON__ 130d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams test_fma(); 131d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams#endif 1323cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 1333cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams return 0; 1343cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams} 1353cadc3a6df0b13deabb1b3423aafa5ff8bbfdf23Jason Sams 136d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 137d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 138d3ecd8f4ebc16dd71393857dec64f913506fd37cJason Sams 139