Bench.cpp revision b5fbd41b23bf309e6b420a3df4641603d55dcb68
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <android/log.h> 18#include <math.h> 19#include <stdlib.h> 20#include <unistd.h> 21 22#include "Bench.h" 23 24 25Bench::Bench() 26{ 27 mTimeBucket = NULL; 28 mTimeBuckets = 0; 29 mTimeBucketDivisor = 1; 30 31 mMemLatencyLastSize = 0; 32 mMemDst = NULL; 33 mMemSrc = NULL; 34 mMemLoopCount = 0; 35} 36 37 38Bench::~Bench() 39{ 40} 41 42uint64_t Bench::getTimeNanos() const 43{ 44 struct timespec t; 45 clock_gettime(CLOCK_MONOTONIC, &t); 46 return t.tv_nsec + ((uint64_t)t.tv_sec * 1000 * 1000 * 1000); 47} 48 49uint64_t Bench::getTimeMillis() const 50{ 51 return getTimeNanos() / 1000000; 52} 53 54 55void Bench::testWork(void *usr, uint32_t idx) 56{ 57 Bench *b = (Bench *)usr; 58 //__android_log_print(ANDROID_LOG_INFO, "bench", "test %i %p", idx, b); 59 60 float f1 = 0.f; 61 float f2 = 0.f; 62 float f3 = 0.f; 63 float f4 = 0.f; 64 65 float *ipk = b->mIpKernel[idx]; 66 volatile float *src = b->mSrcBuf[idx]; 67 volatile float *out = b->mOutBuf[idx]; 68 69 //__android_log_print(ANDROID_LOG_INFO, "bench", "test %p %p %p", ipk, src, out); 70 71 do { 72 73 for (int i = 0; i < 1024; i++) { 74 f1 += src[i * 4] * ipk[i]; 75 f2 += src[i * 4 + 1] * ipk[i]; 76 f3 += src[i * 4 + 2] * ipk[i]; 77 f4 += sqrtf(f1 + f2 + f3); 78 } 79 out[0] = f1; 80 out[1] = f2; 81 out[2] = f3; 82 out[3] = f4; 83 84 } while (b->incTimeBucket()); 85} 86 87bool Bench::initIP() { 88 int workers = mWorkers.getWorkerCount(); 89 90 mIpKernel = new float *[workers]; 91 mSrcBuf = new float *[workers]; 92 mOutBuf = new float *[workers]; 93 94 for (int i = 0; i < workers; i++) { 95 mIpKernel[i] = new float[1024]; 96 mSrcBuf[i] = new float[4096]; 97 mOutBuf[i] = new float[4]; 98 } 99 100 return true; 101} 102 103bool Bench::runPowerManagementTest(uint64_t options) { 104 //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt x %i", options); 105 106 mTimeBucketDivisor = 1000 * 1000; // use ms 107 allocateBuckets(2 * 1000); 108 109 usleep(2 * 1000 * 1000); 110 111 //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2 b %i", mTimeBuckets); 112 113 mTimeStartNanos = getTimeNanos(); 114 mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor; 115 memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets); 116 117 bool useMT = false; 118 119 //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2.1 b %i", mTimeBuckets); 120 mTimeEndGroupNanos = mTimeStartNanos; 121 do { 122 // Advance 8ms 123 mTimeEndGroupNanos += 8 * 1000 * 1000; 124 125 int threads = useMT ? 1 : 0; 126 useMT = !useMT; 127 if ((options & 0x1f) != 0) { 128 threads = options & 0x1f; 129 } 130 131 //__android_log_print(ANDROID_LOG_INFO, "bench", "threads %i", threads); 132 133 mWorkers.launchWork(testWork, this, threads); 134 } while (mTimeEndGroupNanos <= mTimeEndNanos); 135 136 return true; 137} 138 139bool Bench::allocateBuckets(size_t bucketCount) { 140 if (bucketCount == mTimeBuckets) { 141 return true; 142 } 143 144 if (mTimeBucket != NULL) { 145 delete[] mTimeBucket; 146 mTimeBucket = NULL; 147 } 148 149 mTimeBuckets = bucketCount; 150 if (mTimeBuckets > 0) { 151 mTimeBucket = new uint32_t[mTimeBuckets]; 152 } 153 154 return true; 155} 156 157bool Bench::init() { 158 mWorkers.init(); 159 160 initIP(); 161 //ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1); 162 163 return true; 164} 165 166bool Bench::incTimeBucket() const { 167 uint64_t time = getTimeNanos(); 168 uint64_t bucket = (time - mTimeStartNanos) / mTimeBucketDivisor; 169 170 if (bucket >= mTimeBuckets) { 171 return false; 172 } 173 174 __sync_fetch_and_add(&mTimeBucket[bucket], 1); 175 176 return time < mTimeEndGroupNanos; 177} 178 179void Bench::getData(float *data, size_t count) const { 180 if (count > mTimeBuckets) { 181 count = mTimeBuckets; 182 } 183 for (size_t ct = 0; ct < count; ct++) { 184 data[ct] = (float)mTimeBucket[ct]; 185 } 186} 187 188bool Bench::runCPUHeatSoak(uint64_t /* options */) 189{ 190 mTimeBucketDivisor = 1000 * 1000; // use ms 191 allocateBuckets(1000); 192 193 mTimeStartNanos = getTimeNanos(); 194 mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor; 195 memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets); 196 197 mTimeEndGroupNanos = mTimeEndNanos; 198 mWorkers.launchWork(testWork, this, 0); 199 return true; 200} 201 202float Bench::runMemoryBandwidthTest(uint64_t size) 203{ 204 uint64_t t1 = getTimeMillis(); 205 for (size_t ct = mMemLoopCount; ct > 0; ct--) { 206 memcpy(mMemDst, mMemSrc, size); 207 } 208 double dt = getTimeMillis() - t1; 209 dt /= 1000; 210 211 double bw = ((double)size) * mMemLoopCount / dt; 212 bw /= 1024 * 1024 * 1024; 213 214 float targetTime = 0.2f; 215 if (dt > targetTime) { 216 mMemLoopCount = (size_t)((double)mMemLoopCount / (dt / targetTime)); 217 } 218 219 return (float)bw; 220} 221 222float Bench::runMemoryLatencyTest(uint64_t size) 223{ 224 //__android_log_print(ANDROID_LOG_INFO, "bench", "latency %i", (int)size); 225 void ** sp = (void **)mMemSrc; 226 size_t maxIndex = size / sizeof(void *); 227 size_t loops = ((maxIndex / 2) & (~3)); 228 //loops = 10; 229 230 if (size != mMemLatencyLastSize) { 231 __android_log_print(ANDROID_LOG_INFO, "bench", "latency build %i %i", (int)maxIndex, loops); 232 mMemLatencyLastSize = size; 233 memset((void *)mMemSrc, 0, mMemLatencyLastSize); 234 235 size_t lastIdx = 0; 236 for (size_t ct = 0; ct < loops; ct++) { 237 size_t ni = rand() * rand(); 238 ni = ni % maxIndex; 239 while ((sp[ni] != NULL) || (ni == lastIdx)) { 240 ni++; 241 if (ni >= maxIndex) { 242 ni = 1; 243 } 244 // __android_log_print(ANDROID_LOG_INFO, "bench", "gen ni loop %i %i", lastIdx, ni); 245 } 246 // __android_log_print(ANDROID_LOG_INFO, "bench", "gen ct = %i %i %i %p %p", (int)ct, lastIdx, ni, &sp[lastIdx], &sp[ni]); 247 sp[lastIdx] = &sp[ni]; 248 lastIdx = ni; 249 } 250 sp[lastIdx] = 0; 251 } 252 //__android_log_print(ANDROID_LOG_INFO, "bench", "latency testing"); 253 254 uint64_t t1 = getTimeNanos(); 255 for (size_t ct = mMemLoopCount; ct > 0; ct--) { 256 size_t lc = 1; 257 volatile void *p = sp[0]; 258 while (p != NULL) { 259 // Unroll once to minimize branching overhead. 260 void **pn = (void **)p; 261 p = pn[0]; 262 pn = (void **)p; 263 p = pn[0]; 264 } 265 } 266 //__android_log_print(ANDROID_LOG_INFO, "bench", "v %i %i", loops * mMemLoopCount, v); 267 268 double dt = getTimeNanos() - t1; 269 double dts = dt / 1000000000; 270 double lat = dt / (loops * mMemLoopCount); 271 __android_log_print(ANDROID_LOG_INFO, "bench", "latency ret %f", lat); 272 273 float targetTime = 0.2f; 274 if (dts > targetTime) { 275 mMemLoopCount = (size_t)((double)mMemLoopCount / (dts / targetTime)); 276 if (mMemLoopCount < 1) { 277 mMemLoopCount = 1; 278 } 279 } 280 281 return (float)lat; 282} 283 284bool Bench::startMemTests() 285{ 286 mMemSrc = (uint8_t *)malloc(1024*1024*64); 287 mMemDst = (uint8_t *)malloc(1024*1024*64); 288 289 memset(mMemSrc, 0, 1024*1024*16); 290 memset(mMemDst, 0, 1024*1024*16); 291 292 mMemLoopCount = 1; 293 uint64_t start = getTimeMillis(); 294 while((getTimeMillis() - start) < 500) { 295 memcpy(mMemDst, mMemSrc, 1024); 296 mMemLoopCount++; 297 } 298 mMemLatencyLastSize = 0; 299 return true; 300} 301 302void Bench::endMemTests() 303{ 304 free(mMemSrc); 305 free(mMemDst); 306 mMemSrc = NULL; 307 mMemDst = NULL; 308 mMemLatencyLastSize = 0; 309} 310 311void Bench::GflopKernelC() { 312 int halfKX = (mGFlop.kernelXSize / 2); 313 for (int x = halfKX; x < (mGFlop.imageXSize - halfKX - 1); x++) { 314 const float * krnPtr = mGFlop.kernelBuffer; 315 float sum = 0.f; 316 317 int srcInc = mGFlop.imageXSize - mGFlop.kernelXSize; 318 const float * srcPtr = &mGFlop.srcBuffer[x - halfKX]; 319 320 for (int ix = 0; ix < mGFlop.kernelXSize; ix++) { 321 sum += srcPtr[0] * krnPtr[0]; 322 krnPtr++; 323 srcPtr++; 324 } 325 326 float * dstPtr = &mGFlop.dstBuffer[x]; 327 dstPtr[0] = sum; 328 329 } 330 331} 332 333void Bench::GflopKernelC_y3() { 334} 335 336float Bench::runGFlopsTest(uint64_t /* options */) 337{ 338 mTimeBucketDivisor = 1000 * 1000; // use ms 339 allocateBuckets(1000); 340 341 mTimeStartNanos = getTimeNanos(); 342 mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor; 343 memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets); 344 345 mTimeEndGroupNanos = mTimeEndNanos; 346 mWorkers.launchWork(testWork, this, 0); 347 348 // Simulate image convolve 349 mGFlop.kernelXSize = 27; 350 mGFlop.imageXSize = 1024 * 1024; 351 352 mGFlop.srcBuffer = (float *)malloc(mGFlop.imageXSize * sizeof(float)); 353 mGFlop.dstBuffer = (float *)malloc(mGFlop.imageXSize * sizeof(float)); 354 mGFlop.kernelBuffer = (float *)malloc(mGFlop.kernelXSize * sizeof(float)); 355 356 double ops = mGFlop.kernelXSize; 357 ops = ops * 2.f - 1.f; 358 ops *= mGFlop.imageXSize; 359 360 uint64_t t1 = getTimeNanos(); 361 GflopKernelC(); 362 double dt = getTimeNanos() - t1; 363 364 dt /= 1000.f * 1000.f * 1000.f; 365 366 double gflops = ops / dt / 1000000000.f; 367 368 __android_log_print(ANDROID_LOG_INFO, "bench", "v %f %f %f", dt, ops, gflops); 369 370 return (float)gflops; 371} 372 373 374