Bench.cpp revision b5fbd41b23bf309e6b420a3df4641603d55dcb68
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <android/log.h>
18#include <math.h>
19#include <stdlib.h>
20#include <unistd.h>
21
22#include "Bench.h"
23
24
25Bench::Bench()
26{
27    mTimeBucket = NULL;
28    mTimeBuckets = 0;
29    mTimeBucketDivisor = 1;
30
31    mMemLatencyLastSize = 0;
32    mMemDst = NULL;
33    mMemSrc = NULL;
34    mMemLoopCount = 0;
35}
36
37
38Bench::~Bench()
39{
40}
41
42uint64_t Bench::getTimeNanos() const
43{
44    struct timespec t;
45    clock_gettime(CLOCK_MONOTONIC, &t);
46    return t.tv_nsec + ((uint64_t)t.tv_sec * 1000 * 1000 * 1000);
47}
48
49uint64_t Bench::getTimeMillis() const
50{
51    return getTimeNanos() / 1000000;
52}
53
54
55void Bench::testWork(void *usr, uint32_t idx)
56{
57    Bench *b = (Bench *)usr;
58    //__android_log_print(ANDROID_LOG_INFO, "bench", "test %i   %p", idx, b);
59
60    float f1 = 0.f;
61    float f2 = 0.f;
62    float f3 = 0.f;
63    float f4 = 0.f;
64
65    float *ipk = b->mIpKernel[idx];
66    volatile float *src = b->mSrcBuf[idx];
67    volatile float *out = b->mOutBuf[idx];
68
69    //__android_log_print(ANDROID_LOG_INFO, "bench", "test %p %p %p", ipk, src, out);
70
71    do {
72
73        for (int i = 0; i < 1024; i++) {
74            f1 += src[i * 4] * ipk[i];
75            f2 += src[i * 4 + 1] * ipk[i];
76            f3 += src[i * 4 + 2] * ipk[i];
77            f4 += sqrtf(f1 + f2 + f3);
78        }
79        out[0] = f1;
80        out[1] = f2;
81        out[2] = f3;
82        out[3] = f4;
83
84    } while (b->incTimeBucket());
85}
86
87bool Bench::initIP() {
88    int workers = mWorkers.getWorkerCount();
89
90    mIpKernel = new float *[workers];
91    mSrcBuf = new float *[workers];
92    mOutBuf = new float *[workers];
93
94    for (int i = 0; i < workers; i++) {
95        mIpKernel[i] = new float[1024];
96        mSrcBuf[i] = new float[4096];
97        mOutBuf[i] = new float[4];
98    }
99
100    return true;
101}
102
103bool Bench::runPowerManagementTest(uint64_t options) {
104    //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt x %i", options);
105
106    mTimeBucketDivisor = 1000 * 1000;  // use ms
107    allocateBuckets(2 * 1000);
108
109    usleep(2 * 1000 * 1000);
110
111    //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2  b %i", mTimeBuckets);
112
113    mTimeStartNanos = getTimeNanos();
114    mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor;
115    memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets);
116
117    bool useMT = false;
118
119    //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2.1  b %i", mTimeBuckets);
120    mTimeEndGroupNanos = mTimeStartNanos;
121    do  {
122        // Advance 8ms
123        mTimeEndGroupNanos += 8 * 1000 * 1000;
124
125        int threads = useMT ? 1 : 0;
126        useMT = !useMT;
127        if ((options & 0x1f) != 0) {
128            threads = options & 0x1f;
129        }
130
131        //__android_log_print(ANDROID_LOG_INFO, "bench", "threads %i", threads);
132
133        mWorkers.launchWork(testWork, this, threads);
134    } while (mTimeEndGroupNanos <= mTimeEndNanos);
135
136    return true;
137}
138
139bool Bench::allocateBuckets(size_t bucketCount) {
140    if (bucketCount == mTimeBuckets) {
141        return true;
142    }
143
144    if (mTimeBucket != NULL) {
145        delete[] mTimeBucket;
146        mTimeBucket = NULL;
147    }
148
149    mTimeBuckets = bucketCount;
150    if (mTimeBuckets > 0) {
151        mTimeBucket = new uint32_t[mTimeBuckets];
152    }
153
154    return true;
155}
156
157bool Bench::init() {
158    mWorkers.init();
159
160    initIP();
161    //ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1);
162
163    return true;
164}
165
166bool Bench::incTimeBucket() const {
167    uint64_t time = getTimeNanos();
168    uint64_t bucket = (time - mTimeStartNanos) / mTimeBucketDivisor;
169
170    if (bucket >= mTimeBuckets) {
171        return false;
172    }
173
174    __sync_fetch_and_add(&mTimeBucket[bucket], 1);
175
176    return time < mTimeEndGroupNanos;
177}
178
179void Bench::getData(float *data, size_t count) const {
180    if (count > mTimeBuckets) {
181        count = mTimeBuckets;
182    }
183    for (size_t ct = 0; ct < count; ct++) {
184        data[ct] = (float)mTimeBucket[ct];
185    }
186}
187
188bool Bench::runCPUHeatSoak(uint64_t /* options */)
189{
190    mTimeBucketDivisor = 1000 * 1000;  // use ms
191    allocateBuckets(1000);
192
193    mTimeStartNanos = getTimeNanos();
194    mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor;
195    memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets);
196
197    mTimeEndGroupNanos = mTimeEndNanos;
198    mWorkers.launchWork(testWork, this, 0);
199    return true;
200}
201
202float Bench::runMemoryBandwidthTest(uint64_t size)
203{
204    uint64_t t1 = getTimeMillis();
205    for (size_t ct = mMemLoopCount; ct > 0; ct--) {
206        memcpy(mMemDst, mMemSrc, size);
207    }
208    double dt = getTimeMillis() - t1;
209    dt /= 1000;
210
211    double bw = ((double)size) * mMemLoopCount / dt;
212    bw /= 1024 * 1024 * 1024;
213
214    float targetTime = 0.2f;
215    if (dt > targetTime) {
216        mMemLoopCount = (size_t)((double)mMemLoopCount / (dt / targetTime));
217    }
218
219    return (float)bw;
220}
221
222float Bench::runMemoryLatencyTest(uint64_t size)
223{
224    //__android_log_print(ANDROID_LOG_INFO, "bench", "latency %i", (int)size);
225    void ** sp = (void **)mMemSrc;
226    size_t maxIndex = size / sizeof(void *);
227    size_t loops = ((maxIndex / 2) & (~3));
228    //loops = 10;
229
230    if (size != mMemLatencyLastSize) {
231        __android_log_print(ANDROID_LOG_INFO, "bench", "latency build %i %i", (int)maxIndex, loops);
232        mMemLatencyLastSize = size;
233        memset((void *)mMemSrc, 0, mMemLatencyLastSize);
234
235        size_t lastIdx = 0;
236        for (size_t ct = 0; ct < loops; ct++) {
237            size_t ni = rand() * rand();
238            ni = ni % maxIndex;
239            while ((sp[ni] != NULL) || (ni == lastIdx)) {
240                ni++;
241                if (ni >= maxIndex) {
242                    ni = 1;
243                }
244    //            __android_log_print(ANDROID_LOG_INFO, "bench", "gen ni loop %i %i", lastIdx, ni);
245            }
246      //      __android_log_print(ANDROID_LOG_INFO, "bench", "gen ct = %i  %i  %i  %p  %p", (int)ct, lastIdx, ni, &sp[lastIdx], &sp[ni]);
247            sp[lastIdx] = &sp[ni];
248            lastIdx = ni;
249        }
250        sp[lastIdx] = 0;
251    }
252    //__android_log_print(ANDROID_LOG_INFO, "bench", "latency testing");
253
254    uint64_t t1 = getTimeNanos();
255    for (size_t ct = mMemLoopCount; ct > 0; ct--) {
256        size_t lc = 1;
257        volatile void *p = sp[0];
258        while (p != NULL) {
259            // Unroll once to minimize branching overhead.
260            void **pn = (void **)p;
261            p = pn[0];
262            pn = (void **)p;
263            p = pn[0];
264        }
265    }
266    //__android_log_print(ANDROID_LOG_INFO, "bench", "v %i %i", loops * mMemLoopCount, v);
267
268    double dt = getTimeNanos() - t1;
269    double dts = dt / 1000000000;
270    double lat = dt / (loops * mMemLoopCount);
271    __android_log_print(ANDROID_LOG_INFO, "bench", "latency ret %f", lat);
272
273    float targetTime = 0.2f;
274    if (dts > targetTime) {
275        mMemLoopCount = (size_t)((double)mMemLoopCount / (dts / targetTime));
276        if (mMemLoopCount < 1) {
277            mMemLoopCount = 1;
278        }
279    }
280
281    return (float)lat;
282}
283
284bool Bench::startMemTests()
285{
286    mMemSrc = (uint8_t *)malloc(1024*1024*64);
287    mMemDst = (uint8_t *)malloc(1024*1024*64);
288
289    memset(mMemSrc, 0, 1024*1024*16);
290    memset(mMemDst, 0, 1024*1024*16);
291
292    mMemLoopCount = 1;
293    uint64_t start = getTimeMillis();
294    while((getTimeMillis() - start) < 500) {
295        memcpy(mMemDst, mMemSrc, 1024);
296        mMemLoopCount++;
297    }
298    mMemLatencyLastSize = 0;
299    return true;
300}
301
302void Bench::endMemTests()
303{
304    free(mMemSrc);
305    free(mMemDst);
306    mMemSrc = NULL;
307    mMemDst = NULL;
308    mMemLatencyLastSize = 0;
309}
310
311void Bench::GflopKernelC() {
312    int halfKX = (mGFlop.kernelXSize / 2);
313    for (int x = halfKX; x < (mGFlop.imageXSize - halfKX - 1); x++) {
314        const float * krnPtr = mGFlop.kernelBuffer;
315        float sum = 0.f;
316
317        int srcInc = mGFlop.imageXSize - mGFlop.kernelXSize;
318        const float * srcPtr = &mGFlop.srcBuffer[x - halfKX];
319
320        for (int ix = 0; ix < mGFlop.kernelXSize; ix++) {
321            sum += srcPtr[0] * krnPtr[0];
322            krnPtr++;
323            srcPtr++;
324        }
325
326        float * dstPtr = &mGFlop.dstBuffer[x];
327        dstPtr[0] = sum;
328
329    }
330
331}
332
333void Bench::GflopKernelC_y3() {
334}
335
336float Bench::runGFlopsTest(uint64_t /* options */)
337{
338    mTimeBucketDivisor = 1000 * 1000;  // use ms
339    allocateBuckets(1000);
340
341    mTimeStartNanos = getTimeNanos();
342    mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor;
343    memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets);
344
345    mTimeEndGroupNanos = mTimeEndNanos;
346    mWorkers.launchWork(testWork, this, 0);
347
348    // Simulate image convolve
349    mGFlop.kernelXSize = 27;
350    mGFlop.imageXSize = 1024 * 1024;
351
352    mGFlop.srcBuffer = (float *)malloc(mGFlop.imageXSize * sizeof(float));
353    mGFlop.dstBuffer = (float *)malloc(mGFlop.imageXSize * sizeof(float));
354    mGFlop.kernelBuffer = (float *)malloc(mGFlop.kernelXSize * sizeof(float));
355
356    double ops = mGFlop.kernelXSize;
357    ops = ops * 2.f - 1.f;
358    ops *= mGFlop.imageXSize;
359
360    uint64_t t1 = getTimeNanos();
361    GflopKernelC();
362    double dt = getTimeNanos() - t1;
363
364    dt /= 1000.f * 1000.f * 1000.f;
365
366    double gflops = ops / dt / 1000000000.f;
367
368    __android_log_print(ANDROID_LOG_INFO, "bench", "v %f %f %f", dt, ops, gflops);
369
370    return (float)gflops;
371}
372
373
374