1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bandwidth.h"
18
19#include <ctype.h>
20#include <pthread.h>
21#include <sched.h>
22#include <sys/resource.h>
23#include <sys/time.h>
24#include <unistd.h>
25
26#include <map>
27#include <vector>
28
29
30typedef struct {
31    const char *name;
32    bool int_type;
33} option_t;
34
35option_t bandwidth_opts[] = {
36    { "size", true },
37    { "num_warm_loops", true },
38    { "num_loops", true },
39    { "type", false },
40    { NULL, false },
41};
42
43option_t per_core_opts[] = {
44    { "size", true },
45    { "num_warm_loops", true},
46    { "num_loops", true },
47    { "type", false },
48    { NULL, false },
49};
50
51option_t multithread_opts[] = {
52    { "size", true },
53    { "num_warm_loops", true},
54    { "num_loops", true },
55    { "type", false },
56    { "num_threads", true },
57    { NULL, false },
58};
59
60typedef union {
61    int int_value;
62    const char *char_value;
63} arg_value_t;
64typedef std::map<const char*, arg_value_t> arg_t;
65
66bool processBandwidthOptions(int argc, char** argv, option_t options[],
67                             arg_t *values) {
68    for (int i = 1; i < argc; i++) {
69        if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70            char *arg = &argv[i][2];
71
72            for (int j = 0; options[j].name != NULL; j++) {
73                if (strcmp(arg, options[j].name) == 0) {
74                    const char *name = options[j].name;
75                    if (i == argc - 1) {
76                        printf("The option --%s requires an argument.\n", name);
77                        return false;
78                    }
79                    if (options[j].int_type) {
80                        (*values)[name].int_value = strtol(argv[++i], NULL, 0);
81                    } else {
82                        (*values)[name].char_value = argv[++i];
83                    }
84                }
85            }
86        }
87    }
88
89    return true;
90}
91
92BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93    BandwidthBenchmark *bench = NULL;
94
95    const char *name = values["type"].char_value;
96    size_t size = 0;
97    if (values.count("size") > 0) {
98        size = values["size"].int_value;
99    }
100    if (strcmp(name, "copy_ldrd_strd") == 0) {
101        bench = new CopyLdrdStrdBenchmark();
102    } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
103        bench = new CopyLdmiaStmiaBenchmark();
104    } else if (strcmp(name, "copy_vld1_vst1") == 0) {
105        bench = new CopyVld1Vst1Benchmark();
106    } else if (strcmp(name, "copy_vldr_vstr") == 0) {
107        bench = new CopyVldrVstrBenchmark();
108    } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
109        bench = new CopyVldmiaVstmiaBenchmark();
110    } else if (strcmp(name, "memcpy") == 0) {
111        bench = new MemcpyBenchmark();
112    } else if (strcmp(name, "write_strd") == 0) {
113        bench = new WriteStrdBenchmark();
114    } else if (strcmp(name, "write_stmia") == 0) {
115        bench = new WriteStmiaBenchmark();
116    } else if (strcmp(name, "write_vst1") == 0) {
117        bench = new WriteVst1Benchmark();
118    } else if (strcmp(name, "write_vstr") == 0) {
119        bench = new WriteVstrBenchmark();
120    } else if (strcmp(name, "write_vstmia") == 0) {
121        bench = new WriteVstmiaBenchmark();
122    } else if (strcmp(name, "memset") == 0) {
123        bench = new MemsetBenchmark();
124    } else if (strcmp(name, "read_ldrd") == 0) {
125        bench = new ReadLdrdBenchmark();
126    } else if (strcmp(name, "read_ldmia") == 0) {
127        bench = new ReadLdmiaBenchmark();
128    } else if (strcmp(name, "read_vld1") == 0) {
129        bench = new ReadVld1Benchmark();
130    } else if (strcmp(name, "read_vldr") == 0) {
131        bench = new ReadVldrBenchmark();
132    } else if (strcmp(name, "read_vldmia") == 0) {
133        bench = new ReadVldmiaBenchmark();
134    } else {
135        printf("Unknown type name %s\n", name);
136        return NULL;
137    }
138
139    if (!bench->setSize(size)) {
140        printf("Failed to allocate buffers for benchmark.\n");
141        delete bench;
142        return NULL;
143    }
144
145    if (values.count("num_warm_loops") > 0) {
146        bench->set_num_loops(values["num_warm_loops"].int_value);
147    }
148    if (values.count("num_loops") > 0) {
149        bench->set_num_loops(values["num_loops"].int_value);
150    }
151
152    return bench;
153}
154
155bool getAvailCpus(std::vector<int> *cpu_list) {
156    cpu_set_t cpuset;
157
158    CPU_ZERO(&cpuset);
159    if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
160        perror("sched_getaffinity failed.");
161        return false;
162    }
163
164    for (int i = 0; i < CPU_SETSIZE; i++) {
165        if (CPU_ISSET(i, &cpuset)) {
166            cpu_list->push_back(i);
167        }
168    }
169
170    return true;
171}
172
173typedef struct {
174    int core;
175    BandwidthBenchmark *bench;
176    double  avg_mb;
177    volatile bool *run;
178} thread_arg_t;
179
180void *runBandwidthThread(void *data) {
181    thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
182
183    if (arg->core >= 0) {
184        cpu_set_t cpuset;
185        CPU_ZERO(&cpuset);
186        CPU_SET(arg->core, &cpuset);
187        if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
188            perror("sched_setaffinity failed");
189            return NULL;
190        }
191    }
192
193    // Spinloop waiting for the run variable to get set to true.
194    while (!*arg->run) {
195    }
196
197    double avg_mb = 0;
198    for (int run = 1; ; run++) {
199        arg->bench->run();
200        if (!*arg->run) {
201            // Throw away the last data point since it's possible not
202            // all of the threads are running at this point.
203            break;
204        }
205        avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
206    }
207    arg->avg_mb = avg_mb;
208
209    return NULL;
210}
211
212bool processThreadArgs(int argc, char** argv, option_t options[],
213                       arg_t *values) {
214    // Use some smaller values for the number of loops.
215    (*values)["num_warm_loops"].int_value = 1000000;
216    (*values)["num_loops"].int_value = 10000000;
217
218    if (!processBandwidthOptions(argc, argv, options, values)) {
219        return false;
220    }
221    if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
222        printf("The size values must be a multiple of 64.\n");
223        return false;
224    }
225    if (values->count("type") == 0) {
226        printf("Must specify the type value.\n");
227        return false;
228    }
229
230    BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
231    if (!bench) {
232        return false;
233    }
234
235    if (setpriority(PRIO_PROCESS, 0, -20)) {
236        perror("Unable to raise priority of process.");
237        return false;
238    }
239
240    printf("Calculating optimum run time...\n");
241    nsecs_t t = system_time();
242    bench->run();
243    t = system_time() - t;
244    // Since this is only going to be running single threaded, assume that
245    // if the number is set to ten times this value, we should get at least
246    // a couple of samples per thread.
247    int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
248
249    (*values)["run_time"].int_value = run_time;
250    (*values)["size"].int_value = bench->size();
251    (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
252    (*values)["num_loops"].int_value = bench->num_loops();
253    delete bench;
254
255    return true;
256}
257
258bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
259    pthread_t threads[num_threads];
260    volatile bool run = false;
261
262    int rc;
263    for (int i = 0; i < num_threads; i++) {
264        args[i].run = &run;
265        rc = pthread_create(&threads[i], NULL, runBandwidthThread,
266                            (void*)&args[i]);
267        if (rc != 0) {
268            printf("Failed to launch thread %d\n", i);
269            return false;
270        }
271    }
272
273    // Kick start the threads.
274    run = true;
275
276    // Let the threads run.
277    sleep(run_time);
278
279    // Stop the threads.
280    run = false;
281
282    // Wait for the threads to complete.
283    for (int i = 0; i < num_threads; i++) {
284        rc = pthread_join(threads[i], NULL);
285        if (rc != 0) {
286            printf("Thread %d failed to join.\n", i);
287            return false;
288        }
289        printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
290               args[i].bench->getName(), args[i].avg_mb);
291    }
292
293    return true;
294}
295
296int per_core_bandwidth(int argc, char** argv) {
297    arg_t values;
298    if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
299        return -1;
300    }
301
302    std::vector<int> cpu_list;
303    if (!getAvailCpus(&cpu_list)) {
304        printf("Failed to get available cpu list.\n");
305        return -1;
306    }
307
308    thread_arg_t args[cpu_list.size()];
309
310    int i = 0;
311    for (std::vector<int>::iterator it = cpu_list.begin();
312         it != cpu_list.end(); ++it, ++i) {
313        args[i].core = *it;
314        args[i].bench = createBandwidthBenchmarkObject(values);
315        if (!args[i].bench) {
316            for (int j = 0; j < i; j++)
317                delete args[j].bench;
318            return -1;
319        }
320    }
321
322    printf("Running on %d cores\n", cpu_list.size());
323    printf("  run_time = %ds\n", values["run_time"].int_value);
324    printf("  size = %d\n", values["size"].int_value);
325    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
326    printf("  num_loops = %d\n", values["num_loops"].int_value);
327    printf("\n");
328
329    if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
330        return -1;
331    }
332
333    return 0;
334}
335
336int multithread_bandwidth(int argc, char** argv) {
337    arg_t values;
338    if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
339        return -1;
340    }
341    if (values.count("num_threads") == 0) {
342        printf("Must specify the num_threads value.\n");
343        return -1;
344    }
345    int num_threads = values["num_threads"].int_value;
346
347    thread_arg_t args[num_threads];
348
349    for (int i = 0; i < num_threads; i++) {
350        args[i].core = -1;
351        args[i].bench = createBandwidthBenchmarkObject(values);
352        if (!args[i].bench) {
353            for (int j = 0; j < i; j++)
354                delete args[j].bench;
355            return -1;
356        }
357    }
358
359    printf("Running %d threads\n", num_threads);
360    printf("  run_time = %ds\n", values["run_time"].int_value);
361    printf("  size = %d\n", values["size"].int_value);
362    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
363    printf("  num_loops = %d\n", values["num_loops"].int_value);
364    printf("\n");
365
366    if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
367        return -1;
368    }
369
370    return 0;
371}
372
373bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
374                             std::vector<BandwidthBenchmark*> bench_objs) {
375    arg_t values;
376    values["size"].int_value = 0;
377    values["num_warm_loops"].int_value = 0;
378    values["num_loops"].int_value = 0;
379    if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
380        return false;
381    }
382
383    size_t size = values["size"].int_value;
384    if ((size % 64) != 0) {
385        printf("The size value must be a multiple of 64.\n");
386        return false;
387    }
388
389    if (setpriority(PRIO_PROCESS, 0, -20)) {
390        perror("Unable to raise priority of process.");
391        return false;
392    }
393
394    bool preamble_printed = false;
395    size_t num_warm_loops = values["num_warm_loops"].int_value;
396    size_t num_loops = values["num_loops"].int_value;
397    for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
398         it != bench_objs.end(); ++it) {
399        if (!(*it)->canRun()) {
400            continue;
401        }
402        if (!(*it)->setSize(values["size"].int_value)) {
403            printf("Failed creating buffer for bandwidth test.\n");
404            return false;
405        }
406        if (num_warm_loops) {
407            (*it)->set_num_warm_loops(num_warm_loops);
408        }
409        if (num_loops) {
410            (*it)->set_num_loops(num_loops);
411        }
412        if (!preamble_printed) {
413            preamble_printed = true;
414            printf("Benchmarking %s bandwidth\n", name);
415            printf("  size = %d\n", (*it)->size());
416            printf("  num_warm_loops = %d\n", (*it)->num_warm_loops());
417            printf("  num_loops = %d\n\n", (*it)->num_loops());
418        }
419        (*it)->run();
420        printf("  %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
421               (*it)->mb_per_sec());
422    }
423
424    return true;
425}
426
427int copy_bandwidth(int argc, char** argv) {
428    std::vector<BandwidthBenchmark*> bench_objs;
429    bench_objs.push_back(new CopyLdrdStrdBenchmark());
430    bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
431    bench_objs.push_back(new CopyVld1Vst1Benchmark());
432    bench_objs.push_back(new CopyVldrVstrBenchmark());
433    bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
434    bench_objs.push_back(new MemcpyBenchmark());
435
436    if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
437        return -1;
438    }
439    return 0;
440}
441
442int write_bandwidth(int argc, char** argv) {
443    std::vector<BandwidthBenchmark*> bench_objs;
444    bench_objs.push_back(new WriteStrdBenchmark());
445    bench_objs.push_back(new WriteStmiaBenchmark());
446    bench_objs.push_back(new WriteVst1Benchmark());
447    bench_objs.push_back(new WriteVstrBenchmark());
448    bench_objs.push_back(new WriteVstmiaBenchmark());
449    bench_objs.push_back(new MemsetBenchmark());
450
451    if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
452        return -1;
453    }
454
455    return 0;
456}
457
458int read_bandwidth(int argc, char** argv) {
459    std::vector<BandwidthBenchmark*> bench_objs;
460    bench_objs.push_back(new ReadLdrdBenchmark());
461    bench_objs.push_back(new ReadLdmiaBenchmark());
462    bench_objs.push_back(new ReadVld1Benchmark());
463    bench_objs.push_back(new ReadVldrBenchmark());
464    bench_objs.push_back(new ReadVldmiaBenchmark());
465
466    if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
467        return -1;
468    }
469    return 0;
470}
471