bandwidth.cpp revision 8f1da8fe2db97aff22320776b46adcd2333cc5a9
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bandwidth.h"
18
19#include <ctype.h>
20#include <pthread.h>
21#include <sched.h>
22#include <sys/resource.h>
23#include <sys/time.h>
24#include <unistd.h>
25
26#include <map>
27#include <vector>
28
29
30typedef struct {
31    const char *name;
32    bool int_type;
33} option_t;
34
35option_t bandwidth_opts[] = {
36    { "size", true },
37    { "num_warm_loops", true },
38    { "num_loops", true },
39    { "type", false },
40    { NULL, false },
41};
42
43option_t per_core_opts[] = {
44    { "size", true },
45    { "num_warm_loops", true},
46    { "num_loops", true },
47    { "type", false },
48    { NULL, false },
49};
50
51option_t multithread_opts[] = {
52    { "size", true },
53    { "num_warm_loops", true},
54    { "num_loops", true },
55    { "type", false },
56    { "num_threads", true },
57    { NULL, false },
58};
59
60typedef union {
61    int int_value;
62    const char *char_value;
63} arg_value_t;
64typedef std::map<const char*, arg_value_t> arg_t;
65
66bool processBandwidthOptions(int argc, char** argv, option_t options[],
67                             arg_t *values) {
68    for (int i = 1; i < argc; i++) {
69        if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70            char *arg = &argv[i][2];
71
72            for (int j = 0; options[j].name != NULL; j++) {
73                if (strcmp(arg, options[j].name) == 0) {
74                    const char *name = options[j].name;
75                    if (i == argc - 1) {
76                        printf("The option --%s requires an argument.\n", name);
77                        return false;
78                    }
79                    if (options[j].int_type) {
80                        (*values)[name].int_value = strtol(argv[++i], NULL, 0);
81                    } else {
82                        (*values)[name].char_value = argv[++i];
83                    }
84                }
85            }
86        }
87    }
88
89    return true;
90}
91
92BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93    BandwidthBenchmark *bench = NULL;
94
95    const char *name = values["type"].char_value;
96    size_t size = 0;
97    if (values.count("size") > 0) {
98        size = values["size"].int_value;
99    }
100    if (strcmp(name, "copy_ldrd_strd") == 0) {
101        bench = new CopyLdrdStrdBenchmark();
102    } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
103        bench = new CopyLdmiaStmiaBenchmark();
104    } else if (strcmp(name, "copy_vld1_vst1") == 0) {
105        bench = new CopyVld1Vst1Benchmark();
106    } else if (strcmp(name, "copy_vldr_vstr") == 0) {
107        bench = new CopyVldrVstrBenchmark();
108    } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
109        bench = new CopyVldmiaVstmiaBenchmark();
110    } else if (strcmp(name, "memcpy") == 0) {
111        bench = new MemcpyBenchmark();
112    } else if (strcmp(name, "write_strd") == 0) {
113        bench = new WriteStrdBenchmark();
114    } else if (strcmp(name, "write_stmia") == 0) {
115        bench = new WriteStmiaBenchmark();
116    } else if (strcmp(name, "write_vst1") == 0) {
117        bench = new WriteVst1Benchmark();
118    } else if (strcmp(name, "write_vstr") == 0) {
119        bench = new WriteVstrBenchmark();
120    } else if (strcmp(name, "write_vstmia") == 0) {
121        bench = new WriteVstmiaBenchmark();
122    } else if (strcmp(name, "memset") == 0) {
123        bench = new MemsetBenchmark();
124    } else if (strcmp(name, "read_ldrd") == 0) {
125        bench = new ReadLdrdBenchmark();
126    } else if (strcmp(name, "read_ldmia") == 0) {
127        bench = new ReadLdmiaBenchmark();
128    } else if (strcmp(name, "read_vld1") == 0) {
129        bench = new ReadVld1Benchmark();
130    } else if (strcmp(name, "read_vldr") == 0) {
131        bench = new ReadVldrBenchmark();
132    } else if (strcmp(name, "read_vldmia") == 0) {
133        bench = new ReadVldmiaBenchmark();
134    } else {
135        printf("Unknown type name %s\n", name);
136        return NULL;
137    }
138
139    if (!bench->setSize(values["size"].int_value)) {
140        printf("Failed to allocate buffers for benchmark.\n");
141        return NULL;
142    }
143
144    if (values.count("num_warm_loops") > 0) {
145        bench->set_num_loops(values["num_warm_loops"].int_value);
146    }
147    if (values.count("num_loops") > 0) {
148        bench->set_num_loops(values["num_loops"].int_value);
149    }
150
151    return bench;
152}
153
154bool getAvailCpus(std::vector<int> *cpu_list) {
155    cpu_set_t cpuset;
156
157    CPU_ZERO(&cpuset);
158    if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
159        perror("sched_getaffinity failed.");
160        return false;
161    }
162
163    for (int i = 0; i < CPU_SETSIZE; i++) {
164        if (CPU_ISSET(i, &cpuset)) {
165            cpu_list->push_back(i);
166        }
167    }
168
169    return true;
170}
171
172typedef struct {
173    int core;
174    BandwidthBenchmark *bench;
175    double  avg_mb;
176    volatile bool *run;
177} thread_arg_t;
178
179void *runBandwidthThread(void *data) {
180    thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
181
182    if (arg->core >= 0) {
183        cpu_set_t cpuset;
184        CPU_ZERO(&cpuset);
185        CPU_SET(arg->core, &cpuset);
186        if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
187            perror("sched_setaffinity failed");
188            return NULL;
189        }
190    }
191
192    // Spinloop waiting for the run variable to get set to true.
193    while (!*arg->run) {
194    }
195
196    double avg_mb = 0;
197    for (int run = 1; ; run++) {
198        arg->bench->run();
199        if (!*arg->run) {
200            // Throw away the last data point since it's possible not
201            // all of the threads are running at this point.
202            break;
203        }
204        avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
205    }
206    arg->avg_mb = avg_mb;
207
208    return NULL;
209}
210
211bool processThreadArgs(int argc, char** argv, option_t options[],
212                       arg_t *values) {
213    // Use some smaller values for the number of loops.
214    (*values)["num_warm_loops"].int_value = 1000000;
215    (*values)["num_loops"].int_value = 10000000;
216
217    if (!processBandwidthOptions(argc, argv, options, values)) {
218        return false;
219    }
220    if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
221        printf("The size values must be a multiple of 64.\n");
222        return false;
223    }
224    if (values->count("type") == 0) {
225        printf("Must specify the type value.\n");
226        return false;
227    }
228
229    BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
230    if (!bench) {
231        return false;
232    }
233
234    if (setpriority(PRIO_PROCESS, 0, -20)) {
235        perror("Unable to raise priority of process.");
236        return false;
237    }
238
239    printf("Calculating optimum run time...\n");
240    nsecs_t t = system_time();
241    bench->run();
242    t = system_time() - t;
243    // Since this is only going to be running single threaded, assume that
244    // if the number is set to ten times this value, we should get at least
245    // a couple of samples per thread.
246    int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
247
248    (*values)["run_time"].int_value = run_time;
249    (*values)["size"].int_value = bench->size();
250    (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
251    (*values)["num_loops"].int_value = bench->num_loops();
252    delete bench;
253
254    return true;
255}
256
257bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
258    pthread_t threads[num_threads];
259    volatile bool run = false;
260
261    int rc;
262    for (int i = 0; i < num_threads; i++) {
263        args[i].run = &run;
264        rc = pthread_create(&threads[i], NULL, runBandwidthThread,
265                            (void*)&args[i]);
266        if (rc != 0) {
267            printf("Failed to launch thread %d\n", i);
268            return false;
269        }
270    }
271
272    // Kick start the threads.
273    run = true;
274
275    // Let the threads run.
276    sleep(run_time);
277
278    // Stop the threads.
279    run = false;
280
281    // Wait for the threads to complete.
282    for (int i = 0; i < num_threads; i++) {
283        rc = pthread_join(threads[i], NULL);
284        if (rc != 0) {
285            printf("Thread %d failed to join.\n", i);
286            return false;
287        }
288        printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
289               args[i].bench->getName(), args[i].avg_mb);
290    }
291
292    return true;
293}
294
295int per_core_bandwidth(int argc, char** argv) {
296    arg_t values;
297    if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
298        return -1;
299    }
300
301    std::vector<int> cpu_list;
302    if (!getAvailCpus(&cpu_list)) {
303        printf("Failed to get available cpu list.\n");
304        return -1;
305    }
306
307    thread_arg_t args[cpu_list.size()];
308
309    int i = 0;
310    for (std::vector<int>::iterator it = cpu_list.begin();
311         it != cpu_list.end(); ++it, ++i) {
312        args[i].core = *it;
313        args[i].bench = createBandwidthBenchmarkObject(values);
314        if (!args[i].bench) {
315            return -1;
316        }
317    }
318
319    printf("Running on %d cores\n", cpu_list.size());
320    printf("  run_time = %ds\n", values["run_time"].int_value);
321    printf("  size = %d\n", values["size"].int_value);
322    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
323    printf("  num_loops = %d\n", values["num_loops"].int_value);
324    printf("\n");
325
326    if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
327        return -1;
328    }
329
330    return 0;
331}
332
333int multithread_bandwidth(int argc, char** argv) {
334    arg_t values;
335    if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
336        return -1;
337    }
338    if (values.count("num_threads") == 0) {
339        printf("Must specify the num_threads value.\n");
340        return -1;
341    }
342    int num_threads = values["num_threads"].int_value;
343
344    thread_arg_t args[num_threads];
345
346    for (int i = 0; i < num_threads; i++) {
347        args[i].core = -1;
348        args[i].bench = createBandwidthBenchmarkObject(values);
349        if (!args[i].bench) {
350            return -1;
351        }
352    }
353
354    printf("Running %d threads\n", num_threads);
355    printf("  run_time = %ds\n", values["run_time"].int_value);
356    printf("  size = %d\n", values["size"].int_value);
357    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
358    printf("  num_loops = %d\n", values["num_loops"].int_value);
359    printf("\n");
360
361    if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
362        return -1;
363    }
364
365    return 0;
366}
367
368bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
369                             std::vector<BandwidthBenchmark*> bench_objs) {
370    arg_t values;
371    values["size"].int_value = 0;
372    values["num_warm_loops"].int_value = 0;
373    values["num_loops"].int_value = 0;
374    if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
375        return false;
376    }
377
378    size_t size = values["size"].int_value;
379    if ((size % 64) != 0) {
380        printf("The size value must be a multiple of 64.\n");
381        return false;
382    }
383
384    if (setpriority(PRIO_PROCESS, 0, -20)) {
385        perror("Unable to raise priority of process.");
386        return false;
387    }
388
389    bool preamble_printed = false;
390    size_t num_warm_loops = values["num_warm_loops"].int_value;
391    size_t num_loops = values["num_loops"].int_value;
392    for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
393         it != bench_objs.end(); ++it) {
394        if (!(*it)->canRun()) {
395            continue;
396        }
397        if (!(*it)->setSize(values["num_warm_loops"].int_value)) {
398            printf("Failed creating buffer for bandwidth test.\n");
399            return false;
400        }
401        if (num_warm_loops) {
402            (*it)->set_num_warm_loops(num_warm_loops);
403        }
404        if (num_loops) {
405            (*it)->set_num_loops(num_loops);
406        }
407        if (!preamble_printed) {
408            preamble_printed = true;
409            printf("Benchmarking %s bandwidth\n", name);
410            printf("  size = %d\n", (*it)->size());
411            printf("  num_warm_loops = %d\n", (*it)->num_warm_loops());
412            printf("  num_loops = %d\n\n", (*it)->num_loops());
413        }
414        (*it)->run();
415        printf("  %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
416               (*it)->mb_per_sec());
417    }
418
419    return true;
420}
421
422int copy_bandwidth(int argc, char** argv) {
423    std::vector<BandwidthBenchmark*> bench_objs;
424    bench_objs.push_back(new CopyLdrdStrdBenchmark());
425    bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
426    bench_objs.push_back(new CopyVld1Vst1Benchmark());
427    bench_objs.push_back(new CopyVldrVstrBenchmark());
428    bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
429    bench_objs.push_back(new MemcpyBenchmark());
430
431    if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
432        return -1;
433    }
434    return 0;
435}
436
437int write_bandwidth(int argc, char** argv) {
438    std::vector<BandwidthBenchmark*> bench_objs;
439    bench_objs.push_back(new WriteStrdBenchmark());
440    bench_objs.push_back(new WriteStmiaBenchmark());
441    bench_objs.push_back(new WriteVst1Benchmark());
442    bench_objs.push_back(new WriteVstrBenchmark());
443    bench_objs.push_back(new WriteVstmiaBenchmark());
444    bench_objs.push_back(new MemsetBenchmark());
445
446    if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
447        return -1;
448    }
449
450    return 0;
451}
452
453int read_bandwidth(int argc, char** argv) {
454    std::vector<BandwidthBenchmark*> bench_objs;
455    bench_objs.push_back(new ReadLdrdBenchmark());
456    bench_objs.push_back(new ReadLdmiaBenchmark());
457    bench_objs.push_back(new ReadVld1Benchmark());
458    bench_objs.push_back(new ReadVldrBenchmark());
459    bench_objs.push_back(new ReadVldmiaBenchmark());
460
461    if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
462        return -1;
463    }
464    return 0;
465}
466