bandwidth.cpp revision f90ab5f4ab00ddfbcf313be9001837aaefd64ba2
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <pthread.h>
18#include <sched.h>
19#include <sys/time.h>
20#include <sys/resource.h>
21#include <unistd.h>
22#include <ctype.h>
23
24#include <map>
25#include <vector>
26
27#include "bandwidth.h"
28
29
30typedef struct {
31    const char *name;
32    bool int_type;
33} option_t;
34
35option_t bandwidth_opts[] = {
36    { "size", true },
37    { "num_warm_loops", true },
38    { "num_loops", true },
39    { "type", false },
40    { NULL, false },
41};
42
43option_t per_core_opts[] = {
44    { "size", true },
45    { "num_warm_loops", true},
46    { "num_loops", true },
47    { "type", false },
48    { NULL, false },
49};
50
51option_t multithread_opts[] = {
52    { "size", true },
53    { "num_warm_loops", true},
54    { "num_loops", true },
55    { "type", false },
56    { "num_threads", true },
57    { NULL, false },
58};
59
60typedef union {
61    int int_value;
62    const char *char_value;
63} arg_value_t;
64typedef std::map<const char*, arg_value_t> arg_t;
65
66bool processBandwidthOptions(int argc, char** argv, option_t options[],
67                             arg_t *values) {
68    for (int i = 1; i < argc; i++) {
69        if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70            char *arg = &argv[i][2];
71
72            for (int j = 0; options[j].name != NULL; j++) {
73                if (strcmp(arg, options[j].name) == 0) {
74                    const char *name = options[j].name;
75                    if (i == argc - 1) {
76                        printf("The option --%s requires an argument.\n", name);
77                        return false;
78                    }
79                    if (options[j].int_type) {
80                        (*values)[name].int_value = strtol(argv[++i], NULL, 0);
81                    } else {
82                        (*values)[name].char_value = argv[++i];
83                    }
84                }
85            }
86        }
87    }
88
89    return true;
90}
91
92BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93    BandwidthBenchmark *bench = NULL;
94
95    const char *name = values["type"].char_value;
96    size_t size = 0;
97    if (values.count("size") > 0) {
98        size = values["size"].int_value;
99    }
100    if (strcmp(name, "copy_ldrd_strd") == 0) {
101        bench = new CopyLdrdStrdBenchmark();
102    } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
103        bench = new CopyLdmiaStmiaBenchmark();
104    } else if (strcmp(name, "copy_vld_vst") == 0) {
105        bench = new CopyVldVstBenchmark();
106    } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
107        bench = new CopyVldmiaVstmiaBenchmark();
108    } else if (strcmp(name, "memcpy") == 0) {
109        bench = new MemcpyBenchmark();
110    } else if (strcmp(name, "write_strd") == 0) {
111        bench = new WriteStrdBenchmark();
112    } else if (strcmp(name, "write_stmia") == 0) {
113        bench = new WriteStmiaBenchmark();
114    } else if (strcmp(name, "write_vst") == 0) {
115        bench = new WriteVstBenchmark();
116    } else if (strcmp(name, "write_vstmia") == 0) {
117        bench = new WriteVstmiaBenchmark();
118    } else if (strcmp(name, "memset") == 0) {
119        bench = new MemsetBenchmark();
120    } else if (strcmp(name, "read_ldrd") == 0) {
121        bench = new ReadLdrdBenchmark();
122    } else if (strcmp(name, "read_ldmia") == 0) {
123        bench = new ReadLdmiaBenchmark();
124    } else if (strcmp(name, "read_vld") == 0) {
125        bench = new ReadVldBenchmark();
126    } else if (strcmp(name, "read_vldmia") == 0) {
127        bench = new ReadVldmiaBenchmark();
128    } else {
129        printf("Unknown type name %s\n", name);
130        return NULL;
131    }
132
133    if (!bench->setSize(values["size"].int_value)) {
134        printf("Failed to allocate buffers for benchmark.\n");
135        return NULL;
136    }
137
138    if (values.count("num_warm_loops") > 0) {
139        bench->set_num_loops(values["num_warm_loops"].int_value);
140    }
141    if (values.count("num_loops") > 0) {
142        bench->set_num_loops(values["num_loops"].int_value);
143    }
144
145    return bench;
146}
147
148bool getAvailCpus(std::vector<int> *cpu_list) {
149    cpu_set_t cpuset;
150
151    CPU_ZERO(&cpuset);
152    if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
153        perror("sched_getaffinity failed.");
154        return false;
155    }
156
157    for (int i = 0; i < CPU_SETSIZE; i++) {
158        if (CPU_ISSET(i, &cpuset)) {
159            cpu_list->push_back(i);
160        }
161    }
162
163    return true;
164}
165
166typedef struct {
167    int core;
168    BandwidthBenchmark *bench;
169    double  avg_mb;
170    volatile bool *run;
171} thread_arg_t;
172
173void *runBandwidthThread(void *data) {
174    thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
175
176    if (arg->core >= 0) {
177        cpu_set_t cpuset;
178        CPU_ZERO(&cpuset);
179        CPU_SET(arg->core, &cpuset);
180        if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
181            perror("sched_setaffinity failed");
182            return NULL;
183        }
184    }
185
186    // Spinloop waiting for the run variable to get set to true.
187    while (!*arg->run) {
188    }
189
190    double avg_mb = 0;
191    for (int run = 1; ; run++) {
192        arg->bench->run();
193        if (!*arg->run) {
194            // Throw away the last data point since it's possible not
195            // all of the threads are running at this point.
196            break;
197        }
198        avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
199    }
200    arg->avg_mb = avg_mb;
201
202    return NULL;
203}
204
205bool processThreadArgs(int argc, char** argv, option_t options[],
206                       arg_t *values) {
207    // Use some smaller values for the number of loops.
208    (*values)["num_warm_loops"].int_value = 1000000;
209    (*values)["num_loops"].int_value = 10000000;
210
211    if (!processBandwidthOptions(argc, argv, options, values)) {
212        return false;
213    }
214    if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
215        printf("The size values must be a multiple of 64.\n");
216        return false;
217    }
218    if (values->count("type") == 0) {
219        printf("Must specify the type value.\n");
220        return false;
221    }
222
223    BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
224    if (!bench) {
225        return false;
226    }
227
228    if (setpriority(PRIO_PROCESS, 0, -20)) {
229        perror("Unable to raise priority of process.");
230        return false;
231    }
232
233    printf("Calculating optimum run time...\n");
234    nsecs_t t = system_time();
235    bench->run();
236    t = system_time() - t;
237    // Since this is only going to be running single threaded, assume that
238    // if the number is set to ten times this value, we should get at least
239    // a couple of samples per thread.
240    int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
241
242    (*values)["run_time"].int_value = run_time;
243    (*values)["size"].int_value = bench->size();
244    (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
245    (*values)["num_loops"].int_value = bench->num_loops();
246    delete bench;
247
248    return true;
249}
250
251bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
252    pthread_t threads[num_threads];
253    volatile bool run = false;
254
255    int rc;
256    for (int i = 0; i < num_threads; i++) {
257        args[i].run = &run;
258        rc = pthread_create(&threads[i], NULL, runBandwidthThread,
259                            (void*)&args[i]);
260        if (rc != 0) {
261            printf("Failed to launch thread %d\n", i);
262            return false;
263        }
264    }
265
266    // Kick start the threads.
267    run = true;
268
269    // Let the threads run.
270    sleep(run_time);
271
272    // Stop the threads.
273    run = false;
274
275    // Wait for the threads to complete.
276    for (int i = 0; i < num_threads; i++) {
277        rc = pthread_join(threads[i], NULL);
278        if (rc != 0) {
279            printf("Thread %d failed to join.\n", i);
280            return false;
281        }
282        printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
283               args[i].bench->getName(), args[i].avg_mb);
284    }
285
286    return true;
287}
288
289int per_core_bandwidth(int argc, char** argv) {
290    arg_t values;
291    if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
292        return -1;
293    }
294
295    std::vector<int> cpu_list;
296    if (!getAvailCpus(&cpu_list)) {
297        printf("Failed to get available cpu list.\n");
298        return -1;
299    }
300
301    thread_arg_t args[cpu_list.size()];
302
303    int i = 0;
304    for (std::vector<int>::iterator it = cpu_list.begin();
305         it != cpu_list.end(); ++it, ++i) {
306        args[i].core = *it;
307        args[i].bench = createBandwidthBenchmarkObject(values);
308        if (!args[i].bench) {
309            return -1;
310        }
311    }
312
313    printf("Running on %d cores\n", cpu_list.size());
314    printf("  run_time = %ds\n", values["run_time"].int_value);
315    printf("  size = %d\n", values["size"].int_value);
316    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
317    printf("  num_loops = %d\n", values["num_loops"].int_value);
318    printf("\n");
319
320    if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
321        return -1;
322    }
323
324    return 0;
325}
326
327int multithread_bandwidth(int argc, char** argv) {
328    arg_t values;
329    if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
330        return -1;
331    }
332    if (values.count("num_threads") == 0) {
333        printf("Must specify the num_threads value.\n");
334        return -1;
335    }
336    int num_threads = values["num_threads"].int_value;
337
338    thread_arg_t args[num_threads];
339
340    int i = 0;
341    for (int i = 0; i < num_threads; i++) {
342        args[i].core = -1;
343        args[i].bench = createBandwidthBenchmarkObject(values);
344        if (!args[i].bench) {
345            return -1;
346        }
347    }
348
349    printf("Running %d threads\n", num_threads);
350    printf("  run_time = %ds\n", values["run_time"].int_value);
351    printf("  size = %d\n", values["size"].int_value);
352    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
353    printf("  num_loops = %d\n", values["num_loops"].int_value);
354    printf("\n");
355
356    if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
357        return -1;
358    }
359
360    return 0;
361}
362
363bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
364                             std::vector<BandwidthBenchmark*> bench_objs) {
365    arg_t values;
366    values["size"].int_value = 0;
367    values["num_warm_loops"].int_value = 0;
368    values["num_loops"].int_value = 0;
369    if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
370        return false;
371    }
372
373    size_t size = values["size"].int_value;
374    if ((size % 64) != 0) {
375        printf("The size value must be a multiple of 64.\n");
376        return false;
377    }
378
379    if (setpriority(PRIO_PROCESS, 0, -20)) {
380        perror("Unable to raise priority of process.");
381        return false;
382    }
383
384    bool preamble_printed = false;
385    size_t num_warm_loops = values["num_warm_loops"].int_value;
386    size_t num_loops = values["num_loops"].int_value;
387    for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
388         it != bench_objs.end(); ++it) {
389        if (!(*it)->canRun()) {
390            continue;
391        }
392        if (!(*it)->setSize(values["num_warm_loops"].int_value)) {
393            printf("Failed creating buffer for bandwidth test.\n");
394            return false;
395        }
396        if (num_warm_loops) {
397            (*it)->set_num_warm_loops(num_warm_loops);
398        }
399        if (num_loops) {
400            (*it)->set_num_loops(num_loops);
401        }
402        if (!preamble_printed) {
403            preamble_printed = true;
404            printf("Benchmarking %s bandwidth\n", name);
405            printf("  size = %d\n", (*it)->size());
406            printf("  num_warm_loops = %d\n", (*it)->num_warm_loops());
407            printf("  num_loops = %d\n\n", (*it)->num_loops());
408        }
409        (*it)->run();
410        printf("  %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
411               (*it)->mb_per_sec());
412    }
413
414    return true;
415}
416
417int copy_bandwidth(int argc, char** argv) {
418    std::vector<BandwidthBenchmark*> bench_objs;
419    bench_objs.push_back(new CopyLdrdStrdBenchmark());
420    bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
421    bench_objs.push_back(new CopyVldVstBenchmark());
422    bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
423    bench_objs.push_back(new MemcpyBenchmark());
424
425    if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
426        return -1;
427    }
428    return 0;
429}
430
431int write_bandwidth(int argc, char** argv) {
432    std::vector<BandwidthBenchmark*> bench_objs;
433    bench_objs.push_back(new WriteStrdBenchmark());
434    bench_objs.push_back(new WriteStmiaBenchmark());
435    bench_objs.push_back(new WriteVstBenchmark());
436    bench_objs.push_back(new WriteVstmiaBenchmark());
437    bench_objs.push_back(new MemsetBenchmark());
438
439    if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
440        return -1;
441    }
442
443    return 0;
444}
445
446int read_bandwidth(int argc, char** argv) {
447    std::vector<BandwidthBenchmark*> bench_objs;
448    bench_objs.push_back(new ReadLdrdBenchmark());
449    bench_objs.push_back(new ReadLdmiaBenchmark());
450    bench_objs.push_back(new ReadVldBenchmark());
451    bench_objs.push_back(new ReadVldmiaBenchmark());
452
453    if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
454        return -1;
455    }
456    return 0;
457}
458