bandwidth.cpp revision 1348ce27ee9bb8e50a3294879c1523fa4b4d8f8b
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <pthread.h>
18#include <sched.h>
19#include <sys/time.h>
20#include <sys/resource.h>
21#include <unistd.h>
22#include <ctype.h>
23
24#include <map>
25#include <vector>
26
27#include "bandwidth.h"
28
29
30typedef struct {
31    const char *name;
32    bool int_type;
33} option_t;
34
35option_t bandwidth_opts[] = {
36    { "size", true },
37    { "num_warm_loops", true },
38    { "num_loops", true },
39    { "type", false },
40    { NULL, false },
41};
42
43option_t per_core_opts[] = {
44    { "size", true },
45    { "num_warm_loops", true},
46    { "num_loops", true },
47    { "type", false },
48    { NULL, false },
49};
50
51option_t multithread_opts[] = {
52    { "size", true },
53    { "num_warm_loops", true},
54    { "num_loops", true },
55    { "type", false },
56    { "num_threads", true },
57    { NULL, false },
58};
59
60typedef union {
61    int int_value;
62    const char *char_value;
63} arg_value_t;
64typedef std::map<const char*, arg_value_t> arg_t;
65
66bool processBandwidthOptions(int argc, char** argv, option_t options[],
67                             arg_t *values) {
68    for (int i = 1; i < argc; i++) {
69        if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70            char *arg = &argv[i][2];
71
72            for (int j = 0; options[j].name != NULL; j++) {
73                if (strcmp(arg, options[j].name) == 0) {
74                    const char *name = options[j].name;
75                    if (i == argc - 1) {
76                        printf("The option --%s requires an argument.\n", name);
77                        return false;
78                    }
79                    if (options[j].int_type) {
80                        (*values)[name].int_value = strtol(argv[++i], NULL, 0);
81                    } else {
82                        (*values)[name].char_value = argv[++i];
83                    }
84                }
85            }
86        }
87    }
88
89    return true;
90}
91
92BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93    BandwidthBenchmark *bench = NULL;
94
95    const char *name = values["type"].char_value;
96    size_t size = 0;
97    if (values.count("size") > 0) {
98        size = values["size"].int_value;
99    }
100    if (strcmp(name, "copy_ldrd_strd") == 0) {
101        bench = new CopyLdrdStrdBenchmark(size);
102    } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
103        bench = new CopyLdmiaStmiaBenchmark(size);
104    } else if (strcmp(name, "copy_vld_vst") == 0) {
105        bench = new CopyVldVstBenchmark(size);
106    } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
107        bench = new CopyVldmiaVstmiaBenchmark(size);
108    } else if (strcmp(name, "memcpy") == 0) {
109        bench = new MemcpyBenchmark(size);
110    } else if (strcmp(name, "write_strd") == 0) {
111        bench = new WriteStrdBenchmark(size);
112    } else if (strcmp(name, "write_stmia") == 0) {
113        bench = new WriteStmiaBenchmark(size);
114    } else if (strcmp(name, "write_vst") == 0) {
115        bench = new WriteVstBenchmark(size);
116    } else if (strcmp(name, "write_vstmia") == 0) {
117        bench = new WriteVstmiaBenchmark(size);
118    } else if (strcmp(name, "memset") == 0) {
119        bench = new MemsetBenchmark(size);
120    }
121
122    if (bench) {
123        if (values.count("num_warm_loops") > 0) {
124            bench->set_num_loops(values["num_warm_loops"].int_value);
125        }
126        if (values.count("num_loops") > 0) {
127            bench->set_num_loops(values["num_loops"].int_value);
128        }
129    }
130
131    return bench;
132}
133
134bool getAvailCpus(std::vector<int> *cpu_list) {
135    cpu_set_t cpuset;
136
137    CPU_ZERO(&cpuset);
138    if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
139        perror("sched_getaffinity failed.");
140        return false;
141    }
142
143    for (int i = 0; i < CPU_SETSIZE; i++) {
144        if (CPU_ISSET(i, &cpuset)) {
145            cpu_list->push_back(i);
146        }
147    }
148
149    return true;
150}
151
152typedef struct {
153    int core;
154    BandwidthBenchmark *bench;
155    double  avg_mb;
156    volatile bool *run;
157} thread_arg_t;
158
159void *runBandwidthThread(void *data) {
160    thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
161
162    if (arg->core >= 0) {
163        cpu_set_t cpuset;
164        CPU_ZERO(&cpuset);
165        CPU_SET(arg->core, &cpuset);
166        if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
167            perror("sched_setaffinity failed");
168            return NULL;
169        }
170    }
171
172    // Spinloop waiting for the run variable to get set to true.
173    while (!*arg->run) {
174    }
175
176    double avg_mb = 0;
177    for (int run = 1; ; run++) {
178        arg->bench->run();
179        if (!*arg->run) {
180            // Throw away the last data point since it's possible not
181            // all of the threads are running at this point.
182            break;
183        }
184        avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
185    }
186    arg->avg_mb = avg_mb;
187
188    return NULL;
189}
190
191bool processThreadArgs(int argc, char** argv, option_t options[],
192                       arg_t *values) {
193    // Use some smaller values for the number of loops.
194    (*values)["num_warm_loops"].int_value = 1000000;
195    (*values)["num_loops"].int_value = 10000000;
196
197    if (!processBandwidthOptions(argc, argv, options, values)) {
198        return false;
199    }
200    if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
201        printf("The size values must be a multiple of 64.\n");
202        return false;
203    }
204    if (values->count("type") == 0) {
205        printf("Must specify the type value.\n");
206        return false;
207    }
208
209    BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
210    if (!bench) {
211        printf("Unknown type %s\n", (*values)["type"].char_value);
212        return false;
213    }
214
215    if (setpriority(PRIO_PROCESS, 0, -20)) {
216        perror("Unable to raise priority of process.");
217        return false;
218    }
219
220    printf("Calculating optimum run time...\n");
221    nsecs_t t = system_time();
222    bench->run();
223    t = system_time() - t;
224    // Since this is only going to be running single threaded, assume that
225    // if the number is set to ten times this value, we should get at least
226    // a couple of samples per thread.
227    int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
228
229    (*values)["run_time"].int_value = run_time;
230    (*values)["size"].int_value = bench->size();
231    (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
232    (*values)["num_loops"].int_value = bench->num_loops();
233    delete bench;
234
235    return true;
236}
237
238bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
239    pthread_t threads[num_threads];
240    volatile bool run = false;
241
242    int rc;
243    for (int i = 0; i < num_threads; i++) {
244        args[i].run = &run;
245        rc = pthread_create(&threads[i], NULL, runBandwidthThread,
246                            (void*)&args[i]);
247        if (rc != 0) {
248            printf("Failed to launch thread %d\n", i);
249            return false;
250        }
251    }
252
253    // Kick start the threads.
254    run = true;
255
256    // Let the threads run.
257    sleep(run_time);
258
259    // Stop the threads.
260    run = false;
261
262    // Wait for the threads to complete.
263    for (int i = 0; i < num_threads; i++) {
264        rc = pthread_join(threads[i], NULL);
265        if (rc != 0) {
266            printf("Thread %d failed to join.\n", i);
267            return false;
268        }
269        printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
270               args[i].bench->getName(), args[i].avg_mb);
271    }
272
273    return true;
274}
275
276int per_core_bandwidth(int argc, char** argv) {
277    arg_t values;
278    if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
279        return -1;
280    }
281
282    std::vector<int> cpu_list;
283    if (!getAvailCpus(&cpu_list)) {
284        printf("Failed to get available cpu list.\n");
285        return -1;
286    }
287
288    thread_arg_t args[cpu_list.size()];
289
290    int i = 0;
291    for (std::vector<int>::iterator it = cpu_list.begin();
292         it != cpu_list.end(); ++it, ++i) {
293        args[i].core = *it;
294        args[i].bench = createBandwidthBenchmarkObject(values);
295    }
296
297    printf("Running on %d cores\n", cpu_list.size());
298    printf("  run_time = %ds\n", values["run_time"].int_value);
299    printf("  size = %d\n", values["size"].int_value);
300    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
301    printf("  num_loops = %d\n", values["num_loops"].int_value);
302    printf("\n");
303
304    if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
305        return -1;
306    }
307
308    return 0;
309}
310
311int multithread_bandwidth(int argc, char** argv) {
312    arg_t values;
313    if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
314        return -1;
315    }
316    if (values.count("num_threads") == 0) {
317        printf("Must specify the num_threads value.\n");
318        return -1;
319    }
320    int num_threads = values["num_threads"].int_value;
321
322    thread_arg_t args[num_threads];
323
324    int i = 0;
325    for (int i = 0; i < num_threads; i++) {
326        args[i].core = -1;
327        args[i].bench = createBandwidthBenchmarkObject(values);
328    }
329
330    printf("Running %d threads\n", num_threads);
331    printf("  run_time = %ds\n", values["run_time"].int_value);
332    printf("  size = %d\n", values["size"].int_value);
333    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
334    printf("  num_loops = %d\n", values["num_loops"].int_value);
335    printf("\n");
336
337    if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
338        return -1;
339    }
340
341    return 0;
342}
343
344int copy_bandwidth(int argc, char** argv) {
345    arg_t values;
346    values["size"].int_value = 0;
347    values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS;
348    values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS;
349    if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
350        return -1;
351    }
352    size_t size = values["size"].int_value;
353    if ((size % 64) != 0) {
354        printf("The size value must be a multiple of 64.\n");
355        return -1;
356    }
357
358    if (setpriority(PRIO_PROCESS, 0, -20)) {
359        perror("Unable to raise priority of process.");
360        return -1;
361    }
362
363    std::vector<BandwidthBenchmark*> bench_objs;
364    bench_objs.push_back(new CopyLdrdStrdBenchmark(size));
365    bench_objs.push_back(new CopyLdmiaStmiaBenchmark(size));
366    bench_objs.push_back(new CopyVldVstBenchmark(size));
367    bench_objs.push_back(new CopyVldmiaVstmiaBenchmark(size));
368    bench_objs.push_back(new MemcpyBenchmark(size));
369
370    printf("Benchmarking copy bandwidth\n");
371    printf("  size = %d\n", bench_objs[0]->size());
372    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
373    printf("  num_loops = %d\n\n", values["num_loops"].int_value);
374    for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
375         it != bench_objs.end(); ++it) {
376        (*it)->set_num_warm_loops(values["num_warm_loops"].int_value);
377        (*it)->set_num_loops(values["num_loops"].int_value);
378        (*it)->run();
379        printf("  Copy bandwidth with %s: %0.2f MB/s\n", (*it)->getName(),
380               (*it)->mb_per_sec());
381    }
382
383    return 0;
384}
385
386int write_bandwidth(int argc, char** argv) {
387    arg_t values;
388    values["size"].int_value = 0;
389    values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS;
390    values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS;
391    if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
392        return -1;
393    }
394
395    size_t size = values["size"].int_value;
396    if ((size % 64) != 0) {
397        printf("The size value must be a multiple of 64.\n");
398        return 1;
399    }
400
401    if (setpriority(PRIO_PROCESS, 0, -20)) {
402        perror("Unable to raise priority of process.");
403        return -1;
404    }
405
406    std::vector<BandwidthBenchmark*> bench_objs;
407    bench_objs.push_back(new WriteStrdBenchmark(size));
408    bench_objs.push_back(new WriteStmiaBenchmark(size));
409    bench_objs.push_back(new WriteVstBenchmark(size));
410    bench_objs.push_back(new WriteVstmiaBenchmark(size));
411    bench_objs.push_back(new MemsetBenchmark(size));
412
413    printf("Benchmarking write bandwidth\n");
414    printf("  size = %d\n", bench_objs[0]->size());
415    printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
416    printf("  num_loops = %d\n\n", values["num_loops"].int_value);
417    for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
418         it != bench_objs.end(); ++it) {
419        (*it)->set_num_warm_loops(values["num_warm_loops"].int_value);
420        (*it)->set_num_loops(values["num_loops"].int_value);
421        (*it)->run();
422        printf("  Write bandwidth with %s: %0.2f MB/s\n", (*it)->getName(),
423               (*it)->mb_per_sec());
424    }
425
426    return 0;
427}
428