bandwidth.cpp revision 1a3794a84074d7f22b8ddaba840aedd758a14cdd
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <pthread.h> 18#include <sched.h> 19#include <sys/time.h> 20#include <sys/resource.h> 21#include <unistd.h> 22#include <ctype.h> 23 24#include <map> 25#include <vector> 26 27#include "bandwidth.h" 28 29 30typedef struct { 31 const char *name; 32 bool int_type; 33} option_t; 34 35option_t bandwidth_opts[] = { 36 { "size", true }, 37 { "num_warm_loops", true }, 38 { "num_loops", true }, 39 { "type", false }, 40 { NULL, false }, 41}; 42 43option_t per_core_opts[] = { 44 { "size", true }, 45 { "num_warm_loops", true}, 46 { "num_loops", true }, 47 { "type", false }, 48 { NULL, false }, 49}; 50 51option_t multithread_opts[] = { 52 { "size", true }, 53 { "num_warm_loops", true}, 54 { "num_loops", true }, 55 { "type", false }, 56 { "num_threads", true }, 57 { NULL, false }, 58}; 59 60typedef union { 61 int int_value; 62 const char *char_value; 63} arg_value_t; 64typedef std::map<const char*, arg_value_t> arg_t; 65 66bool processBandwidthOptions(int argc, char** argv, option_t options[], 67 arg_t *values) { 68 for (int i = 1; i < argc; i++) { 69 if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) { 70 char *arg = &argv[i][2]; 71 72 for (int j = 0; options[j].name != NULL; j++) { 73 if (strcmp(arg, options[j].name) == 0) { 74 const char *name = options[j].name; 75 if (i == argc - 1) { 76 printf("The option --%s requires an argument.\n", name); 77 return false; 78 } 79 if (options[j].int_type) { 80 (*values)[name].int_value = strtol(argv[++i], NULL, 0); 81 } else { 82 (*values)[name].char_value = argv[++i]; 83 } 84 } 85 } 86 } 87 } 88 89 return true; 90} 91 92BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { 93 BandwidthBenchmark *bench = NULL; 94 95 const char *name = values["type"].char_value; 96 size_t size = 0; 97 if (values.count("size") > 0) { 98 size = values["size"].int_value; 99 } 100 if (strcmp(name, "copy_ldrd_strd") == 0) { 101 bench = new CopyLdrdStrdBenchmark(); 102 } else if (strcmp(name, "copy_ldmia_stmia") == 0) { 103 bench = new CopyLdmiaStmiaBenchmark(); 104 } else if (strcmp(name, "copy_vld_vst") == 0) { 105 bench = new CopyVldVstBenchmark(); 106 } else if (strcmp(name, "copy_vldmia_vstmia") == 0) { 107 bench = new CopyVldmiaVstmiaBenchmark(); 108 } else if (strcmp(name, "memcpy") == 0) { 109 bench = new MemcpyBenchmark(); 110 } else if (strcmp(name, "write_strd") == 0) { 111 bench = new WriteStrdBenchmark(); 112 } else if (strcmp(name, "write_stmia") == 0) { 113 bench = new WriteStmiaBenchmark(); 114 } else if (strcmp(name, "write_vst") == 0) { 115 bench = new WriteVstBenchmark(); 116 } else if (strcmp(name, "write_vstmia") == 0) { 117 bench = new WriteVstmiaBenchmark(); 118 } else if (strcmp(name, "memset") == 0) { 119 bench = new MemsetBenchmark(); 120 } else if (strcmp(name, "read_ldrd") == 0) { 121 bench = new ReadLdrdBenchmark(); 122 } else if (strcmp(name, "read_ldmia") == 0) { 123 bench = new ReadLdmiaBenchmark(); 124 } else if (strcmp(name, "read_vld") == 0) { 125 bench = new ReadVldBenchmark(); 126 } else if (strcmp(name, "read_vldmia") == 0) { 127 bench = new ReadVldmiaBenchmark(); 128 } else { 129 printf("Unknown type name %s\n", name); 130 return NULL; 131 } 132 133 if (!bench->setSize(values["size"].int_value)) { 134 printf("Failed to allocate buffers for benchmark.\n"); 135 return NULL; 136 } 137 138 if (values.count("num_warm_loops") > 0) { 139 bench->set_num_loops(values["num_warm_loops"].int_value); 140 } 141 if (values.count("num_loops") > 0) { 142 bench->set_num_loops(values["num_loops"].int_value); 143 } 144 145 return bench; 146} 147 148bool getAvailCpus(std::vector<int> *cpu_list) { 149 cpu_set_t cpuset; 150 151 CPU_ZERO(&cpuset); 152 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) { 153 perror("sched_getaffinity failed."); 154 return false; 155 } 156 157 for (int i = 0; i < CPU_SETSIZE; i++) { 158 if (CPU_ISSET(i, &cpuset)) { 159 cpu_list->push_back(i); 160 } 161 } 162 163 return true; 164} 165 166typedef struct { 167 int core; 168 BandwidthBenchmark *bench; 169 double avg_mb; 170 volatile bool *run; 171} thread_arg_t; 172 173void *runBandwidthThread(void *data) { 174 thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data); 175 176 if (arg->core >= 0) { 177 cpu_set_t cpuset; 178 CPU_ZERO(&cpuset); 179 CPU_SET(arg->core, &cpuset); 180 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 181 perror("sched_setaffinity failed"); 182 return NULL; 183 } 184 } 185 186 // Spinloop waiting for the run variable to get set to true. 187 while (!*arg->run) { 188 } 189 190 double avg_mb = 0; 191 for (int run = 1; ; run++) { 192 arg->bench->run(); 193 if (!*arg->run) { 194 // Throw away the last data point since it's possible not 195 // all of the threads are running at this point. 196 break; 197 } 198 avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run; 199 } 200 arg->avg_mb = avg_mb; 201 202 return NULL; 203} 204 205bool processThreadArgs(int argc, char** argv, option_t options[], 206 arg_t *values) { 207 // Use some smaller values for the number of loops. 208 (*values)["num_warm_loops"].int_value = 1000000; 209 (*values)["num_loops"].int_value = 10000000; 210 211 if (!processBandwidthOptions(argc, argv, options, values)) { 212 return false; 213 } 214 if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) { 215 printf("The size values must be a multiple of 64.\n"); 216 return false; 217 } 218 if (values->count("type") == 0) { 219 printf("Must specify the type value.\n"); 220 return false; 221 } 222 223 BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values); 224 if (!bench) { 225 return false; 226 } 227 228 if (setpriority(PRIO_PROCESS, 0, -20)) { 229 perror("Unable to raise priority of process."); 230 return false; 231 } 232 233 printf("Calculating optimum run time...\n"); 234 nsecs_t t = system_time(); 235 bench->run(); 236 t = system_time() - t; 237 // Since this is only going to be running single threaded, assume that 238 // if the number is set to ten times this value, we should get at least 239 // a couple of samples per thread. 240 int run_time = int((t/1000000000.0)*10 + 0.5) + 5; 241 242 (*values)["run_time"].int_value = run_time; 243 (*values)["size"].int_value = bench->size(); 244 (*values)["num_warm_loops"].int_value = bench->num_warm_loops(); 245 (*values)["num_loops"].int_value = bench->num_loops(); 246 delete bench; 247 248 return true; 249} 250 251bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) { 252 pthread_t threads[num_threads]; 253 volatile bool run = false; 254 255 int rc; 256 for (int i = 0; i < num_threads; i++) { 257 args[i].run = &run; 258 rc = pthread_create(&threads[i], NULL, runBandwidthThread, 259 (void*)&args[i]); 260 if (rc != 0) { 261 printf("Failed to launch thread %d\n", i); 262 return false; 263 } 264 } 265 266 // Kick start the threads. 267 run = true; 268 269 // Let the threads run. 270 sleep(run_time); 271 272 // Stop the threads. 273 run = false; 274 275 // Wait for the threads to complete. 276 for (int i = 0; i < num_threads; i++) { 277 rc = pthread_join(threads[i], NULL); 278 if (rc != 0) { 279 printf("Thread %d failed to join.\n", i); 280 return false; 281 } 282 printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i, 283 args[i].bench->getName(), args[i].avg_mb); 284 } 285 286 return true; 287} 288 289int per_core_bandwidth(int argc, char** argv) { 290 arg_t values; 291 if (!processThreadArgs(argc, argv, per_core_opts, &values)) { 292 return -1; 293 } 294 295 std::vector<int> cpu_list; 296 if (!getAvailCpus(&cpu_list)) { 297 printf("Failed to get available cpu list.\n"); 298 return -1; 299 } 300 301 thread_arg_t args[cpu_list.size()]; 302 303 int i = 0; 304 for (std::vector<int>::iterator it = cpu_list.begin(); 305 it != cpu_list.end(); ++it, ++i) { 306 args[i].core = *it; 307 args[i].bench = createBandwidthBenchmarkObject(values); 308 if (!args[i].bench) { 309 return 0; 310 } 311 } 312 313 printf("Running on %d cores\n", cpu_list.size()); 314 printf(" run_time = %ds\n", values["run_time"].int_value); 315 printf(" size = %d\n", values["size"].int_value); 316 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 317 printf(" num_loops = %d\n", values["num_loops"].int_value); 318 printf("\n"); 319 320 if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) { 321 return -1; 322 } 323 324 return 0; 325} 326 327int multithread_bandwidth(int argc, char** argv) { 328 arg_t values; 329 if (!processThreadArgs(argc, argv, multithread_opts, &values)) { 330 return -1; 331 } 332 if (values.count("num_threads") == 0) { 333 printf("Must specify the num_threads value.\n"); 334 return -1; 335 } 336 int num_threads = values["num_threads"].int_value; 337 338 thread_arg_t args[num_threads]; 339 340 int i = 0; 341 for (int i = 0; i < num_threads; i++) { 342 args[i].core = -1; 343 args[i].bench = createBandwidthBenchmarkObject(values); 344 if (!args[i].bench) { 345 return 0; 346 } 347 } 348 349 printf("Running %d threads\n", num_threads); 350 printf(" run_time = %ds\n", values["run_time"].int_value); 351 printf(" size = %d\n", values["size"].int_value); 352 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 353 printf(" num_loops = %d\n", values["num_loops"].int_value); 354 printf("\n"); 355 356 if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) { 357 return -1; 358 } 359 360 return 0; 361} 362 363bool run_bandwidth_benchmark(int argc, char** argv, const char *name, 364 std::vector<BandwidthBenchmark*> bench_objs) { 365 arg_t values; 366 values["size"].int_value = 0; 367 values["num_warm_loops"].int_value = 0; 368 values["num_loops"].int_value = 0; 369 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) { 370 return -1; 371 } 372 373 size_t size = values["size"].int_value; 374 if ((size % 64) != 0) { 375 printf("The size value must be a multiple of 64.\n"); 376 return 1; 377 } 378 379 if (setpriority(PRIO_PROCESS, 0, -20)) { 380 perror("Unable to raise priority of process."); 381 return -1; 382 } 383 384 bool preamble_printed = false; 385 size_t num_warm_loops = values["num_warm_loops"].int_value; 386 size_t num_loops = values["num_loops"].int_value; 387 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin(); 388 it != bench_objs.end(); ++it) { 389 if (!(*it)->canRun()) { 390 continue; 391 } 392 if (!(*it)->setSize(values["num_warm_loops"].int_value)) { 393 printf("Failed creating buffer for bandwidth test.\n"); 394 return false; 395 } 396 if (num_warm_loops) { 397 (*it)->set_num_warm_loops(num_warm_loops); 398 } 399 if (num_loops) { 400 (*it)->set_num_loops(num_loops); 401 } 402 if (!preamble_printed) { 403 preamble_printed = true; 404 printf("Benchmarking %s bandwidth\n", name); 405 printf(" size = %d\n", (*it)->size()); 406 printf(" num_warm_loops = %d\n", (*it)->num_warm_loops()); 407 printf(" num_loops = %d\n\n", (*it)->num_loops()); 408 } 409 (*it)->run(); 410 printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(), 411 (*it)->mb_per_sec()); 412 } 413 414 return true; 415} 416 417int copy_bandwidth(int argc, char** argv) { 418 std::vector<BandwidthBenchmark*> bench_objs; 419 bench_objs.push_back(new CopyLdrdStrdBenchmark()); 420 bench_objs.push_back(new CopyLdmiaStmiaBenchmark()); 421 bench_objs.push_back(new CopyVldVstBenchmark()); 422 bench_objs.push_back(new CopyVldmiaVstmiaBenchmark()); 423 bench_objs.push_back(new MemcpyBenchmark()); 424 425 if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) { 426 return -1; 427 } 428 return 0; 429} 430 431int write_bandwidth(int argc, char** argv) { 432 std::vector<BandwidthBenchmark*> bench_objs; 433 bench_objs.push_back(new WriteStrdBenchmark()); 434 bench_objs.push_back(new WriteStmiaBenchmark()); 435 bench_objs.push_back(new WriteVstBenchmark()); 436 bench_objs.push_back(new WriteVstmiaBenchmark()); 437 bench_objs.push_back(new MemsetBenchmark()); 438 439 if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) { 440 return -1; 441 } 442 443 return 0; 444} 445 446int read_bandwidth(int argc, char** argv) { 447 std::vector<BandwidthBenchmark*> bench_objs; 448 bench_objs.push_back(new ReadLdrdBenchmark()); 449 bench_objs.push_back(new ReadLdmiaBenchmark()); 450 bench_objs.push_back(new ReadVldBenchmark()); 451 bench_objs.push_back(new ReadVldmiaBenchmark()); 452 453 if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) { 454 return -1; 455 } 456 return 0; 457} 458