bandwidth.cpp revision 1348ce27ee9bb8e50a3294879c1523fa4b4d8f8b
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <pthread.h> 18#include <sched.h> 19#include <sys/time.h> 20#include <sys/resource.h> 21#include <unistd.h> 22#include <ctype.h> 23 24#include <map> 25#include <vector> 26 27#include "bandwidth.h" 28 29 30typedef struct { 31 const char *name; 32 bool int_type; 33} option_t; 34 35option_t bandwidth_opts[] = { 36 { "size", true }, 37 { "num_warm_loops", true }, 38 { "num_loops", true }, 39 { "type", false }, 40 { NULL, false }, 41}; 42 43option_t per_core_opts[] = { 44 { "size", true }, 45 { "num_warm_loops", true}, 46 { "num_loops", true }, 47 { "type", false }, 48 { NULL, false }, 49}; 50 51option_t multithread_opts[] = { 52 { "size", true }, 53 { "num_warm_loops", true}, 54 { "num_loops", true }, 55 { "type", false }, 56 { "num_threads", true }, 57 { NULL, false }, 58}; 59 60typedef union { 61 int int_value; 62 const char *char_value; 63} arg_value_t; 64typedef std::map<const char*, arg_value_t> arg_t; 65 66bool processBandwidthOptions(int argc, char** argv, option_t options[], 67 arg_t *values) { 68 for (int i = 1; i < argc; i++) { 69 if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) { 70 char *arg = &argv[i][2]; 71 72 for (int j = 0; options[j].name != NULL; j++) { 73 if (strcmp(arg, options[j].name) == 0) { 74 const char *name = options[j].name; 75 if (i == argc - 1) { 76 printf("The option --%s requires an argument.\n", name); 77 return false; 78 } 79 if (options[j].int_type) { 80 (*values)[name].int_value = strtol(argv[++i], NULL, 0); 81 } else { 82 (*values)[name].char_value = argv[++i]; 83 } 84 } 85 } 86 } 87 } 88 89 return true; 90} 91 92BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { 93 BandwidthBenchmark *bench = NULL; 94 95 const char *name = values["type"].char_value; 96 size_t size = 0; 97 if (values.count("size") > 0) { 98 size = values["size"].int_value; 99 } 100 if (strcmp(name, "copy_ldrd_strd") == 0) { 101 bench = new CopyLdrdStrdBenchmark(size); 102 } else if (strcmp(name, "copy_ldmia_stmia") == 0) { 103 bench = new CopyLdmiaStmiaBenchmark(size); 104 } else if (strcmp(name, "copy_vld_vst") == 0) { 105 bench = new CopyVldVstBenchmark(size); 106 } else if (strcmp(name, "copy_vldmia_vstmia") == 0) { 107 bench = new CopyVldmiaVstmiaBenchmark(size); 108 } else if (strcmp(name, "memcpy") == 0) { 109 bench = new MemcpyBenchmark(size); 110 } else if (strcmp(name, "write_strd") == 0) { 111 bench = new WriteStrdBenchmark(size); 112 } else if (strcmp(name, "write_stmia") == 0) { 113 bench = new WriteStmiaBenchmark(size); 114 } else if (strcmp(name, "write_vst") == 0) { 115 bench = new WriteVstBenchmark(size); 116 } else if (strcmp(name, "write_vstmia") == 0) { 117 bench = new WriteVstmiaBenchmark(size); 118 } else if (strcmp(name, "memset") == 0) { 119 bench = new MemsetBenchmark(size); 120 } 121 122 if (bench) { 123 if (values.count("num_warm_loops") > 0) { 124 bench->set_num_loops(values["num_warm_loops"].int_value); 125 } 126 if (values.count("num_loops") > 0) { 127 bench->set_num_loops(values["num_loops"].int_value); 128 } 129 } 130 131 return bench; 132} 133 134bool getAvailCpus(std::vector<int> *cpu_list) { 135 cpu_set_t cpuset; 136 137 CPU_ZERO(&cpuset); 138 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) { 139 perror("sched_getaffinity failed."); 140 return false; 141 } 142 143 for (int i = 0; i < CPU_SETSIZE; i++) { 144 if (CPU_ISSET(i, &cpuset)) { 145 cpu_list->push_back(i); 146 } 147 } 148 149 return true; 150} 151 152typedef struct { 153 int core; 154 BandwidthBenchmark *bench; 155 double avg_mb; 156 volatile bool *run; 157} thread_arg_t; 158 159void *runBandwidthThread(void *data) { 160 thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data); 161 162 if (arg->core >= 0) { 163 cpu_set_t cpuset; 164 CPU_ZERO(&cpuset); 165 CPU_SET(arg->core, &cpuset); 166 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 167 perror("sched_setaffinity failed"); 168 return NULL; 169 } 170 } 171 172 // Spinloop waiting for the run variable to get set to true. 173 while (!*arg->run) { 174 } 175 176 double avg_mb = 0; 177 for (int run = 1; ; run++) { 178 arg->bench->run(); 179 if (!*arg->run) { 180 // Throw away the last data point since it's possible not 181 // all of the threads are running at this point. 182 break; 183 } 184 avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run; 185 } 186 arg->avg_mb = avg_mb; 187 188 return NULL; 189} 190 191bool processThreadArgs(int argc, char** argv, option_t options[], 192 arg_t *values) { 193 // Use some smaller values for the number of loops. 194 (*values)["num_warm_loops"].int_value = 1000000; 195 (*values)["num_loops"].int_value = 10000000; 196 197 if (!processBandwidthOptions(argc, argv, options, values)) { 198 return false; 199 } 200 if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) { 201 printf("The size values must be a multiple of 64.\n"); 202 return false; 203 } 204 if (values->count("type") == 0) { 205 printf("Must specify the type value.\n"); 206 return false; 207 } 208 209 BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values); 210 if (!bench) { 211 printf("Unknown type %s\n", (*values)["type"].char_value); 212 return false; 213 } 214 215 if (setpriority(PRIO_PROCESS, 0, -20)) { 216 perror("Unable to raise priority of process."); 217 return false; 218 } 219 220 printf("Calculating optimum run time...\n"); 221 nsecs_t t = system_time(); 222 bench->run(); 223 t = system_time() - t; 224 // Since this is only going to be running single threaded, assume that 225 // if the number is set to ten times this value, we should get at least 226 // a couple of samples per thread. 227 int run_time = int((t/1000000000.0)*10 + 0.5) + 5; 228 229 (*values)["run_time"].int_value = run_time; 230 (*values)["size"].int_value = bench->size(); 231 (*values)["num_warm_loops"].int_value = bench->num_warm_loops(); 232 (*values)["num_loops"].int_value = bench->num_loops(); 233 delete bench; 234 235 return true; 236} 237 238bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) { 239 pthread_t threads[num_threads]; 240 volatile bool run = false; 241 242 int rc; 243 for (int i = 0; i < num_threads; i++) { 244 args[i].run = &run; 245 rc = pthread_create(&threads[i], NULL, runBandwidthThread, 246 (void*)&args[i]); 247 if (rc != 0) { 248 printf("Failed to launch thread %d\n", i); 249 return false; 250 } 251 } 252 253 // Kick start the threads. 254 run = true; 255 256 // Let the threads run. 257 sleep(run_time); 258 259 // Stop the threads. 260 run = false; 261 262 // Wait for the threads to complete. 263 for (int i = 0; i < num_threads; i++) { 264 rc = pthread_join(threads[i], NULL); 265 if (rc != 0) { 266 printf("Thread %d failed to join.\n", i); 267 return false; 268 } 269 printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i, 270 args[i].bench->getName(), args[i].avg_mb); 271 } 272 273 return true; 274} 275 276int per_core_bandwidth(int argc, char** argv) { 277 arg_t values; 278 if (!processThreadArgs(argc, argv, per_core_opts, &values)) { 279 return -1; 280 } 281 282 std::vector<int> cpu_list; 283 if (!getAvailCpus(&cpu_list)) { 284 printf("Failed to get available cpu list.\n"); 285 return -1; 286 } 287 288 thread_arg_t args[cpu_list.size()]; 289 290 int i = 0; 291 for (std::vector<int>::iterator it = cpu_list.begin(); 292 it != cpu_list.end(); ++it, ++i) { 293 args[i].core = *it; 294 args[i].bench = createBandwidthBenchmarkObject(values); 295 } 296 297 printf("Running on %d cores\n", cpu_list.size()); 298 printf(" run_time = %ds\n", values["run_time"].int_value); 299 printf(" size = %d\n", values["size"].int_value); 300 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 301 printf(" num_loops = %d\n", values["num_loops"].int_value); 302 printf("\n"); 303 304 if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) { 305 return -1; 306 } 307 308 return 0; 309} 310 311int multithread_bandwidth(int argc, char** argv) { 312 arg_t values; 313 if (!processThreadArgs(argc, argv, multithread_opts, &values)) { 314 return -1; 315 } 316 if (values.count("num_threads") == 0) { 317 printf("Must specify the num_threads value.\n"); 318 return -1; 319 } 320 int num_threads = values["num_threads"].int_value; 321 322 thread_arg_t args[num_threads]; 323 324 int i = 0; 325 for (int i = 0; i < num_threads; i++) { 326 args[i].core = -1; 327 args[i].bench = createBandwidthBenchmarkObject(values); 328 } 329 330 printf("Running %d threads\n", num_threads); 331 printf(" run_time = %ds\n", values["run_time"].int_value); 332 printf(" size = %d\n", values["size"].int_value); 333 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 334 printf(" num_loops = %d\n", values["num_loops"].int_value); 335 printf("\n"); 336 337 if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) { 338 return -1; 339 } 340 341 return 0; 342} 343 344int copy_bandwidth(int argc, char** argv) { 345 arg_t values; 346 values["size"].int_value = 0; 347 values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS; 348 values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS; 349 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) { 350 return -1; 351 } 352 size_t size = values["size"].int_value; 353 if ((size % 64) != 0) { 354 printf("The size value must be a multiple of 64.\n"); 355 return -1; 356 } 357 358 if (setpriority(PRIO_PROCESS, 0, -20)) { 359 perror("Unable to raise priority of process."); 360 return -1; 361 } 362 363 std::vector<BandwidthBenchmark*> bench_objs; 364 bench_objs.push_back(new CopyLdrdStrdBenchmark(size)); 365 bench_objs.push_back(new CopyLdmiaStmiaBenchmark(size)); 366 bench_objs.push_back(new CopyVldVstBenchmark(size)); 367 bench_objs.push_back(new CopyVldmiaVstmiaBenchmark(size)); 368 bench_objs.push_back(new MemcpyBenchmark(size)); 369 370 printf("Benchmarking copy bandwidth\n"); 371 printf(" size = %d\n", bench_objs[0]->size()); 372 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 373 printf(" num_loops = %d\n\n", values["num_loops"].int_value); 374 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin(); 375 it != bench_objs.end(); ++it) { 376 (*it)->set_num_warm_loops(values["num_warm_loops"].int_value); 377 (*it)->set_num_loops(values["num_loops"].int_value); 378 (*it)->run(); 379 printf(" Copy bandwidth with %s: %0.2f MB/s\n", (*it)->getName(), 380 (*it)->mb_per_sec()); 381 } 382 383 return 0; 384} 385 386int write_bandwidth(int argc, char** argv) { 387 arg_t values; 388 values["size"].int_value = 0; 389 values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS; 390 values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS; 391 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) { 392 return -1; 393 } 394 395 size_t size = values["size"].int_value; 396 if ((size % 64) != 0) { 397 printf("The size value must be a multiple of 64.\n"); 398 return 1; 399 } 400 401 if (setpriority(PRIO_PROCESS, 0, -20)) { 402 perror("Unable to raise priority of process."); 403 return -1; 404 } 405 406 std::vector<BandwidthBenchmark*> bench_objs; 407 bench_objs.push_back(new WriteStrdBenchmark(size)); 408 bench_objs.push_back(new WriteStmiaBenchmark(size)); 409 bench_objs.push_back(new WriteVstBenchmark(size)); 410 bench_objs.push_back(new WriteVstmiaBenchmark(size)); 411 bench_objs.push_back(new MemsetBenchmark(size)); 412 413 printf("Benchmarking write bandwidth\n"); 414 printf(" size = %d\n", bench_objs[0]->size()); 415 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 416 printf(" num_loops = %d\n\n", values["num_loops"].int_value); 417 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin(); 418 it != bench_objs.end(); ++it) { 419 (*it)->set_num_warm_loops(values["num_warm_loops"].int_value); 420 (*it)->set_num_loops(values["num_loops"].int_value); 421 (*it)->run(); 422 printf(" Write bandwidth with %s: %0.2f MB/s\n", (*it)->getName(), 423 (*it)->mb_per_sec()); 424 } 425 426 return 0; 427} 428