diskutil.c revision fba5c5ff89163062922c3e560e871c087f2177c3
1#include <stdio.h> 2#include <string.h> 3#include <sys/time.h> 4#include <sys/types.h> 5#include <sys/stat.h> 6#include <dirent.h> 7#include <libgen.h> 8#include <math.h> 9 10#include "fio.h" 11#include "smalloc.h" 12#include "diskutil.h" 13 14static int last_majdev, last_mindev; 15static struct disk_util *last_du; 16 17static struct fio_mutex *disk_util_mutex; 18 19FLIST_HEAD(disk_list); 20 21static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 22 int majdev, int mindev, char *path); 23 24static void disk_util_free(struct disk_util *du) 25{ 26 if (du == last_du) 27 last_du = NULL; 28 29 while (!flist_empty(&du->slaves)) { 30 struct disk_util *slave; 31 32 slave = flist_entry(du->slaves.next, struct disk_util, slavelist); 33 flist_del(&slave->slavelist); 34 slave->users--; 35 } 36 37 fio_mutex_remove(du->lock); 38 sfree(du); 39} 40 41static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) 42{ 43 unsigned in_flight; 44 unsigned long long sectors[2]; 45 char line[256]; 46 FILE *f; 47 char *p; 48 int ret; 49 50 dprint(FD_DISKUTIL, "open stat file: %s\n", du->path); 51 52 f = fopen(du->path, "r"); 53 if (!f) 54 return 1; 55 56 p = fgets(line, sizeof(line), f); 57 if (!p) { 58 fclose(f); 59 return 1; 60 } 61 62 dprint(FD_DISKUTIL, "%s: %s", du->path, p); 63 64 ret = sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], 65 &dus->merges[0], §ors[0], 66 &dus->ticks[0], &dus->ios[1], 67 &dus->merges[1], §ors[1], 68 &dus->ticks[1], &in_flight, 69 &dus->io_ticks, &dus->time_in_queue); 70 fclose(f); 71 dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1); 72 dus->sectors[0] = sectors[0]; 73 dus->sectors[1] = sectors[1]; 74 return ret != 11; 75} 76 77static void update_io_tick_disk(struct disk_util *du) 78{ 79 struct disk_util_stat __dus, *dus, *ldus; 80 struct timeval t; 81 82 if (!du->users) 83 return; 84 if (get_io_ticks(du, &__dus)) 85 return; 86 87 dus = &du->dus; 88 ldus = &du->last_dus; 89 90 dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]); 91 dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]); 92 dus->ios[0] += (__dus.ios[0] - ldus->ios[0]); 93 dus->ios[1] += (__dus.ios[1] - ldus->ios[1]); 94 dus->merges[0] += (__dus.merges[0] - ldus->merges[0]); 95 dus->merges[1] += (__dus.merges[1] - ldus->merges[1]); 96 dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]); 97 dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]); 98 dus->io_ticks += (__dus.io_ticks - ldus->io_ticks); 99 dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue); 100 101 fio_gettime(&t, NULL); 102 dus->msec += mtime_since(&du->time, &t); 103 memcpy(&du->time, &t, sizeof(t)); 104 memcpy(ldus, &__dus, sizeof(__dus)); 105} 106 107int update_io_ticks(void) 108{ 109 struct flist_head *entry; 110 struct disk_util *du; 111 int ret = 0; 112 113 dprint(FD_DISKUTIL, "update io ticks\n"); 114 115 fio_mutex_down(disk_util_mutex); 116 117 if (!disk_util_exit) { 118 flist_for_each(entry, &disk_list) { 119 du = flist_entry(entry, struct disk_util, list); 120 update_io_tick_disk(du); 121 } 122 } else 123 ret = 1; 124 125 fio_mutex_up(disk_util_mutex); 126 return ret; 127} 128 129static struct disk_util *disk_util_exists(int major, int minor) 130{ 131 struct flist_head *entry; 132 struct disk_util *du; 133 134 fio_mutex_down(disk_util_mutex); 135 136 flist_for_each(entry, &disk_list) { 137 du = flist_entry(entry, struct disk_util, list); 138 139 if (major == du->major && minor == du->minor) { 140 fio_mutex_up(disk_util_mutex); 141 return du; 142 } 143 } 144 145 fio_mutex_up(disk_util_mutex); 146 return NULL; 147} 148 149static int get_device_numbers(char *file_name, int *maj, int *min) 150{ 151 struct stat st; 152 int majdev, mindev; 153 char tempname[PATH_MAX], *p; 154 155 if (!lstat(file_name, &st)) { 156 if (S_ISBLK(st.st_mode)) { 157 majdev = major(st.st_rdev); 158 mindev = minor(st.st_rdev); 159 } else if (S_ISCHR(st.st_mode)) { 160 majdev = major(st.st_rdev); 161 mindev = minor(st.st_rdev); 162 if (fio_lookup_raw(st.st_rdev, &majdev, &mindev)) 163 return -1; 164 } else if (S_ISFIFO(st.st_mode)) 165 return -1; 166 else { 167 majdev = major(st.st_dev); 168 mindev = minor(st.st_dev); 169 } 170 } else { 171 /* 172 * must be a file, open "." in that path 173 */ 174 strncpy(tempname, file_name, PATH_MAX - 1); 175 p = dirname(tempname); 176 if (stat(p, &st)) { 177 perror("disk util stat"); 178 return -1; 179 } 180 181 majdev = major(st.st_dev); 182 mindev = minor(st.st_dev); 183 } 184 185 *min = mindev; 186 *maj = majdev; 187 188 return 0; 189} 190 191static int read_block_dev_entry(char *path, int *maj, int *min) 192{ 193 char line[256], *p; 194 FILE *f; 195 196 f = fopen(path, "r"); 197 if (!f) { 198 perror("open path"); 199 return 1; 200 } 201 202 p = fgets(line, sizeof(line), f); 203 fclose(f); 204 205 if (!p) 206 return 1; 207 208 if (sscanf(p, "%u:%u", maj, min) != 2) 209 return 1; 210 211 return 0; 212} 213 214static void find_add_disk_slaves(struct thread_data *td, char *path, 215 struct disk_util *masterdu) 216{ 217 DIR *dirhandle = NULL; 218 struct dirent *dirent = NULL; 219 char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX]; 220 struct disk_util *slavedu = NULL; 221 int majdev, mindev; 222 ssize_t linklen; 223 224 sprintf(slavesdir, "%s/%s", path, "slaves"); 225 dirhandle = opendir(slavesdir); 226 if (!dirhandle) 227 return; 228 229 while ((dirent = readdir(dirhandle)) != NULL) { 230 if (!strcmp(dirent->d_name, ".") || 231 !strcmp(dirent->d_name, "..")) 232 continue; 233 234 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, dirent->d_name); 235 /* Can we always assume that the slaves device entries 236 * are links to the real directories for the slave 237 * devices? 238 */ 239 linklen = readlink(temppath, slavepath, PATH_MAX - 0); 240 if (linklen < 0) { 241 perror("readlink() for slave device."); 242 return; 243 } 244 slavepath[linklen] = '\0'; 245 246 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath); 247 if (read_block_dev_entry(temppath, &majdev, &mindev)) { 248 perror("Error getting slave device numbers."); 249 return; 250 } 251 252 /* 253 * See if this maj,min already exists 254 */ 255 slavedu = disk_util_exists(majdev, mindev); 256 if (slavedu) 257 continue; 258 259 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, slavepath); 260 __init_per_file_disk_util(td, majdev, mindev, temppath); 261 slavedu = disk_util_exists(majdev, mindev); 262 263 /* Should probably use an assert here. slavedu should 264 * always be present at this point. */ 265 if (slavedu) { 266 slavedu->users++; 267 flist_add_tail(&slavedu->slavelist, &masterdu->slaves); 268 } 269 } 270 271 closedir(dirhandle); 272} 273 274static struct disk_util *disk_util_add(struct thread_data *td, int majdev, 275 int mindev, char *path) 276{ 277 struct disk_util *du, *__du; 278 struct flist_head *entry; 279 int l; 280 281 dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); 282 283 du = smalloc(sizeof(*du)); 284 if (!du) { 285 log_err("fio: smalloc() pool exhausted\n"); 286 return NULL; 287 } 288 289 memset(du, 0, sizeof(*du)); 290 INIT_FLIST_HEAD(&du->list); 291 l = snprintf(du->path, sizeof(du->path), "%s/stat", path); 292 if (l < 0 || l >= sizeof(du->path)) { 293 log_err("constructed path \"%.100s[...]/stat\" larger than buffer (%zu bytes)\n", 294 path, sizeof(du->path) - 1); 295 sfree(du); 296 return NULL; 297 } 298 strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ); 299 du->sysfs_root = path; 300 du->major = majdev; 301 du->minor = mindev; 302 INIT_FLIST_HEAD(&du->slavelist); 303 INIT_FLIST_HEAD(&du->slaves); 304 du->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); 305 du->users = 0; 306 307 fio_mutex_down(disk_util_mutex); 308 309 flist_for_each(entry, &disk_list) { 310 __du = flist_entry(entry, struct disk_util, list); 311 312 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name); 313 314 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) { 315 disk_util_free(du); 316 fio_mutex_up(disk_util_mutex); 317 return __du; 318 } 319 } 320 321 dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name); 322 323 fio_gettime(&du->time, NULL); 324 get_io_ticks(du, &du->last_dus); 325 326 flist_add_tail(&du->list, &disk_list); 327 fio_mutex_up(disk_util_mutex); 328 329 find_add_disk_slaves(td, path, du); 330 return du; 331} 332 333static int check_dev_match(int majdev, int mindev, char *path) 334{ 335 int major, minor; 336 337 if (read_block_dev_entry(path, &major, &minor)) 338 return 1; 339 340 if (majdev == major && mindev == minor) 341 return 0; 342 343 return 1; 344} 345 346static int find_block_dir(int majdev, int mindev, char *path, int link_ok) 347{ 348 struct dirent *dir; 349 struct stat st; 350 int found = 0; 351 DIR *D; 352 353 D = opendir(path); 354 if (!D) 355 return 0; 356 357 while ((dir = readdir(D)) != NULL) { 358 char full_path[256]; 359 360 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 361 continue; 362 363 sprintf(full_path, "%s%s%s", path, FIO_OS_PATH_SEPARATOR, dir->d_name); 364 365 if (!strcmp(dir->d_name, "dev")) { 366 if (!check_dev_match(majdev, mindev, full_path)) { 367 found = 1; 368 break; 369 } 370 } 371 372 if (link_ok) { 373 if (stat(full_path, &st) == -1) { 374 perror("stat"); 375 break; 376 } 377 } else { 378 if (lstat(full_path, &st) == -1) { 379 perror("stat"); 380 break; 381 } 382 } 383 384 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) 385 continue; 386 387 found = find_block_dir(majdev, mindev, full_path, 0); 388 if (found) { 389 strcpy(path, full_path); 390 break; 391 } 392 } 393 394 closedir(D); 395 return found; 396} 397 398static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 399 int majdev, int mindev, 400 char *path) 401{ 402 struct stat st; 403 char tmp[PATH_MAX]; 404 char *p; 405 406 /* 407 * If there's a ../queue/ directory there, we are inside a partition. 408 * Check if that is the case and jump back. For loop/md/dm etc we 409 * are already in the right spot. 410 */ 411 sprintf(tmp, "%s/../queue", path); 412 if (!stat(tmp, &st)) { 413 p = dirname(path); 414 sprintf(tmp, "%s/queue", p); 415 if (stat(tmp, &st)) { 416 log_err("unknown sysfs layout\n"); 417 return NULL; 418 } 419 strncpy(tmp, p, PATH_MAX - 1); 420 sprintf(path, "%s", tmp); 421 } 422 423 if (td->o.ioscheduler && !td->sysfs_root) 424 td->sysfs_root = strdup(path); 425 426 return disk_util_add(td, majdev, mindev, path); 427} 428 429static struct disk_util *init_per_file_disk_util(struct thread_data *td, 430 char *filename) 431{ 432 433 char foo[PATH_MAX]; 434 struct disk_util *du; 435 int mindev, majdev; 436 437 if (get_device_numbers(filename, &majdev, &mindev)) 438 return NULL; 439 440 dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev, 441 mindev); 442 443 du = disk_util_exists(majdev, mindev); 444 if (du) { 445 if (td->o.ioscheduler && !td->sysfs_root) 446 td->sysfs_root = strdup(du->sysfs_root); 447 448 return du; 449 } 450 451 /* 452 * for an fs without a device, we will repeatedly stat through 453 * sysfs which can take oodles of time for thousands of files. so 454 * cache the last lookup and compare with that before going through 455 * everything again. 456 */ 457 if (mindev == last_mindev && majdev == last_majdev) 458 return last_du; 459 460 last_mindev = mindev; 461 last_majdev = majdev; 462 463 sprintf(foo, "/sys/block"); 464 if (!find_block_dir(majdev, mindev, foo, 1)) 465 return NULL; 466 467 return __init_per_file_disk_util(td, majdev, mindev, foo); 468} 469 470static struct disk_util *__init_disk_util(struct thread_data *td, 471 struct fio_file *f) 472{ 473 return init_per_file_disk_util(td, f->file_name); 474} 475 476void init_disk_util(struct thread_data *td) 477{ 478 struct fio_file *f; 479 unsigned int i; 480 481 if (!td->o.do_disk_util || 482 (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL))) 483 return; 484 485 for_each_file(td, f, i) 486 f->du = __init_disk_util(td, f); 487} 488 489static void show_agg_stats(struct disk_util_agg *agg, int terse) 490{ 491 if (!agg->slavecount) 492 return; 493 494 if (!terse) { 495 log_info(", aggrios=%u/%u, aggrmerge=%u/%u, aggrticks=%u/%u," 496 " aggrin_queue=%u, aggrutil=%3.2f%%", 497 agg->ios[0] / agg->slavecount, 498 agg->ios[1] / agg->slavecount, 499 agg->merges[0] / agg->slavecount, 500 agg->merges[1] / agg->slavecount, 501 agg->ticks[0] / agg->slavecount, 502 agg->ticks[1] / agg->slavecount, 503 agg->time_in_queue / agg->slavecount, 504 agg->max_util.u.f); 505 } else { 506 log_info(";slaves;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 507 agg->ios[0] / agg->slavecount, 508 agg->ios[1] / agg->slavecount, 509 agg->merges[0] / agg->slavecount, 510 agg->merges[1] / agg->slavecount, 511 agg->ticks[0] / agg->slavecount, 512 agg->ticks[1] / agg->slavecount, 513 agg->time_in_queue / agg->slavecount, 514 agg->max_util.u.f); 515 } 516} 517 518static void aggregate_slaves_stats(struct disk_util *masterdu) 519{ 520 struct disk_util_agg *agg = &masterdu->agg; 521 struct disk_util_stat *dus; 522 struct flist_head *entry; 523 struct disk_util *slavedu; 524 double util; 525 526 flist_for_each(entry, &masterdu->slaves) { 527 slavedu = flist_entry(entry, struct disk_util, slavelist); 528 dus = &slavedu->dus; 529 agg->ios[0] += dus->ios[0]; 530 agg->ios[1] += dus->ios[1]; 531 agg->merges[0] += dus->merges[0]; 532 agg->merges[1] += dus->merges[1]; 533 agg->sectors[0] += dus->sectors[0]; 534 agg->sectors[1] += dus->sectors[1]; 535 agg->ticks[0] += dus->ticks[0]; 536 agg->ticks[1] += dus->ticks[1]; 537 agg->time_in_queue += dus->time_in_queue; 538 agg->slavecount++; 539 540 util = (double) (100 * dus->io_ticks / (double) slavedu->dus.msec); 541 /* System utilization is the utilization of the 542 * component with the highest utilization. 543 */ 544 if (util > agg->max_util.u.f) 545 agg->max_util.u.f = util; 546 547 } 548 549 if (agg->max_util.u.f > 100.0) 550 agg->max_util.u.f = 100.0; 551} 552 553void disk_util_prune_entries(void) 554{ 555 fio_mutex_down(disk_util_mutex); 556 557 while (!flist_empty(&disk_list)) { 558 struct disk_util *du; 559 560 du = flist_entry(disk_list.next, struct disk_util, list); 561 flist_del(&du->list); 562 disk_util_free(du); 563 } 564 565 last_majdev = last_mindev = -1; 566 fio_mutex_up(disk_util_mutex); 567 fio_mutex_remove(disk_util_mutex); 568} 569 570void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg, 571 int terse) 572{ 573 double util = 0; 574 575 if (dus->msec) 576 util = (double) 100 * dus->io_ticks / (double) dus->msec; 577 if (util > 100.0) 578 util = 100.0; 579 580 if (!terse) { 581 if (agg->slavecount) 582 log_info(" "); 583 584 log_info(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, " 585 "in_queue=%u, util=%3.2f%%", dus->name, 586 dus->ios[0], dus->ios[1], 587 dus->merges[0], dus->merges[1], 588 dus->ticks[0], dus->ticks[1], 589 dus->time_in_queue, util); 590 } else { 591 log_info(";%s;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 592 dus->name, dus->ios[0], dus->ios[1], 593 dus->merges[0], dus->merges[1], 594 dus->ticks[0], dus->ticks[1], 595 dus->time_in_queue, util); 596 } 597 598 /* 599 * If the device has slaves, aggregate the stats for 600 * those slave devices also. 601 */ 602 show_agg_stats(agg, terse); 603 604 if (!terse) 605 log_info("\n"); 606} 607 608static void print_disk_util_json(struct disk_util *du, struct json_array *array) 609{ 610 double util = 0; 611 struct disk_util_stat *dus = &du->dus; 612 struct disk_util_agg *agg = &du->agg; 613 struct json_object *obj; 614 615 obj = json_create_object(); 616 json_array_add_value_object(array, obj); 617 618 if (dus->msec) 619 util = (double) 100 * dus->io_ticks / (double) dus->msec; 620 if (util > 100.0) 621 util = 100.0; 622 623 624 json_object_add_value_string(obj, "name", dus->name); 625 json_object_add_value_int(obj, "read_ios", dus->ios[0]); 626 json_object_add_value_int(obj, "write_ios", dus->ios[1]); 627 json_object_add_value_int(obj, "read_merges", dus->merges[0]); 628 json_object_add_value_int(obj, "write_merges", dus->merges[1]); 629 json_object_add_value_int(obj, "read_ticks", dus->ticks[0]); 630 json_object_add_value_int(obj, "write_ticks", dus->ticks[1]); 631 json_object_add_value_int(obj, "in_queue", dus->time_in_queue); 632 json_object_add_value_float(obj, "util", util); 633 634 /* 635 * If the device has slaves, aggregate the stats for 636 * those slave devices also. 637 */ 638 if (!agg->slavecount) 639 return; 640 json_object_add_value_int(obj, "aggr_read_ios", 641 agg->ios[0] / agg->slavecount); 642 json_object_add_value_int(obj, "aggr_write_ios", 643 agg->ios[1] / agg->slavecount); 644 json_object_add_value_int(obj, "aggr_read_merges", 645 agg->merges[0] / agg->slavecount); 646 json_object_add_value_int(obj, "aggr_write_merge", 647 agg->merges[1] / agg->slavecount); 648 json_object_add_value_int(obj, "aggr_read_ticks", 649 agg->ticks[0] / agg->slavecount); 650 json_object_add_value_int(obj, "aggr_write_ticks", 651 agg->ticks[1] / agg->slavecount); 652 json_object_add_value_int(obj, "aggr_in_queue", 653 agg->time_in_queue / agg->slavecount); 654 json_object_add_value_float(obj, "aggr_util", agg->max_util.u.f); 655} 656 657void show_disk_util(int terse, struct json_object *parent) 658{ 659 struct flist_head *entry; 660 struct disk_util *du; 661 struct json_array *array = NULL; 662 663 fio_mutex_down(disk_util_mutex); 664 665 if (flist_empty(&disk_list)) { 666 fio_mutex_up(disk_util_mutex); 667 return; 668 } 669 670 if (!terse) 671 log_info("\nDisk stats (read/write):\n"); 672 673 if (output_format == FIO_OUTPUT_JSON) { 674 array = json_create_array(); 675 json_object_add_value_array(parent, "disk_util", array); 676 } 677 678 flist_for_each(entry, &disk_list) { 679 du = flist_entry(entry, struct disk_util, list); 680 681 aggregate_slaves_stats(du); 682 if (output_format == FIO_OUTPUT_JSON) 683 print_disk_util_json(du, array); 684 else 685 print_disk_util(&du->dus, &du->agg, terse); 686 } 687 688 fio_mutex_up(disk_util_mutex); 689} 690 691void setup_disk_util(void) 692{ 693 disk_util_mutex = fio_mutex_init(FIO_MUTEX_UNLOCKED); 694} 695