diskutil.c revision 4b919f7740f4875d73c0619a08d269d5c679e19f
1#include <stdio.h> 2#include <string.h> 3#include <sys/time.h> 4#include <sys/types.h> 5#include <sys/stat.h> 6#include <dirent.h> 7#include <libgen.h> 8#include <math.h> 9 10#include "fio.h" 11#include "smalloc.h" 12#include "diskutil.h" 13 14static int last_majdev, last_mindev; 15static struct disk_util *last_du; 16 17static struct fio_mutex *disk_util_mutex; 18 19FLIST_HEAD(disk_list); 20 21static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 22 int majdev, int mindev, char *path); 23 24static void disk_util_free(struct disk_util *du) 25{ 26 if (du == last_du) 27 last_du = NULL; 28 29 while (!flist_empty(&du->slaves)) { 30 struct disk_util *slave; 31 32 slave = flist_entry(du->slaves.next, struct disk_util, slavelist); 33 flist_del(&slave->slavelist); 34 slave->users--; 35 } 36 37 fio_mutex_remove(du->lock); 38 sfree(du); 39} 40 41static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) 42{ 43 unsigned in_flight; 44 unsigned long long sectors[2]; 45 char line[256]; 46 FILE *f; 47 char *p; 48 int ret; 49 50 dprint(FD_DISKUTIL, "open stat file: %s\n", du->path); 51 52 f = fopen(du->path, "r"); 53 if (!f) 54 return 1; 55 56 p = fgets(line, sizeof(line), f); 57 if (!p) { 58 fclose(f); 59 return 1; 60 } 61 62 dprint(FD_DISKUTIL, "%s: %s", du->path, p); 63 64 ret = sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], 65 &dus->merges[0], §ors[0], 66 &dus->ticks[0], &dus->ios[1], 67 &dus->merges[1], §ors[1], 68 &dus->ticks[1], &in_flight, 69 &dus->io_ticks, &dus->time_in_queue); 70 fclose(f); 71 dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1); 72 dus->sectors[0] = sectors[0]; 73 dus->sectors[1] = sectors[1]; 74 return ret != 11; 75} 76 77static void update_io_tick_disk(struct disk_util *du) 78{ 79 struct disk_util_stat __dus, *dus, *ldus; 80 struct timeval t; 81 82 if (!du->users) 83 return; 84 if (get_io_ticks(du, &__dus)) 85 return; 86 87 dus = &du->dus; 88 ldus = &du->last_dus; 89 90 dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]); 91 dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]); 92 dus->ios[0] += (__dus.ios[0] - ldus->ios[0]); 93 dus->ios[1] += (__dus.ios[1] - ldus->ios[1]); 94 dus->merges[0] += (__dus.merges[0] - ldus->merges[0]); 95 dus->merges[1] += (__dus.merges[1] - ldus->merges[1]); 96 dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]); 97 dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]); 98 dus->io_ticks += (__dus.io_ticks - ldus->io_ticks); 99 dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue); 100 101 fio_gettime(&t, NULL); 102 dus->msec += mtime_since(&du->time, &t); 103 memcpy(&du->time, &t, sizeof(t)); 104 memcpy(ldus, &__dus, sizeof(__dus)); 105} 106 107int update_io_ticks(void) 108{ 109 struct flist_head *entry; 110 struct disk_util *du; 111 int ret = 0; 112 113 dprint(FD_DISKUTIL, "update io ticks\n"); 114 115 fio_mutex_down(disk_util_mutex); 116 117 if (!disk_util_exit) { 118 flist_for_each(entry, &disk_list) { 119 du = flist_entry(entry, struct disk_util, list); 120 update_io_tick_disk(du); 121 } 122 } else 123 ret = 1; 124 125 fio_mutex_up(disk_util_mutex); 126 return ret; 127} 128 129static struct disk_util *disk_util_exists(int major, int minor) 130{ 131 struct flist_head *entry; 132 struct disk_util *du; 133 134 fio_mutex_down(disk_util_mutex); 135 136 flist_for_each(entry, &disk_list) { 137 du = flist_entry(entry, struct disk_util, list); 138 139 if (major == du->major && minor == du->minor) { 140 fio_mutex_up(disk_util_mutex); 141 return du; 142 } 143 } 144 145 fio_mutex_up(disk_util_mutex); 146 return NULL; 147} 148 149static int get_device_numbers(char *file_name, int *maj, int *min) 150{ 151 struct stat st; 152 int majdev, mindev; 153 char tempname[PATH_MAX], *p; 154 155 if (!lstat(file_name, &st)) { 156 if (S_ISBLK(st.st_mode)) { 157 majdev = major(st.st_rdev); 158 mindev = minor(st.st_rdev); 159 } else if (S_ISCHR(st.st_mode)) { 160 majdev = major(st.st_rdev); 161 mindev = minor(st.st_rdev); 162 if (fio_lookup_raw(st.st_rdev, &majdev, &mindev)) 163 return -1; 164 } else if (S_ISFIFO(st.st_mode)) 165 return -1; 166 else { 167 majdev = major(st.st_dev); 168 mindev = minor(st.st_dev); 169 } 170 } else { 171 /* 172 * must be a file, open "." in that path 173 */ 174 strncpy(tempname, file_name, PATH_MAX - 1); 175 p = dirname(tempname); 176 if (stat(p, &st)) { 177 perror("disk util stat"); 178 return -1; 179 } 180 181 majdev = major(st.st_dev); 182 mindev = minor(st.st_dev); 183 } 184 185 *min = mindev; 186 *maj = majdev; 187 188 return 0; 189} 190 191static int read_block_dev_entry(char *path, int *maj, int *min) 192{ 193 char line[256], *p; 194 FILE *f; 195 196 f = fopen(path, "r"); 197 if (!f) { 198 perror("open path"); 199 return 1; 200 } 201 202 p = fgets(line, sizeof(line), f); 203 fclose(f); 204 205 if (!p) 206 return 1; 207 208 if (sscanf(p, "%u:%u", maj, min) != 2) 209 return 1; 210 211 return 0; 212} 213 214static void find_add_disk_slaves(struct thread_data *td, char *path, 215 struct disk_util *masterdu) 216{ 217 DIR *dirhandle = NULL; 218 struct dirent *dirent = NULL; 219 char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX]; 220 struct disk_util *slavedu = NULL; 221 int majdev, mindev; 222 ssize_t linklen; 223 224 sprintf(slavesdir, "%s/%s", path, "slaves"); 225 dirhandle = opendir(slavesdir); 226 if (!dirhandle) 227 return; 228 229 while ((dirent = readdir(dirhandle)) != NULL) { 230 if (!strcmp(dirent->d_name, ".") || 231 !strcmp(dirent->d_name, "..")) 232 continue; 233 234 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, dirent->d_name); 235 /* Can we always assume that the slaves device entries 236 * are links to the real directories for the slave 237 * devices? 238 */ 239 linklen = readlink(temppath, slavepath, PATH_MAX - 0); 240 if (linklen < 0) { 241 perror("readlink() for slave device."); 242 return; 243 } 244 slavepath[linklen] = '\0'; 245 246 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath); 247 if (read_block_dev_entry(temppath, &majdev, &mindev)) { 248 perror("Error getting slave device numbers."); 249 return; 250 } 251 252 /* 253 * See if this maj,min already exists 254 */ 255 slavedu = disk_util_exists(majdev, mindev); 256 if (slavedu) 257 continue; 258 259 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, slavepath); 260 __init_per_file_disk_util(td, majdev, mindev, temppath); 261 slavedu = disk_util_exists(majdev, mindev); 262 263 /* Should probably use an assert here. slavedu should 264 * always be present at this point. */ 265 if (slavedu) { 266 slavedu->users++; 267 flist_add_tail(&slavedu->slavelist, &masterdu->slaves); 268 } 269 } 270 271 closedir(dirhandle); 272} 273 274static struct disk_util *disk_util_add(struct thread_data *td, int majdev, 275 int mindev, char *path) 276{ 277 struct disk_util *du, *__du; 278 struct flist_head *entry; 279 int l; 280 281 dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); 282 283 du = smalloc(sizeof(*du)); 284 memset(du, 0, sizeof(*du)); 285 INIT_FLIST_HEAD(&du->list); 286 l = snprintf(du->path, sizeof(du->path), "%s/stat", path); 287 if (l < 0 || l >= sizeof(du->path)) { 288 log_err("constructed path \"%.100s[...]/stat\" larger than buffer (%zu bytes)\n", 289 path, sizeof(du->path) - 1); 290 sfree(du); 291 return NULL; 292 } 293 strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ); 294 du->sysfs_root = path; 295 du->major = majdev; 296 du->minor = mindev; 297 INIT_FLIST_HEAD(&du->slavelist); 298 INIT_FLIST_HEAD(&du->slaves); 299 du->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); 300 du->users = 0; 301 302 fio_mutex_down(disk_util_mutex); 303 304 flist_for_each(entry, &disk_list) { 305 __du = flist_entry(entry, struct disk_util, list); 306 307 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name); 308 309 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) { 310 disk_util_free(du); 311 fio_mutex_up(disk_util_mutex); 312 return __du; 313 } 314 } 315 316 dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name); 317 318 fio_gettime(&du->time, NULL); 319 get_io_ticks(du, &du->last_dus); 320 321 flist_add_tail(&du->list, &disk_list); 322 fio_mutex_up(disk_util_mutex); 323 324 find_add_disk_slaves(td, path, du); 325 return du; 326} 327 328static int check_dev_match(int majdev, int mindev, char *path) 329{ 330 int major, minor; 331 332 if (read_block_dev_entry(path, &major, &minor)) 333 return 1; 334 335 if (majdev == major && mindev == minor) 336 return 0; 337 338 return 1; 339} 340 341static int find_block_dir(int majdev, int mindev, char *path, int link_ok) 342{ 343 struct dirent *dir; 344 struct stat st; 345 int found = 0; 346 DIR *D; 347 348 D = opendir(path); 349 if (!D) 350 return 0; 351 352 while ((dir = readdir(D)) != NULL) { 353 char full_path[256]; 354 355 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 356 continue; 357 358 sprintf(full_path, "%s%s%s", path, FIO_OS_PATH_SEPARATOR, dir->d_name); 359 360 if (!strcmp(dir->d_name, "dev")) { 361 if (!check_dev_match(majdev, mindev, full_path)) { 362 found = 1; 363 break; 364 } 365 } 366 367 if (link_ok) { 368 if (stat(full_path, &st) == -1) { 369 perror("stat"); 370 break; 371 } 372 } else { 373 if (lstat(full_path, &st) == -1) { 374 perror("stat"); 375 break; 376 } 377 } 378 379 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) 380 continue; 381 382 found = find_block_dir(majdev, mindev, full_path, 0); 383 if (found) { 384 strcpy(path, full_path); 385 break; 386 } 387 } 388 389 closedir(D); 390 return found; 391} 392 393static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 394 int majdev, int mindev, 395 char *path) 396{ 397 struct stat st; 398 char tmp[PATH_MAX]; 399 char *p; 400 401 /* 402 * If there's a ../queue/ directory there, we are inside a partition. 403 * Check if that is the case and jump back. For loop/md/dm etc we 404 * are already in the right spot. 405 */ 406 sprintf(tmp, "%s/../queue", path); 407 if (!stat(tmp, &st)) { 408 p = dirname(path); 409 sprintf(tmp, "%s/queue", p); 410 if (stat(tmp, &st)) { 411 log_err("unknown sysfs layout\n"); 412 return NULL; 413 } 414 strncpy(tmp, p, PATH_MAX - 1); 415 sprintf(path, "%s", tmp); 416 } 417 418 if (td->o.ioscheduler && !td->sysfs_root) 419 td->sysfs_root = strdup(path); 420 421 return disk_util_add(td, majdev, mindev, path); 422} 423 424static struct disk_util *init_per_file_disk_util(struct thread_data *td, 425 char *filename) 426{ 427 428 char foo[PATH_MAX]; 429 struct disk_util *du; 430 int mindev, majdev; 431 432 if (get_device_numbers(filename, &majdev, &mindev)) 433 return NULL; 434 435 dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev, 436 mindev); 437 438 du = disk_util_exists(majdev, mindev); 439 if (du) { 440 if (td->o.ioscheduler && !td->sysfs_root) 441 td->sysfs_root = strdup(du->sysfs_root); 442 443 return du; 444 } 445 446 /* 447 * for an fs without a device, we will repeatedly stat through 448 * sysfs which can take oodles of time for thousands of files. so 449 * cache the last lookup and compare with that before going through 450 * everything again. 451 */ 452 if (mindev == last_mindev && majdev == last_majdev) 453 return last_du; 454 455 last_mindev = mindev; 456 last_majdev = majdev; 457 458 sprintf(foo, "/sys/block"); 459 if (!find_block_dir(majdev, mindev, foo, 1)) 460 return NULL; 461 462 return __init_per_file_disk_util(td, majdev, mindev, foo); 463} 464 465static struct disk_util *__init_disk_util(struct thread_data *td, 466 struct fio_file *f) 467{ 468 return init_per_file_disk_util(td, f->file_name); 469} 470 471void init_disk_util(struct thread_data *td) 472{ 473 struct fio_file *f; 474 unsigned int i; 475 476 if (!td->o.do_disk_util || 477 (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL))) 478 return; 479 480 for_each_file(td, f, i) 481 f->du = __init_disk_util(td, f); 482} 483 484static void show_agg_stats(struct disk_util_agg *agg, int terse) 485{ 486 if (!agg->slavecount) 487 return; 488 489 if (!terse) { 490 log_info(", aggrios=%u/%u, aggrmerge=%u/%u, aggrticks=%u/%u," 491 " aggrin_queue=%u, aggrutil=%3.2f%%", 492 agg->ios[0] / agg->slavecount, 493 agg->ios[1] / agg->slavecount, 494 agg->merges[0] / agg->slavecount, 495 agg->merges[1] / agg->slavecount, 496 agg->ticks[0] / agg->slavecount, 497 agg->ticks[1] / agg->slavecount, 498 agg->time_in_queue / agg->slavecount, 499 agg->max_util.u.f); 500 } else { 501 log_info(";slaves;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 502 agg->ios[0] / agg->slavecount, 503 agg->ios[1] / agg->slavecount, 504 agg->merges[0] / agg->slavecount, 505 agg->merges[1] / agg->slavecount, 506 agg->ticks[0] / agg->slavecount, 507 agg->ticks[1] / agg->slavecount, 508 agg->time_in_queue / agg->slavecount, 509 agg->max_util.u.f); 510 } 511} 512 513static void aggregate_slaves_stats(struct disk_util *masterdu) 514{ 515 struct disk_util_agg *agg = &masterdu->agg; 516 struct disk_util_stat *dus; 517 struct flist_head *entry; 518 struct disk_util *slavedu; 519 double util; 520 521 flist_for_each(entry, &masterdu->slaves) { 522 slavedu = flist_entry(entry, struct disk_util, slavelist); 523 dus = &slavedu->dus; 524 agg->ios[0] += dus->ios[0]; 525 agg->ios[1] += dus->ios[1]; 526 agg->merges[0] += dus->merges[0]; 527 agg->merges[1] += dus->merges[1]; 528 agg->sectors[0] += dus->sectors[0]; 529 agg->sectors[1] += dus->sectors[1]; 530 agg->ticks[0] += dus->ticks[0]; 531 agg->ticks[1] += dus->ticks[1]; 532 agg->time_in_queue += dus->time_in_queue; 533 agg->slavecount++; 534 535 util = (double) (100 * dus->io_ticks / (double) slavedu->dus.msec); 536 /* System utilization is the utilization of the 537 * component with the highest utilization. 538 */ 539 if (util > agg->max_util.u.f) 540 agg->max_util.u.f = util; 541 542 } 543 544 if (agg->max_util.u.f > 100.0) 545 agg->max_util.u.f = 100.0; 546} 547 548void disk_util_prune_entries(void) 549{ 550 fio_mutex_down(disk_util_mutex); 551 552 while (!flist_empty(&disk_list)) { 553 struct disk_util *du; 554 555 du = flist_entry(disk_list.next, struct disk_util, list); 556 flist_del(&du->list); 557 disk_util_free(du); 558 } 559 560 last_majdev = last_mindev = -1; 561 fio_mutex_up(disk_util_mutex); 562 fio_mutex_remove(disk_util_mutex); 563} 564 565void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg, 566 int terse) 567{ 568 double util = 0; 569 570 if (dus->msec) 571 util = (double) 100 * dus->io_ticks / (double) dus->msec; 572 if (util > 100.0) 573 util = 100.0; 574 575 if (!terse) { 576 if (agg->slavecount) 577 log_info(" "); 578 579 log_info(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, " 580 "in_queue=%u, util=%3.2f%%", dus->name, 581 dus->ios[0], dus->ios[1], 582 dus->merges[0], dus->merges[1], 583 dus->ticks[0], dus->ticks[1], 584 dus->time_in_queue, util); 585 } else { 586 log_info(";%s;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 587 dus->name, dus->ios[0], dus->ios[1], 588 dus->merges[0], dus->merges[1], 589 dus->ticks[0], dus->ticks[1], 590 dus->time_in_queue, util); 591 } 592 593 /* 594 * If the device has slaves, aggregate the stats for 595 * those slave devices also. 596 */ 597 show_agg_stats(agg, terse); 598 599 if (!terse) 600 log_info("\n"); 601} 602 603static void print_disk_util_json(struct disk_util *du, struct json_array *array) 604{ 605 double util = 0; 606 struct disk_util_stat *dus = &du->dus; 607 struct disk_util_agg *agg = &du->agg; 608 struct json_object *obj; 609 610 obj = json_create_object(); 611 json_array_add_value_object(array, obj); 612 613 if (dus->msec) 614 util = (double) 100 * dus->io_ticks / (double) dus->msec; 615 if (util > 100.0) 616 util = 100.0; 617 618 619 json_object_add_value_string(obj, "name", dus->name); 620 json_object_add_value_int(obj, "read_ios", dus->ios[0]); 621 json_object_add_value_int(obj, "write_ios", dus->ios[1]); 622 json_object_add_value_int(obj, "read_merges", dus->merges[0]); 623 json_object_add_value_int(obj, "write_merges", dus->merges[1]); 624 json_object_add_value_int(obj, "read_ticks", dus->ticks[0]); 625 json_object_add_value_int(obj, "write_ticks", dus->ticks[1]); 626 json_object_add_value_int(obj, "in_queue", dus->time_in_queue); 627 json_object_add_value_float(obj, "util", util); 628 629 /* 630 * If the device has slaves, aggregate the stats for 631 * those slave devices also. 632 */ 633 if (!agg->slavecount) 634 return; 635 json_object_add_value_int(obj, "aggr_read_ios", 636 agg->ios[0] / agg->slavecount); 637 json_object_add_value_int(obj, "aggr_write_ios", 638 agg->ios[1] / agg->slavecount); 639 json_object_add_value_int(obj, "aggr_read_merges", 640 agg->merges[0] / agg->slavecount); 641 json_object_add_value_int(obj, "aggr_write_merge", 642 agg->merges[1] / agg->slavecount); 643 json_object_add_value_int(obj, "aggr_read_ticks", 644 agg->ticks[0] / agg->slavecount); 645 json_object_add_value_int(obj, "aggr_write_ticks", 646 agg->ticks[1] / agg->slavecount); 647 json_object_add_value_int(obj, "aggr_in_queue", 648 agg->time_in_queue / agg->slavecount); 649 json_object_add_value_float(obj, "aggr_util", agg->max_util.u.f); 650} 651 652void show_disk_util(int terse, struct json_object *parent) 653{ 654 struct flist_head *entry; 655 struct disk_util *du; 656 struct json_array *array = NULL; 657 658 fio_mutex_down(disk_util_mutex); 659 660 if (flist_empty(&disk_list)) { 661 fio_mutex_up(disk_util_mutex); 662 return; 663 } 664 665 if (!terse) 666 log_info("\nDisk stats (read/write):\n"); 667 668 if (output_format == FIO_OUTPUT_JSON) { 669 array = json_create_array(); 670 json_object_add_value_array(parent, "disk_util", array); 671 } 672 673 flist_for_each(entry, &disk_list) { 674 du = flist_entry(entry, struct disk_util, list); 675 676 aggregate_slaves_stats(du); 677 if (output_format == FIO_OUTPUT_JSON) 678 print_disk_util_json(du, array); 679 else 680 print_disk_util(&du->dus, &du->agg, terse); 681 } 682 683 fio_mutex_up(disk_util_mutex); 684} 685 686void setup_disk_util(void) 687{ 688 disk_util_mutex = fio_mutex_init(FIO_MUTEX_UNLOCKED); 689} 690