diskutil.c revision aeb32dfccbd052965b0e50714c12265551f61a00
1#include <stdio.h> 2#include <string.h> 3#include <sys/time.h> 4#include <sys/types.h> 5#include <sys/stat.h> 6#include <dirent.h> 7#include <libgen.h> 8#include <math.h> 9 10#include "fio.h" 11#include "smalloc.h" 12#include "diskutil.h" 13 14static int last_majdev, last_mindev; 15static struct disk_util *last_du; 16 17static struct fio_mutex *disk_util_mutex; 18 19FLIST_HEAD(disk_list); 20 21static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 22 int majdev, int mindev, char *path); 23 24static void disk_util_free(struct disk_util *du) 25{ 26 if (du == last_du) 27 last_du = NULL; 28 29 while (!flist_empty(&du->slaves)) { 30 struct disk_util *slave; 31 32 slave = flist_entry(du->slaves.next, struct disk_util, slavelist); 33 flist_del(&slave->slavelist); 34 slave->users--; 35 } 36 37 fio_mutex_remove(du->lock); 38 sfree(du); 39} 40 41static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) 42{ 43 unsigned in_flight; 44 unsigned long long sectors[2]; 45 char line[256]; 46 FILE *f; 47 char *p; 48 int ret; 49 50 dprint(FD_DISKUTIL, "open stat file: %s\n", du->path); 51 52 f = fopen(du->path, "r"); 53 if (!f) 54 return 1; 55 56 p = fgets(line, sizeof(line), f); 57 if (!p) { 58 fclose(f); 59 return 1; 60 } 61 62 dprint(FD_DISKUTIL, "%s: %s", du->path, p); 63 64 ret = sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], 65 &dus->merges[0], §ors[0], 66 &dus->ticks[0], &dus->ios[1], 67 &dus->merges[1], §ors[1], 68 &dus->ticks[1], &in_flight, 69 &dus->io_ticks, &dus->time_in_queue); 70 fclose(f); 71 dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1); 72 dus->sectors[0] = sectors[0]; 73 dus->sectors[1] = sectors[1]; 74 return ret != 11; 75} 76 77static void update_io_tick_disk(struct disk_util *du) 78{ 79 struct disk_util_stat __dus, *dus, *ldus; 80 struct timeval t; 81 82 if (!du->users) 83 return; 84 if (get_io_ticks(du, &__dus)) 85 return; 86 87 dus = &du->dus; 88 ldus = &du->last_dus; 89 90 dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]); 91 dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]); 92 dus->ios[0] += (__dus.ios[0] - ldus->ios[0]); 93 dus->ios[1] += (__dus.ios[1] - ldus->ios[1]); 94 dus->merges[0] += (__dus.merges[0] - ldus->merges[0]); 95 dus->merges[1] += (__dus.merges[1] - ldus->merges[1]); 96 dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]); 97 dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]); 98 dus->io_ticks += (__dus.io_ticks - ldus->io_ticks); 99 dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue); 100 101 fio_gettime(&t, NULL); 102 dus->msec += mtime_since(&du->time, &t); 103 memcpy(&du->time, &t, sizeof(t)); 104 memcpy(ldus, &__dus, sizeof(__dus)); 105} 106 107int update_io_ticks(void) 108{ 109 struct flist_head *entry; 110 struct disk_util *du; 111 int ret = 0; 112 113 dprint(FD_DISKUTIL, "update io ticks\n"); 114 115 fio_mutex_down(disk_util_mutex); 116 117 if (!disk_util_exit) { 118 flist_for_each(entry, &disk_list) { 119 du = flist_entry(entry, struct disk_util, list); 120 update_io_tick_disk(du); 121 } 122 } else 123 ret = 1; 124 125 fio_mutex_up(disk_util_mutex); 126 return ret; 127} 128 129static struct disk_util *disk_util_exists(int major, int minor) 130{ 131 struct flist_head *entry; 132 struct disk_util *du; 133 134 fio_mutex_down(disk_util_mutex); 135 136 flist_for_each(entry, &disk_list) { 137 du = flist_entry(entry, struct disk_util, list); 138 139 if (major == du->major && minor == du->minor) { 140 fio_mutex_up(disk_util_mutex); 141 return du; 142 } 143 } 144 145 fio_mutex_up(disk_util_mutex); 146 return NULL; 147} 148 149static int get_device_numbers(char *file_name, int *maj, int *min) 150{ 151 struct stat st; 152 int majdev, mindev; 153 char tempname[PATH_MAX], *p; 154 155 if (!lstat(file_name, &st)) { 156 if (S_ISBLK(st.st_mode)) { 157 majdev = major(st.st_rdev); 158 mindev = minor(st.st_rdev); 159 } else if (S_ISCHR(st.st_mode)) { 160 majdev = major(st.st_rdev); 161 mindev = minor(st.st_rdev); 162 if (fio_lookup_raw(st.st_rdev, &majdev, &mindev)) 163 return -1; 164 } else if (S_ISFIFO(st.st_mode)) 165 return -1; 166 else { 167 majdev = major(st.st_dev); 168 mindev = minor(st.st_dev); 169 } 170 } else { 171 /* 172 * must be a file, open "." in that path 173 */ 174 strncpy(tempname, file_name, PATH_MAX - 1); 175 p = dirname(tempname); 176 if (stat(p, &st)) { 177 perror("disk util stat"); 178 return -1; 179 } 180 181 majdev = major(st.st_dev); 182 mindev = minor(st.st_dev); 183 } 184 185 *min = mindev; 186 *maj = majdev; 187 188 return 0; 189} 190 191static int read_block_dev_entry(char *path, int *maj, int *min) 192{ 193 char line[256], *p; 194 FILE *f; 195 196 f = fopen(path, "r"); 197 if (!f) { 198 perror("open path"); 199 return 1; 200 } 201 202 p = fgets(line, sizeof(line), f); 203 fclose(f); 204 205 if (!p) 206 return 1; 207 208 if (sscanf(p, "%u:%u", maj, min) != 2) 209 return 1; 210 211 return 0; 212} 213 214static void find_add_disk_slaves(struct thread_data *td, char *path, 215 struct disk_util *masterdu) 216{ 217 DIR *dirhandle = NULL; 218 struct dirent *dirent = NULL; 219 char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX]; 220 struct disk_util *slavedu = NULL; 221 int majdev, mindev; 222 ssize_t linklen; 223 224 sprintf(slavesdir, "%s/%s", path, "slaves"); 225 dirhandle = opendir(slavesdir); 226 if (!dirhandle) 227 return; 228 229 while ((dirent = readdir(dirhandle)) != NULL) { 230 if (!strcmp(dirent->d_name, ".") || 231 !strcmp(dirent->d_name, "..")) 232 continue; 233 234 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, dirent->d_name); 235 /* Can we always assume that the slaves device entries 236 * are links to the real directories for the slave 237 * devices? 238 */ 239 linklen = readlink(temppath, slavepath, PATH_MAX - 0); 240 if (linklen < 0) { 241 perror("readlink() for slave device."); 242 return; 243 } 244 slavepath[linklen] = '\0'; 245 246 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath); 247 if (read_block_dev_entry(temppath, &majdev, &mindev)) { 248 perror("Error getting slave device numbers."); 249 return; 250 } 251 252 /* 253 * See if this maj,min already exists 254 */ 255 slavedu = disk_util_exists(majdev, mindev); 256 if (slavedu) 257 continue; 258 259 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, slavepath); 260 __init_per_file_disk_util(td, majdev, mindev, temppath); 261 slavedu = disk_util_exists(majdev, mindev); 262 263 /* Should probably use an assert here. slavedu should 264 * always be present at this point. */ 265 if (slavedu) { 266 slavedu->users++; 267 flist_add_tail(&slavedu->slavelist, &masterdu->slaves); 268 } 269 } 270 271 closedir(dirhandle); 272} 273 274static struct disk_util *disk_util_add(struct thread_data *td, int majdev, 275 int mindev, char *path) 276{ 277 struct disk_util *du, *__du; 278 struct flist_head *entry; 279 280 dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); 281 282 du = smalloc(sizeof(*du)); 283 memset(du, 0, sizeof(*du)); 284 INIT_FLIST_HEAD(&du->list); 285 sprintf(du->path, "%s/stat", path); 286 strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ); 287 du->sysfs_root = path; 288 du->major = majdev; 289 du->minor = mindev; 290 INIT_FLIST_HEAD(&du->slavelist); 291 INIT_FLIST_HEAD(&du->slaves); 292 du->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); 293 du->users = 0; 294 295 fio_mutex_down(disk_util_mutex); 296 297 flist_for_each(entry, &disk_list) { 298 __du = flist_entry(entry, struct disk_util, list); 299 300 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name); 301 302 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) { 303 disk_util_free(du); 304 fio_mutex_up(disk_util_mutex); 305 return __du; 306 } 307 } 308 309 dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name); 310 311 fio_gettime(&du->time, NULL); 312 get_io_ticks(du, &du->last_dus); 313 314 flist_add_tail(&du->list, &disk_list); 315 fio_mutex_up(disk_util_mutex); 316 317 find_add_disk_slaves(td, path, du); 318 return du; 319} 320 321static int check_dev_match(int majdev, int mindev, char *path) 322{ 323 int major, minor; 324 325 if (read_block_dev_entry(path, &major, &minor)) 326 return 1; 327 328 if (majdev == major && mindev == minor) 329 return 0; 330 331 return 1; 332} 333 334static int find_block_dir(int majdev, int mindev, char *path, int link_ok) 335{ 336 struct dirent *dir; 337 struct stat st; 338 int found = 0; 339 DIR *D; 340 341 D = opendir(path); 342 if (!D) 343 return 0; 344 345 while ((dir = readdir(D)) != NULL) { 346 char full_path[256]; 347 348 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 349 continue; 350 351 sprintf(full_path, "%s%s%s", path, FIO_OS_PATH_SEPARATOR, dir->d_name); 352 353 if (!strcmp(dir->d_name, "dev")) { 354 if (!check_dev_match(majdev, mindev, full_path)) { 355 found = 1; 356 break; 357 } 358 } 359 360 if (link_ok) { 361 if (stat(full_path, &st) == -1) { 362 perror("stat"); 363 break; 364 } 365 } else { 366 if (lstat(full_path, &st) == -1) { 367 perror("stat"); 368 break; 369 } 370 } 371 372 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) 373 continue; 374 375 found = find_block_dir(majdev, mindev, full_path, 0); 376 if (found) { 377 strcpy(path, full_path); 378 break; 379 } 380 } 381 382 closedir(D); 383 return found; 384} 385 386static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 387 int majdev, int mindev, 388 char *path) 389{ 390 struct stat st; 391 char tmp[PATH_MAX]; 392 char *p; 393 394 /* 395 * If there's a ../queue/ directory there, we are inside a partition. 396 * Check if that is the case and jump back. For loop/md/dm etc we 397 * are already in the right spot. 398 */ 399 sprintf(tmp, "%s/../queue", path); 400 if (!stat(tmp, &st)) { 401 p = dirname(path); 402 sprintf(tmp, "%s/queue", p); 403 if (stat(tmp, &st)) { 404 log_err("unknown sysfs layout\n"); 405 return NULL; 406 } 407 strncpy(tmp, p, PATH_MAX - 1); 408 sprintf(path, "%s", tmp); 409 } 410 411 if (td->o.ioscheduler && !td->sysfs_root) 412 td->sysfs_root = strdup(path); 413 414 return disk_util_add(td, majdev, mindev, path); 415} 416 417static struct disk_util *init_per_file_disk_util(struct thread_data *td, 418 char *filename) 419{ 420 421 char foo[PATH_MAX]; 422 struct disk_util *du; 423 int mindev, majdev; 424 425 if (get_device_numbers(filename, &majdev, &mindev)) 426 return NULL; 427 428 dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev, 429 mindev); 430 431 du = disk_util_exists(majdev, mindev); 432 if (du) { 433 if (td->o.ioscheduler && !td->sysfs_root) 434 td->sysfs_root = strdup(du->sysfs_root); 435 436 return du; 437 } 438 439 /* 440 * for an fs without a device, we will repeatedly stat through 441 * sysfs which can take oodles of time for thousands of files. so 442 * cache the last lookup and compare with that before going through 443 * everything again. 444 */ 445 if (mindev == last_mindev && majdev == last_majdev) 446 return last_du; 447 448 last_mindev = mindev; 449 last_majdev = majdev; 450 451 sprintf(foo, "/sys/block"); 452 if (!find_block_dir(majdev, mindev, foo, 1)) 453 return NULL; 454 455 return __init_per_file_disk_util(td, majdev, mindev, foo); 456} 457 458static struct disk_util *__init_disk_util(struct thread_data *td, 459 struct fio_file *f) 460{ 461 return init_per_file_disk_util(td, f->file_name); 462} 463 464void init_disk_util(struct thread_data *td) 465{ 466 struct fio_file *f; 467 unsigned int i; 468 469 if (!td->o.do_disk_util || 470 (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL))) 471 return; 472 473 for_each_file(td, f, i) 474 f->du = __init_disk_util(td, f); 475} 476 477static void show_agg_stats(struct disk_util_agg *agg, int terse) 478{ 479 if (!agg->slavecount) 480 return; 481 482 if (!terse) { 483 log_info(", aggrios=%u/%u, aggrmerge=%u/%u, aggrticks=%u/%u," 484 " aggrin_queue=%u, aggrutil=%3.2f%%", 485 agg->ios[0] / agg->slavecount, 486 agg->ios[1] / agg->slavecount, 487 agg->merges[0] / agg->slavecount, 488 agg->merges[1] / agg->slavecount, 489 agg->ticks[0] / agg->slavecount, 490 agg->ticks[1] / agg->slavecount, 491 agg->time_in_queue / agg->slavecount, 492 agg->max_util.u.f); 493 } else { 494 log_info(";slaves;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 495 agg->ios[0] / agg->slavecount, 496 agg->ios[1] / agg->slavecount, 497 agg->merges[0] / agg->slavecount, 498 agg->merges[1] / agg->slavecount, 499 agg->ticks[0] / agg->slavecount, 500 agg->ticks[1] / agg->slavecount, 501 agg->time_in_queue / agg->slavecount, 502 agg->max_util.u.f); 503 } 504} 505 506static void aggregate_slaves_stats(struct disk_util *masterdu) 507{ 508 struct disk_util_agg *agg = &masterdu->agg; 509 struct disk_util_stat *dus; 510 struct flist_head *entry; 511 struct disk_util *slavedu; 512 double util; 513 514 flist_for_each(entry, &masterdu->slaves) { 515 slavedu = flist_entry(entry, struct disk_util, slavelist); 516 dus = &slavedu->dus; 517 agg->ios[0] += dus->ios[0]; 518 agg->ios[1] += dus->ios[1]; 519 agg->merges[0] += dus->merges[0]; 520 agg->merges[1] += dus->merges[1]; 521 agg->sectors[0] += dus->sectors[0]; 522 agg->sectors[1] += dus->sectors[1]; 523 agg->ticks[0] += dus->ticks[0]; 524 agg->ticks[1] += dus->ticks[1]; 525 agg->time_in_queue += dus->time_in_queue; 526 agg->slavecount++; 527 528 util = (double) (100 * dus->io_ticks / (double) slavedu->dus.msec); 529 /* System utilization is the utilization of the 530 * component with the highest utilization. 531 */ 532 if (util > agg->max_util.u.f) 533 agg->max_util.u.f = util; 534 535 } 536 537 if (agg->max_util.u.f > 100.0) 538 agg->max_util.u.f = 100.0; 539} 540 541void disk_util_prune_entries(void) 542{ 543 fio_mutex_down(disk_util_mutex); 544 545 while (!flist_empty(&disk_list)) { 546 struct disk_util *du; 547 548 du = flist_entry(disk_list.next, struct disk_util, list); 549 flist_del(&du->list); 550 disk_util_free(du); 551 } 552 553 last_majdev = last_mindev = -1; 554 fio_mutex_up(disk_util_mutex); 555 fio_mutex_remove(disk_util_mutex); 556} 557 558void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg, 559 int terse) 560{ 561 double util = 0; 562 563 if (dus->msec) 564 util = (double) 100 * dus->io_ticks / (double) dus->msec; 565 if (util > 100.0) 566 util = 100.0; 567 568 if (!terse) { 569 if (agg->slavecount) 570 log_info(" "); 571 572 log_info(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, " 573 "in_queue=%u, util=%3.2f%%", dus->name, 574 dus->ios[0], dus->ios[1], 575 dus->merges[0], dus->merges[1], 576 dus->ticks[0], dus->ticks[1], 577 dus->time_in_queue, util); 578 } else { 579 log_info(";%s;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 580 dus->name, dus->ios[0], dus->ios[1], 581 dus->merges[0], dus->merges[1], 582 dus->ticks[0], dus->ticks[1], 583 dus->time_in_queue, util); 584 } 585 586 /* 587 * If the device has slaves, aggregate the stats for 588 * those slave devices also. 589 */ 590 show_agg_stats(agg, terse); 591 592 if (!terse) 593 log_info("\n"); 594} 595 596static void print_disk_util_json(struct disk_util *du, struct json_array *array) 597{ 598 double util = 0; 599 struct disk_util_stat *dus = &du->dus; 600 struct disk_util_agg *agg = &du->agg; 601 struct json_object *obj; 602 603 obj = json_create_object(); 604 json_array_add_value_object(array, obj); 605 606 if (dus->msec) 607 util = (double) 100 * dus->io_ticks / (double) dus->msec; 608 if (util > 100.0) 609 util = 100.0; 610 611 612 json_object_add_value_string(obj, "name", dus->name); 613 json_object_add_value_int(obj, "read_ios", dus->ios[0]); 614 json_object_add_value_int(obj, "write_ios", dus->ios[1]); 615 json_object_add_value_int(obj, "read_merges", dus->merges[0]); 616 json_object_add_value_int(obj, "write_merges", dus->merges[1]); 617 json_object_add_value_int(obj, "read_ticks", dus->ticks[0]); 618 json_object_add_value_int(obj, "write_ticks", dus->ticks[1]); 619 json_object_add_value_int(obj, "in_queue", dus->time_in_queue); 620 json_object_add_value_float(obj, "util", util); 621 622 /* 623 * If the device has slaves, aggregate the stats for 624 * those slave devices also. 625 */ 626 if (!agg->slavecount) 627 return; 628 json_object_add_value_int(obj, "aggr_read_ios", 629 agg->ios[0] / agg->slavecount); 630 json_object_add_value_int(obj, "aggr_write_ios", 631 agg->ios[1] / agg->slavecount); 632 json_object_add_value_int(obj, "aggr_read_merges", 633 agg->merges[0] / agg->slavecount); 634 json_object_add_value_int(obj, "aggr_write_merge", 635 agg->merges[1] / agg->slavecount); 636 json_object_add_value_int(obj, "aggr_read_ticks", 637 agg->ticks[0] / agg->slavecount); 638 json_object_add_value_int(obj, "aggr_write_ticks", 639 agg->ticks[1] / agg->slavecount); 640 json_object_add_value_int(obj, "aggr_in_queue", 641 agg->time_in_queue / agg->slavecount); 642 json_object_add_value_float(obj, "aggr_util", agg->max_util.u.f); 643} 644 645void show_disk_util(int terse, struct json_object *parent) 646{ 647 struct flist_head *entry; 648 struct disk_util *du; 649 struct json_array *array = NULL; 650 651 fio_mutex_down(disk_util_mutex); 652 653 if (flist_empty(&disk_list)) { 654 fio_mutex_up(disk_util_mutex); 655 return; 656 } 657 658 if (!terse) 659 log_info("\nDisk stats (read/write):\n"); 660 661 if (output_format == FIO_OUTPUT_JSON) { 662 array = json_create_array(); 663 json_object_add_value_array(parent, "disk_util", array); 664 } 665 666 flist_for_each(entry, &disk_list) { 667 du = flist_entry(entry, struct disk_util, list); 668 669 aggregate_slaves_stats(du); 670 if (output_format == FIO_OUTPUT_JSON) 671 print_disk_util_json(du, array); 672 else 673 print_disk_util(&du->dus, &du->agg, terse); 674 } 675 676 fio_mutex_up(disk_util_mutex); 677} 678 679void setup_disk_util(void) 680{ 681 disk_util_mutex = fio_mutex_init(FIO_MUTEX_UNLOCKED); 682} 683