diskutil.c revision 631c95e0ef133f5a1b014deea4f9cebcb1cd4446
1#include <stdio.h> 2#include <string.h> 3#include <sys/time.h> 4#include <sys/types.h> 5#include <sys/stat.h> 6#include <dirent.h> 7#include <libgen.h> 8#include <math.h> 9 10#include "fio.h" 11#include "smalloc.h" 12#include "diskutil.h" 13 14static int last_majdev, last_mindev; 15static struct disk_util *last_du; 16 17static struct flist_head disk_list = FLIST_HEAD_INIT(disk_list); 18 19static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 20 int majdev, int mindev, char *path); 21 22static void disk_util_free(struct disk_util *du) 23{ 24 if (du == last_du) 25 last_du = NULL; 26 27 fio_mutex_remove(du->lock); 28 sfree(du->name); 29 sfree(du); 30} 31 32static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) 33{ 34 unsigned in_flight; 35 char line[256]; 36 FILE *f; 37 char *p; 38 int ret; 39 40 dprint(FD_DISKUTIL, "open stat file: %s\n", du->path); 41 42 f = fopen(du->path, "r"); 43 if (!f) 44 return 1; 45 46 p = fgets(line, sizeof(line), f); 47 if (!p) { 48 fclose(f); 49 return 1; 50 } 51 52 dprint(FD_DISKUTIL, "%s: %s", du->path, p); 53 54 ret = sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], 55 &dus->merges[0], &dus->sectors[0], 56 &dus->ticks[0], &dus->ios[1], 57 &dus->merges[1], &dus->sectors[1], 58 &dus->ticks[1], &in_flight, 59 &dus->io_ticks, &dus->time_in_queue); 60 fclose(f); 61 dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1); 62 return ret != 11; 63} 64 65static void update_io_tick_disk(struct disk_util *du) 66{ 67 struct disk_util_stat __dus, *dus, *ldus; 68 struct timeval t; 69 70 if (get_io_ticks(du, &__dus)) 71 return; 72 if (!du->users) 73 return; 74 75 dus = &du->dus; 76 ldus = &du->last_dus; 77 78 dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]); 79 dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]); 80 dus->ios[0] += (__dus.ios[0] - ldus->ios[0]); 81 dus->ios[1] += (__dus.ios[1] - ldus->ios[1]); 82 dus->merges[0] += (__dus.merges[0] - ldus->merges[0]); 83 dus->merges[1] += (__dus.merges[1] - ldus->merges[1]); 84 dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]); 85 dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]); 86 dus->io_ticks += (__dus.io_ticks - ldus->io_ticks); 87 dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue); 88 89 fio_gettime(&t, NULL); 90 du->msec += mtime_since(&du->time, &t); 91 memcpy(&du->time, &t, sizeof(t)); 92 memcpy(ldus, &__dus, sizeof(__dus)); 93} 94 95void update_io_ticks(void) 96{ 97 struct flist_head *entry; 98 struct disk_util *du; 99 100 dprint(FD_DISKUTIL, "update io ticks\n"); 101 102 flist_for_each(entry, &disk_list) { 103 du = flist_entry(entry, struct disk_util, list); 104 update_io_tick_disk(du); 105 } 106} 107 108static struct disk_util *disk_util_exists(int major, int minor) 109{ 110 struct flist_head *entry; 111 struct disk_util *du; 112 113 flist_for_each(entry, &disk_list) { 114 du = flist_entry(entry, struct disk_util, list); 115 116 if (major == du->major && minor == du->minor) 117 return du; 118 } 119 120 return NULL; 121} 122 123static int get_device_numbers(char *file_name, int *maj, int *min) 124{ 125 struct stat st; 126 int majdev, mindev; 127 char tempname[PATH_MAX], *p; 128 129 if (!lstat(file_name, &st)) { 130 if (S_ISBLK(st.st_mode)) { 131 majdev = major(st.st_rdev); 132 mindev = minor(st.st_rdev); 133 } else if (S_ISCHR(st.st_mode)) { 134 majdev = major(st.st_rdev); 135 mindev = minor(st.st_rdev); 136 if (fio_lookup_raw(st.st_rdev, &majdev, &mindev)) 137 return -1; 138 } else if (S_ISFIFO(st.st_mode)) 139 return -1; 140 else { 141 majdev = major(st.st_dev); 142 mindev = minor(st.st_dev); 143 } 144 } else { 145 /* 146 * must be a file, open "." in that path 147 */ 148 strncpy(tempname, file_name, PATH_MAX - 1); 149 p = dirname(tempname); 150 if (stat(p, &st)) { 151 perror("disk util stat"); 152 return -1; 153 } 154 155 majdev = major(st.st_dev); 156 mindev = minor(st.st_dev); 157 } 158 159 *min = mindev; 160 *maj = majdev; 161 162 return 0; 163} 164 165static int read_block_dev_entry(char *path, int *maj, int *min) 166{ 167 char line[256], *p; 168 FILE *f; 169 170 f = fopen(path, "r"); 171 if (!f) { 172 perror("open path"); 173 return 1; 174 } 175 176 p = fgets(line, sizeof(line), f); 177 fclose(f); 178 179 if (!p) 180 return 1; 181 182 if (sscanf(p, "%u:%u", maj, min) != 2) 183 return 1; 184 185 return 0; 186} 187 188static void find_add_disk_slaves(struct thread_data *td, char *path, 189 struct disk_util *masterdu) 190{ 191 DIR *dirhandle = NULL; 192 struct dirent *dirent = NULL; 193 char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX]; 194 struct disk_util *slavedu = NULL; 195 int majdev, mindev; 196 ssize_t linklen; 197 198 sprintf(slavesdir, "%s/%s", path, "slaves"); 199 dirhandle = opendir(slavesdir); 200 if (!dirhandle) 201 return; 202 203 while ((dirent = readdir(dirhandle)) != NULL) { 204 if (!strcmp(dirent->d_name, ".") || 205 !strcmp(dirent->d_name, "..")) 206 continue; 207 208 sprintf(temppath, "%s/%s", slavesdir, dirent->d_name); 209 /* Can we always assume that the slaves device entries 210 * are links to the real directories for the slave 211 * devices? 212 */ 213 linklen = readlink(temppath, slavepath, PATH_MAX - 0); 214 if (linklen < 0) { 215 perror("readlink() for slave device."); 216 return; 217 } 218 slavepath[linklen] = '\0'; 219 220 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath); 221 if (read_block_dev_entry(temppath, &majdev, &mindev)) { 222 perror("Error getting slave device numbers."); 223 return; 224 } 225 226 /* 227 * See if this maj,min already exists 228 */ 229 slavedu = disk_util_exists(majdev, mindev); 230 if (slavedu) 231 continue; 232 233 sprintf(temppath, "%s/%s", slavesdir, slavepath); 234 __init_per_file_disk_util(td, majdev, mindev, temppath); 235 slavedu = disk_util_exists(majdev, mindev); 236 237 /* Should probably use an assert here. slavedu should 238 * always be present at this point. */ 239 if (slavedu) 240 flist_add_tail(&slavedu->slavelist, &masterdu->slaves); 241 } 242 243 closedir(dirhandle); 244} 245 246static struct disk_util *disk_util_add(struct thread_data * td, int majdev, 247 int mindev, char *path) 248{ 249 struct disk_util *du, *__du; 250 struct flist_head *entry; 251 252 dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); 253 254 du = smalloc(sizeof(*du)); 255 memset(du, 0, sizeof(*du)); 256 INIT_FLIST_HEAD(&du->list); 257 sprintf(du->path, "%s/stat", path); 258 du->name = smalloc_strdup(basename(path)); 259 du->sysfs_root = path; 260 du->major = majdev; 261 du->minor = mindev; 262 INIT_FLIST_HEAD(&du->slavelist); 263 INIT_FLIST_HEAD(&du->slaves); 264 du->lock = fio_mutex_init(1); 265 du->users = 0; 266 267 flist_for_each(entry, &disk_list) { 268 __du = flist_entry(entry, struct disk_util, list); 269 270 dprint(FD_DISKUTIL, "found %s in list\n", __du->name); 271 272 if (!strcmp(du->name, __du->name)) { 273 disk_util_free(du); 274 return __du; 275 } 276 } 277 278 dprint(FD_DISKUTIL, "add %s to list\n", du->name); 279 280 fio_gettime(&du->time, NULL); 281 get_io_ticks(du, &du->last_dus); 282 283 flist_add_tail(&du->list, &disk_list); 284 find_add_disk_slaves(td, path, du); 285 return du; 286} 287 288static int check_dev_match(int majdev, int mindev, char *path) 289{ 290 int major, minor; 291 292 if (read_block_dev_entry(path, &major, &minor)) 293 return 1; 294 295 if (majdev == major && mindev == minor) 296 return 0; 297 298 return 1; 299} 300 301static int find_block_dir(int majdev, int mindev, char *path, int link_ok) 302{ 303 struct dirent *dir; 304 struct stat st; 305 int found = 0; 306 DIR *D; 307 308 D = opendir(path); 309 if (!D) 310 return 0; 311 312 while ((dir = readdir(D)) != NULL) { 313 char full_path[256]; 314 315 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 316 continue; 317 318 sprintf(full_path, "%s/%s", path, dir->d_name); 319 320 if (!strcmp(dir->d_name, "dev")) { 321 if (!check_dev_match(majdev, mindev, full_path)) { 322 found = 1; 323 break; 324 } 325 } 326 327 if (link_ok) { 328 if (stat(full_path, &st) == -1) { 329 perror("stat"); 330 break; 331 } 332 } else { 333 if (lstat(full_path, &st) == -1) { 334 perror("stat"); 335 break; 336 } 337 } 338 339 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) 340 continue; 341 342 found = find_block_dir(majdev, mindev, full_path, 0); 343 if (found) { 344 strcpy(path, full_path); 345 break; 346 } 347 } 348 349 closedir(D); 350 return found; 351} 352 353static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 354 int majdev, int mindev, 355 char *path) 356{ 357 struct stat st; 358 char tmp[PATH_MAX]; 359 char *p; 360 361 /* 362 * If there's a ../queue/ directory there, we are inside a partition. 363 * Check if that is the case and jump back. For loop/md/dm etc we 364 * are already in the right spot. 365 */ 366 sprintf(tmp, "%s/../queue", path); 367 if (!stat(tmp, &st)) { 368 p = dirname(path); 369 sprintf(tmp, "%s/queue", p); 370 if (stat(tmp, &st)) { 371 log_err("unknown sysfs layout\n"); 372 return NULL; 373 } 374 strncpy(tmp, p, PATH_MAX - 1); 375 sprintf(path, "%s", tmp); 376 } 377 378 if (td->o.ioscheduler && !td->sysfs_root) 379 td->sysfs_root = strdup(path); 380 381 return disk_util_add(td, majdev, mindev, path); 382} 383 384static struct disk_util *init_per_file_disk_util(struct thread_data *td, 385 char *filename) 386{ 387 388 char foo[PATH_MAX]; 389 struct disk_util *du; 390 int mindev, majdev; 391 392 if (get_device_numbers(filename, &majdev, &mindev)) 393 return NULL; 394 395 dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev, 396 mindev); 397 398 du = disk_util_exists(majdev, mindev); 399 if (du) { 400 if (td->o.ioscheduler && !td->sysfs_root) 401 td->sysfs_root = strdup(du->sysfs_root); 402 403 return du; 404 } 405 406 /* 407 * for an fs without a device, we will repeatedly stat through 408 * sysfs which can take oodles of time for thousands of files. so 409 * cache the last lookup and compare with that before going through 410 * everything again. 411 */ 412 if (mindev == last_mindev && majdev == last_majdev) 413 return last_du; 414 415 last_mindev = mindev; 416 last_majdev = majdev; 417 418 sprintf(foo, "/sys/block"); 419 if (!find_block_dir(majdev, mindev, foo, 1)) 420 return NULL; 421 422 return __init_per_file_disk_util(td, majdev, mindev, foo); 423} 424 425static struct disk_util *__init_disk_util(struct thread_data *td, 426 struct fio_file *f) 427{ 428 return init_per_file_disk_util(td, f->file_name); 429} 430 431void init_disk_util(struct thread_data *td) 432{ 433 struct fio_file *f; 434 unsigned int i; 435 436 if (!td->o.do_disk_util || 437 (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL))) 438 return; 439 440 for_each_file(td, f, i) 441 f->du = __init_disk_util(td, f); 442} 443 444static void aggregate_slaves_stats(struct disk_util *masterdu) 445{ 446 struct disk_util_stat *dus; 447 struct flist_head *entry; 448 struct disk_util *slavedu; 449 double util, max_util = 0; 450 int slavecount = 0; 451 452 unsigned merges[2] = { 0, }; 453 unsigned ticks[2] = { 0, }; 454 unsigned time_in_queue = { 0, }; 455 unsigned long long sectors[2] = { 0, }; 456 unsigned ios[2] = { 0, }; 457 458 flist_for_each(entry, &masterdu->slaves) { 459 slavedu = flist_entry(entry, struct disk_util, slavelist); 460 dus = &slavedu->dus; 461 ios[0] += dus->ios[0]; 462 ios[1] += dus->ios[1]; 463 merges[0] += dus->merges[0]; 464 merges[1] += dus->merges[1]; 465 sectors[0] += dus->sectors[0]; 466 sectors[1] += dus->sectors[1]; 467 ticks[0] += dus->ticks[0]; 468 ticks[1] += dus->ticks[1]; 469 time_in_queue += dus->time_in_queue; 470 ++slavecount; 471 472 util = (double) (100 * dus->io_ticks / (double) slavedu->msec); 473 /* System utilization is the utilization of the 474 * component with the highest utilization. 475 */ 476 if (util > max_util) 477 max_util = util; 478 479 } 480 481 if (max_util > 100.0) 482 max_util = 100.0; 483 484 log_info(", aggrios=%u/%u, aggrmerge=%u/%u, aggrticks=%u/%u," 485 " aggrin_queue=%u, aggrutil=%3.2f%%", 486 ios[0]/slavecount, ios[1]/slavecount, 487 merges[0]/slavecount, merges[1]/slavecount, 488 ticks[0]/slavecount, ticks[1]/slavecount, 489 time_in_queue/slavecount, max_util); 490 491} 492 493void show_disk_util(void) 494{ 495 struct disk_util_stat *dus; 496 struct flist_head *entry, *next; 497 struct disk_util *du; 498 double util; 499 500 if (flist_empty(&disk_list)) 501 return; 502 503 log_info("\nDisk stats (read/write):\n"); 504 505 flist_for_each(entry, &disk_list) { 506 du = flist_entry(entry, struct disk_util, list); 507 dus = &du->dus; 508 509 util = (double) 100 * du->dus.io_ticks / (double) du->msec; 510 if (util > 100.0) 511 util = 100.0; 512 513 /* If this node is the slave of a master device, as 514 * happens in case of software RAIDs, inward-indent 515 * this stats line to reflect a master-slave 516 * relationship. Because the master device gets added 517 * before the slave devices, we can safely assume that 518 * the master's stats line has been displayed in a 519 * previous iteration of this loop. 520 */ 521 if (!flist_empty(&du->slavelist)) 522 log_info(" "); 523 524 log_info(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, " 525 "in_queue=%u, util=%3.2f%%", du->name, 526 dus->ios[0], dus->ios[1], 527 dus->merges[0], dus->merges[1], 528 dus->ticks[0], dus->ticks[1], 529 dus->time_in_queue, util); 530 531 /* If the device has slaves, aggregate the stats for 532 * those slave devices also. 533 */ 534 if (!flist_empty(&du->slaves)) 535 aggregate_slaves_stats(du); 536 537 log_info("\n"); 538 } 539 540 /* 541 * now free the list 542 */ 543 flist_for_each_safe(entry, next, &disk_list) { 544 flist_del(entry); 545 du = flist_entry(entry, struct disk_util, list); 546 disk_util_free(du); 547 } 548} 549