getdelays.c revision 7514c35d7f6922710431372a837771aeea1b258f
1/* getdelays.c 2 * 3 * Utility to get per-pid and per-tgid delay accounting statistics 4 * Also illustrates usage of the taskstats interface 5 * 6 * Copyright (C) Shailabh Nagar, IBM Corp. 2005 7 * Copyright (C) Balbir Singh, IBM Corp. 2006 8 * Copyright (c) Jay Lan, SGI. 2006 9 * 10 * Compile with 11 * gcc -I/usr/src/linux/include getdelays.c -o getdelays 12 */ 13 14#include <stdio.h> 15#include <stdlib.h> 16#include <errno.h> 17#include <unistd.h> 18#include <poll.h> 19#include <string.h> 20#include <fcntl.h> 21#include <sys/types.h> 22#include <sys/stat.h> 23#include <sys/socket.h> 24#include <signal.h> 25 26#include "config.h" 27 28#include <linux/genetlink.h> 29#include <linux/taskstats.h> 30 31#ifdef HAVE_LINUX_CGROUPSTATS_H 32#include <linux/cgroupstats.h> 33#endif 34 35/* 36 * Generic macros for dealing with netlink sockets. Might be duplicated 37 * elsewhere. It is recommended that commercial grade applications use 38 * libnl or libnetlink and use the interfaces provided by the library 39 */ 40#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) 41#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) 42#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN)) 43#define NLA_PAYLOAD(len) (len - NLA_HDRLEN) 44 45#define err(code, fmt, arg...) \ 46 do { \ 47 fprintf(stderr, fmt, ##arg); \ 48 exit(code); \ 49 } while (0) 50 51int done; 52int rcvbufsz; 53char name[100]; 54int dbg; 55int print_delays; 56int print_io_accounting; 57int print_task_context_switch_counts; 58__u64 stime, utime; 59 60#define PRINTF(fmt, arg...) { \ 61 if (dbg) { \ 62 printf(fmt, ##arg); \ 63 } \ 64 } 65 66/* Maximum size of response requested or message sent */ 67#define MAX_MSG_SIZE 1024 68/* Maximum number of cpus expected to be specified in a cpumask */ 69#define MAX_CPUS 32 70 71struct msgtemplate { 72 struct nlmsghdr n; 73 struct genlmsghdr g; 74 char buf[MAX_MSG_SIZE]; 75}; 76 77char cpumask[100+6*MAX_CPUS]; 78 79static void usage(void) 80{ 81 fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] " 82 "[-m cpumask] [-t tgid] [-p pid]\n"); 83 fprintf(stderr, " -d: print delayacct stats\n"); 84 fprintf(stderr, " -i: print IO accounting (works only with -p)\n"); 85 fprintf(stderr, " -l: listen forever\n"); 86 fprintf(stderr, " -v: debug on\n"); 87 fprintf(stderr, " -C: container path\n"); 88} 89 90/* 91 * Create a raw netlink socket and bind 92 */ 93static int create_nl_socket(int protocol) 94{ 95 int fd; 96 struct sockaddr_nl local; 97 98 fd = socket(AF_NETLINK, SOCK_RAW, protocol); 99 if (fd < 0) 100 return -1; 101 102 if (rcvbufsz) 103 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 104 &rcvbufsz, sizeof(rcvbufsz)) < 0) { 105 fprintf(stderr, "Unable to set socket rcv buf size " 106 "to %d\n", 107 rcvbufsz); 108 return -1; 109 } 110 111 memset(&local, 0, sizeof(local)); 112 local.nl_family = AF_NETLINK; 113 114 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) 115 goto error; 116 117 return fd; 118error: 119 close(fd); 120 return -1; 121} 122 123 124int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, 125 __u8 genl_cmd, __u16 nla_type, 126 void *nla_data, int nla_len) 127{ 128 struct nlattr *na; 129 struct sockaddr_nl nladdr; 130 int r, buflen; 131 char *buf; 132 133 struct msgtemplate msg; 134 135 msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); 136 msg.n.nlmsg_type = nlmsg_type; 137 msg.n.nlmsg_flags = NLM_F_REQUEST; 138 msg.n.nlmsg_seq = 0; 139 msg.n.nlmsg_pid = nlmsg_pid; 140 msg.g.cmd = genl_cmd; 141 msg.g.version = 0x1; 142 na = (struct nlattr *) GENLMSG_DATA(&msg); 143 na->nla_type = nla_type; 144 na->nla_len = nla_len + 1 + NLA_HDRLEN; 145 memcpy(NLA_DATA(na), nla_data, nla_len); 146 msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); 147 148 buf = (char *) &msg; 149 buflen = msg.n.nlmsg_len ; 150 memset(&nladdr, 0, sizeof(nladdr)); 151 nladdr.nl_family = AF_NETLINK; 152 while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, 153 sizeof(nladdr))) < buflen) { 154 if (r > 0) { 155 buf += r; 156 buflen -= r; 157 } else if (errno != EAGAIN) 158 return -1; 159 } 160 return 0; 161} 162 163 164/* 165 * Probe the controller in genetlink to find the family id 166 * for the TASKSTATS family 167 */ 168int get_family_id(int sd) 169{ 170 struct { 171 struct nlmsghdr n; 172 struct genlmsghdr g; 173 char buf[256]; 174 } ans; 175 176 int id = 0, rc; 177 struct nlattr *na; 178 int rep_len; 179 180 strcpy(name, TASKSTATS_GENL_NAME); 181 rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, 182 CTRL_ATTR_FAMILY_NAME, (void *)name, 183 strlen(TASKSTATS_GENL_NAME)+1); 184 185 rep_len = recv(sd, &ans, sizeof(ans), 0); 186 if (ans.n.nlmsg_type == NLMSG_ERROR || 187 (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) 188 return 0; 189 190 na = (struct nlattr *) GENLMSG_DATA(&ans); 191 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 192 if (na->nla_type == CTRL_ATTR_FAMILY_ID) { 193 id = *(__u16 *) NLA_DATA(na); 194 } 195 return id; 196} 197 198void print_delayacct(struct taskstats *t) 199{ 200 printf("\n\nCPU %15s%15s%15s%15s\n" 201 " %15llu%15llu%15llu%15llu\n" 202 "IO %15s%15s\n" 203 " %15llu%15llu\n" 204 "SWAP %15s%15s\n" 205 " %15llu%15llu\n" 206 "RECLAIM %12s%15s\n" 207#ifdef HAVE_STRUCT_TASKSTATS_FREEPAGES_COUNT 208 " %15llu%15llu\n" 209#endif 210 , "count", "real total", "virtual total", "delay total", 211 (unsigned long long)t->cpu_count, 212 (unsigned long long)t->cpu_run_real_total, 213 (unsigned long long)t->cpu_run_virtual_total, 214 (unsigned long long)t->cpu_delay_total, 215 "count", "delay total", 216 (unsigned long long)t->blkio_count, 217 (unsigned long long)t->blkio_delay_total, 218 "count", "delay total", 219 (unsigned long long)t->swapin_count, 220 (unsigned long long)t->swapin_delay_total, 221 "count", "delay total" 222#ifdef HAVE_STRUCT_TASKSTATS_FREEPAGES_COUNT 223 , (unsigned long long)t->freepages_count, 224 (unsigned long long)t->freepages_delay_total 225#endif 226 ); 227} 228 229void task_context_switch_counts(struct taskstats *t) 230{ 231#ifdef HAVE_STRUCT_TASKSTATS_NVCSW 232 printf("\n\nTask %15s%15s\n" 233 " %15llu%15llu\n", 234 "voluntary", "nonvoluntary", 235 (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw); 236#endif 237} 238 239#ifdef HAVE_LINUX_CGROUPSTATS_H 240 241void print_cgroupstats(struct cgroupstats *c) 242{ 243 printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, " 244 "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping, 245 (unsigned long long)c->nr_io_wait, 246 (unsigned long long)c->nr_running, 247 (unsigned long long)c->nr_stopped, 248 (unsigned long long)c->nr_uninterruptible); 249} 250 251#endif 252 253void print_ioacct(struct taskstats *t) 254{ 255#ifdef HAVE_STRUCT_TASKSTATS_READ_BYTES 256 printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n", 257 t->ac_comm, 258 (unsigned long long)t->read_bytes, 259 (unsigned long long)t->write_bytes, 260 (unsigned long long)t->cancelled_write_bytes); 261#endif 262} 263 264int main(int argc, char *argv[]) 265{ 266 int c, rc, rep_len, aggr_len, len2, cmd_type; 267 __u16 id; 268 __u32 mypid; 269 270 struct nlattr *na; 271 int nl_sd = -1; 272 int len = 0; 273 pid_t tid = 0; 274 pid_t rtid = 0; 275 276 int fd = 0; 277 int count = 0; 278 int write_file = 0; 279 int maskset = 0; 280 char *logfile = NULL; 281 int loop = 0; 282 int containerset = 0; 283 char containerpath[1024]; 284 int cfd = 0; 285 286 struct msgtemplate msg; 287 288 while (1) { 289 c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:"); 290 if (c < 0) 291 break; 292 293 switch (c) { 294 case 'd': 295 printf("print delayacct stats ON\n"); 296 print_delays = 1; 297 break; 298 case 'i': 299 printf("printing IO accounting\n"); 300 print_io_accounting = 1; 301 break; 302 case 'q': 303 printf("printing task/process context switch rates\n"); 304 print_task_context_switch_counts = 1; 305 break; 306 case 'C': 307 containerset = 1; 308 strncpy(containerpath, optarg, strlen(optarg) + 1); 309 break; 310 case 'w': 311 logfile = strdup(optarg); 312 printf("write to file %s\n", logfile); 313 write_file = 1; 314 break; 315 case 'r': 316 rcvbufsz = atoi(optarg); 317 printf("receive buf size %d\n", rcvbufsz); 318 if (rcvbufsz < 0) 319 err(1, "Invalid rcv buf size\n"); 320 break; 321 case 'm': 322 strncpy(cpumask, optarg, sizeof(cpumask)); 323 maskset = 1; 324 printf("cpumask %s maskset %d\n", cpumask, maskset); 325 break; 326 case 't': 327 tid = atoi(optarg); 328 if (!tid) 329 err(1, "Invalid tgid\n"); 330 cmd_type = TASKSTATS_CMD_ATTR_TGID; 331 break; 332 case 'p': 333 tid = atoi(optarg); 334 if (!tid) 335 err(1, "Invalid pid\n"); 336 cmd_type = TASKSTATS_CMD_ATTR_PID; 337 break; 338 case 'v': 339 printf("debug on\n"); 340 dbg = 1; 341 break; 342 case 'l': 343 printf("listen forever\n"); 344 loop = 1; 345 break; 346 default: 347 usage(); 348 exit(1); 349 } 350 } 351 352 if (write_file) { 353 fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC, 354 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 355 if (fd == -1) { 356 perror("Cannot open output file\n"); 357 exit(1); 358 } 359 } 360 361 if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0) 362 err(1, "error creating Netlink socket\n"); 363 364 365 mypid = getpid(); 366 id = get_family_id(nl_sd); 367 if (!id) { 368 fprintf(stderr, "Error getting family id, errno %d\n", errno); 369 exit(1); 370 } 371 PRINTF("family id %d\n", id); 372 373 if (maskset) { 374 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, 375 TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, 376 &cpumask, strlen(cpumask) + 1); 377 PRINTF("Sent register cpumask, retval %d\n", rc); 378 if (rc < 0) { 379 fprintf(stderr, "error sending register cpumask\n"); 380 exit(1); 381 } 382 } 383 384 if (tid && containerset) { 385 fprintf(stderr, "Select either -t or -C, not both\n"); 386 exit(1); 387 } 388 389 if (tid) { 390 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, 391 cmd_type, &tid, sizeof(__u32)); 392 PRINTF("Sent pid/tgid, retval %d\n", rc); 393 if (rc < 0) { 394 fprintf(stderr, "error sending tid/tgid cmd\n"); 395 exit(1); 396 } 397 } 398 399 if (containerset) { 400#ifdef HAVE_LINUX_CGROUPSTAT_H 401 cfd = open(containerpath, O_RDONLY); 402 if (cfd < 0) { 403 perror("error opening container file"); 404 exit(1); 405 } 406 rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET, 407 CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)); 408 if (rc < 0) { 409 perror("error sending cgroupstats command"); 410 exit(1); 411 } 412#else 413 printf("Header linux/cgroupstat.h was missing during compilation," 414 "you may have old or incomplete kernel-headers.\n"); 415#endif 416 } 417 if (!maskset && !tid && !containerset) { 418 usage(); 419 exit(1); 420 } 421 422 do { 423 int i; 424 425 rep_len = recv(nl_sd, &msg, sizeof(msg), 0); 426 PRINTF("received %d bytes\n", rep_len); 427 428 if (rep_len < 0) { 429 fprintf(stderr, "nonfatal reply error: errno %d\n", 430 errno); 431 exit(1); 432 } 433 if (msg.n.nlmsg_type == NLMSG_ERROR || 434 !NLMSG_OK((&msg.n), rep_len)) { 435 struct nlmsgerr *err = NLMSG_DATA(&msg); 436 fprintf(stderr, "fatal reply error, errno %d\n", 437 err->error); 438 exit(1); 439 } 440 441 PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n", 442 sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len); 443 444 445 rep_len = GENLMSG_PAYLOAD(&msg.n); 446 447 na = (struct nlattr *) GENLMSG_DATA(&msg); 448 len = 0; 449 i = 0; 450 while (len < rep_len) { 451 len += NLA_ALIGN(na->nla_len); 452 switch (na->nla_type) { 453 case TASKSTATS_TYPE_AGGR_TGID: 454 /* Fall through */ 455 case TASKSTATS_TYPE_AGGR_PID: 456 aggr_len = NLA_PAYLOAD(na->nla_len); 457 len2 = 0; 458 /* For nested attributes, na follows */ 459 na = (struct nlattr *) NLA_DATA(na); 460 done = 0; 461 while (len2 < aggr_len) { 462 switch (na->nla_type) { 463 case TASKSTATS_TYPE_PID: 464 rtid = *(int *) NLA_DATA(na); 465 if (print_delays) 466 printf("PID\t%d\n", rtid); 467 break; 468 case TASKSTATS_TYPE_TGID: 469 rtid = *(int *) NLA_DATA(na); 470 if (print_delays) 471 printf("TGID\t%d\n", rtid); 472 break; 473 case TASKSTATS_TYPE_STATS: 474 count++; 475 if (print_delays) 476 print_delayacct((struct taskstats *) NLA_DATA(na)); 477 if (print_io_accounting) 478 print_ioacct((struct taskstats *) NLA_DATA(na)); 479 if (print_task_context_switch_counts) 480 task_context_switch_counts((struct taskstats *) NLA_DATA(na)); 481 if (fd) { 482 if (write(fd, NLA_DATA(na), na->nla_len) < 0) { 483 err(1,"write error\n"); 484 } 485 } 486 if (!loop) 487 goto done; 488 break; 489 default: 490 fprintf(stderr, "Unknown nested" 491 " nla_type %d\n", 492 na->nla_type); 493 break; 494 } 495 len2 += NLA_ALIGN(na->nla_len); 496 na = (struct nlattr *) ((char *) na + len2); 497 } 498 break; 499#ifdef HAVE_LINUX_CGROUPSTATS_H 500 case CGROUPSTATS_TYPE_CGROUP_STATS: 501 print_cgroupstats(NLA_DATA(na)); 502 break; 503#endif 504 default: 505 fprintf(stderr, "Unknown nla_type %d\n", 506 na->nla_type); 507 exit(1); 508 } 509 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); 510 } 511 } while (loop); 512done: 513 if (maskset) { 514 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, 515 TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, 516 &cpumask, strlen(cpumask) + 1); 517 printf("Sent deregister mask, retval %d\n", rc); 518 if (rc < 0) 519 err(rc, "error sending deregister cpumask\n"); 520 } 521 522 close(nl_sd); 523 if (fd) 524 close(fd); 525 if (cfd) 526 close(cfd); 527 return 0; 528} 529 530