blk-cgroup.c revision 70087dc38cc77ca8f46059564c00338777734762
1/* 2 * Common Block IO controller cgroup interface 3 * 4 * Based on ideas and code from CFQ, CFS and BFQ: 5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 6 * 7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> 8 * Paolo Valente <paolo.valente@unimore.it> 9 * 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 11 * Nauman Rafique <nauman@google.com> 12 */ 13#include <linux/ioprio.h> 14#include <linux/seq_file.h> 15#include <linux/kdev_t.h> 16#include <linux/module.h> 17#include <linux/err.h> 18#include <linux/blkdev.h> 19#include <linux/slab.h> 20#include "blk-cgroup.h" 21#include <linux/genhd.h> 22 23#define MAX_KEY_LEN 100 24 25static DEFINE_SPINLOCK(blkio_list_lock); 26static LIST_HEAD(blkio_list); 27 28struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 29EXPORT_SYMBOL_GPL(blkio_root_cgroup); 30 31static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, 32 struct cgroup *); 33static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, 34 struct task_struct *, bool); 35static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, 36 struct cgroup *, struct task_struct *, bool); 37static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); 38static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); 39 40/* for encoding cft->private value on file */ 41#define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val)) 42/* What policy owns the file, proportional or throttle */ 43#define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff) 44#define BLKIOFILE_ATTR(val) ((val) & 0xffff) 45 46struct cgroup_subsys blkio_subsys = { 47 .name = "blkio", 48 .create = blkiocg_create, 49 .can_attach = blkiocg_can_attach, 50 .attach = blkiocg_attach, 51 .destroy = blkiocg_destroy, 52 .populate = blkiocg_populate, 53#ifdef CONFIG_BLK_CGROUP 54 /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ 55 .subsys_id = blkio_subsys_id, 56#endif 57 .use_id = 1, 58 .module = THIS_MODULE, 59}; 60EXPORT_SYMBOL_GPL(blkio_subsys); 61 62static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, 63 struct blkio_policy_node *pn) 64{ 65 list_add(&pn->node, &blkcg->policy_list); 66} 67 68static inline bool cftype_blkg_same_policy(struct cftype *cft, 69 struct blkio_group *blkg) 70{ 71 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 72 73 if (blkg->plid == plid) 74 return 1; 75 76 return 0; 77} 78 79/* Determines if policy node matches cgroup file being accessed */ 80static inline bool pn_matches_cftype(struct cftype *cft, 81 struct blkio_policy_node *pn) 82{ 83 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 84 int fileid = BLKIOFILE_ATTR(cft->private); 85 86 return (plid == pn->plid && fileid == pn->fileid); 87} 88 89/* Must be called with blkcg->lock held */ 90static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) 91{ 92 list_del(&pn->node); 93} 94 95/* Must be called with blkcg->lock held */ 96static struct blkio_policy_node * 97blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev, 98 enum blkio_policy_id plid, int fileid) 99{ 100 struct blkio_policy_node *pn; 101 102 list_for_each_entry(pn, &blkcg->policy_list, node) { 103 if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid) 104 return pn; 105 } 106 107 return NULL; 108} 109 110struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) 111{ 112 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), 113 struct blkio_cgroup, css); 114} 115EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); 116 117struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk) 118{ 119 return container_of(task_subsys_state(tsk, blkio_subsys_id), 120 struct blkio_cgroup, css); 121} 122EXPORT_SYMBOL_GPL(task_blkio_cgroup); 123 124static inline void 125blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight) 126{ 127 struct blkio_policy_type *blkiop; 128 129 list_for_each_entry(blkiop, &blkio_list, list) { 130 /* If this policy does not own the blkg, do not send updates */ 131 if (blkiop->plid != blkg->plid) 132 continue; 133 if (blkiop->ops.blkio_update_group_weight_fn) 134 blkiop->ops.blkio_update_group_weight_fn(blkg->key, 135 blkg, weight); 136 } 137} 138 139static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps, 140 int fileid) 141{ 142 struct blkio_policy_type *blkiop; 143 144 list_for_each_entry(blkiop, &blkio_list, list) { 145 146 /* If this policy does not own the blkg, do not send updates */ 147 if (blkiop->plid != blkg->plid) 148 continue; 149 150 if (fileid == BLKIO_THROTL_read_bps_device 151 && blkiop->ops.blkio_update_group_read_bps_fn) 152 blkiop->ops.blkio_update_group_read_bps_fn(blkg->key, 153 blkg, bps); 154 155 if (fileid == BLKIO_THROTL_write_bps_device 156 && blkiop->ops.blkio_update_group_write_bps_fn) 157 blkiop->ops.blkio_update_group_write_bps_fn(blkg->key, 158 blkg, bps); 159 } 160} 161 162static inline void blkio_update_group_iops(struct blkio_group *blkg, 163 unsigned int iops, int fileid) 164{ 165 struct blkio_policy_type *blkiop; 166 167 list_for_each_entry(blkiop, &blkio_list, list) { 168 169 /* If this policy does not own the blkg, do not send updates */ 170 if (blkiop->plid != blkg->plid) 171 continue; 172 173 if (fileid == BLKIO_THROTL_read_iops_device 174 && blkiop->ops.blkio_update_group_read_iops_fn) 175 blkiop->ops.blkio_update_group_read_iops_fn(blkg->key, 176 blkg, iops); 177 178 if (fileid == BLKIO_THROTL_write_iops_device 179 && blkiop->ops.blkio_update_group_write_iops_fn) 180 blkiop->ops.blkio_update_group_write_iops_fn(blkg->key, 181 blkg,iops); 182 } 183} 184 185/* 186 * Add to the appropriate stat variable depending on the request type. 187 * This should be called with the blkg->stats_lock held. 188 */ 189static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, 190 bool sync) 191{ 192 if (direction) 193 stat[BLKIO_STAT_WRITE] += add; 194 else 195 stat[BLKIO_STAT_READ] += add; 196 if (sync) 197 stat[BLKIO_STAT_SYNC] += add; 198 else 199 stat[BLKIO_STAT_ASYNC] += add; 200} 201 202/* 203 * Decrements the appropriate stat variable if non-zero depending on the 204 * request type. Panics on value being zero. 205 * This should be called with the blkg->stats_lock held. 206 */ 207static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) 208{ 209 if (direction) { 210 BUG_ON(stat[BLKIO_STAT_WRITE] == 0); 211 stat[BLKIO_STAT_WRITE]--; 212 } else { 213 BUG_ON(stat[BLKIO_STAT_READ] == 0); 214 stat[BLKIO_STAT_READ]--; 215 } 216 if (sync) { 217 BUG_ON(stat[BLKIO_STAT_SYNC] == 0); 218 stat[BLKIO_STAT_SYNC]--; 219 } else { 220 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); 221 stat[BLKIO_STAT_ASYNC]--; 222 } 223} 224 225#ifdef CONFIG_DEBUG_BLK_CGROUP 226/* This should be called with the blkg->stats_lock held. */ 227static void blkio_set_start_group_wait_time(struct blkio_group *blkg, 228 struct blkio_group *curr_blkg) 229{ 230 if (blkio_blkg_waiting(&blkg->stats)) 231 return; 232 if (blkg == curr_blkg) 233 return; 234 blkg->stats.start_group_wait_time = sched_clock(); 235 blkio_mark_blkg_waiting(&blkg->stats); 236} 237 238/* This should be called with the blkg->stats_lock held. */ 239static void blkio_update_group_wait_time(struct blkio_group_stats *stats) 240{ 241 unsigned long long now; 242 243 if (!blkio_blkg_waiting(stats)) 244 return; 245 246 now = sched_clock(); 247 if (time_after64(now, stats->start_group_wait_time)) 248 stats->group_wait_time += now - stats->start_group_wait_time; 249 blkio_clear_blkg_waiting(stats); 250} 251 252/* This should be called with the blkg->stats_lock held. */ 253static void blkio_end_empty_time(struct blkio_group_stats *stats) 254{ 255 unsigned long long now; 256 257 if (!blkio_blkg_empty(stats)) 258 return; 259 260 now = sched_clock(); 261 if (time_after64(now, stats->start_empty_time)) 262 stats->empty_time += now - stats->start_empty_time; 263 blkio_clear_blkg_empty(stats); 264} 265 266void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) 267{ 268 unsigned long flags; 269 270 spin_lock_irqsave(&blkg->stats_lock, flags); 271 BUG_ON(blkio_blkg_idling(&blkg->stats)); 272 blkg->stats.start_idle_time = sched_clock(); 273 blkio_mark_blkg_idling(&blkg->stats); 274 spin_unlock_irqrestore(&blkg->stats_lock, flags); 275} 276EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats); 277 278void blkiocg_update_idle_time_stats(struct blkio_group *blkg) 279{ 280 unsigned long flags; 281 unsigned long long now; 282 struct blkio_group_stats *stats; 283 284 spin_lock_irqsave(&blkg->stats_lock, flags); 285 stats = &blkg->stats; 286 if (blkio_blkg_idling(stats)) { 287 now = sched_clock(); 288 if (time_after64(now, stats->start_idle_time)) 289 stats->idle_time += now - stats->start_idle_time; 290 blkio_clear_blkg_idling(stats); 291 } 292 spin_unlock_irqrestore(&blkg->stats_lock, flags); 293} 294EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats); 295 296void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg) 297{ 298 unsigned long flags; 299 struct blkio_group_stats *stats; 300 301 spin_lock_irqsave(&blkg->stats_lock, flags); 302 stats = &blkg->stats; 303 stats->avg_queue_size_sum += 304 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + 305 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; 306 stats->avg_queue_size_samples++; 307 blkio_update_group_wait_time(stats); 308 spin_unlock_irqrestore(&blkg->stats_lock, flags); 309} 310EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); 311 312void blkiocg_set_start_empty_time(struct blkio_group *blkg) 313{ 314 unsigned long flags; 315 struct blkio_group_stats *stats; 316 317 spin_lock_irqsave(&blkg->stats_lock, flags); 318 stats = &blkg->stats; 319 320 if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || 321 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) { 322 spin_unlock_irqrestore(&blkg->stats_lock, flags); 323 return; 324 } 325 326 /* 327 * group is already marked empty. This can happen if cfqq got new 328 * request in parent group and moved to this group while being added 329 * to service tree. Just ignore the event and move on. 330 */ 331 if(blkio_blkg_empty(stats)) { 332 spin_unlock_irqrestore(&blkg->stats_lock, flags); 333 return; 334 } 335 336 stats->start_empty_time = sched_clock(); 337 blkio_mark_blkg_empty(stats); 338 spin_unlock_irqrestore(&blkg->stats_lock, flags); 339} 340EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time); 341 342void blkiocg_update_dequeue_stats(struct blkio_group *blkg, 343 unsigned long dequeue) 344{ 345 blkg->stats.dequeue += dequeue; 346} 347EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); 348#else 349static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg, 350 struct blkio_group *curr_blkg) {} 351static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {} 352#endif 353 354void blkiocg_update_io_add_stats(struct blkio_group *blkg, 355 struct blkio_group *curr_blkg, bool direction, 356 bool sync) 357{ 358 unsigned long flags; 359 360 spin_lock_irqsave(&blkg->stats_lock, flags); 361 blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, 362 sync); 363 blkio_end_empty_time(&blkg->stats); 364 blkio_set_start_group_wait_time(blkg, curr_blkg); 365 spin_unlock_irqrestore(&blkg->stats_lock, flags); 366} 367EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); 368 369void blkiocg_update_io_remove_stats(struct blkio_group *blkg, 370 bool direction, bool sync) 371{ 372 unsigned long flags; 373 374 spin_lock_irqsave(&blkg->stats_lock, flags); 375 blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 376 direction, sync); 377 spin_unlock_irqrestore(&blkg->stats_lock, flags); 378} 379EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); 380 381void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time, 382 unsigned long unaccounted_time) 383{ 384 unsigned long flags; 385 386 spin_lock_irqsave(&blkg->stats_lock, flags); 387 blkg->stats.time += time; 388 blkg->stats.unaccounted_time += unaccounted_time; 389 spin_unlock_irqrestore(&blkg->stats_lock, flags); 390} 391EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); 392 393void blkiocg_update_dispatch_stats(struct blkio_group *blkg, 394 uint64_t bytes, bool direction, bool sync) 395{ 396 struct blkio_group_stats *stats; 397 unsigned long flags; 398 399 spin_lock_irqsave(&blkg->stats_lock, flags); 400 stats = &blkg->stats; 401 stats->sectors += bytes >> 9; 402 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction, 403 sync); 404 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes, 405 direction, sync); 406 spin_unlock_irqrestore(&blkg->stats_lock, flags); 407} 408EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); 409 410void blkiocg_update_completion_stats(struct blkio_group *blkg, 411 uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) 412{ 413 struct blkio_group_stats *stats; 414 unsigned long flags; 415 unsigned long long now = sched_clock(); 416 417 spin_lock_irqsave(&blkg->stats_lock, flags); 418 stats = &blkg->stats; 419 if (time_after64(now, io_start_time)) 420 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], 421 now - io_start_time, direction, sync); 422 if (time_after64(io_start_time, start_time)) 423 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], 424 io_start_time - start_time, direction, sync); 425 spin_unlock_irqrestore(&blkg->stats_lock, flags); 426} 427EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); 428 429void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, 430 bool sync) 431{ 432 unsigned long flags; 433 434 spin_lock_irqsave(&blkg->stats_lock, flags); 435 blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction, 436 sync); 437 spin_unlock_irqrestore(&blkg->stats_lock, flags); 438} 439EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); 440 441void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, 442 struct blkio_group *blkg, void *key, dev_t dev, 443 enum blkio_policy_id plid) 444{ 445 unsigned long flags; 446 447 spin_lock_irqsave(&blkcg->lock, flags); 448 spin_lock_init(&blkg->stats_lock); 449 rcu_assign_pointer(blkg->key, key); 450 blkg->blkcg_id = css_id(&blkcg->css); 451 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 452 blkg->plid = plid; 453 spin_unlock_irqrestore(&blkcg->lock, flags); 454 /* Need to take css reference ? */ 455 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 456 blkg->dev = dev; 457} 458EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); 459 460static void __blkiocg_del_blkio_group(struct blkio_group *blkg) 461{ 462 hlist_del_init_rcu(&blkg->blkcg_node); 463 blkg->blkcg_id = 0; 464} 465 466/* 467 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1 468 * indicating that blk_group was unhashed by the time we got to it. 469 */ 470int blkiocg_del_blkio_group(struct blkio_group *blkg) 471{ 472 struct blkio_cgroup *blkcg; 473 unsigned long flags; 474 struct cgroup_subsys_state *css; 475 int ret = 1; 476 477 rcu_read_lock(); 478 css = css_lookup(&blkio_subsys, blkg->blkcg_id); 479 if (css) { 480 blkcg = container_of(css, struct blkio_cgroup, css); 481 spin_lock_irqsave(&blkcg->lock, flags); 482 if (!hlist_unhashed(&blkg->blkcg_node)) { 483 __blkiocg_del_blkio_group(blkg); 484 ret = 0; 485 } 486 spin_unlock_irqrestore(&blkcg->lock, flags); 487 } 488 489 rcu_read_unlock(); 490 return ret; 491} 492EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); 493 494/* called under rcu_read_lock(). */ 495struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) 496{ 497 struct blkio_group *blkg; 498 struct hlist_node *n; 499 void *__key; 500 501 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { 502 __key = blkg->key; 503 if (__key == key) 504 return blkg; 505 } 506 507 return NULL; 508} 509EXPORT_SYMBOL_GPL(blkiocg_lookup_group); 510 511static int 512blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) 513{ 514 struct blkio_cgroup *blkcg; 515 struct blkio_group *blkg; 516 struct blkio_group_stats *stats; 517 struct hlist_node *n; 518 uint64_t queued[BLKIO_STAT_TOTAL]; 519 int i; 520#ifdef CONFIG_DEBUG_BLK_CGROUP 521 bool idling, waiting, empty; 522 unsigned long long now = sched_clock(); 523#endif 524 525 blkcg = cgroup_to_blkio_cgroup(cgroup); 526 spin_lock_irq(&blkcg->lock); 527 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 528 spin_lock(&blkg->stats_lock); 529 stats = &blkg->stats; 530#ifdef CONFIG_DEBUG_BLK_CGROUP 531 idling = blkio_blkg_idling(stats); 532 waiting = blkio_blkg_waiting(stats); 533 empty = blkio_blkg_empty(stats); 534#endif 535 for (i = 0; i < BLKIO_STAT_TOTAL; i++) 536 queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; 537 memset(stats, 0, sizeof(struct blkio_group_stats)); 538 for (i = 0; i < BLKIO_STAT_TOTAL; i++) 539 stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; 540#ifdef CONFIG_DEBUG_BLK_CGROUP 541 if (idling) { 542 blkio_mark_blkg_idling(stats); 543 stats->start_idle_time = now; 544 } 545 if (waiting) { 546 blkio_mark_blkg_waiting(stats); 547 stats->start_group_wait_time = now; 548 } 549 if (empty) { 550 blkio_mark_blkg_empty(stats); 551 stats->start_empty_time = now; 552 } 553#endif 554 spin_unlock(&blkg->stats_lock); 555 } 556 spin_unlock_irq(&blkcg->lock); 557 return 0; 558} 559 560static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str, 561 int chars_left, bool diskname_only) 562{ 563 snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev)); 564 chars_left -= strlen(str); 565 if (chars_left <= 0) { 566 printk(KERN_WARNING 567 "Possibly incorrect cgroup stat display format"); 568 return; 569 } 570 if (diskname_only) 571 return; 572 switch (type) { 573 case BLKIO_STAT_READ: 574 strlcat(str, " Read", chars_left); 575 break; 576 case BLKIO_STAT_WRITE: 577 strlcat(str, " Write", chars_left); 578 break; 579 case BLKIO_STAT_SYNC: 580 strlcat(str, " Sync", chars_left); 581 break; 582 case BLKIO_STAT_ASYNC: 583 strlcat(str, " Async", chars_left); 584 break; 585 case BLKIO_STAT_TOTAL: 586 strlcat(str, " Total", chars_left); 587 break; 588 default: 589 strlcat(str, " Invalid", chars_left); 590 } 591} 592 593static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val, 594 struct cgroup_map_cb *cb, dev_t dev) 595{ 596 blkio_get_key_name(0, dev, str, chars_left, true); 597 cb->fill(cb, str, val); 598 return val; 599} 600 601/* This should be called with blkg->stats_lock held */ 602static uint64_t blkio_get_stat(struct blkio_group *blkg, 603 struct cgroup_map_cb *cb, dev_t dev, enum stat_type type) 604{ 605 uint64_t disk_total; 606 char key_str[MAX_KEY_LEN]; 607 enum stat_sub_type sub_type; 608 609 if (type == BLKIO_STAT_TIME) 610 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 611 blkg->stats.time, cb, dev); 612 if (type == BLKIO_STAT_SECTORS) 613 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 614 blkg->stats.sectors, cb, dev); 615#ifdef CONFIG_DEBUG_BLK_CGROUP 616 if (type == BLKIO_STAT_UNACCOUNTED_TIME) 617 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 618 blkg->stats.unaccounted_time, cb, dev); 619 if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { 620 uint64_t sum = blkg->stats.avg_queue_size_sum; 621 uint64_t samples = blkg->stats.avg_queue_size_samples; 622 if (samples) 623 do_div(sum, samples); 624 else 625 sum = 0; 626 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); 627 } 628 if (type == BLKIO_STAT_GROUP_WAIT_TIME) 629 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 630 blkg->stats.group_wait_time, cb, dev); 631 if (type == BLKIO_STAT_IDLE_TIME) 632 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 633 blkg->stats.idle_time, cb, dev); 634 if (type == BLKIO_STAT_EMPTY_TIME) 635 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 636 blkg->stats.empty_time, cb, dev); 637 if (type == BLKIO_STAT_DEQUEUE) 638 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 639 blkg->stats.dequeue, cb, dev); 640#endif 641 642 for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; 643 sub_type++) { 644 blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false); 645 cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]); 646 } 647 disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] + 648 blkg->stats.stat_arr[type][BLKIO_STAT_WRITE]; 649 blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false); 650 cb->fill(cb, key_str, disk_total); 651 return disk_total; 652} 653 654static int blkio_check_dev_num(dev_t dev) 655{ 656 int part = 0; 657 struct gendisk *disk; 658 659 disk = get_gendisk(dev, &part); 660 if (!disk || part) 661 return -ENODEV; 662 663 return 0; 664} 665 666static int blkio_policy_parse_and_set(char *buf, 667 struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) 668{ 669 char *s[4], *p, *major_s = NULL, *minor_s = NULL; 670 int ret; 671 unsigned long major, minor, temp; 672 int i = 0; 673 dev_t dev; 674 u64 bps, iops; 675 676 memset(s, 0, sizeof(s)); 677 678 while ((p = strsep(&buf, " ")) != NULL) { 679 if (!*p) 680 continue; 681 682 s[i++] = p; 683 684 /* Prevent from inputing too many things */ 685 if (i == 3) 686 break; 687 } 688 689 if (i != 2) 690 return -EINVAL; 691 692 p = strsep(&s[0], ":"); 693 if (p != NULL) 694 major_s = p; 695 else 696 return -EINVAL; 697 698 minor_s = s[0]; 699 if (!minor_s) 700 return -EINVAL; 701 702 ret = strict_strtoul(major_s, 10, &major); 703 if (ret) 704 return -EINVAL; 705 706 ret = strict_strtoul(minor_s, 10, &minor); 707 if (ret) 708 return -EINVAL; 709 710 dev = MKDEV(major, minor); 711 712 ret = blkio_check_dev_num(dev); 713 if (ret) 714 return ret; 715 716 newpn->dev = dev; 717 718 if (s[1] == NULL) 719 return -EINVAL; 720 721 switch (plid) { 722 case BLKIO_POLICY_PROP: 723 ret = strict_strtoul(s[1], 10, &temp); 724 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || 725 temp > BLKIO_WEIGHT_MAX) 726 return -EINVAL; 727 728 newpn->plid = plid; 729 newpn->fileid = fileid; 730 newpn->val.weight = temp; 731 break; 732 case BLKIO_POLICY_THROTL: 733 switch(fileid) { 734 case BLKIO_THROTL_read_bps_device: 735 case BLKIO_THROTL_write_bps_device: 736 ret = strict_strtoull(s[1], 10, &bps); 737 if (ret) 738 return -EINVAL; 739 740 newpn->plid = plid; 741 newpn->fileid = fileid; 742 newpn->val.bps = bps; 743 break; 744 case BLKIO_THROTL_read_iops_device: 745 case BLKIO_THROTL_write_iops_device: 746 ret = strict_strtoull(s[1], 10, &iops); 747 if (ret) 748 return -EINVAL; 749 750 if (iops > THROTL_IOPS_MAX) 751 return -EINVAL; 752 753 newpn->plid = plid; 754 newpn->fileid = fileid; 755 newpn->val.iops = (unsigned int)iops; 756 break; 757 } 758 break; 759 default: 760 BUG(); 761 } 762 763 return 0; 764} 765 766unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, 767 dev_t dev) 768{ 769 struct blkio_policy_node *pn; 770 771 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP, 772 BLKIO_PROP_weight_device); 773 if (pn) 774 return pn->val.weight; 775 else 776 return blkcg->weight; 777} 778EXPORT_SYMBOL_GPL(blkcg_get_weight); 779 780uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev) 781{ 782 struct blkio_policy_node *pn; 783 784 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 785 BLKIO_THROTL_read_bps_device); 786 if (pn) 787 return pn->val.bps; 788 else 789 return -1; 790} 791 792uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev) 793{ 794 struct blkio_policy_node *pn; 795 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 796 BLKIO_THROTL_write_bps_device); 797 if (pn) 798 return pn->val.bps; 799 else 800 return -1; 801} 802 803unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev) 804{ 805 struct blkio_policy_node *pn; 806 807 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 808 BLKIO_THROTL_read_iops_device); 809 if (pn) 810 return pn->val.iops; 811 else 812 return -1; 813} 814 815unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev) 816{ 817 struct blkio_policy_node *pn; 818 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 819 BLKIO_THROTL_write_iops_device); 820 if (pn) 821 return pn->val.iops; 822 else 823 return -1; 824} 825 826/* Checks whether user asked for deleting a policy rule */ 827static bool blkio_delete_rule_command(struct blkio_policy_node *pn) 828{ 829 switch(pn->plid) { 830 case BLKIO_POLICY_PROP: 831 if (pn->val.weight == 0) 832 return 1; 833 break; 834 case BLKIO_POLICY_THROTL: 835 switch(pn->fileid) { 836 case BLKIO_THROTL_read_bps_device: 837 case BLKIO_THROTL_write_bps_device: 838 if (pn->val.bps == 0) 839 return 1; 840 break; 841 case BLKIO_THROTL_read_iops_device: 842 case BLKIO_THROTL_write_iops_device: 843 if (pn->val.iops == 0) 844 return 1; 845 } 846 break; 847 default: 848 BUG(); 849 } 850 851 return 0; 852} 853 854static void blkio_update_policy_rule(struct blkio_policy_node *oldpn, 855 struct blkio_policy_node *newpn) 856{ 857 switch(oldpn->plid) { 858 case BLKIO_POLICY_PROP: 859 oldpn->val.weight = newpn->val.weight; 860 break; 861 case BLKIO_POLICY_THROTL: 862 switch(newpn->fileid) { 863 case BLKIO_THROTL_read_bps_device: 864 case BLKIO_THROTL_write_bps_device: 865 oldpn->val.bps = newpn->val.bps; 866 break; 867 case BLKIO_THROTL_read_iops_device: 868 case BLKIO_THROTL_write_iops_device: 869 oldpn->val.iops = newpn->val.iops; 870 } 871 break; 872 default: 873 BUG(); 874 } 875} 876 877/* 878 * Some rules/values in blkg have changed. Propagate those to respective 879 * policies. 880 */ 881static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, 882 struct blkio_group *blkg, struct blkio_policy_node *pn) 883{ 884 unsigned int weight, iops; 885 u64 bps; 886 887 switch(pn->plid) { 888 case BLKIO_POLICY_PROP: 889 weight = pn->val.weight ? pn->val.weight : 890 blkcg->weight; 891 blkio_update_group_weight(blkg, weight); 892 break; 893 case BLKIO_POLICY_THROTL: 894 switch(pn->fileid) { 895 case BLKIO_THROTL_read_bps_device: 896 case BLKIO_THROTL_write_bps_device: 897 bps = pn->val.bps ? pn->val.bps : (-1); 898 blkio_update_group_bps(blkg, bps, pn->fileid); 899 break; 900 case BLKIO_THROTL_read_iops_device: 901 case BLKIO_THROTL_write_iops_device: 902 iops = pn->val.iops ? pn->val.iops : (-1); 903 blkio_update_group_iops(blkg, iops, pn->fileid); 904 break; 905 } 906 break; 907 default: 908 BUG(); 909 } 910} 911 912/* 913 * A policy node rule has been updated. Propagate this update to all the 914 * block groups which might be affected by this update. 915 */ 916static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg, 917 struct blkio_policy_node *pn) 918{ 919 struct blkio_group *blkg; 920 struct hlist_node *n; 921 922 spin_lock(&blkio_list_lock); 923 spin_lock_irq(&blkcg->lock); 924 925 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 926 if (pn->dev != blkg->dev || pn->plid != blkg->plid) 927 continue; 928 blkio_update_blkg_policy(blkcg, blkg, pn); 929 } 930 931 spin_unlock_irq(&blkcg->lock); 932 spin_unlock(&blkio_list_lock); 933} 934 935static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, 936 const char *buffer) 937{ 938 int ret = 0; 939 char *buf; 940 struct blkio_policy_node *newpn, *pn; 941 struct blkio_cgroup *blkcg; 942 int keep_newpn = 0; 943 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 944 int fileid = BLKIOFILE_ATTR(cft->private); 945 946 buf = kstrdup(buffer, GFP_KERNEL); 947 if (!buf) 948 return -ENOMEM; 949 950 newpn = kzalloc(sizeof(*newpn), GFP_KERNEL); 951 if (!newpn) { 952 ret = -ENOMEM; 953 goto free_buf; 954 } 955 956 ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid); 957 if (ret) 958 goto free_newpn; 959 960 blkcg = cgroup_to_blkio_cgroup(cgrp); 961 962 spin_lock_irq(&blkcg->lock); 963 964 pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid); 965 if (!pn) { 966 if (!blkio_delete_rule_command(newpn)) { 967 blkio_policy_insert_node(blkcg, newpn); 968 keep_newpn = 1; 969 } 970 spin_unlock_irq(&blkcg->lock); 971 goto update_io_group; 972 } 973 974 if (blkio_delete_rule_command(newpn)) { 975 blkio_policy_delete_node(pn); 976 spin_unlock_irq(&blkcg->lock); 977 goto update_io_group; 978 } 979 spin_unlock_irq(&blkcg->lock); 980 981 blkio_update_policy_rule(pn, newpn); 982 983update_io_group: 984 blkio_update_policy_node_blkg(blkcg, newpn); 985 986free_newpn: 987 if (!keep_newpn) 988 kfree(newpn); 989free_buf: 990 kfree(buf); 991 return ret; 992} 993 994static void 995blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn) 996{ 997 switch(pn->plid) { 998 case BLKIO_POLICY_PROP: 999 if (pn->fileid == BLKIO_PROP_weight_device) 1000 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), 1001 MINOR(pn->dev), pn->val.weight); 1002 break; 1003 case BLKIO_POLICY_THROTL: 1004 switch(pn->fileid) { 1005 case BLKIO_THROTL_read_bps_device: 1006 case BLKIO_THROTL_write_bps_device: 1007 seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev), 1008 MINOR(pn->dev), pn->val.bps); 1009 break; 1010 case BLKIO_THROTL_read_iops_device: 1011 case BLKIO_THROTL_write_iops_device: 1012 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), 1013 MINOR(pn->dev), pn->val.iops); 1014 break; 1015 } 1016 break; 1017 default: 1018 BUG(); 1019 } 1020} 1021 1022/* cgroup files which read their data from policy nodes end up here */ 1023static void blkio_read_policy_node_files(struct cftype *cft, 1024 struct blkio_cgroup *blkcg, struct seq_file *m) 1025{ 1026 struct blkio_policy_node *pn; 1027 1028 if (!list_empty(&blkcg->policy_list)) { 1029 spin_lock_irq(&blkcg->lock); 1030 list_for_each_entry(pn, &blkcg->policy_list, node) { 1031 if (!pn_matches_cftype(cft, pn)) 1032 continue; 1033 blkio_print_policy_node(m, pn); 1034 } 1035 spin_unlock_irq(&blkcg->lock); 1036 } 1037} 1038 1039static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft, 1040 struct seq_file *m) 1041{ 1042 struct blkio_cgroup *blkcg; 1043 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1044 int name = BLKIOFILE_ATTR(cft->private); 1045 1046 blkcg = cgroup_to_blkio_cgroup(cgrp); 1047 1048 switch(plid) { 1049 case BLKIO_POLICY_PROP: 1050 switch(name) { 1051 case BLKIO_PROP_weight_device: 1052 blkio_read_policy_node_files(cft, blkcg, m); 1053 return 0; 1054 default: 1055 BUG(); 1056 } 1057 break; 1058 case BLKIO_POLICY_THROTL: 1059 switch(name){ 1060 case BLKIO_THROTL_read_bps_device: 1061 case BLKIO_THROTL_write_bps_device: 1062 case BLKIO_THROTL_read_iops_device: 1063 case BLKIO_THROTL_write_iops_device: 1064 blkio_read_policy_node_files(cft, blkcg, m); 1065 return 0; 1066 default: 1067 BUG(); 1068 } 1069 break; 1070 default: 1071 BUG(); 1072 } 1073 1074 return 0; 1075} 1076 1077static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg, 1078 struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type, 1079 bool show_total) 1080{ 1081 struct blkio_group *blkg; 1082 struct hlist_node *n; 1083 uint64_t cgroup_total = 0; 1084 1085 rcu_read_lock(); 1086 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { 1087 if (blkg->dev) { 1088 if (!cftype_blkg_same_policy(cft, blkg)) 1089 continue; 1090 spin_lock_irq(&blkg->stats_lock); 1091 cgroup_total += blkio_get_stat(blkg, cb, blkg->dev, 1092 type); 1093 spin_unlock_irq(&blkg->stats_lock); 1094 } 1095 } 1096 if (show_total) 1097 cb->fill(cb, "Total", cgroup_total); 1098 rcu_read_unlock(); 1099 return 0; 1100} 1101 1102/* All map kind of cgroup file get serviced by this function */ 1103static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft, 1104 struct cgroup_map_cb *cb) 1105{ 1106 struct blkio_cgroup *blkcg; 1107 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1108 int name = BLKIOFILE_ATTR(cft->private); 1109 1110 blkcg = cgroup_to_blkio_cgroup(cgrp); 1111 1112 switch(plid) { 1113 case BLKIO_POLICY_PROP: 1114 switch(name) { 1115 case BLKIO_PROP_time: 1116 return blkio_read_blkg_stats(blkcg, cft, cb, 1117 BLKIO_STAT_TIME, 0); 1118 case BLKIO_PROP_sectors: 1119 return blkio_read_blkg_stats(blkcg, cft, cb, 1120 BLKIO_STAT_SECTORS, 0); 1121 case BLKIO_PROP_io_service_bytes: 1122 return blkio_read_blkg_stats(blkcg, cft, cb, 1123 BLKIO_STAT_SERVICE_BYTES, 1); 1124 case BLKIO_PROP_io_serviced: 1125 return blkio_read_blkg_stats(blkcg, cft, cb, 1126 BLKIO_STAT_SERVICED, 1); 1127 case BLKIO_PROP_io_service_time: 1128 return blkio_read_blkg_stats(blkcg, cft, cb, 1129 BLKIO_STAT_SERVICE_TIME, 1); 1130 case BLKIO_PROP_io_wait_time: 1131 return blkio_read_blkg_stats(blkcg, cft, cb, 1132 BLKIO_STAT_WAIT_TIME, 1); 1133 case BLKIO_PROP_io_merged: 1134 return blkio_read_blkg_stats(blkcg, cft, cb, 1135 BLKIO_STAT_MERGED, 1); 1136 case BLKIO_PROP_io_queued: 1137 return blkio_read_blkg_stats(blkcg, cft, cb, 1138 BLKIO_STAT_QUEUED, 1); 1139#ifdef CONFIG_DEBUG_BLK_CGROUP 1140 case BLKIO_PROP_unaccounted_time: 1141 return blkio_read_blkg_stats(blkcg, cft, cb, 1142 BLKIO_STAT_UNACCOUNTED_TIME, 0); 1143 case BLKIO_PROP_dequeue: 1144 return blkio_read_blkg_stats(blkcg, cft, cb, 1145 BLKIO_STAT_DEQUEUE, 0); 1146 case BLKIO_PROP_avg_queue_size: 1147 return blkio_read_blkg_stats(blkcg, cft, cb, 1148 BLKIO_STAT_AVG_QUEUE_SIZE, 0); 1149 case BLKIO_PROP_group_wait_time: 1150 return blkio_read_blkg_stats(blkcg, cft, cb, 1151 BLKIO_STAT_GROUP_WAIT_TIME, 0); 1152 case BLKIO_PROP_idle_time: 1153 return blkio_read_blkg_stats(blkcg, cft, cb, 1154 BLKIO_STAT_IDLE_TIME, 0); 1155 case BLKIO_PROP_empty_time: 1156 return blkio_read_blkg_stats(blkcg, cft, cb, 1157 BLKIO_STAT_EMPTY_TIME, 0); 1158#endif 1159 default: 1160 BUG(); 1161 } 1162 break; 1163 case BLKIO_POLICY_THROTL: 1164 switch(name){ 1165 case BLKIO_THROTL_io_service_bytes: 1166 return blkio_read_blkg_stats(blkcg, cft, cb, 1167 BLKIO_STAT_SERVICE_BYTES, 1); 1168 case BLKIO_THROTL_io_serviced: 1169 return blkio_read_blkg_stats(blkcg, cft, cb, 1170 BLKIO_STAT_SERVICED, 1); 1171 default: 1172 BUG(); 1173 } 1174 break; 1175 default: 1176 BUG(); 1177 } 1178 1179 return 0; 1180} 1181 1182static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val) 1183{ 1184 struct blkio_group *blkg; 1185 struct hlist_node *n; 1186 struct blkio_policy_node *pn; 1187 1188 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) 1189 return -EINVAL; 1190 1191 spin_lock(&blkio_list_lock); 1192 spin_lock_irq(&blkcg->lock); 1193 blkcg->weight = (unsigned int)val; 1194 1195 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 1196 pn = blkio_policy_search_node(blkcg, blkg->dev, 1197 BLKIO_POLICY_PROP, BLKIO_PROP_weight_device); 1198 if (pn) 1199 continue; 1200 1201 blkio_update_group_weight(blkg, blkcg->weight); 1202 } 1203 spin_unlock_irq(&blkcg->lock); 1204 spin_unlock(&blkio_list_lock); 1205 return 0; 1206} 1207 1208static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) { 1209 struct blkio_cgroup *blkcg; 1210 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1211 int name = BLKIOFILE_ATTR(cft->private); 1212 1213 blkcg = cgroup_to_blkio_cgroup(cgrp); 1214 1215 switch(plid) { 1216 case BLKIO_POLICY_PROP: 1217 switch(name) { 1218 case BLKIO_PROP_weight: 1219 return (u64)blkcg->weight; 1220 } 1221 break; 1222 default: 1223 BUG(); 1224 } 1225 return 0; 1226} 1227 1228static int 1229blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) 1230{ 1231 struct blkio_cgroup *blkcg; 1232 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1233 int name = BLKIOFILE_ATTR(cft->private); 1234 1235 blkcg = cgroup_to_blkio_cgroup(cgrp); 1236 1237 switch(plid) { 1238 case BLKIO_POLICY_PROP: 1239 switch(name) { 1240 case BLKIO_PROP_weight: 1241 return blkio_weight_write(blkcg, val); 1242 } 1243 break; 1244 default: 1245 BUG(); 1246 } 1247 1248 return 0; 1249} 1250 1251struct cftype blkio_files[] = { 1252 { 1253 .name = "weight_device", 1254 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1255 BLKIO_PROP_weight_device), 1256 .read_seq_string = blkiocg_file_read, 1257 .write_string = blkiocg_file_write, 1258 .max_write_len = 256, 1259 }, 1260 { 1261 .name = "weight", 1262 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1263 BLKIO_PROP_weight), 1264 .read_u64 = blkiocg_file_read_u64, 1265 .write_u64 = blkiocg_file_write_u64, 1266 }, 1267 { 1268 .name = "time", 1269 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1270 BLKIO_PROP_time), 1271 .read_map = blkiocg_file_read_map, 1272 }, 1273 { 1274 .name = "sectors", 1275 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1276 BLKIO_PROP_sectors), 1277 .read_map = blkiocg_file_read_map, 1278 }, 1279 { 1280 .name = "io_service_bytes", 1281 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1282 BLKIO_PROP_io_service_bytes), 1283 .read_map = blkiocg_file_read_map, 1284 }, 1285 { 1286 .name = "io_serviced", 1287 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1288 BLKIO_PROP_io_serviced), 1289 .read_map = blkiocg_file_read_map, 1290 }, 1291 { 1292 .name = "io_service_time", 1293 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1294 BLKIO_PROP_io_service_time), 1295 .read_map = blkiocg_file_read_map, 1296 }, 1297 { 1298 .name = "io_wait_time", 1299 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1300 BLKIO_PROP_io_wait_time), 1301 .read_map = blkiocg_file_read_map, 1302 }, 1303 { 1304 .name = "io_merged", 1305 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1306 BLKIO_PROP_io_merged), 1307 .read_map = blkiocg_file_read_map, 1308 }, 1309 { 1310 .name = "io_queued", 1311 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1312 BLKIO_PROP_io_queued), 1313 .read_map = blkiocg_file_read_map, 1314 }, 1315 { 1316 .name = "reset_stats", 1317 .write_u64 = blkiocg_reset_stats, 1318 }, 1319#ifdef CONFIG_BLK_DEV_THROTTLING 1320 { 1321 .name = "throttle.read_bps_device", 1322 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1323 BLKIO_THROTL_read_bps_device), 1324 .read_seq_string = blkiocg_file_read, 1325 .write_string = blkiocg_file_write, 1326 .max_write_len = 256, 1327 }, 1328 1329 { 1330 .name = "throttle.write_bps_device", 1331 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1332 BLKIO_THROTL_write_bps_device), 1333 .read_seq_string = blkiocg_file_read, 1334 .write_string = blkiocg_file_write, 1335 .max_write_len = 256, 1336 }, 1337 1338 { 1339 .name = "throttle.read_iops_device", 1340 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1341 BLKIO_THROTL_read_iops_device), 1342 .read_seq_string = blkiocg_file_read, 1343 .write_string = blkiocg_file_write, 1344 .max_write_len = 256, 1345 }, 1346 1347 { 1348 .name = "throttle.write_iops_device", 1349 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1350 BLKIO_THROTL_write_iops_device), 1351 .read_seq_string = blkiocg_file_read, 1352 .write_string = blkiocg_file_write, 1353 .max_write_len = 256, 1354 }, 1355 { 1356 .name = "throttle.io_service_bytes", 1357 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1358 BLKIO_THROTL_io_service_bytes), 1359 .read_map = blkiocg_file_read_map, 1360 }, 1361 { 1362 .name = "throttle.io_serviced", 1363 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1364 BLKIO_THROTL_io_serviced), 1365 .read_map = blkiocg_file_read_map, 1366 }, 1367#endif /* CONFIG_BLK_DEV_THROTTLING */ 1368 1369#ifdef CONFIG_DEBUG_BLK_CGROUP 1370 { 1371 .name = "avg_queue_size", 1372 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1373 BLKIO_PROP_avg_queue_size), 1374 .read_map = blkiocg_file_read_map, 1375 }, 1376 { 1377 .name = "group_wait_time", 1378 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1379 BLKIO_PROP_group_wait_time), 1380 .read_map = blkiocg_file_read_map, 1381 }, 1382 { 1383 .name = "idle_time", 1384 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1385 BLKIO_PROP_idle_time), 1386 .read_map = blkiocg_file_read_map, 1387 }, 1388 { 1389 .name = "empty_time", 1390 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1391 BLKIO_PROP_empty_time), 1392 .read_map = blkiocg_file_read_map, 1393 }, 1394 { 1395 .name = "dequeue", 1396 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1397 BLKIO_PROP_dequeue), 1398 .read_map = blkiocg_file_read_map, 1399 }, 1400 { 1401 .name = "unaccounted_time", 1402 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1403 BLKIO_PROP_unaccounted_time), 1404 .read_map = blkiocg_file_read_map, 1405 }, 1406#endif 1407}; 1408 1409static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) 1410{ 1411 return cgroup_add_files(cgroup, subsys, blkio_files, 1412 ARRAY_SIZE(blkio_files)); 1413} 1414 1415static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) 1416{ 1417 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 1418 unsigned long flags; 1419 struct blkio_group *blkg; 1420 void *key; 1421 struct blkio_policy_type *blkiop; 1422 struct blkio_policy_node *pn, *pntmp; 1423 1424 rcu_read_lock(); 1425 do { 1426 spin_lock_irqsave(&blkcg->lock, flags); 1427 1428 if (hlist_empty(&blkcg->blkg_list)) { 1429 spin_unlock_irqrestore(&blkcg->lock, flags); 1430 break; 1431 } 1432 1433 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, 1434 blkcg_node); 1435 key = rcu_dereference(blkg->key); 1436 __blkiocg_del_blkio_group(blkg); 1437 1438 spin_unlock_irqrestore(&blkcg->lock, flags); 1439 1440 /* 1441 * This blkio_group is being unlinked as associated cgroup is 1442 * going away. Let all the IO controlling policies know about 1443 * this event. 1444 */ 1445 spin_lock(&blkio_list_lock); 1446 list_for_each_entry(blkiop, &blkio_list, list) { 1447 if (blkiop->plid != blkg->plid) 1448 continue; 1449 blkiop->ops.blkio_unlink_group_fn(key, blkg); 1450 } 1451 spin_unlock(&blkio_list_lock); 1452 } while (1); 1453 1454 list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) { 1455 blkio_policy_delete_node(pn); 1456 kfree(pn); 1457 } 1458 1459 free_css_id(&blkio_subsys, &blkcg->css); 1460 rcu_read_unlock(); 1461 if (blkcg != &blkio_root_cgroup) 1462 kfree(blkcg); 1463} 1464 1465static struct cgroup_subsys_state * 1466blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) 1467{ 1468 struct blkio_cgroup *blkcg; 1469 struct cgroup *parent = cgroup->parent; 1470 1471 if (!parent) { 1472 blkcg = &blkio_root_cgroup; 1473 goto done; 1474 } 1475 1476 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 1477 if (!blkcg) 1478 return ERR_PTR(-ENOMEM); 1479 1480 blkcg->weight = BLKIO_WEIGHT_DEFAULT; 1481done: 1482 spin_lock_init(&blkcg->lock); 1483 INIT_HLIST_HEAD(&blkcg->blkg_list); 1484 1485 INIT_LIST_HEAD(&blkcg->policy_list); 1486 return &blkcg->css; 1487} 1488 1489/* 1490 * We cannot support shared io contexts, as we have no mean to support 1491 * two tasks with the same ioc in two different groups without major rework 1492 * of the main cic data structures. For now we allow a task to change 1493 * its cgroup only if it's the only owner of its ioc. 1494 */ 1495static int blkiocg_can_attach(struct cgroup_subsys *subsys, 1496 struct cgroup *cgroup, struct task_struct *tsk, 1497 bool threadgroup) 1498{ 1499 struct io_context *ioc; 1500 int ret = 0; 1501 1502 /* task_lock() is needed to avoid races with exit_io_context() */ 1503 task_lock(tsk); 1504 ioc = tsk->io_context; 1505 if (ioc && atomic_read(&ioc->nr_tasks) > 1) 1506 ret = -EINVAL; 1507 task_unlock(tsk); 1508 1509 return ret; 1510} 1511 1512static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, 1513 struct cgroup *prev, struct task_struct *tsk, 1514 bool threadgroup) 1515{ 1516 struct io_context *ioc; 1517 1518 task_lock(tsk); 1519 ioc = tsk->io_context; 1520 if (ioc) 1521 ioc->cgroup_changed = 1; 1522 task_unlock(tsk); 1523} 1524 1525void blkio_policy_register(struct blkio_policy_type *blkiop) 1526{ 1527 spin_lock(&blkio_list_lock); 1528 list_add_tail(&blkiop->list, &blkio_list); 1529 spin_unlock(&blkio_list_lock); 1530} 1531EXPORT_SYMBOL_GPL(blkio_policy_register); 1532 1533void blkio_policy_unregister(struct blkio_policy_type *blkiop) 1534{ 1535 spin_lock(&blkio_list_lock); 1536 list_del_init(&blkiop->list); 1537 spin_unlock(&blkio_list_lock); 1538} 1539EXPORT_SYMBOL_GPL(blkio_policy_unregister); 1540 1541static int __init init_cgroup_blkio(void) 1542{ 1543 return cgroup_load_subsys(&blkio_subsys); 1544} 1545 1546static void __exit exit_cgroup_blkio(void) 1547{ 1548 cgroup_unload_subsys(&blkio_subsys); 1549} 1550 1551module_init(init_cgroup_blkio); 1552module_exit(exit_cgroup_blkio); 1553MODULE_LICENSE("GPL"); 1554