blk-cgroup.c revision 8a3d26151f24e2a2ffa550890144c3d54d2edb15
1/* 2 * Common Block IO controller cgroup interface 3 * 4 * Based on ideas and code from CFQ, CFS and BFQ: 5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 6 * 7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> 8 * Paolo Valente <paolo.valente@unimore.it> 9 * 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 11 * Nauman Rafique <nauman@google.com> 12 */ 13#include <linux/ioprio.h> 14#include <linux/kdev_t.h> 15#include <linux/module.h> 16#include <linux/err.h> 17#include <linux/blkdev.h> 18#include <linux/slab.h> 19#include <linux/genhd.h> 20#include <linux/delay.h> 21#include <linux/atomic.h> 22#include "blk-cgroup.h" 23#include "blk.h" 24 25#define MAX_KEY_LEN 100 26 27static DEFINE_SPINLOCK(blkio_list_lock); 28static LIST_HEAD(blkio_list); 29 30static DEFINE_MUTEX(all_q_mutex); 31static LIST_HEAD(all_q_list); 32 33struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 34EXPORT_SYMBOL_GPL(blkio_root_cgroup); 35 36static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES]; 37 38struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) 39{ 40 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), 41 struct blkio_cgroup, css); 42} 43EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); 44 45static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk) 46{ 47 return container_of(task_subsys_state(tsk, blkio_subsys_id), 48 struct blkio_cgroup, css); 49} 50 51struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio) 52{ 53 if (bio && bio->bi_css) 54 return container_of(bio->bi_css, struct blkio_cgroup, css); 55 return task_blkio_cgroup(current); 56} 57EXPORT_SYMBOL_GPL(bio_blkio_cgroup); 58 59/** 60 * blkg_free - free a blkg 61 * @blkg: blkg to free 62 * 63 * Free @blkg which may be partially allocated. 64 */ 65static void blkg_free(struct blkio_group *blkg) 66{ 67 int i; 68 69 if (!blkg) 70 return; 71 72 for (i = 0; i < BLKIO_NR_POLICIES; i++) { 73 struct blkio_policy_type *pol = blkio_policy[i]; 74 struct blkg_policy_data *pd = blkg->pd[i]; 75 76 if (!pd) 77 continue; 78 79 if (pol && pol->ops.blkio_exit_group_fn) 80 pol->ops.blkio_exit_group_fn(blkg); 81 82 kfree(pd); 83 } 84 85 kfree(blkg); 86} 87 88/** 89 * blkg_alloc - allocate a blkg 90 * @blkcg: block cgroup the new blkg is associated with 91 * @q: request_queue the new blkg is associated with 92 * 93 * Allocate a new blkg assocating @blkcg and @q. 94 */ 95static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, 96 struct request_queue *q) 97{ 98 struct blkio_group *blkg; 99 int i; 100 101 /* alloc and init base part */ 102 blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node); 103 if (!blkg) 104 return NULL; 105 106 blkg->q = q; 107 INIT_LIST_HEAD(&blkg->q_node); 108 blkg->blkcg = blkcg; 109 blkg->refcnt = 1; 110 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 111 112 for (i = 0; i < BLKIO_NR_POLICIES; i++) { 113 struct blkio_policy_type *pol = blkio_policy[i]; 114 struct blkg_policy_data *pd; 115 116 if (!pol) 117 continue; 118 119 /* alloc per-policy data and attach it to blkg */ 120 pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC, 121 q->node); 122 if (!pd) { 123 blkg_free(blkg); 124 return NULL; 125 } 126 127 blkg->pd[i] = pd; 128 pd->blkg = blkg; 129 } 130 131 /* invoke per-policy init */ 132 for (i = 0; i < BLKIO_NR_POLICIES; i++) { 133 struct blkio_policy_type *pol = blkio_policy[i]; 134 135 if (pol) 136 pol->ops.blkio_init_group_fn(blkg); 137 } 138 139 return blkg; 140} 141 142struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, 143 struct request_queue *q, 144 bool for_root) 145 __releases(q->queue_lock) __acquires(q->queue_lock) 146{ 147 struct blkio_group *blkg; 148 149 WARN_ON_ONCE(!rcu_read_lock_held()); 150 lockdep_assert_held(q->queue_lock); 151 152 /* 153 * This could be the first entry point of blkcg implementation and 154 * we shouldn't allow anything to go through for a bypassing queue. 155 * The following can be removed if blkg lookup is guaranteed to 156 * fail on a bypassing queue. 157 */ 158 if (unlikely(blk_queue_bypass(q)) && !for_root) 159 return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); 160 161 blkg = blkg_lookup(blkcg, q); 162 if (blkg) 163 return blkg; 164 165 /* blkg holds a reference to blkcg */ 166 if (!css_tryget(&blkcg->css)) 167 return ERR_PTR(-EINVAL); 168 169 /* 170 * Allocate and initialize. 171 */ 172 blkg = blkg_alloc(blkcg, q); 173 174 /* did alloc fail? */ 175 if (unlikely(!blkg)) { 176 blkg = ERR_PTR(-ENOMEM); 177 goto out; 178 } 179 180 /* insert */ 181 spin_lock(&blkcg->lock); 182 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 183 list_add(&blkg->q_node, &q->blkg_list); 184 spin_unlock(&blkcg->lock); 185out: 186 return blkg; 187} 188EXPORT_SYMBOL_GPL(blkg_lookup_create); 189 190/* called under rcu_read_lock(). */ 191struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, 192 struct request_queue *q) 193{ 194 struct blkio_group *blkg; 195 struct hlist_node *n; 196 197 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) 198 if (blkg->q == q) 199 return blkg; 200 return NULL; 201} 202EXPORT_SYMBOL_GPL(blkg_lookup); 203 204static void blkg_destroy(struct blkio_group *blkg) 205{ 206 struct request_queue *q = blkg->q; 207 struct blkio_cgroup *blkcg = blkg->blkcg; 208 209 lockdep_assert_held(q->queue_lock); 210 lockdep_assert_held(&blkcg->lock); 211 212 /* Something wrong if we are trying to remove same group twice */ 213 WARN_ON_ONCE(list_empty(&blkg->q_node)); 214 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); 215 list_del_init(&blkg->q_node); 216 hlist_del_init_rcu(&blkg->blkcg_node); 217 218 /* 219 * Put the reference taken at the time of creation so that when all 220 * queues are gone, group can be destroyed. 221 */ 222 blkg_put(blkg); 223} 224 225/* 226 * XXX: This updates blkg policy data in-place for root blkg, which is 227 * necessary across elevator switch and policy registration as root blkgs 228 * aren't shot down. This broken and racy implementation is temporary. 229 * Eventually, blkg shoot down will be replaced by proper in-place update. 230 */ 231void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid) 232{ 233 struct blkio_policy_type *pol = blkio_policy[plid]; 234 struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q); 235 struct blkg_policy_data *pd; 236 237 if (!blkg) 238 return; 239 240 kfree(blkg->pd[plid]); 241 blkg->pd[plid] = NULL; 242 243 if (!pol) 244 return; 245 246 pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL); 247 WARN_ON_ONCE(!pd); 248 249 blkg->pd[plid] = pd; 250 pd->blkg = blkg; 251 pol->ops.blkio_init_group_fn(blkg); 252} 253EXPORT_SYMBOL_GPL(update_root_blkg_pd); 254 255/** 256 * blkg_destroy_all - destroy all blkgs associated with a request_queue 257 * @q: request_queue of interest 258 * @destroy_root: whether to destroy root blkg or not 259 * 260 * Destroy blkgs associated with @q. If @destroy_root is %true, all are 261 * destroyed; otherwise, root blkg is left alone. 262 */ 263void blkg_destroy_all(struct request_queue *q, bool destroy_root) 264{ 265 struct blkio_group *blkg, *n; 266 267 spin_lock_irq(q->queue_lock); 268 269 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { 270 struct blkio_cgroup *blkcg = blkg->blkcg; 271 272 /* skip root? */ 273 if (!destroy_root && blkg->blkcg == &blkio_root_cgroup) 274 continue; 275 276 spin_lock(&blkcg->lock); 277 blkg_destroy(blkg); 278 spin_unlock(&blkcg->lock); 279 } 280 281 spin_unlock_irq(q->queue_lock); 282} 283EXPORT_SYMBOL_GPL(blkg_destroy_all); 284 285static void blkg_rcu_free(struct rcu_head *rcu_head) 286{ 287 blkg_free(container_of(rcu_head, struct blkio_group, rcu_head)); 288} 289 290void __blkg_release(struct blkio_group *blkg) 291{ 292 /* release the extra blkcg reference this blkg has been holding */ 293 css_put(&blkg->blkcg->css); 294 295 /* 296 * A group is freed in rcu manner. But having an rcu lock does not 297 * mean that one can access all the fields of blkg and assume these 298 * are valid. For example, don't try to follow throtl_data and 299 * request queue links. 300 * 301 * Having a reference to blkg under an rcu allows acess to only 302 * values local to groups like group stats and group rate limits 303 */ 304 call_rcu(&blkg->rcu_head, blkg_rcu_free); 305} 306EXPORT_SYMBOL_GPL(__blkg_release); 307 308static int 309blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) 310{ 311 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 312 struct blkio_group *blkg; 313 struct hlist_node *n; 314 315 spin_lock(&blkio_list_lock); 316 spin_lock_irq(&blkcg->lock); 317 318 /* 319 * Note that stat reset is racy - it doesn't synchronize against 320 * stat updates. This is a debug feature which shouldn't exist 321 * anyway. If you get hit by a race, retry. 322 */ 323 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 324 struct blkio_policy_type *pol; 325 326 list_for_each_entry(pol, &blkio_list, list) 327 if (pol->ops.blkio_reset_group_stats_fn) 328 pol->ops.blkio_reset_group_stats_fn(blkg); 329 } 330 331 spin_unlock_irq(&blkcg->lock); 332 spin_unlock(&blkio_list_lock); 333 return 0; 334} 335 336static const char *blkg_dev_name(struct blkio_group *blkg) 337{ 338 /* some drivers (floppy) instantiate a queue w/o disk registered */ 339 if (blkg->q->backing_dev_info.dev) 340 return dev_name(blkg->q->backing_dev_info.dev); 341 return NULL; 342} 343 344/** 345 * blkcg_print_blkgs - helper for printing per-blkg data 346 * @sf: seq_file to print to 347 * @blkcg: blkcg of interest 348 * @prfill: fill function to print out a blkg 349 * @pol: policy in question 350 * @data: data to be passed to @prfill 351 * @show_total: to print out sum of prfill return values or not 352 * 353 * This function invokes @prfill on each blkg of @blkcg if pd for the 354 * policy specified by @pol exists. @prfill is invoked with @sf, the 355 * policy data and @data. If @show_total is %true, the sum of the return 356 * values from @prfill is printed with "Total" label at the end. 357 * 358 * This is to be used to construct print functions for 359 * cftype->read_seq_string method. 360 */ 361void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg, 362 u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int), 363 int pol, int data, bool show_total) 364{ 365 struct blkio_group *blkg; 366 struct hlist_node *n; 367 u64 total = 0; 368 369 spin_lock_irq(&blkcg->lock); 370 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) 371 if (blkg->pd[pol]) 372 total += prfill(sf, blkg->pd[pol], data); 373 spin_unlock_irq(&blkcg->lock); 374 375 if (show_total) 376 seq_printf(sf, "Total %llu\n", (unsigned long long)total); 377} 378EXPORT_SYMBOL_GPL(blkcg_print_blkgs); 379 380/** 381 * __blkg_prfill_u64 - prfill helper for a single u64 value 382 * @sf: seq_file to print to 383 * @pd: policy data of interest 384 * @v: value to print 385 * 386 * Print @v to @sf for the device assocaited with @pd. 387 */ 388u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) 389{ 390 const char *dname = blkg_dev_name(pd->blkg); 391 392 if (!dname) 393 return 0; 394 395 seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v); 396 return v; 397} 398EXPORT_SYMBOL_GPL(__blkg_prfill_u64); 399 400/** 401 * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat 402 * @sf: seq_file to print to 403 * @pd: policy data of interest 404 * @rwstat: rwstat to print 405 * 406 * Print @rwstat to @sf for the device assocaited with @pd. 407 */ 408u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 409 const struct blkg_rwstat *rwstat) 410{ 411 static const char *rwstr[] = { 412 [BLKG_RWSTAT_READ] = "Read", 413 [BLKG_RWSTAT_WRITE] = "Write", 414 [BLKG_RWSTAT_SYNC] = "Sync", 415 [BLKG_RWSTAT_ASYNC] = "Async", 416 }; 417 const char *dname = blkg_dev_name(pd->blkg); 418 u64 v; 419 int i; 420 421 if (!dname) 422 return 0; 423 424 for (i = 0; i < BLKG_RWSTAT_NR; i++) 425 seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], 426 (unsigned long long)rwstat->cnt[i]); 427 428 v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE]; 429 seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); 430 return v; 431} 432 433static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, 434 int off) 435{ 436 return __blkg_prfill_u64(sf, pd, 437 blkg_stat_read((void *)pd->pdata + off)); 438} 439 440static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 441 int off) 442{ 443 struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->pdata + off); 444 445 return __blkg_prfill_rwstat(sf, pd, &rwstat); 446} 447 448/* print blkg_stat specified by BLKCG_STAT_PRIV() */ 449int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft, 450 struct seq_file *sf) 451{ 452 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); 453 454 blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, 455 BLKCG_STAT_POL(cft->private), 456 BLKCG_STAT_OFF(cft->private), false); 457 return 0; 458} 459EXPORT_SYMBOL_GPL(blkcg_print_stat); 460 461/* print blkg_rwstat specified by BLKCG_STAT_PRIV() */ 462int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft, 463 struct seq_file *sf) 464{ 465 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); 466 467 blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, 468 BLKCG_STAT_POL(cft->private), 469 BLKCG_STAT_OFF(cft->private), true); 470 return 0; 471} 472EXPORT_SYMBOL_GPL(blkcg_print_rwstat); 473 474/** 475 * blkg_conf_prep - parse and prepare for per-blkg config update 476 * @blkcg: target block cgroup 477 * @input: input string 478 * @ctx: blkg_conf_ctx to be filled 479 * 480 * Parse per-blkg config update from @input and initialize @ctx with the 481 * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new 482 * value. This function returns with RCU read locked and must be paired 483 * with blkg_conf_finish(). 484 */ 485int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input, 486 struct blkg_conf_ctx *ctx) 487 __acquires(rcu) 488{ 489 struct gendisk *disk; 490 struct blkio_group *blkg; 491 unsigned int major, minor; 492 unsigned long long v; 493 int part, ret; 494 495 if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3) 496 return -EINVAL; 497 498 disk = get_gendisk(MKDEV(major, minor), &part); 499 if (!disk || part) 500 return -EINVAL; 501 502 rcu_read_lock(); 503 504 spin_lock_irq(disk->queue->queue_lock); 505 blkg = blkg_lookup_create(blkcg, disk->queue, false); 506 spin_unlock_irq(disk->queue->queue_lock); 507 508 if (IS_ERR(blkg)) { 509 ret = PTR_ERR(blkg); 510 rcu_read_unlock(); 511 put_disk(disk); 512 /* 513 * If queue was bypassing, we should retry. Do so after a 514 * short msleep(). It isn't strictly necessary but queue 515 * can be bypassing for some time and it's always nice to 516 * avoid busy looping. 517 */ 518 if (ret == -EBUSY) { 519 msleep(10); 520 ret = restart_syscall(); 521 } 522 return ret; 523 } 524 525 ctx->disk = disk; 526 ctx->blkg = blkg; 527 ctx->v = v; 528 return 0; 529} 530EXPORT_SYMBOL_GPL(blkg_conf_prep); 531 532/** 533 * blkg_conf_finish - finish up per-blkg config update 534 * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep() 535 * 536 * Finish up after per-blkg config update. This function must be paired 537 * with blkg_conf_prep(). 538 */ 539void blkg_conf_finish(struct blkg_conf_ctx *ctx) 540 __releases(rcu) 541{ 542 rcu_read_unlock(); 543 put_disk(ctx->disk); 544} 545EXPORT_SYMBOL_GPL(blkg_conf_finish); 546 547struct cftype blkio_files[] = { 548 { 549 .name = "reset_stats", 550 .write_u64 = blkiocg_reset_stats, 551 }, 552 { } /* terminate */ 553}; 554 555/** 556 * blkiocg_pre_destroy - cgroup pre_destroy callback 557 * @cgroup: cgroup of interest 558 * 559 * This function is called when @cgroup is about to go away and responsible 560 * for shooting down all blkgs associated with @cgroup. blkgs should be 561 * removed while holding both q and blkcg locks. As blkcg lock is nested 562 * inside q lock, this function performs reverse double lock dancing. 563 * 564 * This is the blkcg counterpart of ioc_release_fn(). 565 */ 566static int blkiocg_pre_destroy(struct cgroup *cgroup) 567{ 568 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 569 570 spin_lock_irq(&blkcg->lock); 571 572 while (!hlist_empty(&blkcg->blkg_list)) { 573 struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first, 574 struct blkio_group, blkcg_node); 575 struct request_queue *q = blkg->q; 576 577 if (spin_trylock(q->queue_lock)) { 578 blkg_destroy(blkg); 579 spin_unlock(q->queue_lock); 580 } else { 581 spin_unlock_irq(&blkcg->lock); 582 cpu_relax(); 583 spin_lock_irq(&blkcg->lock); 584 } 585 } 586 587 spin_unlock_irq(&blkcg->lock); 588 return 0; 589} 590 591static void blkiocg_destroy(struct cgroup *cgroup) 592{ 593 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 594 595 if (blkcg != &blkio_root_cgroup) 596 kfree(blkcg); 597} 598 599static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup) 600{ 601 static atomic64_t id_seq = ATOMIC64_INIT(0); 602 struct blkio_cgroup *blkcg; 603 struct cgroup *parent = cgroup->parent; 604 605 if (!parent) { 606 blkcg = &blkio_root_cgroup; 607 goto done; 608 } 609 610 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 611 if (!blkcg) 612 return ERR_PTR(-ENOMEM); 613 614 blkcg->weight = BLKIO_WEIGHT_DEFAULT; 615 blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ 616done: 617 spin_lock_init(&blkcg->lock); 618 INIT_HLIST_HEAD(&blkcg->blkg_list); 619 620 return &blkcg->css; 621} 622 623/** 624 * blkcg_init_queue - initialize blkcg part of request queue 625 * @q: request_queue to initialize 626 * 627 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg 628 * part of new request_queue @q. 629 * 630 * RETURNS: 631 * 0 on success, -errno on failure. 632 */ 633int blkcg_init_queue(struct request_queue *q) 634{ 635 int ret; 636 637 might_sleep(); 638 639 ret = blk_throtl_init(q); 640 if (ret) 641 return ret; 642 643 mutex_lock(&all_q_mutex); 644 INIT_LIST_HEAD(&q->all_q_node); 645 list_add_tail(&q->all_q_node, &all_q_list); 646 mutex_unlock(&all_q_mutex); 647 648 return 0; 649} 650 651/** 652 * blkcg_drain_queue - drain blkcg part of request_queue 653 * @q: request_queue to drain 654 * 655 * Called from blk_drain_queue(). Responsible for draining blkcg part. 656 */ 657void blkcg_drain_queue(struct request_queue *q) 658{ 659 lockdep_assert_held(q->queue_lock); 660 661 blk_throtl_drain(q); 662} 663 664/** 665 * blkcg_exit_queue - exit and release blkcg part of request_queue 666 * @q: request_queue being released 667 * 668 * Called from blk_release_queue(). Responsible for exiting blkcg part. 669 */ 670void blkcg_exit_queue(struct request_queue *q) 671{ 672 mutex_lock(&all_q_mutex); 673 list_del_init(&q->all_q_node); 674 mutex_unlock(&all_q_mutex); 675 676 blkg_destroy_all(q, true); 677 678 blk_throtl_exit(q); 679} 680 681/* 682 * We cannot support shared io contexts, as we have no mean to support 683 * two tasks with the same ioc in two different groups without major rework 684 * of the main cic data structures. For now we allow a task to change 685 * its cgroup only if it's the only owner of its ioc. 686 */ 687static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 688{ 689 struct task_struct *task; 690 struct io_context *ioc; 691 int ret = 0; 692 693 /* task_lock() is needed to avoid races with exit_io_context() */ 694 cgroup_taskset_for_each(task, cgrp, tset) { 695 task_lock(task); 696 ioc = task->io_context; 697 if (ioc && atomic_read(&ioc->nr_tasks) > 1) 698 ret = -EINVAL; 699 task_unlock(task); 700 if (ret) 701 break; 702 } 703 return ret; 704} 705 706static void blkcg_bypass_start(void) 707 __acquires(&all_q_mutex) 708{ 709 struct request_queue *q; 710 711 mutex_lock(&all_q_mutex); 712 713 list_for_each_entry(q, &all_q_list, all_q_node) { 714 blk_queue_bypass_start(q); 715 blkg_destroy_all(q, false); 716 } 717} 718 719static void blkcg_bypass_end(void) 720 __releases(&all_q_mutex) 721{ 722 struct request_queue *q; 723 724 list_for_each_entry(q, &all_q_list, all_q_node) 725 blk_queue_bypass_end(q); 726 727 mutex_unlock(&all_q_mutex); 728} 729 730struct cgroup_subsys blkio_subsys = { 731 .name = "blkio", 732 .create = blkiocg_create, 733 .can_attach = blkiocg_can_attach, 734 .pre_destroy = blkiocg_pre_destroy, 735 .destroy = blkiocg_destroy, 736 .subsys_id = blkio_subsys_id, 737 .base_cftypes = blkio_files, 738 .module = THIS_MODULE, 739}; 740EXPORT_SYMBOL_GPL(blkio_subsys); 741 742void blkio_policy_register(struct blkio_policy_type *blkiop) 743{ 744 struct request_queue *q; 745 746 blkcg_bypass_start(); 747 spin_lock(&blkio_list_lock); 748 749 BUG_ON(blkio_policy[blkiop->plid]); 750 blkio_policy[blkiop->plid] = blkiop; 751 list_add_tail(&blkiop->list, &blkio_list); 752 753 spin_unlock(&blkio_list_lock); 754 list_for_each_entry(q, &all_q_list, all_q_node) 755 update_root_blkg_pd(q, blkiop->plid); 756 blkcg_bypass_end(); 757 758 if (blkiop->cftypes) 759 WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes)); 760} 761EXPORT_SYMBOL_GPL(blkio_policy_register); 762 763void blkio_policy_unregister(struct blkio_policy_type *blkiop) 764{ 765 struct request_queue *q; 766 767 if (blkiop->cftypes) 768 cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes); 769 770 blkcg_bypass_start(); 771 spin_lock(&blkio_list_lock); 772 773 BUG_ON(blkio_policy[blkiop->plid] != blkiop); 774 blkio_policy[blkiop->plid] = NULL; 775 list_del_init(&blkiop->list); 776 777 spin_unlock(&blkio_list_lock); 778 list_for_each_entry(q, &all_q_list, all_q_node) 779 update_root_blkg_pd(q, blkiop->plid); 780 blkcg_bypass_end(); 781} 782EXPORT_SYMBOL_GPL(blkio_policy_unregister); 783