blk-cgroup.c revision 8a3d26151f24e2a2ffa550890144c3d54d2edb15
1/*
2 * Common Block IO controller cgroup interface
3 *
4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 *		      Paolo Valente <paolo.valente@unimore.it>
9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * 	              Nauman Rafique <nauman@google.com>
12 */
13#include <linux/ioprio.h>
14#include <linux/kdev_t.h>
15#include <linux/module.h>
16#include <linux/err.h>
17#include <linux/blkdev.h>
18#include <linux/slab.h>
19#include <linux/genhd.h>
20#include <linux/delay.h>
21#include <linux/atomic.h>
22#include "blk-cgroup.h"
23#include "blk.h"
24
25#define MAX_KEY_LEN 100
26
27static DEFINE_SPINLOCK(blkio_list_lock);
28static LIST_HEAD(blkio_list);
29
30static DEFINE_MUTEX(all_q_mutex);
31static LIST_HEAD(all_q_list);
32
33struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
34EXPORT_SYMBOL_GPL(blkio_root_cgroup);
35
36static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
37
38struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
39{
40	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
41			    struct blkio_cgroup, css);
42}
43EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
44
45static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
46{
47	return container_of(task_subsys_state(tsk, blkio_subsys_id),
48			    struct blkio_cgroup, css);
49}
50
51struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
52{
53	if (bio && bio->bi_css)
54		return container_of(bio->bi_css, struct blkio_cgroup, css);
55	return task_blkio_cgroup(current);
56}
57EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
58
59/**
60 * blkg_free - free a blkg
61 * @blkg: blkg to free
62 *
63 * Free @blkg which may be partially allocated.
64 */
65static void blkg_free(struct blkio_group *blkg)
66{
67	int i;
68
69	if (!blkg)
70		return;
71
72	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
73		struct blkio_policy_type *pol = blkio_policy[i];
74		struct blkg_policy_data *pd = blkg->pd[i];
75
76		if (!pd)
77			continue;
78
79		if (pol && pol->ops.blkio_exit_group_fn)
80			pol->ops.blkio_exit_group_fn(blkg);
81
82		kfree(pd);
83	}
84
85	kfree(blkg);
86}
87
88/**
89 * blkg_alloc - allocate a blkg
90 * @blkcg: block cgroup the new blkg is associated with
91 * @q: request_queue the new blkg is associated with
92 *
93 * Allocate a new blkg assocating @blkcg and @q.
94 */
95static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
96				      struct request_queue *q)
97{
98	struct blkio_group *blkg;
99	int i;
100
101	/* alloc and init base part */
102	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
103	if (!blkg)
104		return NULL;
105
106	blkg->q = q;
107	INIT_LIST_HEAD(&blkg->q_node);
108	blkg->blkcg = blkcg;
109	blkg->refcnt = 1;
110	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
111
112	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
113		struct blkio_policy_type *pol = blkio_policy[i];
114		struct blkg_policy_data *pd;
115
116		if (!pol)
117			continue;
118
119		/* alloc per-policy data and attach it to blkg */
120		pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
121				  q->node);
122		if (!pd) {
123			blkg_free(blkg);
124			return NULL;
125		}
126
127		blkg->pd[i] = pd;
128		pd->blkg = blkg;
129	}
130
131	/* invoke per-policy init */
132	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
133		struct blkio_policy_type *pol = blkio_policy[i];
134
135		if (pol)
136			pol->ops.blkio_init_group_fn(blkg);
137	}
138
139	return blkg;
140}
141
142struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
143				       struct request_queue *q,
144				       bool for_root)
145	__releases(q->queue_lock) __acquires(q->queue_lock)
146{
147	struct blkio_group *blkg;
148
149	WARN_ON_ONCE(!rcu_read_lock_held());
150	lockdep_assert_held(q->queue_lock);
151
152	/*
153	 * This could be the first entry point of blkcg implementation and
154	 * we shouldn't allow anything to go through for a bypassing queue.
155	 * The following can be removed if blkg lookup is guaranteed to
156	 * fail on a bypassing queue.
157	 */
158	if (unlikely(blk_queue_bypass(q)) && !for_root)
159		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
160
161	blkg = blkg_lookup(blkcg, q);
162	if (blkg)
163		return blkg;
164
165	/* blkg holds a reference to blkcg */
166	if (!css_tryget(&blkcg->css))
167		return ERR_PTR(-EINVAL);
168
169	/*
170	 * Allocate and initialize.
171	 */
172	blkg = blkg_alloc(blkcg, q);
173
174	/* did alloc fail? */
175	if (unlikely(!blkg)) {
176		blkg = ERR_PTR(-ENOMEM);
177		goto out;
178	}
179
180	/* insert */
181	spin_lock(&blkcg->lock);
182	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
183	list_add(&blkg->q_node, &q->blkg_list);
184	spin_unlock(&blkcg->lock);
185out:
186	return blkg;
187}
188EXPORT_SYMBOL_GPL(blkg_lookup_create);
189
190/* called under rcu_read_lock(). */
191struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
192				struct request_queue *q)
193{
194	struct blkio_group *blkg;
195	struct hlist_node *n;
196
197	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
198		if (blkg->q == q)
199			return blkg;
200	return NULL;
201}
202EXPORT_SYMBOL_GPL(blkg_lookup);
203
204static void blkg_destroy(struct blkio_group *blkg)
205{
206	struct request_queue *q = blkg->q;
207	struct blkio_cgroup *blkcg = blkg->blkcg;
208
209	lockdep_assert_held(q->queue_lock);
210	lockdep_assert_held(&blkcg->lock);
211
212	/* Something wrong if we are trying to remove same group twice */
213	WARN_ON_ONCE(list_empty(&blkg->q_node));
214	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
215	list_del_init(&blkg->q_node);
216	hlist_del_init_rcu(&blkg->blkcg_node);
217
218	/*
219	 * Put the reference taken at the time of creation so that when all
220	 * queues are gone, group can be destroyed.
221	 */
222	blkg_put(blkg);
223}
224
225/*
226 * XXX: This updates blkg policy data in-place for root blkg, which is
227 * necessary across elevator switch and policy registration as root blkgs
228 * aren't shot down.  This broken and racy implementation is temporary.
229 * Eventually, blkg shoot down will be replaced by proper in-place update.
230 */
231void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
232{
233	struct blkio_policy_type *pol = blkio_policy[plid];
234	struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
235	struct blkg_policy_data *pd;
236
237	if (!blkg)
238		return;
239
240	kfree(blkg->pd[plid]);
241	blkg->pd[plid] = NULL;
242
243	if (!pol)
244		return;
245
246	pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
247	WARN_ON_ONCE(!pd);
248
249	blkg->pd[plid] = pd;
250	pd->blkg = blkg;
251	pol->ops.blkio_init_group_fn(blkg);
252}
253EXPORT_SYMBOL_GPL(update_root_blkg_pd);
254
255/**
256 * blkg_destroy_all - destroy all blkgs associated with a request_queue
257 * @q: request_queue of interest
258 * @destroy_root: whether to destroy root blkg or not
259 *
260 * Destroy blkgs associated with @q.  If @destroy_root is %true, all are
261 * destroyed; otherwise, root blkg is left alone.
262 */
263void blkg_destroy_all(struct request_queue *q, bool destroy_root)
264{
265	struct blkio_group *blkg, *n;
266
267	spin_lock_irq(q->queue_lock);
268
269	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
270		struct blkio_cgroup *blkcg = blkg->blkcg;
271
272		/* skip root? */
273		if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
274			continue;
275
276		spin_lock(&blkcg->lock);
277		blkg_destroy(blkg);
278		spin_unlock(&blkcg->lock);
279	}
280
281	spin_unlock_irq(q->queue_lock);
282}
283EXPORT_SYMBOL_GPL(blkg_destroy_all);
284
285static void blkg_rcu_free(struct rcu_head *rcu_head)
286{
287	blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
288}
289
290void __blkg_release(struct blkio_group *blkg)
291{
292	/* release the extra blkcg reference this blkg has been holding */
293	css_put(&blkg->blkcg->css);
294
295	/*
296	 * A group is freed in rcu manner. But having an rcu lock does not
297	 * mean that one can access all the fields of blkg and assume these
298	 * are valid. For example, don't try to follow throtl_data and
299	 * request queue links.
300	 *
301	 * Having a reference to blkg under an rcu allows acess to only
302	 * values local to groups like group stats and group rate limits
303	 */
304	call_rcu(&blkg->rcu_head, blkg_rcu_free);
305}
306EXPORT_SYMBOL_GPL(__blkg_release);
307
308static int
309blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
310{
311	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
312	struct blkio_group *blkg;
313	struct hlist_node *n;
314
315	spin_lock(&blkio_list_lock);
316	spin_lock_irq(&blkcg->lock);
317
318	/*
319	 * Note that stat reset is racy - it doesn't synchronize against
320	 * stat updates.  This is a debug feature which shouldn't exist
321	 * anyway.  If you get hit by a race, retry.
322	 */
323	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
324		struct blkio_policy_type *pol;
325
326		list_for_each_entry(pol, &blkio_list, list)
327			if (pol->ops.blkio_reset_group_stats_fn)
328				pol->ops.blkio_reset_group_stats_fn(blkg);
329	}
330
331	spin_unlock_irq(&blkcg->lock);
332	spin_unlock(&blkio_list_lock);
333	return 0;
334}
335
336static const char *blkg_dev_name(struct blkio_group *blkg)
337{
338	/* some drivers (floppy) instantiate a queue w/o disk registered */
339	if (blkg->q->backing_dev_info.dev)
340		return dev_name(blkg->q->backing_dev_info.dev);
341	return NULL;
342}
343
344/**
345 * blkcg_print_blkgs - helper for printing per-blkg data
346 * @sf: seq_file to print to
347 * @blkcg: blkcg of interest
348 * @prfill: fill function to print out a blkg
349 * @pol: policy in question
350 * @data: data to be passed to @prfill
351 * @show_total: to print out sum of prfill return values or not
352 *
353 * This function invokes @prfill on each blkg of @blkcg if pd for the
354 * policy specified by @pol exists.  @prfill is invoked with @sf, the
355 * policy data and @data.  If @show_total is %true, the sum of the return
356 * values from @prfill is printed with "Total" label at the end.
357 *
358 * This is to be used to construct print functions for
359 * cftype->read_seq_string method.
360 */
361void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
362		       u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
363		       int pol, int data, bool show_total)
364{
365	struct blkio_group *blkg;
366	struct hlist_node *n;
367	u64 total = 0;
368
369	spin_lock_irq(&blkcg->lock);
370	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
371		if (blkg->pd[pol])
372			total += prfill(sf, blkg->pd[pol], data);
373	spin_unlock_irq(&blkcg->lock);
374
375	if (show_total)
376		seq_printf(sf, "Total %llu\n", (unsigned long long)total);
377}
378EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
379
380/**
381 * __blkg_prfill_u64 - prfill helper for a single u64 value
382 * @sf: seq_file to print to
383 * @pd: policy data of interest
384 * @v: value to print
385 *
386 * Print @v to @sf for the device assocaited with @pd.
387 */
388u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
389{
390	const char *dname = blkg_dev_name(pd->blkg);
391
392	if (!dname)
393		return 0;
394
395	seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
396	return v;
397}
398EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
399
400/**
401 * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
402 * @sf: seq_file to print to
403 * @pd: policy data of interest
404 * @rwstat: rwstat to print
405 *
406 * Print @rwstat to @sf for the device assocaited with @pd.
407 */
408u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
409			 const struct blkg_rwstat *rwstat)
410{
411	static const char *rwstr[] = {
412		[BLKG_RWSTAT_READ]	= "Read",
413		[BLKG_RWSTAT_WRITE]	= "Write",
414		[BLKG_RWSTAT_SYNC]	= "Sync",
415		[BLKG_RWSTAT_ASYNC]	= "Async",
416	};
417	const char *dname = blkg_dev_name(pd->blkg);
418	u64 v;
419	int i;
420
421	if (!dname)
422		return 0;
423
424	for (i = 0; i < BLKG_RWSTAT_NR; i++)
425		seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
426			   (unsigned long long)rwstat->cnt[i]);
427
428	v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
429	seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
430	return v;
431}
432
433static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
434			    int off)
435{
436	return __blkg_prfill_u64(sf, pd,
437				 blkg_stat_read((void *)pd->pdata + off));
438}
439
440static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
441			      int off)
442{
443	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->pdata + off);
444
445	return __blkg_prfill_rwstat(sf, pd, &rwstat);
446}
447
448/* print blkg_stat specified by BLKCG_STAT_PRIV() */
449int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
450		     struct seq_file *sf)
451{
452	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
453
454	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat,
455			  BLKCG_STAT_POL(cft->private),
456			  BLKCG_STAT_OFF(cft->private), false);
457	return 0;
458}
459EXPORT_SYMBOL_GPL(blkcg_print_stat);
460
461/* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
462int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
463		       struct seq_file *sf)
464{
465	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
466
467	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat,
468			  BLKCG_STAT_POL(cft->private),
469			  BLKCG_STAT_OFF(cft->private), true);
470	return 0;
471}
472EXPORT_SYMBOL_GPL(blkcg_print_rwstat);
473
474/**
475 * blkg_conf_prep - parse and prepare for per-blkg config update
476 * @blkcg: target block cgroup
477 * @input: input string
478 * @ctx: blkg_conf_ctx to be filled
479 *
480 * Parse per-blkg config update from @input and initialize @ctx with the
481 * result.  @ctx->blkg points to the blkg to be updated and @ctx->v the new
482 * value.  This function returns with RCU read locked and must be paired
483 * with blkg_conf_finish().
484 */
485int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
486		   struct blkg_conf_ctx *ctx)
487	__acquires(rcu)
488{
489	struct gendisk *disk;
490	struct blkio_group *blkg;
491	unsigned int major, minor;
492	unsigned long long v;
493	int part, ret;
494
495	if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
496		return -EINVAL;
497
498	disk = get_gendisk(MKDEV(major, minor), &part);
499	if (!disk || part)
500		return -EINVAL;
501
502	rcu_read_lock();
503
504	spin_lock_irq(disk->queue->queue_lock);
505	blkg = blkg_lookup_create(blkcg, disk->queue, false);
506	spin_unlock_irq(disk->queue->queue_lock);
507
508	if (IS_ERR(blkg)) {
509		ret = PTR_ERR(blkg);
510		rcu_read_unlock();
511		put_disk(disk);
512		/*
513		 * If queue was bypassing, we should retry.  Do so after a
514		 * short msleep().  It isn't strictly necessary but queue
515		 * can be bypassing for some time and it's always nice to
516		 * avoid busy looping.
517		 */
518		if (ret == -EBUSY) {
519			msleep(10);
520			ret = restart_syscall();
521		}
522		return ret;
523	}
524
525	ctx->disk = disk;
526	ctx->blkg = blkg;
527	ctx->v = v;
528	return 0;
529}
530EXPORT_SYMBOL_GPL(blkg_conf_prep);
531
532/**
533 * blkg_conf_finish - finish up per-blkg config update
534 * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
535 *
536 * Finish up after per-blkg config update.  This function must be paired
537 * with blkg_conf_prep().
538 */
539void blkg_conf_finish(struct blkg_conf_ctx *ctx)
540	__releases(rcu)
541{
542	rcu_read_unlock();
543	put_disk(ctx->disk);
544}
545EXPORT_SYMBOL_GPL(blkg_conf_finish);
546
547struct cftype blkio_files[] = {
548	{
549		.name = "reset_stats",
550		.write_u64 = blkiocg_reset_stats,
551	},
552	{ }	/* terminate */
553};
554
555/**
556 * blkiocg_pre_destroy - cgroup pre_destroy callback
557 * @cgroup: cgroup of interest
558 *
559 * This function is called when @cgroup is about to go away and responsible
560 * for shooting down all blkgs associated with @cgroup.  blkgs should be
561 * removed while holding both q and blkcg locks.  As blkcg lock is nested
562 * inside q lock, this function performs reverse double lock dancing.
563 *
564 * This is the blkcg counterpart of ioc_release_fn().
565 */
566static int blkiocg_pre_destroy(struct cgroup *cgroup)
567{
568	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
569
570	spin_lock_irq(&blkcg->lock);
571
572	while (!hlist_empty(&blkcg->blkg_list)) {
573		struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
574						struct blkio_group, blkcg_node);
575		struct request_queue *q = blkg->q;
576
577		if (spin_trylock(q->queue_lock)) {
578			blkg_destroy(blkg);
579			spin_unlock(q->queue_lock);
580		} else {
581			spin_unlock_irq(&blkcg->lock);
582			cpu_relax();
583			spin_lock_irq(&blkcg->lock);
584		}
585	}
586
587	spin_unlock_irq(&blkcg->lock);
588	return 0;
589}
590
591static void blkiocg_destroy(struct cgroup *cgroup)
592{
593	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
594
595	if (blkcg != &blkio_root_cgroup)
596		kfree(blkcg);
597}
598
599static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
600{
601	static atomic64_t id_seq = ATOMIC64_INIT(0);
602	struct blkio_cgroup *blkcg;
603	struct cgroup *parent = cgroup->parent;
604
605	if (!parent) {
606		blkcg = &blkio_root_cgroup;
607		goto done;
608	}
609
610	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
611	if (!blkcg)
612		return ERR_PTR(-ENOMEM);
613
614	blkcg->weight = BLKIO_WEIGHT_DEFAULT;
615	blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
616done:
617	spin_lock_init(&blkcg->lock);
618	INIT_HLIST_HEAD(&blkcg->blkg_list);
619
620	return &blkcg->css;
621}
622
623/**
624 * blkcg_init_queue - initialize blkcg part of request queue
625 * @q: request_queue to initialize
626 *
627 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
628 * part of new request_queue @q.
629 *
630 * RETURNS:
631 * 0 on success, -errno on failure.
632 */
633int blkcg_init_queue(struct request_queue *q)
634{
635	int ret;
636
637	might_sleep();
638
639	ret = blk_throtl_init(q);
640	if (ret)
641		return ret;
642
643	mutex_lock(&all_q_mutex);
644	INIT_LIST_HEAD(&q->all_q_node);
645	list_add_tail(&q->all_q_node, &all_q_list);
646	mutex_unlock(&all_q_mutex);
647
648	return 0;
649}
650
651/**
652 * blkcg_drain_queue - drain blkcg part of request_queue
653 * @q: request_queue to drain
654 *
655 * Called from blk_drain_queue().  Responsible for draining blkcg part.
656 */
657void blkcg_drain_queue(struct request_queue *q)
658{
659	lockdep_assert_held(q->queue_lock);
660
661	blk_throtl_drain(q);
662}
663
664/**
665 * blkcg_exit_queue - exit and release blkcg part of request_queue
666 * @q: request_queue being released
667 *
668 * Called from blk_release_queue().  Responsible for exiting blkcg part.
669 */
670void blkcg_exit_queue(struct request_queue *q)
671{
672	mutex_lock(&all_q_mutex);
673	list_del_init(&q->all_q_node);
674	mutex_unlock(&all_q_mutex);
675
676	blkg_destroy_all(q, true);
677
678	blk_throtl_exit(q);
679}
680
681/*
682 * We cannot support shared io contexts, as we have no mean to support
683 * two tasks with the same ioc in two different groups without major rework
684 * of the main cic data structures.  For now we allow a task to change
685 * its cgroup only if it's the only owner of its ioc.
686 */
687static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
688{
689	struct task_struct *task;
690	struct io_context *ioc;
691	int ret = 0;
692
693	/* task_lock() is needed to avoid races with exit_io_context() */
694	cgroup_taskset_for_each(task, cgrp, tset) {
695		task_lock(task);
696		ioc = task->io_context;
697		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
698			ret = -EINVAL;
699		task_unlock(task);
700		if (ret)
701			break;
702	}
703	return ret;
704}
705
706static void blkcg_bypass_start(void)
707	__acquires(&all_q_mutex)
708{
709	struct request_queue *q;
710
711	mutex_lock(&all_q_mutex);
712
713	list_for_each_entry(q, &all_q_list, all_q_node) {
714		blk_queue_bypass_start(q);
715		blkg_destroy_all(q, false);
716	}
717}
718
719static void blkcg_bypass_end(void)
720	__releases(&all_q_mutex)
721{
722	struct request_queue *q;
723
724	list_for_each_entry(q, &all_q_list, all_q_node)
725		blk_queue_bypass_end(q);
726
727	mutex_unlock(&all_q_mutex);
728}
729
730struct cgroup_subsys blkio_subsys = {
731	.name = "blkio",
732	.create = blkiocg_create,
733	.can_attach = blkiocg_can_attach,
734	.pre_destroy = blkiocg_pre_destroy,
735	.destroy = blkiocg_destroy,
736	.subsys_id = blkio_subsys_id,
737	.base_cftypes = blkio_files,
738	.module = THIS_MODULE,
739};
740EXPORT_SYMBOL_GPL(blkio_subsys);
741
742void blkio_policy_register(struct blkio_policy_type *blkiop)
743{
744	struct request_queue *q;
745
746	blkcg_bypass_start();
747	spin_lock(&blkio_list_lock);
748
749	BUG_ON(blkio_policy[blkiop->plid]);
750	blkio_policy[blkiop->plid] = blkiop;
751	list_add_tail(&blkiop->list, &blkio_list);
752
753	spin_unlock(&blkio_list_lock);
754	list_for_each_entry(q, &all_q_list, all_q_node)
755		update_root_blkg_pd(q, blkiop->plid);
756	blkcg_bypass_end();
757
758	if (blkiop->cftypes)
759		WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
760}
761EXPORT_SYMBOL_GPL(blkio_policy_register);
762
763void blkio_policy_unregister(struct blkio_policy_type *blkiop)
764{
765	struct request_queue *q;
766
767	if (blkiop->cftypes)
768		cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
769
770	blkcg_bypass_start();
771	spin_lock(&blkio_list_lock);
772
773	BUG_ON(blkio_policy[blkiop->plid] != blkiop);
774	blkio_policy[blkiop->plid] = NULL;
775	list_del_init(&blkiop->list);
776
777	spin_unlock(&blkio_list_lock);
778	list_for_each_entry(q, &all_q_list, all_q_node)
779		update_root_blkg_pd(q, blkiop->plid);
780	blkcg_bypass_end();
781}
782EXPORT_SYMBOL_GPL(blkio_policy_unregister);
783