blk-cgroup.c revision ec399347d39fb2337ebace928cf4a2855bd0ec37
1/*
2 * Common Block IO controller cgroup interface
3 *
4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 *		      Paolo Valente <paolo.valente@unimore.it>
9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * 	              Nauman Rafique <nauman@google.com>
12 */
13#include <linux/ioprio.h>
14#include <linux/kdev_t.h>
15#include <linux/module.h>
16#include <linux/err.h>
17#include <linux/blkdev.h>
18#include <linux/slab.h>
19#include <linux/genhd.h>
20#include <linux/delay.h>
21#include <linux/atomic.h>
22#include "blk-cgroup.h"
23#include "blk.h"
24
25#define MAX_KEY_LEN 100
26
27static DEFINE_MUTEX(blkcg_pol_mutex);
28static DEFINE_MUTEX(all_q_mutex);
29static LIST_HEAD(all_q_list);
30
31struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
32EXPORT_SYMBOL_GPL(blkio_root_cgroup);
33
34static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
35
36struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
37{
38	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
39			    struct blkio_cgroup, css);
40}
41EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
42
43static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
44{
45	return container_of(task_subsys_state(tsk, blkio_subsys_id),
46			    struct blkio_cgroup, css);
47}
48
49struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
50{
51	if (bio && bio->bi_css)
52		return container_of(bio->bi_css, struct blkio_cgroup, css);
53	return task_blkio_cgroup(current);
54}
55EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
56
57/**
58 * blkg_free - free a blkg
59 * @blkg: blkg to free
60 *
61 * Free @blkg which may be partially allocated.
62 */
63static void blkg_free(struct blkio_group *blkg)
64{
65	int i;
66
67	if (!blkg)
68		return;
69
70	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
71		struct blkio_policy_type *pol = blkio_policy[i];
72		struct blkg_policy_data *pd = blkg->pd[i];
73
74		if (!pd)
75			continue;
76
77		if (pol && pol->ops.blkio_exit_group_fn)
78			pol->ops.blkio_exit_group_fn(blkg);
79
80		kfree(pd);
81	}
82
83	kfree(blkg);
84}
85
86/**
87 * blkg_alloc - allocate a blkg
88 * @blkcg: block cgroup the new blkg is associated with
89 * @q: request_queue the new blkg is associated with
90 *
91 * Allocate a new blkg assocating @blkcg and @q.
92 */
93static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
94				      struct request_queue *q)
95{
96	struct blkio_group *blkg;
97	int i;
98
99	/* alloc and init base part */
100	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
101	if (!blkg)
102		return NULL;
103
104	blkg->q = q;
105	INIT_LIST_HEAD(&blkg->q_node);
106	blkg->blkcg = blkcg;
107	blkg->refcnt = 1;
108	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
109
110	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
111		struct blkio_policy_type *pol = blkio_policy[i];
112		struct blkg_policy_data *pd;
113
114		if (!pol)
115			continue;
116
117		/* alloc per-policy data and attach it to blkg */
118		pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
119				  q->node);
120		if (!pd) {
121			blkg_free(blkg);
122			return NULL;
123		}
124
125		blkg->pd[i] = pd;
126		pd->blkg = blkg;
127	}
128
129	/* invoke per-policy init */
130	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
131		struct blkio_policy_type *pol = blkio_policy[i];
132
133		if (pol)
134			pol->ops.blkio_init_group_fn(blkg);
135	}
136
137	return blkg;
138}
139
140struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
141				       struct request_queue *q,
142				       bool for_root)
143	__releases(q->queue_lock) __acquires(q->queue_lock)
144{
145	struct blkio_group *blkg;
146
147	WARN_ON_ONCE(!rcu_read_lock_held());
148	lockdep_assert_held(q->queue_lock);
149
150	/*
151	 * This could be the first entry point of blkcg implementation and
152	 * we shouldn't allow anything to go through for a bypassing queue.
153	 * The following can be removed if blkg lookup is guaranteed to
154	 * fail on a bypassing queue.
155	 */
156	if (unlikely(blk_queue_bypass(q)) && !for_root)
157		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
158
159	blkg = blkg_lookup(blkcg, q);
160	if (blkg)
161		return blkg;
162
163	/* blkg holds a reference to blkcg */
164	if (!css_tryget(&blkcg->css))
165		return ERR_PTR(-EINVAL);
166
167	/*
168	 * Allocate and initialize.
169	 */
170	blkg = blkg_alloc(blkcg, q);
171
172	/* did alloc fail? */
173	if (unlikely(!blkg)) {
174		blkg = ERR_PTR(-ENOMEM);
175		goto out;
176	}
177
178	/* insert */
179	spin_lock(&blkcg->lock);
180	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
181	list_add(&blkg->q_node, &q->blkg_list);
182	spin_unlock(&blkcg->lock);
183out:
184	return blkg;
185}
186EXPORT_SYMBOL_GPL(blkg_lookup_create);
187
188/* called under rcu_read_lock(). */
189struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
190				struct request_queue *q)
191{
192	struct blkio_group *blkg;
193	struct hlist_node *n;
194
195	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
196		if (blkg->q == q)
197			return blkg;
198	return NULL;
199}
200EXPORT_SYMBOL_GPL(blkg_lookup);
201
202static void blkg_destroy(struct blkio_group *blkg)
203{
204	struct request_queue *q = blkg->q;
205	struct blkio_cgroup *blkcg = blkg->blkcg;
206
207	lockdep_assert_held(q->queue_lock);
208	lockdep_assert_held(&blkcg->lock);
209
210	/* Something wrong if we are trying to remove same group twice */
211	WARN_ON_ONCE(list_empty(&blkg->q_node));
212	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
213	list_del_init(&blkg->q_node);
214	hlist_del_init_rcu(&blkg->blkcg_node);
215
216	/*
217	 * Put the reference taken at the time of creation so that when all
218	 * queues are gone, group can be destroyed.
219	 */
220	blkg_put(blkg);
221}
222
223/*
224 * XXX: This updates blkg policy data in-place for root blkg, which is
225 * necessary across elevator switch and policy registration as root blkgs
226 * aren't shot down.  This broken and racy implementation is temporary.
227 * Eventually, blkg shoot down will be replaced by proper in-place update.
228 */
229void update_root_blkg_pd(struct request_queue *q,
230			 const struct blkio_policy_type *pol)
231{
232	struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
233	struct blkg_policy_data *pd;
234
235	if (!blkg)
236		return;
237
238	kfree(blkg->pd[pol->plid]);
239	blkg->pd[pol->plid] = NULL;
240
241	if (!pol)
242		return;
243
244	pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
245	WARN_ON_ONCE(!pd);
246
247	blkg->pd[pol->plid] = pd;
248	pd->blkg = blkg;
249	pol->ops.blkio_init_group_fn(blkg);
250}
251EXPORT_SYMBOL_GPL(update_root_blkg_pd);
252
253/**
254 * blkg_destroy_all - destroy all blkgs associated with a request_queue
255 * @q: request_queue of interest
256 * @destroy_root: whether to destroy root blkg or not
257 *
258 * Destroy blkgs associated with @q.  If @destroy_root is %true, all are
259 * destroyed; otherwise, root blkg is left alone.
260 */
261void blkg_destroy_all(struct request_queue *q, bool destroy_root)
262{
263	struct blkio_group *blkg, *n;
264
265	spin_lock_irq(q->queue_lock);
266
267	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
268		struct blkio_cgroup *blkcg = blkg->blkcg;
269
270		/* skip root? */
271		if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
272			continue;
273
274		spin_lock(&blkcg->lock);
275		blkg_destroy(blkg);
276		spin_unlock(&blkcg->lock);
277	}
278
279	spin_unlock_irq(q->queue_lock);
280}
281EXPORT_SYMBOL_GPL(blkg_destroy_all);
282
283static void blkg_rcu_free(struct rcu_head *rcu_head)
284{
285	blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
286}
287
288void __blkg_release(struct blkio_group *blkg)
289{
290	/* release the extra blkcg reference this blkg has been holding */
291	css_put(&blkg->blkcg->css);
292
293	/*
294	 * A group is freed in rcu manner. But having an rcu lock does not
295	 * mean that one can access all the fields of blkg and assume these
296	 * are valid. For example, don't try to follow throtl_data and
297	 * request queue links.
298	 *
299	 * Having a reference to blkg under an rcu allows acess to only
300	 * values local to groups like group stats and group rate limits
301	 */
302	call_rcu(&blkg->rcu_head, blkg_rcu_free);
303}
304EXPORT_SYMBOL_GPL(__blkg_release);
305
306static int
307blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
308{
309	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
310	struct blkio_group *blkg;
311	struct hlist_node *n;
312	int i;
313
314	mutex_lock(&blkcg_pol_mutex);
315	spin_lock_irq(&blkcg->lock);
316
317	/*
318	 * Note that stat reset is racy - it doesn't synchronize against
319	 * stat updates.  This is a debug feature which shouldn't exist
320	 * anyway.  If you get hit by a race, retry.
321	 */
322	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
323		for (i = 0; i < BLKIO_NR_POLICIES; i++) {
324			struct blkio_policy_type *pol = blkio_policy[i];
325
326			if (pol && pol->ops.blkio_reset_group_stats_fn)
327				pol->ops.blkio_reset_group_stats_fn(blkg);
328		}
329	}
330
331	spin_unlock_irq(&blkcg->lock);
332	mutex_unlock(&blkcg_pol_mutex);
333	return 0;
334}
335
336static const char *blkg_dev_name(struct blkio_group *blkg)
337{
338	/* some drivers (floppy) instantiate a queue w/o disk registered */
339	if (blkg->q->backing_dev_info.dev)
340		return dev_name(blkg->q->backing_dev_info.dev);
341	return NULL;
342}
343
344/**
345 * blkcg_print_blkgs - helper for printing per-blkg data
346 * @sf: seq_file to print to
347 * @blkcg: blkcg of interest
348 * @prfill: fill function to print out a blkg
349 * @pol: policy in question
350 * @data: data to be passed to @prfill
351 * @show_total: to print out sum of prfill return values or not
352 *
353 * This function invokes @prfill on each blkg of @blkcg if pd for the
354 * policy specified by @pol exists.  @prfill is invoked with @sf, the
355 * policy data and @data.  If @show_total is %true, the sum of the return
356 * values from @prfill is printed with "Total" label at the end.
357 *
358 * This is to be used to construct print functions for
359 * cftype->read_seq_string method.
360 */
361void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
362		       u64 (*prfill)(struct seq_file *, void *, int),
363		       const struct blkio_policy_type *pol, int data,
364		       bool show_total)
365{
366	struct blkio_group *blkg;
367	struct hlist_node *n;
368	u64 total = 0;
369
370	spin_lock_irq(&blkcg->lock);
371	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
372		if (blkg->pd[pol->plid])
373			total += prfill(sf, blkg->pd[pol->plid]->pdata, data);
374	spin_unlock_irq(&blkcg->lock);
375
376	if (show_total)
377		seq_printf(sf, "Total %llu\n", (unsigned long long)total);
378}
379EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
380
381/**
382 * __blkg_prfill_u64 - prfill helper for a single u64 value
383 * @sf: seq_file to print to
384 * @pdata: policy private data of interest
385 * @v: value to print
386 *
387 * Print @v to @sf for the device assocaited with @pdata.
388 */
389u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v)
390{
391	const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
392
393	if (!dname)
394		return 0;
395
396	seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
397	return v;
398}
399EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
400
401/**
402 * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
403 * @sf: seq_file to print to
404 * @pdata: policy private data of interest
405 * @rwstat: rwstat to print
406 *
407 * Print @rwstat to @sf for the device assocaited with @pdata.
408 */
409u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
410			 const struct blkg_rwstat *rwstat)
411{
412	static const char *rwstr[] = {
413		[BLKG_RWSTAT_READ]	= "Read",
414		[BLKG_RWSTAT_WRITE]	= "Write",
415		[BLKG_RWSTAT_SYNC]	= "Sync",
416		[BLKG_RWSTAT_ASYNC]	= "Async",
417	};
418	const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
419	u64 v;
420	int i;
421
422	if (!dname)
423		return 0;
424
425	for (i = 0; i < BLKG_RWSTAT_NR; i++)
426		seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
427			   (unsigned long long)rwstat->cnt[i]);
428
429	v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
430	seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
431	return v;
432}
433
434/**
435 * blkg_prfill_stat - prfill callback for blkg_stat
436 * @sf: seq_file to print to
437 * @pdata: policy private data of interest
438 * @off: offset to the blkg_stat in @pdata
439 *
440 * prfill callback for printing a blkg_stat.
441 */
442u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
443{
444	return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
445}
446EXPORT_SYMBOL_GPL(blkg_prfill_stat);
447
448/**
449 * blkg_prfill_rwstat - prfill callback for blkg_rwstat
450 * @sf: seq_file to print to
451 * @pdata: policy private data of interest
452 * @off: offset to the blkg_rwstat in @pdata
453 *
454 * prfill callback for printing a blkg_rwstat.
455 */
456u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
457{
458	struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
459
460	return __blkg_prfill_rwstat(sf, pdata, &rwstat);
461}
462EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
463
464/**
465 * blkg_conf_prep - parse and prepare for per-blkg config update
466 * @blkcg: target block cgroup
467 * @input: input string
468 * @ctx: blkg_conf_ctx to be filled
469 *
470 * Parse per-blkg config update from @input and initialize @ctx with the
471 * result.  @ctx->blkg points to the blkg to be updated and @ctx->v the new
472 * value.  This function returns with RCU read locked and must be paired
473 * with blkg_conf_finish().
474 */
475int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
476		   struct blkg_conf_ctx *ctx)
477	__acquires(rcu)
478{
479	struct gendisk *disk;
480	struct blkio_group *blkg;
481	unsigned int major, minor;
482	unsigned long long v;
483	int part, ret;
484
485	if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
486		return -EINVAL;
487
488	disk = get_gendisk(MKDEV(major, minor), &part);
489	if (!disk || part)
490		return -EINVAL;
491
492	rcu_read_lock();
493
494	spin_lock_irq(disk->queue->queue_lock);
495	blkg = blkg_lookup_create(blkcg, disk->queue, false);
496	spin_unlock_irq(disk->queue->queue_lock);
497
498	if (IS_ERR(blkg)) {
499		ret = PTR_ERR(blkg);
500		rcu_read_unlock();
501		put_disk(disk);
502		/*
503		 * If queue was bypassing, we should retry.  Do so after a
504		 * short msleep().  It isn't strictly necessary but queue
505		 * can be bypassing for some time and it's always nice to
506		 * avoid busy looping.
507		 */
508		if (ret == -EBUSY) {
509			msleep(10);
510			ret = restart_syscall();
511		}
512		return ret;
513	}
514
515	ctx->disk = disk;
516	ctx->blkg = blkg;
517	ctx->v = v;
518	return 0;
519}
520EXPORT_SYMBOL_GPL(blkg_conf_prep);
521
522/**
523 * blkg_conf_finish - finish up per-blkg config update
524 * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
525 *
526 * Finish up after per-blkg config update.  This function must be paired
527 * with blkg_conf_prep().
528 */
529void blkg_conf_finish(struct blkg_conf_ctx *ctx)
530	__releases(rcu)
531{
532	rcu_read_unlock();
533	put_disk(ctx->disk);
534}
535EXPORT_SYMBOL_GPL(blkg_conf_finish);
536
537struct cftype blkio_files[] = {
538	{
539		.name = "reset_stats",
540		.write_u64 = blkiocg_reset_stats,
541	},
542	{ }	/* terminate */
543};
544
545/**
546 * blkiocg_pre_destroy - cgroup pre_destroy callback
547 * @cgroup: cgroup of interest
548 *
549 * This function is called when @cgroup is about to go away and responsible
550 * for shooting down all blkgs associated with @cgroup.  blkgs should be
551 * removed while holding both q and blkcg locks.  As blkcg lock is nested
552 * inside q lock, this function performs reverse double lock dancing.
553 *
554 * This is the blkcg counterpart of ioc_release_fn().
555 */
556static int blkiocg_pre_destroy(struct cgroup *cgroup)
557{
558	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
559
560	spin_lock_irq(&blkcg->lock);
561
562	while (!hlist_empty(&blkcg->blkg_list)) {
563		struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
564						struct blkio_group, blkcg_node);
565		struct request_queue *q = blkg->q;
566
567		if (spin_trylock(q->queue_lock)) {
568			blkg_destroy(blkg);
569			spin_unlock(q->queue_lock);
570		} else {
571			spin_unlock_irq(&blkcg->lock);
572			cpu_relax();
573			spin_lock_irq(&blkcg->lock);
574		}
575	}
576
577	spin_unlock_irq(&blkcg->lock);
578	return 0;
579}
580
581static void blkiocg_destroy(struct cgroup *cgroup)
582{
583	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
584
585	if (blkcg != &blkio_root_cgroup)
586		kfree(blkcg);
587}
588
589static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
590{
591	static atomic64_t id_seq = ATOMIC64_INIT(0);
592	struct blkio_cgroup *blkcg;
593	struct cgroup *parent = cgroup->parent;
594
595	if (!parent) {
596		blkcg = &blkio_root_cgroup;
597		goto done;
598	}
599
600	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
601	if (!blkcg)
602		return ERR_PTR(-ENOMEM);
603
604	blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
605	blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
606done:
607	spin_lock_init(&blkcg->lock);
608	INIT_HLIST_HEAD(&blkcg->blkg_list);
609
610	return &blkcg->css;
611}
612
613/**
614 * blkcg_init_queue - initialize blkcg part of request queue
615 * @q: request_queue to initialize
616 *
617 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
618 * part of new request_queue @q.
619 *
620 * RETURNS:
621 * 0 on success, -errno on failure.
622 */
623int blkcg_init_queue(struct request_queue *q)
624{
625	int ret;
626
627	might_sleep();
628
629	ret = blk_throtl_init(q);
630	if (ret)
631		return ret;
632
633	mutex_lock(&all_q_mutex);
634	INIT_LIST_HEAD(&q->all_q_node);
635	list_add_tail(&q->all_q_node, &all_q_list);
636	mutex_unlock(&all_q_mutex);
637
638	return 0;
639}
640
641/**
642 * blkcg_drain_queue - drain blkcg part of request_queue
643 * @q: request_queue to drain
644 *
645 * Called from blk_drain_queue().  Responsible for draining blkcg part.
646 */
647void blkcg_drain_queue(struct request_queue *q)
648{
649	lockdep_assert_held(q->queue_lock);
650
651	blk_throtl_drain(q);
652}
653
654/**
655 * blkcg_exit_queue - exit and release blkcg part of request_queue
656 * @q: request_queue being released
657 *
658 * Called from blk_release_queue().  Responsible for exiting blkcg part.
659 */
660void blkcg_exit_queue(struct request_queue *q)
661{
662	mutex_lock(&all_q_mutex);
663	list_del_init(&q->all_q_node);
664	mutex_unlock(&all_q_mutex);
665
666	blkg_destroy_all(q, true);
667
668	blk_throtl_exit(q);
669}
670
671/*
672 * We cannot support shared io contexts, as we have no mean to support
673 * two tasks with the same ioc in two different groups without major rework
674 * of the main cic data structures.  For now we allow a task to change
675 * its cgroup only if it's the only owner of its ioc.
676 */
677static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
678{
679	struct task_struct *task;
680	struct io_context *ioc;
681	int ret = 0;
682
683	/* task_lock() is needed to avoid races with exit_io_context() */
684	cgroup_taskset_for_each(task, cgrp, tset) {
685		task_lock(task);
686		ioc = task->io_context;
687		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
688			ret = -EINVAL;
689		task_unlock(task);
690		if (ret)
691			break;
692	}
693	return ret;
694}
695
696static void blkcg_bypass_start(void)
697	__acquires(&all_q_mutex)
698{
699	struct request_queue *q;
700
701	mutex_lock(&all_q_mutex);
702
703	list_for_each_entry(q, &all_q_list, all_q_node) {
704		blk_queue_bypass_start(q);
705		blkg_destroy_all(q, false);
706	}
707}
708
709static void blkcg_bypass_end(void)
710	__releases(&all_q_mutex)
711{
712	struct request_queue *q;
713
714	list_for_each_entry(q, &all_q_list, all_q_node)
715		blk_queue_bypass_end(q);
716
717	mutex_unlock(&all_q_mutex);
718}
719
720struct cgroup_subsys blkio_subsys = {
721	.name = "blkio",
722	.create = blkiocg_create,
723	.can_attach = blkiocg_can_attach,
724	.pre_destroy = blkiocg_pre_destroy,
725	.destroy = blkiocg_destroy,
726	.subsys_id = blkio_subsys_id,
727	.base_cftypes = blkio_files,
728	.module = THIS_MODULE,
729};
730EXPORT_SYMBOL_GPL(blkio_subsys);
731
732void blkio_policy_register(struct blkio_policy_type *blkiop)
733{
734	struct request_queue *q;
735
736	mutex_lock(&blkcg_pol_mutex);
737
738	blkcg_bypass_start();
739
740	BUG_ON(blkio_policy[blkiop->plid]);
741	blkio_policy[blkiop->plid] = blkiop;
742	list_for_each_entry(q, &all_q_list, all_q_node)
743		update_root_blkg_pd(q, blkiop);
744
745	blkcg_bypass_end();
746
747	if (blkiop->cftypes)
748		WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
749
750	mutex_unlock(&blkcg_pol_mutex);
751}
752EXPORT_SYMBOL_GPL(blkio_policy_register);
753
754void blkio_policy_unregister(struct blkio_policy_type *blkiop)
755{
756	struct request_queue *q;
757
758	mutex_lock(&blkcg_pol_mutex);
759
760	if (blkiop->cftypes)
761		cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
762
763	blkcg_bypass_start();
764
765	BUG_ON(blkio_policy[blkiop->plid] != blkiop);
766	blkio_policy[blkiop->plid] = NULL;
767
768	list_for_each_entry(q, &all_q_list, all_q_node)
769		update_root_blkg_pd(q, blkiop);
770	blkcg_bypass_end();
771
772	mutex_unlock(&blkcg_pol_mutex);
773}
774EXPORT_SYMBOL_GPL(blkio_policy_unregister);
775