dm-space-map-metadata.c revision 7e664b3dec431eebf0c5df5ff704d6197634cf35
1/*
2 * Copyright (C) 2011 Red Hat, Inc.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm-space-map.h"
8#include "dm-space-map-common.h"
9#include "dm-space-map-metadata.h"
10
11#include <linux/list.h>
12#include <linux/slab.h>
13#include <linux/device-mapper.h>
14
15#define DM_MSG_PREFIX "space map metadata"
16
17/*----------------------------------------------------------------*/
18
19/*
20 * An edge triggered threshold.
21 */
22struct threshold {
23	bool threshold_set;
24	bool value_set;
25	dm_block_t threshold;
26	dm_block_t current_value;
27	dm_sm_threshold_fn fn;
28	void *context;
29};
30
31static void threshold_init(struct threshold *t)
32{
33	t->threshold_set = false;
34	t->value_set = false;
35}
36
37static void set_threshold(struct threshold *t, dm_block_t value,
38			  dm_sm_threshold_fn fn, void *context)
39{
40	t->threshold_set = true;
41	t->threshold = value;
42	t->fn = fn;
43	t->context = context;
44}
45
46static bool below_threshold(struct threshold *t, dm_block_t value)
47{
48	return t->threshold_set && value <= t->threshold;
49}
50
51static bool threshold_already_triggered(struct threshold *t)
52{
53	return t->value_set && below_threshold(t, t->current_value);
54}
55
56static void check_threshold(struct threshold *t, dm_block_t value)
57{
58	if (below_threshold(t, value) &&
59	    !threshold_already_triggered(t))
60		t->fn(t->context);
61
62	t->value_set = true;
63	t->current_value = value;
64}
65
66/*----------------------------------------------------------------*/
67
68/*
69 * Space map interface.
70 *
71 * The low level disk format is written using the standard btree and
72 * transaction manager.  This means that performing disk operations may
73 * cause us to recurse into the space map in order to allocate new blocks.
74 * For this reason we have a pool of pre-allocated blocks large enough to
75 * service any metadata_ll_disk operation.
76 */
77
78/*
79 * FIXME: we should calculate this based on the size of the device.
80 * Only the metadata space map needs this functionality.
81 */
82#define MAX_RECURSIVE_ALLOCATIONS 1024
83
84enum block_op_type {
85	BOP_INC,
86	BOP_DEC
87};
88
89struct block_op {
90	enum block_op_type type;
91	dm_block_t block;
92};
93
94struct sm_metadata {
95	struct dm_space_map sm;
96
97	struct ll_disk ll;
98	struct ll_disk old_ll;
99
100	dm_block_t begin;
101
102	unsigned recursion_count;
103	unsigned allocated_this_transaction;
104	unsigned nr_uncommitted;
105	struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
106
107	struct threshold threshold;
108};
109
110static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
111{
112	struct block_op *op;
113
114	if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) {
115		DMERR("too many recursive allocations");
116		return -ENOMEM;
117	}
118
119	op = smm->uncommitted + smm->nr_uncommitted++;
120	op->type = type;
121	op->block = b;
122
123	return 0;
124}
125
126static int commit_bop(struct sm_metadata *smm, struct block_op *op)
127{
128	int r = 0;
129	enum allocation_event ev;
130
131	switch (op->type) {
132	case BOP_INC:
133		r = sm_ll_inc(&smm->ll, op->block, &ev);
134		break;
135
136	case BOP_DEC:
137		r = sm_ll_dec(&smm->ll, op->block, &ev);
138		break;
139	}
140
141	return r;
142}
143
144static void in(struct sm_metadata *smm)
145{
146	smm->recursion_count++;
147}
148
149static int out(struct sm_metadata *smm)
150{
151	int r = 0;
152
153	/*
154	 * If we're not recursing then very bad things are happening.
155	 */
156	if (!smm->recursion_count) {
157		DMERR("lost track of recursion depth");
158		return -ENOMEM;
159	}
160
161	if (smm->recursion_count == 1 && smm->nr_uncommitted) {
162		while (smm->nr_uncommitted && !r) {
163			smm->nr_uncommitted--;
164			r = commit_bop(smm, smm->uncommitted +
165				       smm->nr_uncommitted);
166			if (r)
167				break;
168		}
169	}
170
171	smm->recursion_count--;
172
173	return r;
174}
175
176/*
177 * When using the out() function above, we often want to combine an error
178 * code for the operation run in the recursive context with that from
179 * out().
180 */
181static int combine_errors(int r1, int r2)
182{
183	return r1 ? r1 : r2;
184}
185
186static int recursing(struct sm_metadata *smm)
187{
188	return smm->recursion_count;
189}
190
191static void sm_metadata_destroy(struct dm_space_map *sm)
192{
193	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
194
195	kfree(smm);
196}
197
198static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
199{
200	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
201
202	*count = smm->ll.nr_blocks;
203
204	return 0;
205}
206
207static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
208{
209	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
210
211	*count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
212		 smm->allocated_this_transaction;
213
214	return 0;
215}
216
217static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
218				 uint32_t *result)
219{
220	int r, i;
221	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
222	unsigned adjustment = 0;
223
224	/*
225	 * We may have some uncommitted adjustments to add.  This list
226	 * should always be really short.
227	 */
228	for (i = 0; i < smm->nr_uncommitted; i++) {
229		struct block_op *op = smm->uncommitted + i;
230
231		if (op->block != b)
232			continue;
233
234		switch (op->type) {
235		case BOP_INC:
236			adjustment++;
237			break;
238
239		case BOP_DEC:
240			adjustment--;
241			break;
242		}
243	}
244
245	r = sm_ll_lookup(&smm->ll, b, result);
246	if (r)
247		return r;
248
249	*result += adjustment;
250
251	return 0;
252}
253
254static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
255					      dm_block_t b, int *result)
256{
257	int r, i, adjustment = 0;
258	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
259	uint32_t rc;
260
261	/*
262	 * We may have some uncommitted adjustments to add.  This list
263	 * should always be really short.
264	 */
265	for (i = 0; i < smm->nr_uncommitted; i++) {
266		struct block_op *op = smm->uncommitted + i;
267
268		if (op->block != b)
269			continue;
270
271		switch (op->type) {
272		case BOP_INC:
273			adjustment++;
274			break;
275
276		case BOP_DEC:
277			adjustment--;
278			break;
279		}
280	}
281
282	if (adjustment > 1) {
283		*result = 1;
284		return 0;
285	}
286
287	r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
288	if (r)
289		return r;
290
291	if (rc == 3)
292		/*
293		 * We err on the side of caution, and always return true.
294		 */
295		*result = 1;
296	else
297		*result = rc + adjustment > 1;
298
299	return 0;
300}
301
302static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
303				 uint32_t count)
304{
305	int r, r2;
306	enum allocation_event ev;
307	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
308
309	if (smm->recursion_count) {
310		DMERR("cannot recurse set_count()");
311		return -EINVAL;
312	}
313
314	in(smm);
315	r = sm_ll_insert(&smm->ll, b, count, &ev);
316	r2 = out(smm);
317
318	return combine_errors(r, r2);
319}
320
321static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
322{
323	int r, r2 = 0;
324	enum allocation_event ev;
325	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
326
327	if (recursing(smm))
328		r = add_bop(smm, BOP_INC, b);
329	else {
330		in(smm);
331		r = sm_ll_inc(&smm->ll, b, &ev);
332		r2 = out(smm);
333	}
334
335	return combine_errors(r, r2);
336}
337
338static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
339{
340	int r, r2 = 0;
341	enum allocation_event ev;
342	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
343
344	if (recursing(smm))
345		r = add_bop(smm, BOP_DEC, b);
346	else {
347		in(smm);
348		r = sm_ll_dec(&smm->ll, b, &ev);
349		r2 = out(smm);
350	}
351
352	return combine_errors(r, r2);
353}
354
355static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
356{
357	int r, r2 = 0;
358	enum allocation_event ev;
359	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
360
361	r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b);
362	if (r)
363		return r;
364
365	smm->begin = *b + 1;
366
367	if (recursing(smm))
368		r = add_bop(smm, BOP_INC, *b);
369	else {
370		in(smm);
371		r = sm_ll_inc(&smm->ll, *b, &ev);
372		r2 = out(smm);
373	}
374
375	if (!r)
376		smm->allocated_this_transaction++;
377
378	return combine_errors(r, r2);
379}
380
381static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
382{
383	dm_block_t count;
384	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
385
386	int r = sm_metadata_new_block_(sm, b);
387	if (r) {
388		DMERR_LIMIT("unable to allocate new metadata block");
389		return r;
390	}
391
392	r = sm_metadata_get_nr_free(sm, &count);
393	if (r) {
394		DMERR_LIMIT("couldn't get free block count");
395		return r;
396	}
397
398	check_threshold(&smm->threshold, count);
399
400	return r;
401}
402
403static int sm_metadata_commit(struct dm_space_map *sm)
404{
405	int r;
406	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
407
408	r = sm_ll_commit(&smm->ll);
409	if (r)
410		return r;
411
412	memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
413	smm->begin = 0;
414	smm->allocated_this_transaction = 0;
415
416	return 0;
417}
418
419static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
420						   dm_block_t threshold,
421						   dm_sm_threshold_fn fn,
422						   void *context)
423{
424	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
425
426	set_threshold(&smm->threshold, threshold, fn, context);
427
428	return 0;
429}
430
431static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
432{
433	*result = sizeof(struct disk_sm_root);
434
435	return 0;
436}
437
438static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
439{
440	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
441	struct disk_sm_root root_le;
442
443	root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
444	root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
445	root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
446	root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
447
448	if (max < sizeof(root_le))
449		return -ENOSPC;
450
451	memcpy(where_le, &root_le, sizeof(root_le));
452
453	return 0;
454}
455
456static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
457
458static struct dm_space_map ops = {
459	.destroy = sm_metadata_destroy,
460	.extend = sm_metadata_extend,
461	.get_nr_blocks = sm_metadata_get_nr_blocks,
462	.get_nr_free = sm_metadata_get_nr_free,
463	.get_count = sm_metadata_get_count,
464	.count_is_more_than_one = sm_metadata_count_is_more_than_one,
465	.set_count = sm_metadata_set_count,
466	.inc_block = sm_metadata_inc_block,
467	.dec_block = sm_metadata_dec_block,
468	.new_block = sm_metadata_new_block,
469	.commit = sm_metadata_commit,
470	.root_size = sm_metadata_root_size,
471	.copy_root = sm_metadata_copy_root,
472	.register_threshold_callback = sm_metadata_register_threshold_callback
473};
474
475/*----------------------------------------------------------------*/
476
477/*
478 * When a new space map is created that manages its own space.  We use
479 * this tiny bootstrap allocator.
480 */
481static void sm_bootstrap_destroy(struct dm_space_map *sm)
482{
483}
484
485static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
486{
487	DMERR("bootstrap doesn't support extend");
488
489	return -EINVAL;
490}
491
492static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
493{
494	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
495
496	return smm->ll.nr_blocks;
497}
498
499static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
500{
501	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
502
503	*count = smm->ll.nr_blocks - smm->begin;
504
505	return 0;
506}
507
508static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
509				  uint32_t *result)
510{
511	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
512
513	return b < smm->begin ? 1 : 0;
514}
515
516static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
517					       dm_block_t b, int *result)
518{
519	*result = 0;
520
521	return 0;
522}
523
524static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
525				  uint32_t count)
526{
527	DMERR("bootstrap doesn't support set_count");
528
529	return -EINVAL;
530}
531
532static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
533{
534	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
535
536	/*
537	 * We know the entire device is unused.
538	 */
539	if (smm->begin == smm->ll.nr_blocks)
540		return -ENOSPC;
541
542	*b = smm->begin++;
543
544	return 0;
545}
546
547static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
548{
549	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
550
551	return add_bop(smm, BOP_INC, b);
552}
553
554static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
555{
556	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
557
558	return add_bop(smm, BOP_DEC, b);
559}
560
561static int sm_bootstrap_commit(struct dm_space_map *sm)
562{
563	return 0;
564}
565
566static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
567{
568	DMERR("bootstrap doesn't support root_size");
569
570	return -EINVAL;
571}
572
573static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
574				  size_t max)
575{
576	DMERR("bootstrap doesn't support copy_root");
577
578	return -EINVAL;
579}
580
581static struct dm_space_map bootstrap_ops = {
582	.destroy = sm_bootstrap_destroy,
583	.extend = sm_bootstrap_extend,
584	.get_nr_blocks = sm_bootstrap_get_nr_blocks,
585	.get_nr_free = sm_bootstrap_get_nr_free,
586	.get_count = sm_bootstrap_get_count,
587	.count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
588	.set_count = sm_bootstrap_set_count,
589	.inc_block = sm_bootstrap_inc_block,
590	.dec_block = sm_bootstrap_dec_block,
591	.new_block = sm_bootstrap_new_block,
592	.commit = sm_bootstrap_commit,
593	.root_size = sm_bootstrap_root_size,
594	.copy_root = sm_bootstrap_copy_root,
595	.register_threshold_callback = NULL
596};
597
598/*----------------------------------------------------------------*/
599
600static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
601{
602	int r, i;
603	enum allocation_event ev;
604	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
605	dm_block_t old_len = smm->ll.nr_blocks;
606
607	/*
608	 * Flick into a mode where all blocks get allocated in the new area.
609	 */
610	smm->begin = old_len;
611	memcpy(sm, &bootstrap_ops, sizeof(*sm));
612
613	/*
614	 * Extend.
615	 */
616	r = sm_ll_extend(&smm->ll, extra_blocks);
617	if (r)
618		goto out;
619
620	for (i = old_len; !r && i < smm->begin; i++) {
621		r = sm_ll_inc(&smm->ll, i, &ev);
622		if (r)
623			goto out;
624	}
625
626	r = sm_metadata_commit(sm);
627
628out:
629	/*
630	 * Switch back to normal behaviour.
631	 */
632	memcpy(sm, &ops, sizeof(*sm));
633	return r;
634}
635
636/*----------------------------------------------------------------*/
637
638struct dm_space_map *dm_sm_metadata_init(void)
639{
640	struct sm_metadata *smm;
641
642	smm = kmalloc(sizeof(*smm), GFP_KERNEL);
643	if (!smm)
644		return ERR_PTR(-ENOMEM);
645
646	memcpy(&smm->sm, &ops, sizeof(smm->sm));
647
648	return &smm->sm;
649}
650
651int dm_sm_metadata_create(struct dm_space_map *sm,
652			  struct dm_transaction_manager *tm,
653			  dm_block_t nr_blocks,
654			  dm_block_t superblock)
655{
656	int r;
657	dm_block_t i;
658	enum allocation_event ev;
659	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
660
661	smm->begin = superblock + 1;
662	smm->recursion_count = 0;
663	smm->allocated_this_transaction = 0;
664	smm->nr_uncommitted = 0;
665	threshold_init(&smm->threshold);
666
667	memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
668
669	r = sm_ll_new_metadata(&smm->ll, tm);
670	if (r)
671		return r;
672
673	r = sm_ll_extend(&smm->ll, nr_blocks);
674	if (r)
675		return r;
676
677	memcpy(&smm->sm, &ops, sizeof(smm->sm));
678
679	/*
680	 * Now we need to update the newly created data structures with the
681	 * allocated blocks that they were built from.
682	 */
683	for (i = superblock; !r && i < smm->begin; i++)
684		r = sm_ll_inc(&smm->ll, i, &ev);
685
686	if (r)
687		return r;
688
689	return sm_metadata_commit(sm);
690}
691
692int dm_sm_metadata_open(struct dm_space_map *sm,
693			struct dm_transaction_manager *tm,
694			void *root_le, size_t len)
695{
696	int r;
697	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
698
699	r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
700	if (r)
701		return r;
702
703	smm->begin = 0;
704	smm->recursion_count = 0;
705	smm->allocated_this_transaction = 0;
706	smm->nr_uncommitted = 0;
707	threshold_init(&smm->threshold);
708
709	memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
710	return 0;
711}
712