1/*
2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/slab.h>
13#include <linux/spinlock.h>
14#include <linux/completion.h>
15#include <linux/buffer_head.h>
16#include <linux/fs.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/prefetch.h>
19#include <linux/blkdev.h>
20#include <linux/rbtree.h>
21#include <linux/random.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "glock.h"
26#include "glops.h"
27#include "lops.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "rgrp.h"
31#include "super.h"
32#include "trans.h"
33#include "util.h"
34#include "log.h"
35#include "inode.h"
36#include "trace_gfs2.h"
37
38#define BFITNOENT ((u32)~0)
39#define NO_BLOCK ((u64)~0)
40
41#if BITS_PER_LONG == 32
42#define LBITMASK   (0x55555555UL)
43#define LBITSKIP55 (0x55555555UL)
44#define LBITSKIP00 (0x00000000UL)
45#else
46#define LBITMASK   (0x5555555555555555UL)
47#define LBITSKIP55 (0x5555555555555555UL)
48#define LBITSKIP00 (0x0000000000000000UL)
49#endif
50
51/*
52 * These routines are used by the resource group routines (rgrp.c)
53 * to keep track of block allocation.  Each block is represented by two
54 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
55 *
56 * 0 = Free
57 * 1 = Used (not metadata)
58 * 2 = Unlinked (still in use) inode
59 * 3 = Used (metadata)
60 */
61
62struct gfs2_extent {
63	struct gfs2_rbm rbm;
64	u32 len;
65};
66
67static const char valid_change[16] = {
68	        /* current */
69	/* n */ 0, 1, 1, 1,
70	/* e */ 1, 0, 0, 0,
71	/* w */ 0, 0, 0, 1,
72	        1, 0, 0, 0
73};
74
75static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
76			 const struct gfs2_inode *ip, bool nowrap,
77			 const struct gfs2_alloc_parms *ap);
78
79
80/**
81 * gfs2_setbit - Set a bit in the bitmaps
82 * @rbm: The position of the bit to set
83 * @do_clone: Also set the clone bitmap, if it exists
84 * @new_state: the new state of the block
85 *
86 */
87
88static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
89			       unsigned char new_state)
90{
91	unsigned char *byte1, *byte2, *end, cur_state;
92	struct gfs2_bitmap *bi = rbm_bi(rbm);
93	unsigned int buflen = bi->bi_len;
94	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
95
96	byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
97	end = bi->bi_bh->b_data + bi->bi_offset + buflen;
98
99	BUG_ON(byte1 >= end);
100
101	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
102
103	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
104		pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
105			rbm->offset, cur_state, new_state);
106		pr_warn("rgrp=0x%llx bi_start=0x%x\n",
107			(unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
108		pr_warn("bi_offset=0x%x bi_len=0x%x\n",
109			bi->bi_offset, bi->bi_len);
110		dump_stack();
111		gfs2_consist_rgrpd(rbm->rgd);
112		return;
113	}
114	*byte1 ^= (cur_state ^ new_state) << bit;
115
116	if (do_clone && bi->bi_clone) {
117		byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
118		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
119		*byte2 ^= (cur_state ^ new_state) << bit;
120	}
121}
122
123/**
124 * gfs2_testbit - test a bit in the bitmaps
125 * @rbm: The bit to test
126 *
127 * Returns: The two bit block state of the requested bit
128 */
129
130static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
131{
132	struct gfs2_bitmap *bi = rbm_bi(rbm);
133	const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
134	const u8 *byte;
135	unsigned int bit;
136
137	byte = buffer + (rbm->offset / GFS2_NBBY);
138	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
139
140	return (*byte >> bit) & GFS2_BIT_MASK;
141}
142
143/**
144 * gfs2_bit_search
145 * @ptr: Pointer to bitmap data
146 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
147 * @state: The state we are searching for
148 *
149 * We xor the bitmap data with a patter which is the bitwise opposite
150 * of what we are looking for, this gives rise to a pattern of ones
151 * wherever there is a match. Since we have two bits per entry, we
152 * take this pattern, shift it down by one place and then and it with
153 * the original. All the even bit positions (0,2,4, etc) then represent
154 * successful matches, so we mask with 0x55555..... to remove the unwanted
155 * odd bit positions.
156 *
157 * This allows searching of a whole u64 at once (32 blocks) with a
158 * single test (on 64 bit arches).
159 */
160
161static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
162{
163	u64 tmp;
164	static const u64 search[] = {
165		[0] = 0xffffffffffffffffULL,
166		[1] = 0xaaaaaaaaaaaaaaaaULL,
167		[2] = 0x5555555555555555ULL,
168		[3] = 0x0000000000000000ULL,
169	};
170	tmp = le64_to_cpu(*ptr) ^ search[state];
171	tmp &= (tmp >> 1);
172	tmp &= mask;
173	return tmp;
174}
175
176/**
177 * rs_cmp - multi-block reservation range compare
178 * @blk: absolute file system block number of the new reservation
179 * @len: number of blocks in the new reservation
180 * @rs: existing reservation to compare against
181 *
182 * returns: 1 if the block range is beyond the reach of the reservation
183 *         -1 if the block range is before the start of the reservation
184 *          0 if the block range overlaps with the reservation
185 */
186static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
187{
188	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
189
190	if (blk >= startblk + rs->rs_free)
191		return 1;
192	if (blk + len - 1 < startblk)
193		return -1;
194	return 0;
195}
196
197/**
198 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
199 *       a block in a given allocation state.
200 * @buf: the buffer that holds the bitmaps
201 * @len: the length (in bytes) of the buffer
202 * @goal: start search at this block's bit-pair (within @buffer)
203 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
204 *
205 * Scope of @goal and returned block number is only within this bitmap buffer,
206 * not entire rgrp or filesystem.  @buffer will be offset from the actual
207 * beginning of a bitmap block buffer, skipping any header structures, but
208 * headers are always a multiple of 64 bits long so that the buffer is
209 * always aligned to a 64 bit boundary.
210 *
211 * The size of the buffer is in bytes, but is it assumed that it is
212 * always ok to read a complete multiple of 64 bits at the end
213 * of the block in case the end is no aligned to a natural boundary.
214 *
215 * Return: the block number (bitmap buffer scope) that was found
216 */
217
218static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
219		       u32 goal, u8 state)
220{
221	u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
222	const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
223	const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
224	u64 tmp;
225	u64 mask = 0x5555555555555555ULL;
226	u32 bit;
227
228	/* Mask off bits we don't care about at the start of the search */
229	mask <<= spoint;
230	tmp = gfs2_bit_search(ptr, mask, state);
231	ptr++;
232	while(tmp == 0 && ptr < end) {
233		tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
234		ptr++;
235	}
236	/* Mask off any bits which are more than len bytes from the start */
237	if (ptr == end && (len & (sizeof(u64) - 1)))
238		tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
239	/* Didn't find anything, so return */
240	if (tmp == 0)
241		return BFITNOENT;
242	ptr--;
243	bit = __ffs64(tmp);
244	bit /= 2;	/* two bits per entry in the bitmap */
245	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
246}
247
248/**
249 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
250 * @rbm: The rbm with rgd already set correctly
251 * @block: The block number (filesystem relative)
252 *
253 * This sets the bi and offset members of an rbm based on a
254 * resource group and a filesystem relative block number. The
255 * resource group must be set in the rbm on entry, the bi and
256 * offset members will be set by this function.
257 *
258 * Returns: 0 on success, or an error code
259 */
260
261static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
262{
263	u64 rblock = block - rbm->rgd->rd_data0;
264
265	if (WARN_ON_ONCE(rblock > UINT_MAX))
266		return -EINVAL;
267	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
268		return -E2BIG;
269
270	rbm->bii = 0;
271	rbm->offset = (u32)(rblock);
272	/* Check if the block is within the first block */
273	if (rbm->offset < rbm_bi(rbm)->bi_blocks)
274		return 0;
275
276	/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
277	rbm->offset += (sizeof(struct gfs2_rgrp) -
278			sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
279	rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
280	rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
281	return 0;
282}
283
284/**
285 * gfs2_rbm_incr - increment an rbm structure
286 * @rbm: The rbm with rgd already set correctly
287 *
288 * This function takes an existing rbm structure and increments it to the next
289 * viable block offset.
290 *
291 * Returns: If incrementing the offset would cause the rbm to go past the
292 *          end of the rgrp, true is returned, otherwise false.
293 *
294 */
295
296static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
297{
298	if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
299		rbm->offset++;
300		return false;
301	}
302	if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
303		return true;
304
305	rbm->offset = 0;
306	rbm->bii++;
307	return false;
308}
309
310/**
311 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
312 * @rbm: Position to search (value/result)
313 * @n_unaligned: Number of unaligned blocks to check
314 * @len: Decremented for each block found (terminate on zero)
315 *
316 * Returns: true if a non-free block is encountered
317 */
318
319static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
320{
321	u32 n;
322	u8 res;
323
324	for (n = 0; n < n_unaligned; n++) {
325		res = gfs2_testbit(rbm);
326		if (res != GFS2_BLKST_FREE)
327			return true;
328		(*len)--;
329		if (*len == 0)
330			return true;
331		if (gfs2_rbm_incr(rbm))
332			return true;
333	}
334
335	return false;
336}
337
338/**
339 * gfs2_free_extlen - Return extent length of free blocks
340 * @rrbm: Starting position
341 * @len: Max length to check
342 *
343 * Starting at the block specified by the rbm, see how many free blocks
344 * there are, not reading more than len blocks ahead. This can be done
345 * using memchr_inv when the blocks are byte aligned, but has to be done
346 * on a block by block basis in case of unaligned blocks. Also this
347 * function can cope with bitmap boundaries (although it must stop on
348 * a resource group boundary)
349 *
350 * Returns: Number of free blocks in the extent
351 */
352
353static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
354{
355	struct gfs2_rbm rbm = *rrbm;
356	u32 n_unaligned = rbm.offset & 3;
357	u32 size = len;
358	u32 bytes;
359	u32 chunk_size;
360	u8 *ptr, *start, *end;
361	u64 block;
362	struct gfs2_bitmap *bi;
363
364	if (n_unaligned &&
365	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
366		goto out;
367
368	n_unaligned = len & 3;
369	/* Start is now byte aligned */
370	while (len > 3) {
371		bi = rbm_bi(&rbm);
372		start = bi->bi_bh->b_data;
373		if (bi->bi_clone)
374			start = bi->bi_clone;
375		end = start + bi->bi_bh->b_size;
376		start += bi->bi_offset;
377		BUG_ON(rbm.offset & 3);
378		start += (rbm.offset / GFS2_NBBY);
379		bytes = min_t(u32, len / GFS2_NBBY, (end - start));
380		ptr = memchr_inv(start, 0, bytes);
381		chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
382		chunk_size *= GFS2_NBBY;
383		BUG_ON(len < chunk_size);
384		len -= chunk_size;
385		block = gfs2_rbm_to_block(&rbm);
386		if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
387			n_unaligned = 0;
388			break;
389		}
390		if (ptr) {
391			n_unaligned = 3;
392			break;
393		}
394		n_unaligned = len & 3;
395	}
396
397	/* Deal with any bits left over at the end */
398	if (n_unaligned)
399		gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
400out:
401	return size - len;
402}
403
404/**
405 * gfs2_bitcount - count the number of bits in a certain state
406 * @rgd: the resource group descriptor
407 * @buffer: the buffer that holds the bitmaps
408 * @buflen: the length (in bytes) of the buffer
409 * @state: the state of the block we're looking for
410 *
411 * Returns: The number of bits
412 */
413
414static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
415			 unsigned int buflen, u8 state)
416{
417	const u8 *byte = buffer;
418	const u8 *end = buffer + buflen;
419	const u8 state1 = state << 2;
420	const u8 state2 = state << 4;
421	const u8 state3 = state << 6;
422	u32 count = 0;
423
424	for (; byte < end; byte++) {
425		if (((*byte) & 0x03) == state)
426			count++;
427		if (((*byte) & 0x0C) == state1)
428			count++;
429		if (((*byte) & 0x30) == state2)
430			count++;
431		if (((*byte) & 0xC0) == state3)
432			count++;
433	}
434
435	return count;
436}
437
438/**
439 * gfs2_rgrp_verify - Verify that a resource group is consistent
440 * @rgd: the rgrp
441 *
442 */
443
444void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
445{
446	struct gfs2_sbd *sdp = rgd->rd_sbd;
447	struct gfs2_bitmap *bi = NULL;
448	u32 length = rgd->rd_length;
449	u32 count[4], tmp;
450	int buf, x;
451
452	memset(count, 0, 4 * sizeof(u32));
453
454	/* Count # blocks in each of 4 possible allocation states */
455	for (buf = 0; buf < length; buf++) {
456		bi = rgd->rd_bits + buf;
457		for (x = 0; x < 4; x++)
458			count[x] += gfs2_bitcount(rgd,
459						  bi->bi_bh->b_data +
460						  bi->bi_offset,
461						  bi->bi_len, x);
462	}
463
464	if (count[0] != rgd->rd_free) {
465		if (gfs2_consist_rgrpd(rgd))
466			fs_err(sdp, "free data mismatch:  %u != %u\n",
467			       count[0], rgd->rd_free);
468		return;
469	}
470
471	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
472	if (count[1] != tmp) {
473		if (gfs2_consist_rgrpd(rgd))
474			fs_err(sdp, "used data mismatch:  %u != %u\n",
475			       count[1], tmp);
476		return;
477	}
478
479	if (count[2] + count[3] != rgd->rd_dinodes) {
480		if (gfs2_consist_rgrpd(rgd))
481			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
482			       count[2] + count[3], rgd->rd_dinodes);
483		return;
484	}
485}
486
487static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
488{
489	u64 first = rgd->rd_data0;
490	u64 last = first + rgd->rd_data;
491	return first <= block && block < last;
492}
493
494/**
495 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
496 * @sdp: The GFS2 superblock
497 * @blk: The data block number
498 * @exact: True if this needs to be an exact match
499 *
500 * Returns: The resource group, or NULL if not found
501 */
502
503struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
504{
505	struct rb_node *n, *next;
506	struct gfs2_rgrpd *cur;
507
508	spin_lock(&sdp->sd_rindex_spin);
509	n = sdp->sd_rindex_tree.rb_node;
510	while (n) {
511		cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
512		next = NULL;
513		if (blk < cur->rd_addr)
514			next = n->rb_left;
515		else if (blk >= cur->rd_data0 + cur->rd_data)
516			next = n->rb_right;
517		if (next == NULL) {
518			spin_unlock(&sdp->sd_rindex_spin);
519			if (exact) {
520				if (blk < cur->rd_addr)
521					return NULL;
522				if (blk >= cur->rd_data0 + cur->rd_data)
523					return NULL;
524			}
525			return cur;
526		}
527		n = next;
528	}
529	spin_unlock(&sdp->sd_rindex_spin);
530
531	return NULL;
532}
533
534/**
535 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
536 * @sdp: The GFS2 superblock
537 *
538 * Returns: The first rgrp in the filesystem
539 */
540
541struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
542{
543	const struct rb_node *n;
544	struct gfs2_rgrpd *rgd;
545
546	spin_lock(&sdp->sd_rindex_spin);
547	n = rb_first(&sdp->sd_rindex_tree);
548	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
549	spin_unlock(&sdp->sd_rindex_spin);
550
551	return rgd;
552}
553
554/**
555 * gfs2_rgrpd_get_next - get the next RG
556 * @rgd: the resource group descriptor
557 *
558 * Returns: The next rgrp
559 */
560
561struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
562{
563	struct gfs2_sbd *sdp = rgd->rd_sbd;
564	const struct rb_node *n;
565
566	spin_lock(&sdp->sd_rindex_spin);
567	n = rb_next(&rgd->rd_node);
568	if (n == NULL)
569		n = rb_first(&sdp->sd_rindex_tree);
570
571	if (unlikely(&rgd->rd_node == n)) {
572		spin_unlock(&sdp->sd_rindex_spin);
573		return NULL;
574	}
575	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
576	spin_unlock(&sdp->sd_rindex_spin);
577	return rgd;
578}
579
580void check_and_update_goal(struct gfs2_inode *ip)
581{
582	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
583	if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
584		ip->i_goal = ip->i_no_addr;
585}
586
587void gfs2_free_clones(struct gfs2_rgrpd *rgd)
588{
589	int x;
590
591	for (x = 0; x < rgd->rd_length; x++) {
592		struct gfs2_bitmap *bi = rgd->rd_bits + x;
593		kfree(bi->bi_clone);
594		bi->bi_clone = NULL;
595	}
596}
597
598/**
599 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
600 * @ip: the inode for this reservation
601 */
602int gfs2_rs_alloc(struct gfs2_inode *ip)
603{
604	int error = 0;
605
606	down_write(&ip->i_rw_mutex);
607	if (ip->i_res)
608		goto out;
609
610	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
611	if (!ip->i_res) {
612		error = -ENOMEM;
613		goto out;
614	}
615
616	RB_CLEAR_NODE(&ip->i_res->rs_node);
617out:
618	up_write(&ip->i_rw_mutex);
619	return error;
620}
621
622static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
623{
624	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
625		       (unsigned long long)rs->rs_inum,
626		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
627		       rs->rs_rbm.offset, rs->rs_free);
628}
629
630/**
631 * __rs_deltree - remove a multi-block reservation from the rgd tree
632 * @rs: The reservation to remove
633 *
634 */
635static void __rs_deltree(struct gfs2_blkreserv *rs)
636{
637	struct gfs2_rgrpd *rgd;
638
639	if (!gfs2_rs_active(rs))
640		return;
641
642	rgd = rs->rs_rbm.rgd;
643	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
644	rb_erase(&rs->rs_node, &rgd->rd_rstree);
645	RB_CLEAR_NODE(&rs->rs_node);
646
647	if (rs->rs_free) {
648		struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
649
650		/* return reserved blocks to the rgrp */
651		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
652		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
653		/* The rgrp extent failure point is likely not to increase;
654		   it will only do so if the freed blocks are somehow
655		   contiguous with a span of free blocks that follows. Still,
656		   it will force the number to be recalculated later. */
657		rgd->rd_extfail_pt += rs->rs_free;
658		rs->rs_free = 0;
659		clear_bit(GBF_FULL, &bi->bi_flags);
660	}
661}
662
663/**
664 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
665 * @rs: The reservation to remove
666 *
667 */
668void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
669{
670	struct gfs2_rgrpd *rgd;
671
672	rgd = rs->rs_rbm.rgd;
673	if (rgd) {
674		spin_lock(&rgd->rd_rsspin);
675		__rs_deltree(rs);
676		spin_unlock(&rgd->rd_rsspin);
677	}
678}
679
680/**
681 * gfs2_rs_delete - delete a multi-block reservation
682 * @ip: The inode for this reservation
683 * @wcount: The inode's write count, or NULL
684 *
685 */
686void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
687{
688	down_write(&ip->i_rw_mutex);
689	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
690		gfs2_rs_deltree(ip->i_res);
691		BUG_ON(ip->i_res->rs_free);
692		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
693		ip->i_res = NULL;
694	}
695	up_write(&ip->i_rw_mutex);
696}
697
698/**
699 * return_all_reservations - return all reserved blocks back to the rgrp.
700 * @rgd: the rgrp that needs its space back
701 *
702 * We previously reserved a bunch of blocks for allocation. Now we need to
703 * give them back. This leave the reservation structures in tact, but removes
704 * all of their corresponding "no-fly zones".
705 */
706static void return_all_reservations(struct gfs2_rgrpd *rgd)
707{
708	struct rb_node *n;
709	struct gfs2_blkreserv *rs;
710
711	spin_lock(&rgd->rd_rsspin);
712	while ((n = rb_first(&rgd->rd_rstree))) {
713		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
714		__rs_deltree(rs);
715	}
716	spin_unlock(&rgd->rd_rsspin);
717}
718
719void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
720{
721	struct rb_node *n;
722	struct gfs2_rgrpd *rgd;
723	struct gfs2_glock *gl;
724
725	while ((n = rb_first(&sdp->sd_rindex_tree))) {
726		rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
727		gl = rgd->rd_gl;
728
729		rb_erase(n, &sdp->sd_rindex_tree);
730
731		if (gl) {
732			spin_lock(&gl->gl_spin);
733			gl->gl_object = NULL;
734			spin_unlock(&gl->gl_spin);
735			gfs2_glock_add_to_lru(gl);
736			gfs2_glock_put(gl);
737		}
738
739		gfs2_free_clones(rgd);
740		kfree(rgd->rd_bits);
741		return_all_reservations(rgd);
742		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
743	}
744}
745
746static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
747{
748	pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
749	pr_info("ri_length = %u\n", rgd->rd_length);
750	pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
751	pr_info("ri_data = %u\n", rgd->rd_data);
752	pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
753}
754
755/**
756 * gfs2_compute_bitstructs - Compute the bitmap sizes
757 * @rgd: The resource group descriptor
758 *
759 * Calculates bitmap descriptors, one for each block that contains bitmap data
760 *
761 * Returns: errno
762 */
763
764static int compute_bitstructs(struct gfs2_rgrpd *rgd)
765{
766	struct gfs2_sbd *sdp = rgd->rd_sbd;
767	struct gfs2_bitmap *bi;
768	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
769	u32 bytes_left, bytes;
770	int x;
771
772	if (!length)
773		return -EINVAL;
774
775	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
776	if (!rgd->rd_bits)
777		return -ENOMEM;
778
779	bytes_left = rgd->rd_bitbytes;
780
781	for (x = 0; x < length; x++) {
782		bi = rgd->rd_bits + x;
783
784		bi->bi_flags = 0;
785		/* small rgrp; bitmap stored completely in header block */
786		if (length == 1) {
787			bytes = bytes_left;
788			bi->bi_offset = sizeof(struct gfs2_rgrp);
789			bi->bi_start = 0;
790			bi->bi_len = bytes;
791			bi->bi_blocks = bytes * GFS2_NBBY;
792		/* header block */
793		} else if (x == 0) {
794			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
795			bi->bi_offset = sizeof(struct gfs2_rgrp);
796			bi->bi_start = 0;
797			bi->bi_len = bytes;
798			bi->bi_blocks = bytes * GFS2_NBBY;
799		/* last block */
800		} else if (x + 1 == length) {
801			bytes = bytes_left;
802			bi->bi_offset = sizeof(struct gfs2_meta_header);
803			bi->bi_start = rgd->rd_bitbytes - bytes_left;
804			bi->bi_len = bytes;
805			bi->bi_blocks = bytes * GFS2_NBBY;
806		/* other blocks */
807		} else {
808			bytes = sdp->sd_sb.sb_bsize -
809				sizeof(struct gfs2_meta_header);
810			bi->bi_offset = sizeof(struct gfs2_meta_header);
811			bi->bi_start = rgd->rd_bitbytes - bytes_left;
812			bi->bi_len = bytes;
813			bi->bi_blocks = bytes * GFS2_NBBY;
814		}
815
816		bytes_left -= bytes;
817	}
818
819	if (bytes_left) {
820		gfs2_consist_rgrpd(rgd);
821		return -EIO;
822	}
823	bi = rgd->rd_bits + (length - 1);
824	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
825		if (gfs2_consist_rgrpd(rgd)) {
826			gfs2_rindex_print(rgd);
827			fs_err(sdp, "start=%u len=%u offset=%u\n",
828			       bi->bi_start, bi->bi_len, bi->bi_offset);
829		}
830		return -EIO;
831	}
832
833	return 0;
834}
835
836/**
837 * gfs2_ri_total - Total up the file system space, according to the rindex.
838 * @sdp: the filesystem
839 *
840 */
841u64 gfs2_ri_total(struct gfs2_sbd *sdp)
842{
843	u64 total_data = 0;
844	struct inode *inode = sdp->sd_rindex;
845	struct gfs2_inode *ip = GFS2_I(inode);
846	char buf[sizeof(struct gfs2_rindex)];
847	int error, rgrps;
848
849	for (rgrps = 0;; rgrps++) {
850		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
851
852		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
853			break;
854		error = gfs2_internal_read(ip, buf, &pos,
855					   sizeof(struct gfs2_rindex));
856		if (error != sizeof(struct gfs2_rindex))
857			break;
858		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
859	}
860	return total_data;
861}
862
863static int rgd_insert(struct gfs2_rgrpd *rgd)
864{
865	struct gfs2_sbd *sdp = rgd->rd_sbd;
866	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
867
868	/* Figure out where to put new node */
869	while (*newn) {
870		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
871						  rd_node);
872
873		parent = *newn;
874		if (rgd->rd_addr < cur->rd_addr)
875			newn = &((*newn)->rb_left);
876		else if (rgd->rd_addr > cur->rd_addr)
877			newn = &((*newn)->rb_right);
878		else
879			return -EEXIST;
880	}
881
882	rb_link_node(&rgd->rd_node, parent, newn);
883	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
884	sdp->sd_rgrps++;
885	return 0;
886}
887
888/**
889 * read_rindex_entry - Pull in a new resource index entry from the disk
890 * @ip: Pointer to the rindex inode
891 *
892 * Returns: 0 on success, > 0 on EOF, error code otherwise
893 */
894
895static int read_rindex_entry(struct gfs2_inode *ip)
896{
897	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
898	const unsigned bsize = sdp->sd_sb.sb_bsize;
899	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
900	struct gfs2_rindex buf;
901	int error;
902	struct gfs2_rgrpd *rgd;
903
904	if (pos >= i_size_read(&ip->i_inode))
905		return 1;
906
907	error = gfs2_internal_read(ip, (char *)&buf, &pos,
908				   sizeof(struct gfs2_rindex));
909
910	if (error != sizeof(struct gfs2_rindex))
911		return (error == 0) ? 1 : error;
912
913	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
914	error = -ENOMEM;
915	if (!rgd)
916		return error;
917
918	rgd->rd_sbd = sdp;
919	rgd->rd_addr = be64_to_cpu(buf.ri_addr);
920	rgd->rd_length = be32_to_cpu(buf.ri_length);
921	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
922	rgd->rd_data = be32_to_cpu(buf.ri_data);
923	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
924	spin_lock_init(&rgd->rd_rsspin);
925
926	error = compute_bitstructs(rgd);
927	if (error)
928		goto fail;
929
930	error = gfs2_glock_get(sdp, rgd->rd_addr,
931			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
932	if (error)
933		goto fail;
934
935	rgd->rd_gl->gl_object = rgd;
936	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
937	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
938	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
939	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
940	if (rgd->rd_data > sdp->sd_max_rg_data)
941		sdp->sd_max_rg_data = rgd->rd_data;
942	spin_lock(&sdp->sd_rindex_spin);
943	error = rgd_insert(rgd);
944	spin_unlock(&sdp->sd_rindex_spin);
945	if (!error)
946		return 0;
947
948	error = 0; /* someone else read in the rgrp; free it and ignore it */
949	gfs2_glock_put(rgd->rd_gl);
950
951fail:
952	kfree(rgd->rd_bits);
953	kmem_cache_free(gfs2_rgrpd_cachep, rgd);
954	return error;
955}
956
957/**
958 * gfs2_ri_update - Pull in a new resource index from the disk
959 * @ip: pointer to the rindex inode
960 *
961 * Returns: 0 on successful update, error code otherwise
962 */
963
964static int gfs2_ri_update(struct gfs2_inode *ip)
965{
966	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
967	int error;
968
969	do {
970		error = read_rindex_entry(ip);
971	} while (error == 0);
972
973	if (error < 0)
974		return error;
975
976	sdp->sd_rindex_uptodate = 1;
977	return 0;
978}
979
980/**
981 * gfs2_rindex_update - Update the rindex if required
982 * @sdp: The GFS2 superblock
983 *
984 * We grab a lock on the rindex inode to make sure that it doesn't
985 * change whilst we are performing an operation. We keep this lock
986 * for quite long periods of time compared to other locks. This
987 * doesn't matter, since it is shared and it is very, very rarely
988 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
989 *
990 * This makes sure that we're using the latest copy of the resource index
991 * special file, which might have been updated if someone expanded the
992 * filesystem (via gfs2_grow utility), which adds new resource groups.
993 *
994 * Returns: 0 on succeess, error code otherwise
995 */
996
997int gfs2_rindex_update(struct gfs2_sbd *sdp)
998{
999	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1000	struct gfs2_glock *gl = ip->i_gl;
1001	struct gfs2_holder ri_gh;
1002	int error = 0;
1003	int unlock_required = 0;
1004
1005	/* Read new copy from disk if we don't have the latest */
1006	if (!sdp->sd_rindex_uptodate) {
1007		if (!gfs2_glock_is_locked_by_me(gl)) {
1008			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1009			if (error)
1010				return error;
1011			unlock_required = 1;
1012		}
1013		if (!sdp->sd_rindex_uptodate)
1014			error = gfs2_ri_update(ip);
1015		if (unlock_required)
1016			gfs2_glock_dq_uninit(&ri_gh);
1017	}
1018
1019	return error;
1020}
1021
1022static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1023{
1024	const struct gfs2_rgrp *str = buf;
1025	u32 rg_flags;
1026
1027	rg_flags = be32_to_cpu(str->rg_flags);
1028	rg_flags &= ~GFS2_RDF_MASK;
1029	rgd->rd_flags &= GFS2_RDF_MASK;
1030	rgd->rd_flags |= rg_flags;
1031	rgd->rd_free = be32_to_cpu(str->rg_free);
1032	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1033	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1034}
1035
1036static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1037{
1038	struct gfs2_rgrp *str = buf;
1039
1040	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1041	str->rg_free = cpu_to_be32(rgd->rd_free);
1042	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1043	str->__pad = cpu_to_be32(0);
1044	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1045	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1046}
1047
1048static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1049{
1050	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1051	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1052
1053	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1054	    rgl->rl_dinodes != str->rg_dinodes ||
1055	    rgl->rl_igeneration != str->rg_igeneration)
1056		return 0;
1057	return 1;
1058}
1059
1060static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1061{
1062	const struct gfs2_rgrp *str = buf;
1063
1064	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1065	rgl->rl_flags = str->rg_flags;
1066	rgl->rl_free = str->rg_free;
1067	rgl->rl_dinodes = str->rg_dinodes;
1068	rgl->rl_igeneration = str->rg_igeneration;
1069	rgl->__pad = 0UL;
1070}
1071
1072static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1073{
1074	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1075	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1076	rgl->rl_unlinked = cpu_to_be32(unlinked);
1077}
1078
1079static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1080{
1081	struct gfs2_bitmap *bi;
1082	const u32 length = rgd->rd_length;
1083	const u8 *buffer = NULL;
1084	u32 i, goal, count = 0;
1085
1086	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1087		goal = 0;
1088		buffer = bi->bi_bh->b_data + bi->bi_offset;
1089		WARN_ON(!buffer_uptodate(bi->bi_bh));
1090		while (goal < bi->bi_len * GFS2_NBBY) {
1091			goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1092					   GFS2_BLKST_UNLINKED);
1093			if (goal == BFITNOENT)
1094				break;
1095			count++;
1096			goal++;
1097		}
1098	}
1099
1100	return count;
1101}
1102
1103
1104/**
1105 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1106 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1107 *
1108 * Read in all of a Resource Group's header and bitmap blocks.
1109 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1110 *
1111 * Returns: errno
1112 */
1113
1114static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1115{
1116	struct gfs2_sbd *sdp = rgd->rd_sbd;
1117	struct gfs2_glock *gl = rgd->rd_gl;
1118	unsigned int length = rgd->rd_length;
1119	struct gfs2_bitmap *bi;
1120	unsigned int x, y;
1121	int error;
1122
1123	if (rgd->rd_bits[0].bi_bh != NULL)
1124		return 0;
1125
1126	for (x = 0; x < length; x++) {
1127		bi = rgd->rd_bits + x;
1128		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1129		if (error)
1130			goto fail;
1131	}
1132
1133	for (y = length; y--;) {
1134		bi = rgd->rd_bits + y;
1135		error = gfs2_meta_wait(sdp, bi->bi_bh);
1136		if (error)
1137			goto fail;
1138		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1139					      GFS2_METATYPE_RG)) {
1140			error = -EIO;
1141			goto fail;
1142		}
1143	}
1144
1145	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1146		for (x = 0; x < length; x++)
1147			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1148		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1149		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1150		rgd->rd_free_clone = rgd->rd_free;
1151		/* max out the rgrp allocation failure point */
1152		rgd->rd_extfail_pt = rgd->rd_free;
1153	}
1154	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1155		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1156		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1157				     rgd->rd_bits[0].bi_bh->b_data);
1158	}
1159	else if (sdp->sd_args.ar_rgrplvb) {
1160		if (!gfs2_rgrp_lvb_valid(rgd)){
1161			gfs2_consist_rgrpd(rgd);
1162			error = -EIO;
1163			goto fail;
1164		}
1165		if (rgd->rd_rgl->rl_unlinked == 0)
1166			rgd->rd_flags &= ~GFS2_RDF_CHECK;
1167	}
1168	return 0;
1169
1170fail:
1171	while (x--) {
1172		bi = rgd->rd_bits + x;
1173		brelse(bi->bi_bh);
1174		bi->bi_bh = NULL;
1175		gfs2_assert_warn(sdp, !bi->bi_clone);
1176	}
1177
1178	return error;
1179}
1180
1181static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1182{
1183	u32 rl_flags;
1184
1185	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1186		return 0;
1187
1188	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1189		return gfs2_rgrp_bh_get(rgd);
1190
1191	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1192	rl_flags &= ~GFS2_RDF_MASK;
1193	rgd->rd_flags &= GFS2_RDF_MASK;
1194	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1195	if (rgd->rd_rgl->rl_unlinked == 0)
1196		rgd->rd_flags &= ~GFS2_RDF_CHECK;
1197	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1198	rgd->rd_free_clone = rgd->rd_free;
1199	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1200	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1201	return 0;
1202}
1203
1204int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1205{
1206	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1207	struct gfs2_sbd *sdp = rgd->rd_sbd;
1208
1209	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1210		return 0;
1211	return gfs2_rgrp_bh_get(rgd);
1212}
1213
1214/**
1215 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1216 * @gh: The glock holder for the resource group
1217 *
1218 */
1219
1220void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1221{
1222	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1223	int x, length = rgd->rd_length;
1224
1225	for (x = 0; x < length; x++) {
1226		struct gfs2_bitmap *bi = rgd->rd_bits + x;
1227		if (bi->bi_bh) {
1228			brelse(bi->bi_bh);
1229			bi->bi_bh = NULL;
1230		}
1231	}
1232
1233}
1234
1235int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1236			     struct buffer_head *bh,
1237			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1238{
1239	struct super_block *sb = sdp->sd_vfs;
1240	u64 blk;
1241	sector_t start = 0;
1242	sector_t nr_blks = 0;
1243	int rv;
1244	unsigned int x;
1245	u32 trimmed = 0;
1246	u8 diff;
1247
1248	for (x = 0; x < bi->bi_len; x++) {
1249		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1250		clone += bi->bi_offset;
1251		clone += x;
1252		if (bh) {
1253			const u8 *orig = bh->b_data + bi->bi_offset + x;
1254			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1255		} else {
1256			diff = ~(*clone | (*clone >> 1));
1257		}
1258		diff &= 0x55;
1259		if (diff == 0)
1260			continue;
1261		blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1262		while(diff) {
1263			if (diff & 1) {
1264				if (nr_blks == 0)
1265					goto start_new_extent;
1266				if ((start + nr_blks) != blk) {
1267					if (nr_blks >= minlen) {
1268						rv = sb_issue_discard(sb,
1269							start, nr_blks,
1270							GFP_NOFS, 0);
1271						if (rv)
1272							goto fail;
1273						trimmed += nr_blks;
1274					}
1275					nr_blks = 0;
1276start_new_extent:
1277					start = blk;
1278				}
1279				nr_blks++;
1280			}
1281			diff >>= 2;
1282			blk++;
1283		}
1284	}
1285	if (nr_blks >= minlen) {
1286		rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1287		if (rv)
1288			goto fail;
1289		trimmed += nr_blks;
1290	}
1291	if (ptrimmed)
1292		*ptrimmed = trimmed;
1293	return 0;
1294
1295fail:
1296	if (sdp->sd_args.ar_discard)
1297		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1298	sdp->sd_args.ar_discard = 0;
1299	return -EIO;
1300}
1301
1302/**
1303 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1304 * @filp: Any file on the filesystem
1305 * @argp: Pointer to the arguments (also used to pass result)
1306 *
1307 * Returns: 0 on success, otherwise error code
1308 */
1309
1310int gfs2_fitrim(struct file *filp, void __user *argp)
1311{
1312	struct inode *inode = file_inode(filp);
1313	struct gfs2_sbd *sdp = GFS2_SB(inode);
1314	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1315	struct buffer_head *bh;
1316	struct gfs2_rgrpd *rgd;
1317	struct gfs2_rgrpd *rgd_end;
1318	struct gfs2_holder gh;
1319	struct fstrim_range r;
1320	int ret = 0;
1321	u64 amt;
1322	u64 trimmed = 0;
1323	u64 start, end, minlen;
1324	unsigned int x;
1325	unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1326
1327	if (!capable(CAP_SYS_ADMIN))
1328		return -EPERM;
1329
1330	if (!blk_queue_discard(q))
1331		return -EOPNOTSUPP;
1332
1333	if (copy_from_user(&r, argp, sizeof(r)))
1334		return -EFAULT;
1335
1336	ret = gfs2_rindex_update(sdp);
1337	if (ret)
1338		return ret;
1339
1340	start = r.start >> bs_shift;
1341	end = start + (r.len >> bs_shift);
1342	minlen = max_t(u64, r.minlen,
1343		       q->limits.discard_granularity) >> bs_shift;
1344
1345	if (end <= start || minlen > sdp->sd_max_rg_data)
1346		return -EINVAL;
1347
1348	rgd = gfs2_blk2rgrpd(sdp, start, 0);
1349	rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1350
1351	if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1352	    && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1353		return -EINVAL; /* start is beyond the end of the fs */
1354
1355	while (1) {
1356
1357		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1358		if (ret)
1359			goto out;
1360
1361		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1362			/* Trim each bitmap in the rgrp */
1363			for (x = 0; x < rgd->rd_length; x++) {
1364				struct gfs2_bitmap *bi = rgd->rd_bits + x;
1365				ret = gfs2_rgrp_send_discards(sdp,
1366						rgd->rd_data0, NULL, bi, minlen,
1367						&amt);
1368				if (ret) {
1369					gfs2_glock_dq_uninit(&gh);
1370					goto out;
1371				}
1372				trimmed += amt;
1373			}
1374
1375			/* Mark rgrp as having been trimmed */
1376			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1377			if (ret == 0) {
1378				bh = rgd->rd_bits[0].bi_bh;
1379				rgd->rd_flags |= GFS2_RGF_TRIMMED;
1380				gfs2_trans_add_meta(rgd->rd_gl, bh);
1381				gfs2_rgrp_out(rgd, bh->b_data);
1382				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1383				gfs2_trans_end(sdp);
1384			}
1385		}
1386		gfs2_glock_dq_uninit(&gh);
1387
1388		if (rgd == rgd_end)
1389			break;
1390
1391		rgd = gfs2_rgrpd_get_next(rgd);
1392	}
1393
1394out:
1395	r.len = trimmed << bs_shift;
1396	if (copy_to_user(argp, &r, sizeof(r)))
1397		return -EFAULT;
1398
1399	return ret;
1400}
1401
1402/**
1403 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1404 * @ip: the inode structure
1405 *
1406 */
1407static void rs_insert(struct gfs2_inode *ip)
1408{
1409	struct rb_node **newn, *parent = NULL;
1410	int rc;
1411	struct gfs2_blkreserv *rs = ip->i_res;
1412	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1413	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1414
1415	BUG_ON(gfs2_rs_active(rs));
1416
1417	spin_lock(&rgd->rd_rsspin);
1418	newn = &rgd->rd_rstree.rb_node;
1419	while (*newn) {
1420		struct gfs2_blkreserv *cur =
1421			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1422
1423		parent = *newn;
1424		rc = rs_cmp(fsblock, rs->rs_free, cur);
1425		if (rc > 0)
1426			newn = &((*newn)->rb_right);
1427		else if (rc < 0)
1428			newn = &((*newn)->rb_left);
1429		else {
1430			spin_unlock(&rgd->rd_rsspin);
1431			WARN_ON(1);
1432			return;
1433		}
1434	}
1435
1436	rb_link_node(&rs->rs_node, parent, newn);
1437	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1438
1439	/* Do our rgrp accounting for the reservation */
1440	rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1441	spin_unlock(&rgd->rd_rsspin);
1442	trace_gfs2_rs(rs, TRACE_RS_INSERT);
1443}
1444
1445/**
1446 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1447 * @rgd: the resource group descriptor
1448 * @ip: pointer to the inode for which we're reserving blocks
1449 * @ap: the allocation parameters
1450 *
1451 */
1452
1453static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1454			   const struct gfs2_alloc_parms *ap)
1455{
1456	struct gfs2_rbm rbm = { .rgd = rgd, };
1457	u64 goal;
1458	struct gfs2_blkreserv *rs = ip->i_res;
1459	u32 extlen;
1460	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1461	int ret;
1462	struct inode *inode = &ip->i_inode;
1463
1464	if (S_ISDIR(inode->i_mode))
1465		extlen = 1;
1466	else {
1467		extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1468		extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1469	}
1470	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1471		return;
1472
1473	/* Find bitmap block that contains bits for goal block */
1474	if (rgrp_contains_block(rgd, ip->i_goal))
1475		goal = ip->i_goal;
1476	else
1477		goal = rgd->rd_last_alloc + rgd->rd_data0;
1478
1479	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1480		return;
1481
1482	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1483	if (ret == 0) {
1484		rs->rs_rbm = rbm;
1485		rs->rs_free = extlen;
1486		rs->rs_inum = ip->i_no_addr;
1487		rs_insert(ip);
1488	} else {
1489		if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1490			rgd->rd_last_alloc = 0;
1491	}
1492}
1493
1494/**
1495 * gfs2_next_unreserved_block - Return next block that is not reserved
1496 * @rgd: The resource group
1497 * @block: The starting block
1498 * @length: The required length
1499 * @ip: Ignore any reservations for this inode
1500 *
1501 * If the block does not appear in any reservation, then return the
1502 * block number unchanged. If it does appear in the reservation, then
1503 * keep looking through the tree of reservations in order to find the
1504 * first block number which is not reserved.
1505 */
1506
1507static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1508				      u32 length,
1509				      const struct gfs2_inode *ip)
1510{
1511	struct gfs2_blkreserv *rs;
1512	struct rb_node *n;
1513	int rc;
1514
1515	spin_lock(&rgd->rd_rsspin);
1516	n = rgd->rd_rstree.rb_node;
1517	while (n) {
1518		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1519		rc = rs_cmp(block, length, rs);
1520		if (rc < 0)
1521			n = n->rb_left;
1522		else if (rc > 0)
1523			n = n->rb_right;
1524		else
1525			break;
1526	}
1527
1528	if (n) {
1529		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1530			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1531			n = n->rb_right;
1532			if (n == NULL)
1533				break;
1534			rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1535		}
1536	}
1537
1538	spin_unlock(&rgd->rd_rsspin);
1539	return block;
1540}
1541
1542/**
1543 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1544 * @rbm: The current position in the resource group
1545 * @ip: The inode for which we are searching for blocks
1546 * @minext: The minimum extent length
1547 * @maxext: A pointer to the maximum extent structure
1548 *
1549 * This checks the current position in the rgrp to see whether there is
1550 * a reservation covering this block. If not then this function is a
1551 * no-op. If there is, then the position is moved to the end of the
1552 * contiguous reservation(s) so that we are pointing at the first
1553 * non-reserved block.
1554 *
1555 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1556 */
1557
1558static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1559					     const struct gfs2_inode *ip,
1560					     u32 minext,
1561					     struct gfs2_extent *maxext)
1562{
1563	u64 block = gfs2_rbm_to_block(rbm);
1564	u32 extlen = 1;
1565	u64 nblock;
1566	int ret;
1567
1568	/*
1569	 * If we have a minimum extent length, then skip over any extent
1570	 * which is less than the min extent length in size.
1571	 */
1572	if (minext) {
1573		extlen = gfs2_free_extlen(rbm, minext);
1574		if (extlen <= maxext->len)
1575			goto fail;
1576	}
1577
1578	/*
1579	 * Check the extent which has been found against the reservations
1580	 * and skip if parts of it are already reserved
1581	 */
1582	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1583	if (nblock == block) {
1584		if (!minext || extlen >= minext)
1585			return 0;
1586
1587		if (extlen > maxext->len) {
1588			maxext->len = extlen;
1589			maxext->rbm = *rbm;
1590		}
1591fail:
1592		nblock = block + extlen;
1593	}
1594	ret = gfs2_rbm_from_block(rbm, nblock);
1595	if (ret < 0)
1596		return ret;
1597	return 1;
1598}
1599
1600/**
1601 * gfs2_rbm_find - Look for blocks of a particular state
1602 * @rbm: Value/result starting position and final position
1603 * @state: The state which we want to find
1604 * @minext: Pointer to the requested extent length (NULL for a single block)
1605 *          This is updated to be the actual reservation size.
1606 * @ip: If set, check for reservations
1607 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1608 *          around until we've reached the starting point.
1609 * @ap: the allocation parameters
1610 *
1611 * Side effects:
1612 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1613 *   has no free blocks in it.
1614 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1615 *   has come up short on a free block search.
1616 *
1617 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1618 */
1619
1620static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1621			 const struct gfs2_inode *ip, bool nowrap,
1622			 const struct gfs2_alloc_parms *ap)
1623{
1624	struct buffer_head *bh;
1625	int initial_bii;
1626	u32 initial_offset;
1627	int first_bii = rbm->bii;
1628	u32 first_offset = rbm->offset;
1629	u32 offset;
1630	u8 *buffer;
1631	int n = 0;
1632	int iters = rbm->rgd->rd_length;
1633	int ret;
1634	struct gfs2_bitmap *bi;
1635	struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1636
1637	/* If we are not starting at the beginning of a bitmap, then we
1638	 * need to add one to the bitmap count to ensure that we search
1639	 * the starting bitmap twice.
1640	 */
1641	if (rbm->offset != 0)
1642		iters++;
1643
1644	while(1) {
1645		bi = rbm_bi(rbm);
1646		if (test_bit(GBF_FULL, &bi->bi_flags) &&
1647		    (state == GFS2_BLKST_FREE))
1648			goto next_bitmap;
1649
1650		bh = bi->bi_bh;
1651		buffer = bh->b_data + bi->bi_offset;
1652		WARN_ON(!buffer_uptodate(bh));
1653		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1654			buffer = bi->bi_clone + bi->bi_offset;
1655		initial_offset = rbm->offset;
1656		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1657		if (offset == BFITNOENT)
1658			goto bitmap_full;
1659		rbm->offset = offset;
1660		if (ip == NULL)
1661			return 0;
1662
1663		initial_bii = rbm->bii;
1664		ret = gfs2_reservation_check_and_update(rbm, ip,
1665							minext ? *minext : 0,
1666							&maxext);
1667		if (ret == 0)
1668			return 0;
1669		if (ret > 0) {
1670			n += (rbm->bii - initial_bii);
1671			goto next_iter;
1672		}
1673		if (ret == -E2BIG) {
1674			rbm->bii = 0;
1675			rbm->offset = 0;
1676			n += (rbm->bii - initial_bii);
1677			goto res_covered_end_of_rgrp;
1678		}
1679		return ret;
1680
1681bitmap_full:	/* Mark bitmap as full and fall through */
1682		if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
1683			struct gfs2_bitmap *bi = rbm_bi(rbm);
1684			set_bit(GBF_FULL, &bi->bi_flags);
1685		}
1686
1687next_bitmap:	/* Find next bitmap in the rgrp */
1688		rbm->offset = 0;
1689		rbm->bii++;
1690		if (rbm->bii == rbm->rgd->rd_length)
1691			rbm->bii = 0;
1692res_covered_end_of_rgrp:
1693		if ((rbm->bii == 0) && nowrap)
1694			break;
1695		n++;
1696next_iter:
1697		if (n >= iters)
1698			break;
1699	}
1700
1701	if (minext == NULL || state != GFS2_BLKST_FREE)
1702		return -ENOSPC;
1703
1704	/* If the extent was too small, and it's smaller than the smallest
1705	   to have failed before, remember for future reference that it's
1706	   useless to search this rgrp again for this amount or more. */
1707	if ((first_offset == 0) && (first_bii == 0) &&
1708	    (*minext < rbm->rgd->rd_extfail_pt))
1709		rbm->rgd->rd_extfail_pt = *minext;
1710
1711	/* If the maximum extent we found is big enough to fulfill the
1712	   minimum requirements, use it anyway. */
1713	if (maxext.len) {
1714		*rbm = maxext.rbm;
1715		*minext = maxext.len;
1716		return 0;
1717	}
1718
1719	return -ENOSPC;
1720}
1721
1722/**
1723 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1724 * @rgd: The rgrp
1725 * @last_unlinked: block address of the last dinode we unlinked
1726 * @skip: block address we should explicitly not unlink
1727 *
1728 * Returns: 0 if no error
1729 *          The inode, if one has been found, in inode.
1730 */
1731
1732static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1733{
1734	u64 block;
1735	struct gfs2_sbd *sdp = rgd->rd_sbd;
1736	struct gfs2_glock *gl;
1737	struct gfs2_inode *ip;
1738	int error;
1739	int found = 0;
1740	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1741
1742	while (1) {
1743		down_write(&sdp->sd_log_flush_lock);
1744		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1745				      true, NULL);
1746		up_write(&sdp->sd_log_flush_lock);
1747		if (error == -ENOSPC)
1748			break;
1749		if (WARN_ON_ONCE(error))
1750			break;
1751
1752		block = gfs2_rbm_to_block(&rbm);
1753		if (gfs2_rbm_from_block(&rbm, block + 1))
1754			break;
1755		if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1756			continue;
1757		if (block == skip)
1758			continue;
1759		*last_unlinked = block;
1760
1761		error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1762		if (error)
1763			continue;
1764
1765		/* If the inode is already in cache, we can ignore it here
1766		 * because the existing inode disposal code will deal with
1767		 * it when all refs have gone away. Accessing gl_object like
1768		 * this is not safe in general. Here it is ok because we do
1769		 * not dereference the pointer, and we only need an approx
1770		 * answer to whether it is NULL or not.
1771		 */
1772		ip = gl->gl_object;
1773
1774		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1775			gfs2_glock_put(gl);
1776		else
1777			found++;
1778
1779		/* Limit reclaim to sensible number of tasks */
1780		if (found > NR_CPUS)
1781			return;
1782	}
1783
1784	rgd->rd_flags &= ~GFS2_RDF_CHECK;
1785	return;
1786}
1787
1788/**
1789 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1790 * @rgd: The rgrp in question
1791 * @loops: An indication of how picky we can be (0=very, 1=less so)
1792 *
1793 * This function uses the recently added glock statistics in order to
1794 * figure out whether a parciular resource group is suffering from
1795 * contention from multiple nodes. This is done purely on the basis
1796 * of timings, since this is the only data we have to work with and
1797 * our aim here is to reject a resource group which is highly contended
1798 * but (very important) not to do this too often in order to ensure that
1799 * we do not land up introducing fragmentation by changing resource
1800 * groups when not actually required.
1801 *
1802 * The calculation is fairly simple, we want to know whether the SRTTB
1803 * (i.e. smoothed round trip time for blocking operations) to acquire
1804 * the lock for this rgrp's glock is significantly greater than the
1805 * time taken for resource groups on average. We introduce a margin in
1806 * the form of the variable @var which is computed as the sum of the two
1807 * respective variences, and multiplied by a factor depending on @loops
1808 * and whether we have a lot of data to base the decision on. This is
1809 * then tested against the square difference of the means in order to
1810 * decide whether the result is statistically significant or not.
1811 *
1812 * Returns: A boolean verdict on the congestion status
1813 */
1814
1815static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1816{
1817	const struct gfs2_glock *gl = rgd->rd_gl;
1818	const struct gfs2_sbd *sdp = gl->gl_sbd;
1819	struct gfs2_lkstats *st;
1820	s64 r_dcount, l_dcount;
1821	s64 r_srttb, l_srttb;
1822	s64 srttb_diff;
1823	s64 sqr_diff;
1824	s64 var;
1825
1826	preempt_disable();
1827	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1828	r_srttb = st->stats[GFS2_LKS_SRTTB];
1829	r_dcount = st->stats[GFS2_LKS_DCOUNT];
1830	var = st->stats[GFS2_LKS_SRTTVARB] +
1831	      gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1832	preempt_enable();
1833
1834	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1835	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1836
1837	if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1838		return false;
1839
1840	srttb_diff = r_srttb - l_srttb;
1841	sqr_diff = srttb_diff * srttb_diff;
1842
1843	var *= 2;
1844	if (l_dcount < 8 || r_dcount < 8)
1845		var *= 2;
1846	if (loops == 1)
1847		var *= 2;
1848
1849	return ((srttb_diff < 0) && (sqr_diff > var));
1850}
1851
1852/**
1853 * gfs2_rgrp_used_recently
1854 * @rs: The block reservation with the rgrp to test
1855 * @msecs: The time limit in milliseconds
1856 *
1857 * Returns: True if the rgrp glock has been used within the time limit
1858 */
1859static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1860				    u64 msecs)
1861{
1862	u64 tdiff;
1863
1864	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1865                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1866
1867	return tdiff > (msecs * 1000 * 1000);
1868}
1869
1870static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1871{
1872	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1873	u32 skip;
1874
1875	get_random_bytes(&skip, sizeof(skip));
1876	return skip % sdp->sd_rgrps;
1877}
1878
1879static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1880{
1881	struct gfs2_rgrpd *rgd = *pos;
1882	struct gfs2_sbd *sdp = rgd->rd_sbd;
1883
1884	rgd = gfs2_rgrpd_get_next(rgd);
1885	if (rgd == NULL)
1886		rgd = gfs2_rgrpd_get_first(sdp);
1887	*pos = rgd;
1888	if (rgd != begin) /* If we didn't wrap */
1889		return true;
1890	return false;
1891}
1892
1893/**
1894 * gfs2_inplace_reserve - Reserve space in the filesystem
1895 * @ip: the inode to reserve space for
1896 * @ap: the allocation parameters
1897 *
1898 * Returns: errno
1899 */
1900
1901int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
1902{
1903	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1904	struct gfs2_rgrpd *begin = NULL;
1905	struct gfs2_blkreserv *rs = ip->i_res;
1906	int error = 0, rg_locked, flags = 0;
1907	u64 last_unlinked = NO_BLOCK;
1908	int loops = 0;
1909	u32 skip = 0;
1910
1911	if (sdp->sd_args.ar_rgrplvb)
1912		flags |= GL_SKIP;
1913	if (gfs2_assert_warn(sdp, ap->target))
1914		return -EINVAL;
1915	if (gfs2_rs_active(rs)) {
1916		begin = rs->rs_rbm.rgd;
1917	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1918		rs->rs_rbm.rgd = begin = ip->i_rgd;
1919	} else {
1920		check_and_update_goal(ip);
1921		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1922	}
1923	if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1924		skip = gfs2_orlov_skip(ip);
1925	if (rs->rs_rbm.rgd == NULL)
1926		return -EBADSLT;
1927
1928	while (loops < 3) {
1929		rg_locked = 1;
1930
1931		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1932			rg_locked = 0;
1933			if (skip && skip--)
1934				goto next_rgrp;
1935			if (!gfs2_rs_active(rs) && (loops < 2) &&
1936			     gfs2_rgrp_used_recently(rs, 1000) &&
1937			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1938				goto next_rgrp;
1939			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1940						   LM_ST_EXCLUSIVE, flags,
1941						   &rs->rs_rgd_gh);
1942			if (unlikely(error))
1943				return error;
1944			if (!gfs2_rs_active(rs) && (loops < 2) &&
1945			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1946				goto skip_rgrp;
1947			if (sdp->sd_args.ar_rgrplvb) {
1948				error = update_rgrp_lvb(rs->rs_rbm.rgd);
1949				if (unlikely(error)) {
1950					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1951					return error;
1952				}
1953			}
1954		}
1955
1956		/* Skip unuseable resource groups */
1957		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
1958						 GFS2_RDF_ERROR)) ||
1959		    (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
1960			goto skip_rgrp;
1961
1962		if (sdp->sd_args.ar_rgrplvb)
1963			gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
1964
1965		/* Get a reservation if we don't already have one */
1966		if (!gfs2_rs_active(rs))
1967			rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
1968
1969		/* Skip rgrps when we can't get a reservation on first pass */
1970		if (!gfs2_rs_active(rs) && (loops < 1))
1971			goto check_rgrp;
1972
1973		/* If rgrp has enough free space, use it */
1974		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
1975			ip->i_rgd = rs->rs_rbm.rgd;
1976			return 0;
1977		}
1978
1979check_rgrp:
1980		/* Check for unlinked inodes which can be reclaimed */
1981		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
1982			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
1983					ip->i_no_addr);
1984skip_rgrp:
1985		/* Drop reservation, if we couldn't use reserved rgrp */
1986		if (gfs2_rs_active(rs))
1987			gfs2_rs_deltree(rs);
1988
1989		/* Unlock rgrp if required */
1990		if (!rg_locked)
1991			gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1992next_rgrp:
1993		/* Find the next rgrp, and continue looking */
1994		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1995			continue;
1996		if (skip)
1997			continue;
1998
1999		/* If we've scanned all the rgrps, but found no free blocks
2000		 * then this checks for some less likely conditions before
2001		 * trying again.
2002		 */
2003		loops++;
2004		/* Check that fs hasn't grown if writing to rindex */
2005		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2006			error = gfs2_ri_update(ip);
2007			if (error)
2008				return error;
2009		}
2010		/* Flushing the log may release space */
2011		if (loops == 2)
2012			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2013	}
2014
2015	return -ENOSPC;
2016}
2017
2018/**
2019 * gfs2_inplace_release - release an inplace reservation
2020 * @ip: the inode the reservation was taken out on
2021 *
2022 * Release a reservation made by gfs2_inplace_reserve().
2023 */
2024
2025void gfs2_inplace_release(struct gfs2_inode *ip)
2026{
2027	struct gfs2_blkreserv *rs = ip->i_res;
2028
2029	if (rs->rs_rgd_gh.gh_gl)
2030		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2031}
2032
2033/**
2034 * gfs2_get_block_type - Check a block in a RG is of given type
2035 * @rgd: the resource group holding the block
2036 * @block: the block number
2037 *
2038 * Returns: The block type (GFS2_BLKST_*)
2039 */
2040
2041static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2042{
2043	struct gfs2_rbm rbm = { .rgd = rgd, };
2044	int ret;
2045
2046	ret = gfs2_rbm_from_block(&rbm, block);
2047	WARN_ON_ONCE(ret != 0);
2048
2049	return gfs2_testbit(&rbm);
2050}
2051
2052
2053/**
2054 * gfs2_alloc_extent - allocate an extent from a given bitmap
2055 * @rbm: the resource group information
2056 * @dinode: TRUE if the first block we allocate is for a dinode
2057 * @n: The extent length (value/result)
2058 *
2059 * Add the bitmap buffer to the transaction.
2060 * Set the found bits to @new_state to change block's allocation state.
2061 */
2062static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2063			     unsigned int *n)
2064{
2065	struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2066	const unsigned int elen = *n;
2067	u64 block;
2068	int ret;
2069
2070	*n = 1;
2071	block = gfs2_rbm_to_block(rbm);
2072	gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2073	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2074	block++;
2075	while (*n < elen) {
2076		ret = gfs2_rbm_from_block(&pos, block);
2077		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2078			break;
2079		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2080		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2081		(*n)++;
2082		block++;
2083	}
2084}
2085
2086/**
2087 * rgblk_free - Change alloc state of given block(s)
2088 * @sdp: the filesystem
2089 * @bstart: the start of a run of blocks to free
2090 * @blen: the length of the block run (all must lie within ONE RG!)
2091 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2092 *
2093 * Returns:  Resource group containing the block(s)
2094 */
2095
2096static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2097				     u32 blen, unsigned char new_state)
2098{
2099	struct gfs2_rbm rbm;
2100	struct gfs2_bitmap *bi, *bi_prev = NULL;
2101
2102	rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2103	if (!rbm.rgd) {
2104		if (gfs2_consist(sdp))
2105			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2106		return NULL;
2107	}
2108
2109	gfs2_rbm_from_block(&rbm, bstart);
2110	while (blen--) {
2111		bi = rbm_bi(&rbm);
2112		if (bi != bi_prev) {
2113			if (!bi->bi_clone) {
2114				bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2115						      GFP_NOFS | __GFP_NOFAIL);
2116				memcpy(bi->bi_clone + bi->bi_offset,
2117				       bi->bi_bh->b_data + bi->bi_offset,
2118				       bi->bi_len);
2119			}
2120			gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2121			bi_prev = bi;
2122		}
2123		gfs2_setbit(&rbm, false, new_state);
2124		gfs2_rbm_incr(&rbm);
2125	}
2126
2127	return rbm.rgd;
2128}
2129
2130/**
2131 * gfs2_rgrp_dump - print out an rgrp
2132 * @seq: The iterator
2133 * @gl: The glock in question
2134 *
2135 */
2136
2137void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2138{
2139	struct gfs2_rgrpd *rgd = gl->gl_object;
2140	struct gfs2_blkreserv *trs;
2141	const struct rb_node *n;
2142
2143	if (rgd == NULL)
2144		return;
2145	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2146		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2147		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2148		       rgd->rd_reserved, rgd->rd_extfail_pt);
2149	spin_lock(&rgd->rd_rsspin);
2150	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2151		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2152		dump_rs(seq, trs);
2153	}
2154	spin_unlock(&rgd->rd_rsspin);
2155}
2156
2157static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2158{
2159	struct gfs2_sbd *sdp = rgd->rd_sbd;
2160	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2161		(unsigned long long)rgd->rd_addr);
2162	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2163	gfs2_rgrp_dump(NULL, rgd->rd_gl);
2164	rgd->rd_flags |= GFS2_RDF_ERROR;
2165}
2166
2167/**
2168 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2169 * @ip: The inode we have just allocated blocks for
2170 * @rbm: The start of the allocated blocks
2171 * @len: The extent length
2172 *
2173 * Adjusts a reservation after an allocation has taken place. If the
2174 * reservation does not match the allocation, or if it is now empty
2175 * then it is removed.
2176 */
2177
2178static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2179				    const struct gfs2_rbm *rbm, unsigned len)
2180{
2181	struct gfs2_blkreserv *rs = ip->i_res;
2182	struct gfs2_rgrpd *rgd = rbm->rgd;
2183	unsigned rlen;
2184	u64 block;
2185	int ret;
2186
2187	spin_lock(&rgd->rd_rsspin);
2188	if (gfs2_rs_active(rs)) {
2189		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2190			block = gfs2_rbm_to_block(rbm);
2191			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2192			rlen = min(rs->rs_free, len);
2193			rs->rs_free -= rlen;
2194			rgd->rd_reserved -= rlen;
2195			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2196			if (rs->rs_free && !ret)
2197				goto out;
2198		}
2199		__rs_deltree(rs);
2200	}
2201out:
2202	spin_unlock(&rgd->rd_rsspin);
2203}
2204
2205/**
2206 * gfs2_set_alloc_start - Set starting point for block allocation
2207 * @rbm: The rbm which will be set to the required location
2208 * @ip: The gfs2 inode
2209 * @dinode: Flag to say if allocation includes a new inode
2210 *
2211 * This sets the starting point from the reservation if one is active
2212 * otherwise it falls back to guessing a start point based on the
2213 * inode's goal block or the last allocation point in the rgrp.
2214 */
2215
2216static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2217				 const struct gfs2_inode *ip, bool dinode)
2218{
2219	u64 goal;
2220
2221	if (gfs2_rs_active(ip->i_res)) {
2222		*rbm = ip->i_res->rs_rbm;
2223		return;
2224	}
2225
2226	if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2227		goal = ip->i_goal;
2228	else
2229		goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2230
2231	gfs2_rbm_from_block(rbm, goal);
2232}
2233
2234/**
2235 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2236 * @ip: the inode to allocate the block for
2237 * @bn: Used to return the starting block number
2238 * @nblocks: requested number of blocks/extent length (value/result)
2239 * @dinode: 1 if we're allocating a dinode block, else 0
2240 * @generation: the generation number of the inode
2241 *
2242 * Returns: 0 or error
2243 */
2244
2245int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2246		      bool dinode, u64 *generation)
2247{
2248	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2249	struct buffer_head *dibh;
2250	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2251	unsigned int ndata;
2252	u64 block; /* block, within the file system scope */
2253	int error;
2254
2255	gfs2_set_alloc_start(&rbm, ip, dinode);
2256	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2257
2258	if (error == -ENOSPC) {
2259		gfs2_set_alloc_start(&rbm, ip, dinode);
2260		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2261				      NULL);
2262	}
2263
2264	/* Since all blocks are reserved in advance, this shouldn't happen */
2265	if (error) {
2266		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2267			(unsigned long long)ip->i_no_addr, error, *nblocks,
2268			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2269			rbm.rgd->rd_extfail_pt);
2270		goto rgrp_error;
2271	}
2272
2273	gfs2_alloc_extent(&rbm, dinode, nblocks);
2274	block = gfs2_rbm_to_block(&rbm);
2275	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2276	if (gfs2_rs_active(ip->i_res))
2277		gfs2_adjust_reservation(ip, &rbm, *nblocks);
2278	ndata = *nblocks;
2279	if (dinode)
2280		ndata--;
2281
2282	if (!dinode) {
2283		ip->i_goal = block + ndata - 1;
2284		error = gfs2_meta_inode_buffer(ip, &dibh);
2285		if (error == 0) {
2286			struct gfs2_dinode *di =
2287				(struct gfs2_dinode *)dibh->b_data;
2288			gfs2_trans_add_meta(ip->i_gl, dibh);
2289			di->di_goal_meta = di->di_goal_data =
2290				cpu_to_be64(ip->i_goal);
2291			brelse(dibh);
2292		}
2293	}
2294	if (rbm.rgd->rd_free < *nblocks) {
2295		pr_warn("nblocks=%u\n", *nblocks);
2296		goto rgrp_error;
2297	}
2298
2299	rbm.rgd->rd_free -= *nblocks;
2300	if (dinode) {
2301		rbm.rgd->rd_dinodes++;
2302		*generation = rbm.rgd->rd_igeneration++;
2303		if (*generation == 0)
2304			*generation = rbm.rgd->rd_igeneration++;
2305	}
2306
2307	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2308	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2309	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2310
2311	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2312	if (dinode)
2313		gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2314
2315	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2316
2317	rbm.rgd->rd_free_clone -= *nblocks;
2318	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2319			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2320	*bn = block;
2321	return 0;
2322
2323rgrp_error:
2324	gfs2_rgrp_error(rbm.rgd);
2325	return -EIO;
2326}
2327
2328/**
2329 * __gfs2_free_blocks - free a contiguous run of block(s)
2330 * @ip: the inode these blocks are being freed from
2331 * @bstart: first block of a run of contiguous blocks
2332 * @blen: the length of the block run
2333 * @meta: 1 if the blocks represent metadata
2334 *
2335 */
2336
2337void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2338{
2339	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2340	struct gfs2_rgrpd *rgd;
2341
2342	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2343	if (!rgd)
2344		return;
2345	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2346	rgd->rd_free += blen;
2347	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2348	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2349	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2350	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2351
2352	/* Directories keep their data in the metadata address space */
2353	if (meta || ip->i_depth)
2354		gfs2_meta_wipe(ip, bstart, blen);
2355}
2356
2357/**
2358 * gfs2_free_meta - free a contiguous run of data block(s)
2359 * @ip: the inode these blocks are being freed from
2360 * @bstart: first block of a run of contiguous blocks
2361 * @blen: the length of the block run
2362 *
2363 */
2364
2365void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2366{
2367	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2368
2369	__gfs2_free_blocks(ip, bstart, blen, 1);
2370	gfs2_statfs_change(sdp, 0, +blen, 0);
2371	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2372}
2373
2374void gfs2_unlink_di(struct inode *inode)
2375{
2376	struct gfs2_inode *ip = GFS2_I(inode);
2377	struct gfs2_sbd *sdp = GFS2_SB(inode);
2378	struct gfs2_rgrpd *rgd;
2379	u64 blkno = ip->i_no_addr;
2380
2381	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2382	if (!rgd)
2383		return;
2384	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2385	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2386	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2387	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2388	update_rgrp_lvb_unlinked(rgd, 1);
2389}
2390
2391static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2392{
2393	struct gfs2_sbd *sdp = rgd->rd_sbd;
2394	struct gfs2_rgrpd *tmp_rgd;
2395
2396	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2397	if (!tmp_rgd)
2398		return;
2399	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2400
2401	if (!rgd->rd_dinodes)
2402		gfs2_consist_rgrpd(rgd);
2403	rgd->rd_dinodes--;
2404	rgd->rd_free++;
2405
2406	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2407	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2408	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2409	update_rgrp_lvb_unlinked(rgd, -1);
2410
2411	gfs2_statfs_change(sdp, 0, +1, -1);
2412}
2413
2414
2415void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2416{
2417	gfs2_free_uninit_di(rgd, ip->i_no_addr);
2418	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2419	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2420	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2421}
2422
2423/**
2424 * gfs2_check_blk_type - Check the type of a block
2425 * @sdp: The superblock
2426 * @no_addr: The block number to check
2427 * @type: The block type we are looking for
2428 *
2429 * Returns: 0 if the block type matches the expected type
2430 *          -ESTALE if it doesn't match
2431 *          or -ve errno if something went wrong while checking
2432 */
2433
2434int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2435{
2436	struct gfs2_rgrpd *rgd;
2437	struct gfs2_holder rgd_gh;
2438	int error = -EINVAL;
2439
2440	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2441	if (!rgd)
2442		goto fail;
2443
2444	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2445	if (error)
2446		goto fail;
2447
2448	if (gfs2_get_block_type(rgd, no_addr) != type)
2449		error = -ESTALE;
2450
2451	gfs2_glock_dq_uninit(&rgd_gh);
2452fail:
2453	return error;
2454}
2455
2456/**
2457 * gfs2_rlist_add - add a RG to a list of RGs
2458 * @ip: the inode
2459 * @rlist: the list of resource groups
2460 * @block: the block
2461 *
2462 * Figure out what RG a block belongs to and add that RG to the list
2463 *
2464 * FIXME: Don't use NOFAIL
2465 *
2466 */
2467
2468void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2469		    u64 block)
2470{
2471	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2472	struct gfs2_rgrpd *rgd;
2473	struct gfs2_rgrpd **tmp;
2474	unsigned int new_space;
2475	unsigned int x;
2476
2477	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2478		return;
2479
2480	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2481		rgd = ip->i_rgd;
2482	else
2483		rgd = gfs2_blk2rgrpd(sdp, block, 1);
2484	if (!rgd) {
2485		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2486		return;
2487	}
2488	ip->i_rgd = rgd;
2489
2490	for (x = 0; x < rlist->rl_rgrps; x++)
2491		if (rlist->rl_rgd[x] == rgd)
2492			return;
2493
2494	if (rlist->rl_rgrps == rlist->rl_space) {
2495		new_space = rlist->rl_space + 10;
2496
2497		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2498			      GFP_NOFS | __GFP_NOFAIL);
2499
2500		if (rlist->rl_rgd) {
2501			memcpy(tmp, rlist->rl_rgd,
2502			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2503			kfree(rlist->rl_rgd);
2504		}
2505
2506		rlist->rl_space = new_space;
2507		rlist->rl_rgd = tmp;
2508	}
2509
2510	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2511}
2512
2513/**
2514 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2515 *      and initialize an array of glock holders for them
2516 * @rlist: the list of resource groups
2517 * @state: the lock state to acquire the RG lock in
2518 *
2519 * FIXME: Don't use NOFAIL
2520 *
2521 */
2522
2523void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2524{
2525	unsigned int x;
2526
2527	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2528				GFP_NOFS | __GFP_NOFAIL);
2529	for (x = 0; x < rlist->rl_rgrps; x++)
2530		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2531				state, 0,
2532				&rlist->rl_ghs[x]);
2533}
2534
2535/**
2536 * gfs2_rlist_free - free a resource group list
2537 * @rlist: the list of resource groups
2538 *
2539 */
2540
2541void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2542{
2543	unsigned int x;
2544
2545	kfree(rlist->rl_rgd);
2546
2547	if (rlist->rl_ghs) {
2548		for (x = 0; x < rlist->rl_rgrps; x++)
2549			gfs2_holder_uninit(&rlist->rl_ghs[x]);
2550		kfree(rlist->rl_ghs);
2551		rlist->rl_ghs = NULL;
2552	}
2553}
2554
2555