punch.c revision f404167dda29a59d2be2882328aeb074b9899669
1/*
2 * punch.c --- deallocate blocks allocated to an inode
3 *
4 * Copyright (C) 2010 Theodore Ts'o.
5 *
6 * %Begin-Header%
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
9 * %End-Header%
10 */
11
12#include "config.h"
13#include <stdio.h>
14#include <string.h>
15#if HAVE_UNISTD_H
16#include <unistd.h>
17#endif
18#include <errno.h>
19
20#include "ext2_fs.h"
21#include "ext2fs.h"
22
23#undef PUNCH_DEBUG
24
25/*
26 * This function returns 1 if the specified block is all zeros
27 */
28static int check_zero_block(char *buf, int blocksize)
29{
30	char	*cp = buf;
31	int	left = blocksize;
32
33	while (left > 0) {
34		if (*cp++)
35			return 0;
36		left--;
37	}
38	return 1;
39}
40
41/*
42 * This clever recursive function handles i_blocks[] as well as
43 * indirect, double indirect, and triple indirect blocks.  It iterates
44 * over the entries in the i_blocks array or indirect blocks, and for
45 * each one, will recursively handle any indirect blocks and then
46 * frees and deallocates the blocks.
47 */
48static errcode_t ind_punch(ext2_filsys fs, struct ext2_inode *inode,
49			   char *block_buf, blk_t *p, int level,
50			   blk_t start, blk_t count, int max)
51{
52	errcode_t	retval;
53	blk_t		b;
54	int		i;
55	blk64_t		offset, incr;
56	int		freed = 0;
57
58#ifdef PUNCH_DEBUG
59	printf("Entering ind_punch, level %d, start %u, count %u, "
60	       "max %d\n", level, start, count, max);
61#endif
62	incr = 1ULL << ((EXT2_BLOCK_SIZE_BITS(fs->super)-2)*level);
63	for (i=0, offset=0; i < max; i++, p++, offset += incr) {
64		if (offset >= start + count)
65			break;
66		if (*p == 0 || (offset+incr) <= start)
67			continue;
68		b = *p;
69		if (level > 0) {
70			blk_t start2;
71#ifdef PUNCH_DEBUG
72			printf("Reading indirect block %u\n", b);
73#endif
74			retval = ext2fs_read_ind_block(fs, b, block_buf);
75			if (retval)
76				return retval;
77			start2 = (start > offset) ? start - offset : 0;
78			retval = ind_punch(fs, inode, block_buf + fs->blocksize,
79					   (blk_t *) block_buf, level - 1,
80					   start2, count - offset,
81					   fs->blocksize >> 2);
82			if (retval)
83				return retval;
84			retval = ext2fs_write_ind_block(fs, b, block_buf);
85			if (retval)
86				return retval;
87			if (!check_zero_block(block_buf, fs->blocksize))
88				continue;
89		}
90#ifdef PUNCH_DEBUG
91		printf("Freeing block %u (offset %llu)\n", b, offset);
92#endif
93		ext2fs_block_alloc_stats(fs, b, -1);
94		*p = 0;
95		freed++;
96	}
97#ifdef PUNCH_DEBUG
98	printf("Freed %d blocks\n", freed);
99#endif
100	return ext2fs_iblk_sub_blocks(fs, inode, freed);
101}
102
103static errcode_t ext2fs_punch_ind(ext2_filsys fs, struct ext2_inode *inode,
104				  char *block_buf, blk_t start, blk_t count)
105{
106	errcode_t		retval;
107	char			*buf = 0;
108	int			level;
109	int			num = EXT2_NDIR_BLOCKS;
110	blk_t			*bp = inode->i_block;
111	blk_t			addr_per_block;
112	blk64_t			max = EXT2_NDIR_BLOCKS;
113
114	if (!block_buf) {
115		retval = ext2fs_get_array(3, fs->blocksize, &buf);
116		if (retval)
117			return retval;
118		block_buf = buf;
119	}
120
121	addr_per_block = (blk_t) fs->blocksize >> 2;
122
123	for (level = 0; level < 4; level++, max *= (blk64_t)addr_per_block) {
124#ifdef PUNCH_DEBUG
125		printf("Main loop level %d, start %u count %u "
126		       "max %llu num %d\n", level, start, count, max, num);
127#endif
128		if (start < max) {
129			retval = ind_punch(fs, inode, block_buf, bp, level,
130					   start, count, num);
131			if (retval)
132				goto errout;
133			if (count > max)
134				count -= max - start;
135			else
136				break;
137			start = 0;
138		} else
139			start -= max;
140		bp += num;
141		if (level == 0) {
142			num = 1;
143			max = 1;
144		}
145	}
146	retval = 0;
147errout:
148	if (buf)
149		ext2fs_free_mem(&buf);
150	return retval;
151}
152
153#ifdef PUNCH_DEBUG
154
155#define dbg_printf(f, a...)  printf(f, ## a)
156
157static void dbg_print_extent(char *desc, struct ext2fs_extent *extent)
158{
159	if (desc)
160		printf("%s: ", desc);
161	printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ",
162	       extent->e_lblk, extent->e_lblk + extent->e_len - 1,
163	       extent->e_len, extent->e_pblk);
164	if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF)
165		fputs("LEAF ", stdout);
166	if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
167		fputs("UNINIT ", stdout);
168	if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
169		fputs("2ND_VISIT ", stdout);
170	if (!extent->e_flags)
171		fputs("(none)", stdout);
172	fputc('\n', stdout);
173
174}
175#else
176#define dbg_print_extent(desc, ex)	do { } while (0)
177#define dbg_printf(f, a...)		do { } while (0)
178#endif
179
180/* Free a range of blocks, respecting cluster boundaries */
181static errcode_t punch_extent_blocks(ext2_filsys fs, ext2_ino_t ino,
182				     struct ext2_inode *inode,
183				     blk64_t lfree_start, blk64_t free_start,
184				     __u32 free_count, int *freed)
185{
186	blk64_t		pblk;
187	int		freed_now = 0;
188	__u32		cluster_freed;
189	errcode_t	retval = 0;
190
191	/* No bigalloc?  Just free each block. */
192	if (EXT2FS_CLUSTER_RATIO(fs) == 1) {
193		*freed += free_count;
194		while (free_count-- > 0)
195			ext2fs_block_alloc_stats2(fs, free_start++, -1);
196		return retval;
197	}
198
199	/*
200	 * Try to free up to the next cluster boundary.  We assume that all
201	 * blocks in a logical cluster map to blocks from the same physical
202	 * cluster, and that the offsets within the [pl]clusters match.
203	 */
204	if (free_start & EXT2FS_CLUSTER_MASK(fs)) {
205		retval = ext2fs_map_cluster_block(fs, ino, inode,
206						  lfree_start, &pblk);
207		if (retval)
208			goto errout;
209		if (!pblk) {
210			ext2fs_block_alloc_stats2(fs, free_start, -1);
211			freed_now++;
212		}
213		cluster_freed = EXT2FS_CLUSTER_RATIO(fs) -
214			(free_start & EXT2FS_CLUSTER_MASK(fs));
215		if (cluster_freed > free_count)
216			cluster_freed = free_count;
217		free_count -= cluster_freed;
218		free_start += cluster_freed;
219		lfree_start += cluster_freed;
220	}
221
222	/* Free whole clusters from the middle of the range. */
223	while (free_count > 0 && free_count >= EXT2FS_CLUSTER_RATIO(fs)) {
224		ext2fs_block_alloc_stats2(fs, free_start, -1);
225		freed_now++;
226		cluster_freed = EXT2FS_CLUSTER_RATIO(fs);
227		free_count -= cluster_freed;
228		free_start += cluster_freed;
229		lfree_start += cluster_freed;
230	}
231
232	/* Try to free the last cluster. */
233	if (free_count > 0) {
234		retval = ext2fs_map_cluster_block(fs, ino, inode,
235						  lfree_start, &pblk);
236		if (retval)
237			goto errout;
238		if (!pblk) {
239			ext2fs_block_alloc_stats2(fs, free_start, -1);
240			freed_now++;
241		}
242	}
243
244errout:
245	*freed += freed_now;
246	return retval;
247}
248
249static errcode_t ext2fs_punch_extent(ext2_filsys fs, ext2_ino_t ino,
250				     struct ext2_inode *inode,
251				     blk64_t start, blk64_t end)
252{
253	ext2_extent_handle_t	handle = 0;
254	struct ext2fs_extent	extent;
255	errcode_t		retval;
256	blk64_t			free_start, next, lfree_start;
257	__u32			free_count, newlen;
258	int			freed = 0;
259	int			op;
260
261	retval = ext2fs_extent_open2(fs, ino, inode, &handle);
262	if (retval)
263		return retval;
264	/*
265	 * Find the extent closest to the start of the punch range.  We don't
266	 * check the return value because _goto() sets the current node to the
267	 * next-lowest extent if 'start' is in a hole, and doesn't set a
268	 * current node if there was a real error reading the extent tree.
269	 * In that case, _get() will error out.
270	 *
271	 * Note: If _get() returns 'no current node', that simply means that
272	 * there aren't any blocks mapped past this point in the file, so we're
273	 * done.
274	 */
275	ext2fs_extent_goto(handle, start);
276	retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent);
277	if (retval == EXT2_ET_NO_CURRENT_NODE) {
278		retval = 0;
279		goto errout;
280	} else if (retval)
281		goto errout;
282	while (1) {
283		op = EXT2_EXTENT_NEXT_LEAF;
284		dbg_print_extent("main loop", &extent);
285		next = extent.e_lblk + extent.e_len;
286		dbg_printf("start %llu, end %llu, next %llu\n",
287			   (unsigned long long) start,
288			   (unsigned long long) end,
289			   (unsigned long long) next);
290		if (start <= extent.e_lblk) {
291			if (end < extent.e_lblk)
292				goto next_extent;
293			dbg_printf("Case #%d\n", 1);
294			/* Start of deleted region before extent;
295			   adjust beginning of extent */
296			free_start = extent.e_pblk;
297			lfree_start = extent.e_lblk;
298			if (next > end)
299				free_count = end - extent.e_lblk + 1;
300			else
301				free_count = extent.e_len;
302			extent.e_len -= free_count;
303			extent.e_lblk += free_count;
304			extent.e_pblk += free_count;
305		} else if (end >= next-1) {
306			if (start >= next)
307				break;
308			/* End of deleted region after extent;
309			   adjust end of extent */
310			dbg_printf("Case #%d\n", 2);
311			newlen = start - extent.e_lblk;
312			free_start = extent.e_pblk + newlen;
313			lfree_start = extent.e_lblk + newlen;
314			free_count = extent.e_len - newlen;
315			extent.e_len = newlen;
316		} else {
317			struct ext2fs_extent	newex;
318
319			dbg_printf("Case #%d\n", 3);
320			/* The hard case; we need to split the extent */
321			newex.e_pblk = extent.e_pblk +
322				(end + 1 - extent.e_lblk);
323			newex.e_lblk = end + 1;
324			newex.e_len = next - end - 1;
325			newex.e_flags = extent.e_flags;
326
327			extent.e_len = start - extent.e_lblk;
328			free_start = extent.e_pblk + extent.e_len;
329			lfree_start = extent.e_lblk + extent.e_len;
330			free_count = end - start + 1;
331
332			dbg_print_extent("inserting", &newex);
333			retval = ext2fs_extent_insert(handle,
334					EXT2_EXTENT_INSERT_AFTER, &newex);
335			if (retval)
336				goto errout;
337			/* Now pointing at inserted extent; so go back */
338			retval = ext2fs_extent_get(handle,
339						   EXT2_EXTENT_PREV_LEAF,
340						   &newex);
341			if (retval)
342				goto errout;
343		}
344		if (extent.e_len) {
345			dbg_print_extent("replacing", &extent);
346			retval = ext2fs_extent_replace(handle, 0, &extent);
347		} else {
348			struct ext2fs_extent	newex;
349			blk64_t			old_lblk, next_lblk;
350			dbg_printf("deleting current extent%s\n", "");
351
352			/*
353			 * Save the location of the next leaf, then slip
354			 * back to the current extent.
355			 */
356			retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
357						   &newex);
358			if (retval)
359				goto errout;
360			old_lblk = newex.e_lblk;
361
362			retval = ext2fs_extent_get(handle,
363						   EXT2_EXTENT_NEXT_LEAF,
364						   &newex);
365			if (retval == EXT2_ET_EXTENT_NO_NEXT)
366				next_lblk = old_lblk;
367			else if (retval)
368				goto errout;
369			else
370				next_lblk = newex.e_lblk;
371
372			retval = ext2fs_extent_goto(handle, old_lblk);
373			if (retval)
374				goto errout;
375
376			/* Now delete the extent. */
377			retval = ext2fs_extent_delete(handle, 0);
378			if (retval)
379				goto errout;
380
381			/* Jump forward to the next extent. */
382			ext2fs_extent_goto(handle, next_lblk);
383			op = EXT2_EXTENT_CURRENT;
384		}
385		if (retval)
386			goto errout;
387		dbg_printf("Free start %llu, free count = %u\n",
388		       free_start, free_count);
389		retval = punch_extent_blocks(fs, ino, inode, lfree_start,
390					     free_start, free_count, &freed);
391		if (retval)
392			goto errout;
393	next_extent:
394		retval = ext2fs_extent_get(handle, op,
395					   &extent);
396		if (retval == EXT2_ET_EXTENT_NO_NEXT ||
397		    retval == EXT2_ET_NO_CURRENT_NODE)
398			break;
399		if (retval)
400			goto errout;
401	}
402	dbg_printf("Freed %d blocks\n", freed);
403	retval = ext2fs_iblk_sub_blocks(fs, inode, freed);
404errout:
405	ext2fs_extent_free(handle);
406	return retval;
407}
408
409/*
410 * Deallocate all logical blocks starting at start to end, inclusive.
411 * If end is ~0, then this is effectively truncate.
412 */
413errcode_t ext2fs_punch(ext2_filsys fs, ext2_ino_t ino,
414		       struct ext2_inode *inode,
415		       char *block_buf, blk64_t start,
416		       blk64_t end)
417{
418	errcode_t		retval;
419	struct ext2_inode	inode_buf;
420
421	if (start > end)
422		return EINVAL;
423
424	/* Read inode structure if necessary */
425	if (!inode) {
426		retval = ext2fs_read_inode(fs, ino, &inode_buf);
427		if (retval)
428			return retval;
429		inode = &inode_buf;
430	}
431	if (inode->i_flags & EXT4_EXTENTS_FL)
432		retval = ext2fs_punch_extent(fs, ino, inode, start, end);
433	else {
434		blk_t	count;
435
436		if (start > ~0U)
437			return 0;
438		if (end > ~0U)
439			end = ~0U;
440		count = ((end - start + 1) < ~0U) ? (end - start + 1) : ~0U;
441		retval = ext2fs_punch_ind(fs, inode, block_buf,
442					  (blk_t) start, count);
443	}
444	if (retval)
445		return retval;
446
447	return ext2fs_write_inode(fs, ino, inode);
448}
449