1/*
2 * punch.c --- deallocate blocks allocated to an inode
3 *
4 * Copyright (C) 2010 Theodore Ts'o.
5 *
6 * %Begin-Header%
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
9 * %End-Header%
10 */
11
12#include <stdio.h>
13#include <string.h>
14#if HAVE_UNISTD_H
15#include <unistd.h>
16#endif
17#include <errno.h>
18
19#include "ext2_fs.h"
20#include "ext2fs.h"
21
22#undef PUNCH_DEBUG
23
24/*
25 * This function returns 1 if the specified block is all zeros
26 */
27static int check_zero_block(char *buf, int blocksize)
28{
29	char	*cp = buf;
30	int	left = blocksize;
31
32	while (left > 0) {
33		if (*cp++)
34			return 0;
35		left--;
36	}
37	return 1;
38}
39
40/*
41 * This clever recursive function handles i_blocks[] as well as
42 * indirect, double indirect, and triple indirect blocks.  It iterates
43 * over the entries in the i_blocks array or indirect blocks, and for
44 * each one, will recursively handle any indirect blocks and then
45 * frees and deallocates the blocks.
46 */
47static errcode_t ind_punch(ext2_filsys fs, struct ext2_inode *inode,
48			   char *block_buf, blk_t *p, int level,
49			   blk_t start, blk_t count, int max)
50{
51	errcode_t	retval;
52	blk_t		b;
53	int		i;
54	blk64_t		offset, incr;
55	int		freed = 0;
56
57#ifdef PUNCH_DEBUG
58	printf("Entering ind_punch, level %d, start %u, count %u, "
59	       "max %d\n", level, start, count, max);
60#endif
61	incr = 1ULL << ((EXT2_BLOCK_SIZE_BITS(fs->super)-2)*level);
62	for (i=0, offset=0; i < max; i++, p++, offset += incr) {
63		if (offset >= start + count)
64			break;
65		if (*p == 0 || (offset+incr) <= start)
66			continue;
67		b = *p;
68		if (level > 0) {
69			blk_t start2;
70#ifdef PUNCH_DEBUG
71			printf("Reading indirect block %u\n", b);
72#endif
73			retval = ext2fs_read_ind_block(fs, b, block_buf);
74			if (retval)
75				return retval;
76			start2 = (start > offset) ? start - offset : 0;
77			retval = ind_punch(fs, inode, block_buf + fs->blocksize,
78					   (blk_t *) block_buf, level - 1,
79					   start2, count - offset,
80					   fs->blocksize >> 2);
81			if (retval)
82				return retval;
83			retval = ext2fs_write_ind_block(fs, b, block_buf);
84			if (retval)
85				return retval;
86			if (!check_zero_block(block_buf, fs->blocksize))
87				continue;
88		}
89#ifdef PUNCH_DEBUG
90		printf("Freeing block %u (offset %llu)\n", b, offset);
91#endif
92		ext2fs_block_alloc_stats(fs, b, -1);
93		*p = 0;
94		freed++;
95	}
96#ifdef PUNCH_DEBUG
97	printf("Freed %d blocks\n", freed);
98#endif
99	return ext2fs_iblk_sub_blocks(fs, inode, freed);
100}
101
102static errcode_t ext2fs_punch_ind(ext2_filsys fs, struct ext2_inode *inode,
103				  char *block_buf, blk_t start, blk_t count)
104{
105	errcode_t		retval;
106	char			*buf = 0;
107	int			level;
108	int			num = EXT2_NDIR_BLOCKS;
109	blk_t			*bp = inode->i_block;
110	blk_t			addr_per_block;
111	blk64_t			max = EXT2_NDIR_BLOCKS;
112
113	if (!block_buf) {
114		retval = ext2fs_get_array(3, fs->blocksize, &buf);
115		if (retval)
116			return retval;
117		block_buf = buf;
118	}
119
120	addr_per_block = (blk_t) fs->blocksize >> 2;
121
122	for (level = 0; level < 4; level++, max *= (blk64_t)addr_per_block) {
123#ifdef PUNCH_DEBUG
124		printf("Main loop level %d, start %u count %u "
125		       "max %llu num %d\n", level, start, count, max, num);
126#endif
127		if (start < max) {
128			retval = ind_punch(fs, inode, block_buf, bp, level,
129					   start, count, num);
130			if (retval)
131				goto errout;
132			if (count > max)
133				count -= max - start;
134			else
135				break;
136			start = 0;
137		} else
138			start -= max;
139		bp += num;
140		if (level == 0) {
141			num = 1;
142			max = 1;
143		}
144	}
145	retval = 0;
146errout:
147	if (buf)
148		ext2fs_free_mem(&buf);
149	return retval;
150}
151
152#ifdef PUNCH_DEBUG
153
154#define dbg_printf(f, a...)  printf(f, ## a)
155
156static void dbg_print_extent(char *desc, struct ext2fs_extent *extent)
157{
158	if (desc)
159		printf("%s: ", desc);
160	printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ",
161	       extent->e_lblk, extent->e_lblk + extent->e_len - 1,
162	       extent->e_len, extent->e_pblk);
163	if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF)
164		fputs("LEAF ", stdout);
165	if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
166		fputs("UNINIT ", stdout);
167	if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
168		fputs("2ND_VISIT ", stdout);
169	if (!extent->e_flags)
170		fputs("(none)", stdout);
171	fputc('\n', stdout);
172
173}
174#else
175#define dbg_print_extent(desc, ex)	do { } while (0)
176#define dbg_printf(f, a...)		do { } while (0)
177#endif
178
179/* Free a range of blocks, respecting cluster boundaries */
180static errcode_t punch_extent_blocks(ext2_filsys fs, ext2_ino_t ino,
181				     struct ext2_inode *inode,
182				     blk64_t lfree_start, blk64_t free_start,
183				     __u32 free_count, int *freed)
184{
185	blk64_t		pblk;
186	int		freed_now = 0;
187	__u32		cluster_freed;
188	errcode_t	retval = 0;
189
190	/* No bigalloc?  Just free each block. */
191	if (EXT2FS_CLUSTER_RATIO(fs) == 1) {
192		*freed += free_count;
193		while (free_count-- > 0)
194			ext2fs_block_alloc_stats2(fs, free_start++, -1);
195		return retval;
196	}
197
198	/*
199	 * Try to free up to the next cluster boundary.  We assume that all
200	 * blocks in a logical cluster map to blocks from the same physical
201	 * cluster, and that the offsets within the [pl]clusters match.
202	 */
203	if (free_start & EXT2FS_CLUSTER_MASK(fs)) {
204		retval = ext2fs_map_cluster_block(fs, ino, inode,
205						  lfree_start, &pblk);
206		if (retval)
207			goto errout;
208		if (!pblk) {
209			ext2fs_block_alloc_stats2(fs, free_start, -1);
210			freed_now++;
211		}
212		cluster_freed = EXT2FS_CLUSTER_RATIO(fs) -
213			(free_start & EXT2FS_CLUSTER_MASK(fs));
214		if (cluster_freed > free_count)
215			cluster_freed = free_count;
216		free_count -= cluster_freed;
217		free_start += cluster_freed;
218		lfree_start += cluster_freed;
219	}
220
221	/* Free whole clusters from the middle of the range. */
222	while (free_count > 0 && free_count >= EXT2FS_CLUSTER_RATIO(fs)) {
223		ext2fs_block_alloc_stats2(fs, free_start, -1);
224		freed_now++;
225		cluster_freed = EXT2FS_CLUSTER_RATIO(fs);
226		free_count -= cluster_freed;
227		free_start += cluster_freed;
228		lfree_start += cluster_freed;
229	}
230
231	/* Try to free the last cluster. */
232	if (free_count > 0) {
233		retval = ext2fs_map_cluster_block(fs, ino, inode,
234						  lfree_start, &pblk);
235		if (retval)
236			goto errout;
237		if (!pblk) {
238			ext2fs_block_alloc_stats2(fs, free_start, -1);
239			freed_now++;
240		}
241	}
242
243errout:
244	*freed += freed_now;
245	return retval;
246}
247
248static errcode_t ext2fs_punch_extent(ext2_filsys fs, ext2_ino_t ino,
249				     struct ext2_inode *inode,
250				     blk64_t start, blk64_t end)
251{
252	ext2_extent_handle_t	handle = 0;
253	struct ext2fs_extent	extent;
254	errcode_t		retval;
255	blk64_t			free_start, next, lfree_start;
256	__u32			free_count, newlen;
257	int			freed = 0;
258	int			op;
259
260	retval = ext2fs_extent_open2(fs, ino, inode, &handle);
261	if (retval)
262		return retval;
263	/*
264	 * Find the extent closest to the start of the punch range.  We don't
265	 * check the return value because _goto() sets the current node to the
266	 * next-lowest extent if 'start' is in a hole, and doesn't set a
267	 * current node if there was a real error reading the extent tree.
268	 * In that case, _get() will error out.
269	 *
270	 * Note: If _get() returns 'no current node', that simply means that
271	 * there aren't any blocks mapped past this point in the file, so we're
272	 * done.
273	 */
274	ext2fs_extent_goto(handle, start);
275	retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent);
276	if (retval == EXT2_ET_NO_CURRENT_NODE) {
277		retval = 0;
278		goto errout;
279	} else if (retval)
280		goto errout;
281	while (1) {
282		op = EXT2_EXTENT_NEXT_LEAF;
283		dbg_print_extent("main loop", &extent);
284		next = extent.e_lblk + extent.e_len;
285		dbg_printf("start %llu, end %llu, next %llu\n",
286			   (unsigned long long) start,
287			   (unsigned long long) end,
288			   (unsigned long long) next);
289		if (start <= extent.e_lblk) {
290			if (end < extent.e_lblk)
291				goto next_extent;
292			dbg_printf("Case #%d\n", 1);
293			/* Start of deleted region before extent;
294			   adjust beginning of extent */
295			free_start = extent.e_pblk;
296			lfree_start = extent.e_lblk;
297			if (next > end)
298				free_count = end - extent.e_lblk + 1;
299			else
300				free_count = extent.e_len;
301			extent.e_len -= free_count;
302			extent.e_lblk += free_count;
303			extent.e_pblk += free_count;
304		} else if (end >= next-1) {
305			if (start >= next)
306				break;
307			/* End of deleted region after extent;
308			   adjust end of extent */
309			dbg_printf("Case #%d\n", 2);
310			newlen = start - extent.e_lblk;
311			free_start = extent.e_pblk + newlen;
312			lfree_start = extent.e_lblk + newlen;
313			free_count = extent.e_len - newlen;
314			extent.e_len = newlen;
315		} else {
316			struct ext2fs_extent	newex;
317
318			dbg_printf("Case #%d\n", 3);
319			/* The hard case; we need to split the extent */
320			newex.e_pblk = extent.e_pblk +
321				(end + 1 - extent.e_lblk);
322			newex.e_lblk = end + 1;
323			newex.e_len = next - end - 1;
324			newex.e_flags = extent.e_flags;
325
326			extent.e_len = start - extent.e_lblk;
327			free_start = extent.e_pblk + extent.e_len;
328			lfree_start = extent.e_lblk + extent.e_len;
329			free_count = end - start + 1;
330
331			dbg_print_extent("inserting", &newex);
332			retval = ext2fs_extent_insert(handle,
333					EXT2_EXTENT_INSERT_AFTER, &newex);
334			if (retval)
335				goto errout;
336			/* Now pointing at inserted extent; so go back */
337			retval = ext2fs_extent_get(handle,
338						   EXT2_EXTENT_PREV_LEAF,
339						   &newex);
340			if (retval)
341				goto errout;
342		}
343		if (extent.e_len) {
344			dbg_print_extent("replacing", &extent);
345			retval = ext2fs_extent_replace(handle, 0, &extent);
346		} else {
347			struct ext2fs_extent	newex;
348			blk64_t			old_lblk, next_lblk;
349			dbg_printf("deleting current extent%s\n", "");
350
351			/*
352			 * Save the location of the next leaf, then slip
353			 * back to the current extent.
354			 */
355			retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
356						   &newex);
357			if (retval)
358				goto errout;
359			old_lblk = newex.e_lblk;
360
361			retval = ext2fs_extent_get(handle,
362						   EXT2_EXTENT_NEXT_LEAF,
363						   &newex);
364			if (retval == EXT2_ET_EXTENT_NO_NEXT)
365				next_lblk = old_lblk;
366			else if (retval)
367				goto errout;
368			else
369				next_lblk = newex.e_lblk;
370
371			retval = ext2fs_extent_goto(handle, old_lblk);
372			if (retval)
373				goto errout;
374
375			/* Now delete the extent. */
376			retval = ext2fs_extent_delete(handle, 0);
377			if (retval)
378				goto errout;
379
380			/* Jump forward to the next extent. */
381			ext2fs_extent_goto(handle, next_lblk);
382			op = EXT2_EXTENT_CURRENT;
383		}
384		if (retval)
385			goto errout;
386		dbg_printf("Free start %llu, free count = %u\n",
387		       free_start, free_count);
388		retval = punch_extent_blocks(fs, ino, inode, lfree_start,
389					     free_start, free_count, &freed);
390		if (retval)
391			goto errout;
392	next_extent:
393		retval = ext2fs_extent_get(handle, op,
394					   &extent);
395		if (retval == EXT2_ET_EXTENT_NO_NEXT ||
396		    retval == EXT2_ET_NO_CURRENT_NODE)
397			break;
398		if (retval)
399			goto errout;
400	}
401	dbg_printf("Freed %d blocks\n", freed);
402	retval = ext2fs_iblk_sub_blocks(fs, inode, freed);
403errout:
404	ext2fs_extent_free(handle);
405	return retval;
406}
407
408/*
409 * Deallocate all logical blocks starting at start to end, inclusive.
410 * If end is ~0, then this is effectively truncate.
411 */
412errcode_t ext2fs_punch(ext2_filsys fs, ext2_ino_t ino,
413		       struct ext2_inode *inode,
414		       char *block_buf, blk64_t start,
415		       blk64_t end)
416{
417	errcode_t		retval;
418	struct ext2_inode	inode_buf;
419
420	if (start > end)
421		return EINVAL;
422
423	/* Read inode structure if necessary */
424	if (!inode) {
425		retval = ext2fs_read_inode(fs, ino, &inode_buf);
426		if (retval)
427			return retval;
428		inode = &inode_buf;
429	}
430	if (inode->i_flags & EXT4_EXTENTS_FL)
431		retval = ext2fs_punch_extent(fs, ino, inode, start, end);
432	else {
433		blk_t	count;
434
435		if (start > ~0U)
436			return 0;
437		if (end > ~0U)
438			end = ~0U;
439		count = ((end - start + 1) < ~0U) ? (end - start + 1) : ~0U;
440		retval = ext2fs_punch_ind(fs, inode, block_buf,
441					  (blk_t) start, count);
442	}
443	if (retval)
444		return retval;
445
446	return ext2fs_write_inode(fs, ino, inode);
447}
448