1/*
2 * extents.c --- rebuild extent tree
3 *
4 * Copyright (C) 2014 Oracle.
5 *
6 * %Begin-Header%
7 * This file may be redistributed under the terms of the GNU Public
8 * License, version 2.
9 * %End-Header%
10 */
11
12#include "config.h"
13#include <string.h>
14#include <ctype.h>
15#include <errno.h>
16#include "e2fsck.h"
17#include "problem.h"
18
19#undef DEBUG
20#undef DEBUG_SUMMARY
21#undef DEBUG_FREE
22
23#define NUM_EXTENTS	341	/* about one ETB' worth of extents */
24
25static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino);
26
27/* Schedule an inode to have its extent tree rebuilt during pass 1E. */
28errcode_t e2fsck_rebuild_extents_later(e2fsck_t ctx, ext2_ino_t ino)
29{
30	errcode_t retval = 0;
31
32	if (!ext2fs_has_feature_extents(ctx->fs->super) ||
33	    (ctx->options & E2F_OPT_NO) ||
34	    (ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
35		return 0;
36
37	if (ctx->flags & E2F_FLAG_ALLOC_OK)
38		return e2fsck_rebuild_extents(ctx, ino);
39
40	if (!ctx->inodes_to_rebuild)
41		retval = e2fsck_allocate_inode_bitmap(ctx->fs,
42					     _("extent rebuild inode map"),
43					     EXT2FS_BMAP64_RBTREE,
44					     "inodes_to_rebuild",
45					     &ctx->inodes_to_rebuild);
46	if (retval)
47		return retval;
48
49	ext2fs_mark_inode_bitmap2(ctx->inodes_to_rebuild, ino);
50	return 0;
51}
52
53/* Ask if an inode will have its extents rebuilt during pass 1E. */
54int e2fsck_ino_will_be_rebuilt(e2fsck_t ctx, ext2_ino_t ino)
55{
56	if (!ctx->inodes_to_rebuild)
57		return 0;
58	return ext2fs_test_inode_bitmap2(ctx->inodes_to_rebuild, ino);
59}
60
61struct extent_list {
62	blk64_t blocks_freed;
63	struct ext2fs_extent *extents;
64	unsigned int count;
65	unsigned int size;
66	unsigned int ext_read;
67	errcode_t retval;
68	ext2_ino_t ino;
69};
70
71static errcode_t load_extents(e2fsck_t ctx, struct extent_list *list)
72{
73	ext2_filsys		fs = ctx->fs;
74	ext2_extent_handle_t	handle;
75	struct ext2fs_extent	extent;
76	errcode_t		retval;
77
78	retval = ext2fs_extent_open(fs, list->ino, &handle);
79	if (retval)
80		return retval;
81
82	retval = ext2fs_extent_get(handle, EXT2_EXTENT_ROOT, &extent);
83	if (retval)
84		goto out;
85
86	do {
87		if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
88			goto next;
89
90		/* Internal node; free it and we'll re-allocate it later */
91		if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) {
92#if defined(DEBUG) || defined(DEBUG_FREE)
93			printf("ino=%d free=%llu bf=%llu\n", list->ino,
94					extent.e_pblk, list->blocks_freed + 1);
95#endif
96			list->blocks_freed++;
97			ext2fs_block_alloc_stats2(fs, extent.e_pblk, -1);
98			goto next;
99		}
100
101		list->ext_read++;
102		/* Can we attach it to the previous extent? */
103		if (list->count) {
104			struct ext2fs_extent *last = list->extents +
105						     list->count - 1;
106			blk64_t end = last->e_len + extent.e_len;
107
108			if (last->e_pblk + last->e_len == extent.e_pblk &&
109			    last->e_lblk + last->e_len == extent.e_lblk &&
110			    (last->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
111			    (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
112			    end < (1ULL << 32)) {
113				last->e_len += extent.e_len;
114#ifdef DEBUG
115				printf("R: ino=%d len=%u\n", list->ino,
116						last->e_len);
117#endif
118				goto next;
119			}
120		}
121
122		/* Do we need to expand? */
123		if (list->count == list->size) {
124			unsigned int new_size = (list->size + NUM_EXTENTS) *
125						sizeof(struct ext2fs_extent);
126			retval = ext2fs_resize_mem(0, new_size, &list->extents);
127			if (retval)
128				goto out;
129			list->size += NUM_EXTENTS;
130		}
131
132		/* Add a new extent */
133		memcpy(list->extents + list->count, &extent, sizeof(extent));
134#ifdef DEBUG
135		printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino,
136				extent.e_pblk, extent.e_lblk, extent.e_len);
137#endif
138		list->count++;
139next:
140		retval = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT, &extent);
141	} while (retval == 0);
142
143out:
144	/* Ok if we run off the end */
145	if (retval == EXT2_ET_EXTENT_NO_NEXT)
146		retval = 0;
147	ext2fs_extent_free(handle);
148	return retval;
149}
150
151static int find_blocks(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
152		       blk64_t ref_blk EXT2FS_ATTR((unused)),
153		       int ref_offset EXT2FS_ATTR((unused)), void *priv_data)
154{
155	struct extent_list *list = priv_data;
156
157	/* Internal node? */
158	if (blockcnt < 0) {
159#if defined(DEBUG) || defined(DEBUG_FREE)
160		printf("ino=%d free=%llu bf=%llu\n", list->ino, *blocknr,
161				list->blocks_freed + 1);
162#endif
163		list->blocks_freed++;
164		ext2fs_block_alloc_stats2(fs, *blocknr, -1);
165		return 0;
166	}
167
168	/* Can we attach it to the previous extent? */
169	if (list->count) {
170		struct ext2fs_extent *last = list->extents +
171					     list->count - 1;
172		blk64_t end = last->e_len + 1;
173
174		if (last->e_pblk + last->e_len == *blocknr &&
175		    end < (1ULL << 32)) {
176			last->e_len++;
177#ifdef DEBUG
178			printf("R: ino=%d len=%u\n", list->ino, last->e_len);
179#endif
180			return 0;
181		}
182	}
183
184	/* Do we need to expand? */
185	if (list->count == list->size) {
186		unsigned int new_size = (list->size + NUM_EXTENTS) *
187					sizeof(struct ext2fs_extent);
188		list->retval = ext2fs_resize_mem(0, new_size, &list->extents);
189		if (list->retval)
190			return BLOCK_ABORT;
191		list->size += NUM_EXTENTS;
192	}
193
194	/* Add a new extent */
195	list->extents[list->count].e_pblk = *blocknr;
196	list->extents[list->count].e_lblk = blockcnt;
197	list->extents[list->count].e_len = 1;
198	list->extents[list->count].e_flags = 0;
199#ifdef DEBUG
200	printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino, *blocknr,
201			blockcnt, 1);
202#endif
203	list->count++;
204
205	return 0;
206}
207
208static errcode_t rebuild_extent_tree(e2fsck_t ctx, struct extent_list *list,
209				     ext2_ino_t ino)
210{
211	struct ext2_inode_large	inode;
212	errcode_t		retval;
213	ext2_extent_handle_t	handle;
214	unsigned int		i, ext_written;
215	struct ext2fs_extent	*ex, extent;
216	blk64_t			start_val, delta;
217
218	list->count = 0;
219	list->blocks_freed = 0;
220	list->ino = ino;
221	list->ext_read = 0;
222	e2fsck_read_inode_full(ctx, ino, EXT2_INODE(&inode), sizeof(inode),
223			       "rebuild_extents");
224
225	/* Skip deleted inodes and inline data files */
226	if (inode.i_links_count == 0 ||
227	    inode.i_flags & EXT4_INLINE_DATA_FL)
228		return 0;
229
230	/* Collect lblk->pblk mappings */
231	if (inode.i_flags & EXT4_EXTENTS_FL) {
232		retval = load_extents(ctx, list);
233		if (retval)
234			goto err;
235		goto extents_loaded;
236	}
237
238	retval = ext2fs_block_iterate3(ctx->fs, ino, BLOCK_FLAG_READ_ONLY, 0,
239				       find_blocks, list);
240	if (retval)
241		goto err;
242	if (list->retval) {
243		retval = list->retval;
244		goto err;
245	}
246
247extents_loaded:
248	/* Reset extent tree */
249	inode.i_flags &= ~EXT4_EXTENTS_FL;
250	memset(inode.i_block, 0, sizeof(inode.i_block));
251
252	/* Make a note of freed blocks */
253	quota_data_sub(ctx->qctx, &inode, ino,
254		       list->blocks_freed * ctx->fs->blocksize);
255	retval = ext2fs_iblk_sub_blocks(ctx->fs, EXT2_INODE(&inode),
256					list->blocks_freed);
257	if (retval)
258		goto err;
259
260	/* Now stuff extents into the file */
261	retval = ext2fs_extent_open2(ctx->fs, ino, EXT2_INODE(&inode), &handle);
262	if (retval)
263		goto err;
264
265	ext_written = 0;
266	start_val = ext2fs_inode_i_blocks(ctx->fs, EXT2_INODE(&inode));
267	for (i = 0, ex = list->extents; i < list->count; i++, ex++) {
268		memcpy(&extent, ex, sizeof(struct ext2fs_extent));
269		extent.e_flags &= EXT2_EXTENT_FLAGS_UNINIT;
270		if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
271			if (extent.e_len > EXT_UNINIT_MAX_LEN) {
272				extent.e_len = EXT_UNINIT_MAX_LEN;
273				ex->e_pblk += EXT_UNINIT_MAX_LEN;
274				ex->e_lblk += EXT_UNINIT_MAX_LEN;
275				ex->e_len -= EXT_UNINIT_MAX_LEN;
276				ex--;
277				i--;
278			}
279		} else {
280			if (extent.e_len > EXT_INIT_MAX_LEN) {
281				extent.e_len = EXT_INIT_MAX_LEN;
282				ex->e_pblk += EXT_INIT_MAX_LEN;
283				ex->e_lblk += EXT_INIT_MAX_LEN;
284				ex->e_len -= EXT_INIT_MAX_LEN;
285				ex--;
286				i--;
287			}
288		}
289
290#ifdef DEBUG
291		printf("W: ino=%d pblk=%llu lblk=%llu len=%u\n", ino,
292				extent.e_pblk, extent.e_lblk, extent.e_len);
293#endif
294		retval = ext2fs_extent_insert(handle, EXT2_EXTENT_INSERT_AFTER,
295					      &extent);
296		if (retval)
297			goto err2;
298		retval = ext2fs_extent_fix_parents(handle);
299		if (retval)
300			goto err2;
301		ext_written++;
302	}
303
304	delta = ext2fs_inode_i_blocks(ctx->fs, EXT2_INODE(&inode)) - start_val;
305	if (delta) {
306		if (!ext2fs_has_feature_huge_file(ctx->fs->super) ||
307		    !(inode.i_flags & EXT4_HUGE_FILE_FL))
308			delta <<= 9;
309		else
310			delta *= ctx->fs->blocksize;
311		quota_data_add(ctx->qctx, &inode, ino, delta);
312	}
313
314#if defined(DEBUG) || defined(DEBUG_SUMMARY)
315	printf("rebuild: ino=%d extents=%d->%d\n", ino, list->ext_read,
316	       ext_written);
317#endif
318	e2fsck_write_inode(ctx, ino, EXT2_INODE(&inode), "rebuild_extents");
319
320err2:
321	ext2fs_extent_free(handle);
322err:
323	return retval;
324}
325
326/* Rebuild the extents immediately */
327static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino)
328{
329	struct extent_list	list;
330	errcode_t err;
331
332	if (!ext2fs_has_feature_extents(ctx->fs->super) ||
333	    (ctx->options & E2F_OPT_NO) ||
334	    (ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
335		return 0;
336
337	e2fsck_read_bitmaps(ctx);
338	memset(&list, 0, sizeof(list));
339	err = ext2fs_get_mem(sizeof(struct ext2fs_extent) * NUM_EXTENTS,
340				&list.extents);
341	if (err)
342		return err;
343	list.size = NUM_EXTENTS;
344	err = rebuild_extent_tree(ctx, &list, ino);
345	ext2fs_free_mem(&list.extents);
346
347	return err;
348}
349
350static void rebuild_extents(e2fsck_t ctx, const char *pass_name, int pr_header)
351{
352	struct problem_context	pctx;
353#ifdef RESOURCE_TRACK
354	struct resource_track	rtrack;
355#endif
356	struct extent_list	list;
357	int			first = 1;
358	ext2_ino_t		ino = 0;
359	errcode_t		retval;
360
361	if (!ext2fs_has_feature_extents(ctx->fs->super) ||
362	    !ext2fs_test_valid(ctx->fs) ||
363	    ctx->invalid_bitmaps) {
364		if (ctx->inodes_to_rebuild)
365			ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
366		ctx->inodes_to_rebuild = NULL;
367	}
368
369	if (ctx->inodes_to_rebuild == NULL)
370		return;
371
372	init_resource_track(&rtrack, ctx->fs->io);
373	clear_problem_context(&pctx);
374	e2fsck_read_bitmaps(ctx);
375
376	memset(&list, 0, sizeof(list));
377	retval = ext2fs_get_mem(sizeof(struct ext2fs_extent) * NUM_EXTENTS,
378				&list.extents);
379	list.size = NUM_EXTENTS;
380	while (1) {
381		retval = ext2fs_find_first_set_inode_bitmap2(
382				ctx->inodes_to_rebuild, ino + 1,
383				ctx->fs->super->s_inodes_count, &ino);
384		if (retval)
385			break;
386		pctx.ino = ino;
387		if (first) {
388			fix_problem(ctx, pr_header, &pctx);
389			first = 0;
390		}
391		pctx.errcode = rebuild_extent_tree(ctx, &list, ino);
392		if (pctx.errcode) {
393			end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
394			fix_problem(ctx, PR_1E_OPTIMIZE_EXT_ERR, &pctx);
395		}
396		if (ctx->progress && !ctx->progress_fd)
397			e2fsck_simple_progress(ctx, "Rebuilding extents",
398					100.0 * (float) ino /
399					(float) ctx->fs->super->s_inodes_count,
400					ino);
401	}
402	end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
403
404	ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
405	ctx->inodes_to_rebuild = NULL;
406	ext2fs_free_mem(&list.extents);
407
408	print_resource_track(ctx, pass_name, &rtrack, ctx->fs->io);
409}
410
411/* Scan a file to see if we should rebuild its extent tree */
412errcode_t e2fsck_check_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino,
413				  struct ext2_inode *inode,
414				  struct problem_context *pctx)
415{
416	struct extent_tree_info	eti;
417	struct ext2_extent_info	info, top_info;
418	struct ext2fs_extent	extent;
419	ext2_extent_handle_t	ehandle;
420	ext2_filsys		fs = ctx->fs;
421	errcode_t		retval;
422
423	/* block map file and we want extent conversion */
424	if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
425	    !(inode->i_flags & EXT4_INLINE_DATA_FL) &&
426	    (ctx->options & E2F_OPT_CONVERT_BMAP)) {
427		return e2fsck_rebuild_extents_later(ctx, ino);
428	}
429
430	if (!(inode->i_flags & EXT4_EXTENTS_FL))
431		return 0;
432	memset(&eti, 0, sizeof(eti));
433	eti.ino = ino;
434
435	/* Otherwise, go scan the extent tree... */
436	retval = ext2fs_extent_open2(fs, ino, inode, &ehandle);
437	if (retval)
438		return 0;
439
440	retval = ext2fs_extent_get_info(ehandle, &top_info);
441	if (retval)
442		goto out;
443
444	/* Check maximum extent depth */
445	pctx->ino = ino;
446	pctx->blk = top_info.max_depth;
447	pctx->blk2 = ext2fs_max_extent_depth(ehandle);
448	if (pctx->blk2 < pctx->blk &&
449	    fix_problem(ctx, PR_1_EXTENT_BAD_MAX_DEPTH, pctx))
450		eti.force_rebuild = 1;
451
452	/* Can we collect extent tree level stats? */
453	pctx->blk = MAX_EXTENT_DEPTH_COUNT;
454	if (pctx->blk2 > pctx->blk)
455		fix_problem(ctx, PR_1E_MAX_EXTENT_TREE_DEPTH, pctx);
456
457	/* We need to fix tree depth problems, but the scan isn't a fix. */
458	if (ctx->options & E2F_OPT_FIXES_ONLY)
459		goto out;
460
461	retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_ROOT, &extent);
462	if (retval)
463		goto out;
464
465	do {
466		retval = ext2fs_extent_get_info(ehandle, &info);
467		if (retval)
468			break;
469
470		/*
471		 * If this is the first extent in an extent block that we
472		 * haven't visited, collect stats on the block.
473		 */
474		if (info.curr_entry == 1 &&
475		    !(extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) &&
476		    !eti.force_rebuild) {
477			struct extent_tree_level *etl;
478
479			etl = eti.ext_info + info.curr_level;
480			etl->num_extents += info.num_entries;
481			etl->max_extents += info.max_entries;
482			/*
483			 * Implementation wart: Splitting extent blocks when
484			 * appending will leave the old block with one free
485			 * entry.  Therefore unless the node is totally full,
486			 * pretend that a non-root extent block can hold one
487			 * fewer entry than it actually does, so that we don't
488			 * repeatedly rebuild the extent tree.
489			 */
490			if (info.curr_level &&
491			    info.num_entries < info.max_entries)
492				etl->max_extents--;
493		}
494
495		/* Skip to the end of a block of leaf nodes */
496		if (extent.e_flags & EXT2_EXTENT_FLAGS_LEAF) {
497			retval = ext2fs_extent_get(ehandle,
498						    EXT2_EXTENT_LAST_SIB,
499						    &extent);
500			if (retval)
501				break;
502		}
503
504		retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_NEXT, &extent);
505	} while (retval == 0);
506out:
507	ext2fs_extent_free(ehandle);
508	return e2fsck_should_rebuild_extents(ctx, pctx, &eti, &top_info);
509}
510
511/* Having scanned a file's extent tree, decide if we should rebuild it */
512errcode_t e2fsck_should_rebuild_extents(e2fsck_t ctx,
513				   struct problem_context *pctx,
514				   struct extent_tree_info *eti,
515				   struct ext2_extent_info *info)
516{
517	struct extent_tree_level *ei;
518	int i, j, op;
519	unsigned int extents_per_block;
520
521	if (eti->force_rebuild)
522		goto rebuild;
523
524	extents_per_block = (ctx->fs->blocksize -
525			     sizeof(struct ext3_extent_header)) /
526			    sizeof(struct ext3_extent);
527	/*
528	 * If we can consolidate a level or shorten the tree, schedule the
529	 * extent tree to be rebuilt.
530	 */
531	for (i = 0, ei = eti->ext_info; i < info->max_depth + 1; i++, ei++) {
532		if (ei->max_extents - ei->num_extents > extents_per_block) {
533			pctx->blk = i;
534			op = PR_1E_CAN_NARROW_EXTENT_TREE;
535			goto rebuild;
536		}
537		for (j = 0; j < i; j++) {
538			if (ei->num_extents < eti->ext_info[j].max_extents) {
539				pctx->blk = i;
540				op = PR_1E_CAN_COLLAPSE_EXTENT_TREE;
541				goto rebuild;
542			}
543		}
544	}
545	return 0;
546
547rebuild:
548	if (eti->force_rebuild || fix_problem(ctx, op, pctx))
549		return e2fsck_rebuild_extents_later(ctx, eti->ino);
550	return 0;
551}
552
553void e2fsck_pass1e(e2fsck_t ctx)
554{
555	rebuild_extents(ctx, "Pass 1E", PR_1E_PASS_HEADER);
556}
557