1/*
2 * readahead.c -- Prefetch filesystem metadata to speed up fsck.
3 *
4 * Copyright (C) 2014 Oracle.
5 *
6 * %Begin-Header%
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
9 * %End-Header%
10 */
11
12#include "config.h"
13#include <string.h>
14
15#include "e2fsck.h"
16
17#undef DEBUG
18
19#ifdef DEBUG
20# define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
21#else
22# define dbg_printf(f, a...)
23#endif
24
25struct read_dblist {
26	errcode_t err;
27	blk64_t run_start;
28	blk64_t run_len;
29	int flags;
30};
31
32static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db,
33			       void *priv_data)
34{
35	struct read_dblist *pr = priv_data;
36	e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ?
37			     1 : db->blockcnt);
38
39	if (!pr->run_len || db->blk != pr->run_start + pr->run_len) {
40		if (pr->run_len) {
41			pr->err = io_channel_cache_readahead(fs->io,
42							     pr->run_start,
43							     pr->run_len);
44			dbg_printf("readahead start=%llu len=%llu err=%d\n",
45				   pr->run_start, pr->run_len,
46				   (int)pr->err);
47		}
48		pr->run_start = db->blk;
49		pr->run_len = 0;
50	}
51	pr->run_len += count;
52
53	return pr->err ? DBLIST_ABORT : 0;
54}
55
56errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags,
57				  ext2_dblist dblist,
58				  unsigned long long start,
59				  unsigned long long count)
60{
61	errcode_t err;
62	struct read_dblist pr;
63
64	dbg_printf("%s: flags=0x%x\n", __func__, flags);
65	if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS)
66		return EXT2_ET_INVALID_ARGUMENT;
67
68	memset(&pr, 0, sizeof(pr));
69	pr.flags = flags;
70	err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start,
71				     count, &pr);
72	if (pr.err)
73		return pr.err;
74	if (err)
75		return err;
76
77	if (pr.run_len)
78		err = io_channel_cache_readahead(fs->io, pr.run_start,
79						 pr.run_len);
80
81	return err;
82}
83
84static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs,
85					 ext2fs_block_bitmap ra_map)
86{
87	blk64_t start, end, out;
88	errcode_t err;
89
90	start = 1;
91	end = ext2fs_blocks_count(fs->super) - 1;
92
93	err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out);
94	while (err == 0) {
95		start = out;
96		err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end,
97							   &out);
98		if (err == ENOENT) {
99			out = end;
100			err = 0;
101		} else if (err)
102			break;
103
104		err = io_channel_cache_readahead(fs->io, start, out - start);
105		if (err)
106			break;
107		start = out;
108		err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end,
109							  &out);
110	}
111
112	if (err == ENOENT)
113		err = 0;
114
115	return err;
116}
117
118/* Try not to spew bitmap range errors for readahead */
119static errcode_t mark_bmap_range(ext2fs_block_bitmap map,
120				 blk64_t blk, unsigned int num)
121{
122	if (blk >= ext2fs_get_generic_bmap_start(map) &&
123	    blk + num <= ext2fs_get_generic_bmap_end(map))
124		ext2fs_mark_block_bitmap_range2(map, blk, num);
125	else
126		return EXT2_ET_INVALID_ARGUMENT;
127	return 0;
128}
129
130static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk)
131{
132	if (blk >= ext2fs_get_generic_bmap_start(map) &&
133	    blk <= ext2fs_get_generic_bmap_end(map))
134		ext2fs_mark_block_bitmap2(map, blk);
135	else
136		return EXT2_ET_INVALID_ARGUMENT;
137	return 0;
138}
139
140errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start,
141			   dgrp_t ngroups)
142{
143	blk64_t		super, old_gdt, new_gdt;
144	blk_t		blocks;
145	dgrp_t		i;
146	ext2fs_block_bitmap		ra_map = NULL;
147	dgrp_t		end = start + ngroups;
148	errcode_t	err = 0;
149
150	dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags,
151		   start, ngroups);
152	if (flags & ~E2FSCK_READA_ALL_FLAGS)
153		return EXT2_ET_INVALID_ARGUMENT;
154
155	if (end > fs->group_desc_count)
156		end = fs->group_desc_count;
157
158	if (flags == 0)
159		return 0;
160
161	err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap",
162					   &ra_map);
163	if (err)
164		return err;
165
166	for (i = start; i < end; i++) {
167		err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt,
168						&new_gdt, &blocks);
169		if (err)
170			break;
171
172		if (flags & E2FSCK_READA_SUPER) {
173			err = mark_bmap(ra_map, super);
174			if (err)
175				break;
176		}
177
178		if (flags & E2FSCK_READA_GDT) {
179			err = mark_bmap_range(ra_map,
180					      old_gdt ? old_gdt : new_gdt,
181					      blocks);
182			if (err)
183				break;
184		}
185
186		if ((flags & E2FSCK_READA_BBITMAP) &&
187		    !ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) &&
188		    ext2fs_bg_free_blocks_count(fs, i) <
189				fs->super->s_blocks_per_group) {
190			super = ext2fs_block_bitmap_loc(fs, i);
191			err = mark_bmap(ra_map, super);
192			if (err)
193				break;
194		}
195
196		if ((flags & E2FSCK_READA_IBITMAP) &&
197		    !ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) &&
198		    ext2fs_bg_free_inodes_count(fs, i) <
199				fs->super->s_inodes_per_group) {
200			super = ext2fs_inode_bitmap_loc(fs, i);
201			err = mark_bmap(ra_map, super);
202			if (err)
203				break;
204		}
205
206		if ((flags & E2FSCK_READA_ITABLE) &&
207		    ext2fs_bg_free_inodes_count(fs, i) <
208				fs->super->s_inodes_per_group) {
209			super = ext2fs_inode_table_loc(fs, i);
210			blocks = fs->inode_blocks_per_group -
211				 (ext2fs_bg_itable_unused(fs, i) *
212				  EXT2_INODE_SIZE(fs->super) / fs->blocksize);
213			err = mark_bmap_range(ra_map, super, blocks);
214			if (err)
215				break;
216		}
217	}
218
219	if (!err)
220		err = e2fsck_readahead_bitmap(fs, ra_map);
221
222	ext2fs_free_block_bitmap(ra_map);
223	return err;
224}
225
226int e2fsck_can_readahead(ext2_filsys fs)
227{
228	errcode_t err;
229
230	err = io_channel_cache_readahead(fs->io, 0, 1);
231	dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED);
232	return err != EXT2_ET_OP_NOT_SUPPORTED;
233}
234
235unsigned long long e2fsck_guess_readahead(ext2_filsys fs)
236{
237	unsigned long long guess;
238
239	/*
240	 * The optimal readahead sizes were experimentally determined by
241	 * djwong in August 2014.  Setting the RA size to two block groups'
242	 * worth of inode table blocks seems to yield the largest reductions
243	 * in e2fsck runtime.
244	 */
245	guess = 2ULL * fs->blocksize * fs->inode_blocks_per_group;
246
247	/* Disable RA if it'd use more 1/50th of RAM. */
248	if (get_memory_size() > (guess * 50))
249		return guess / 1024;
250
251	return 0;
252}
253