1/* 2 * readahead.c -- Prefetch filesystem metadata to speed up fsck. 3 * 4 * Copyright (C) 2014 Oracle. 5 * 6 * %Begin-Header% 7 * This file may be redistributed under the terms of the GNU Library 8 * General Public License, version 2. 9 * %End-Header% 10 */ 11 12#include "config.h" 13#include <string.h> 14 15#include "e2fsck.h" 16 17#undef DEBUG 18 19#ifdef DEBUG 20# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0) 21#else 22# define dbg_printf(f, a...) 23#endif 24 25struct read_dblist { 26 errcode_t err; 27 blk64_t run_start; 28 blk64_t run_len; 29 int flags; 30}; 31 32static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db, 33 void *priv_data) 34{ 35 struct read_dblist *pr = priv_data; 36 e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ? 37 1 : db->blockcnt); 38 39 if (!pr->run_len || db->blk != pr->run_start + pr->run_len) { 40 if (pr->run_len) { 41 pr->err = io_channel_cache_readahead(fs->io, 42 pr->run_start, 43 pr->run_len); 44 dbg_printf("readahead start=%llu len=%llu err=%d\n", 45 pr->run_start, pr->run_len, 46 (int)pr->err); 47 } 48 pr->run_start = db->blk; 49 pr->run_len = 0; 50 } 51 pr->run_len += count; 52 53 return pr->err ? DBLIST_ABORT : 0; 54} 55 56errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags, 57 ext2_dblist dblist, 58 unsigned long long start, 59 unsigned long long count) 60{ 61 errcode_t err; 62 struct read_dblist pr; 63 64 dbg_printf("%s: flags=0x%x\n", __func__, flags); 65 if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS) 66 return EXT2_ET_INVALID_ARGUMENT; 67 68 memset(&pr, 0, sizeof(pr)); 69 pr.flags = flags; 70 err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start, 71 count, &pr); 72 if (pr.err) 73 return pr.err; 74 if (err) 75 return err; 76 77 if (pr.run_len) 78 err = io_channel_cache_readahead(fs->io, pr.run_start, 79 pr.run_len); 80 81 return err; 82} 83 84static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs, 85 ext2fs_block_bitmap ra_map) 86{ 87 blk64_t start, end, out; 88 errcode_t err; 89 90 start = 1; 91 end = ext2fs_blocks_count(fs->super) - 1; 92 93 err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out); 94 while (err == 0) { 95 start = out; 96 err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end, 97 &out); 98 if (err == ENOENT) { 99 out = end; 100 err = 0; 101 } else if (err) 102 break; 103 104 err = io_channel_cache_readahead(fs->io, start, out - start); 105 if (err) 106 break; 107 start = out; 108 err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, 109 &out); 110 } 111 112 if (err == ENOENT) 113 err = 0; 114 115 return err; 116} 117 118/* Try not to spew bitmap range errors for readahead */ 119static errcode_t mark_bmap_range(ext2fs_block_bitmap map, 120 blk64_t blk, unsigned int num) 121{ 122 if (blk >= ext2fs_get_generic_bmap_start(map) && 123 blk + num <= ext2fs_get_generic_bmap_end(map)) 124 ext2fs_mark_block_bitmap_range2(map, blk, num); 125 else 126 return EXT2_ET_INVALID_ARGUMENT; 127 return 0; 128} 129 130static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk) 131{ 132 if (blk >= ext2fs_get_generic_bmap_start(map) && 133 blk <= ext2fs_get_generic_bmap_end(map)) 134 ext2fs_mark_block_bitmap2(map, blk); 135 else 136 return EXT2_ET_INVALID_ARGUMENT; 137 return 0; 138} 139 140errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start, 141 dgrp_t ngroups) 142{ 143 blk64_t super, old_gdt, new_gdt; 144 blk_t blocks; 145 dgrp_t i; 146 ext2fs_block_bitmap ra_map = NULL; 147 dgrp_t end = start + ngroups; 148 errcode_t err = 0; 149 150 dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags, 151 start, ngroups); 152 if (flags & ~E2FSCK_READA_ALL_FLAGS) 153 return EXT2_ET_INVALID_ARGUMENT; 154 155 if (end > fs->group_desc_count) 156 end = fs->group_desc_count; 157 158 if (flags == 0) 159 return 0; 160 161 err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap", 162 &ra_map); 163 if (err) 164 return err; 165 166 for (i = start; i < end; i++) { 167 err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt, 168 &new_gdt, &blocks); 169 if (err) 170 break; 171 172 if (flags & E2FSCK_READA_SUPER) { 173 err = mark_bmap(ra_map, super); 174 if (err) 175 break; 176 } 177 178 if (flags & E2FSCK_READA_GDT) { 179 err = mark_bmap_range(ra_map, 180 old_gdt ? old_gdt : new_gdt, 181 blocks); 182 if (err) 183 break; 184 } 185 186 if ((flags & E2FSCK_READA_BBITMAP) && 187 !ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) && 188 ext2fs_bg_free_blocks_count(fs, i) < 189 fs->super->s_blocks_per_group) { 190 super = ext2fs_block_bitmap_loc(fs, i); 191 err = mark_bmap(ra_map, super); 192 if (err) 193 break; 194 } 195 196 if ((flags & E2FSCK_READA_IBITMAP) && 197 !ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) && 198 ext2fs_bg_free_inodes_count(fs, i) < 199 fs->super->s_inodes_per_group) { 200 super = ext2fs_inode_bitmap_loc(fs, i); 201 err = mark_bmap(ra_map, super); 202 if (err) 203 break; 204 } 205 206 if ((flags & E2FSCK_READA_ITABLE) && 207 ext2fs_bg_free_inodes_count(fs, i) < 208 fs->super->s_inodes_per_group) { 209 super = ext2fs_inode_table_loc(fs, i); 210 blocks = fs->inode_blocks_per_group - 211 (ext2fs_bg_itable_unused(fs, i) * 212 EXT2_INODE_SIZE(fs->super) / fs->blocksize); 213 err = mark_bmap_range(ra_map, super, blocks); 214 if (err) 215 break; 216 } 217 } 218 219 if (!err) 220 err = e2fsck_readahead_bitmap(fs, ra_map); 221 222 ext2fs_free_block_bitmap(ra_map); 223 return err; 224} 225 226int e2fsck_can_readahead(ext2_filsys fs) 227{ 228 errcode_t err; 229 230 err = io_channel_cache_readahead(fs->io, 0, 1); 231 dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED); 232 return err != EXT2_ET_OP_NOT_SUPPORTED; 233} 234 235unsigned long long e2fsck_guess_readahead(ext2_filsys fs) 236{ 237 unsigned long long guess; 238 239 /* 240 * The optimal readahead sizes were experimentally determined by 241 * djwong in August 2014. Setting the RA size to two block groups' 242 * worth of inode table blocks seems to yield the largest reductions 243 * in e2fsck runtime. 244 */ 245 guess = 2ULL * fs->blocksize * fs->inode_blocks_per_group; 246 247 /* Disable RA if it'd use more 1/50th of RAM. */ 248 if (get_memory_size() > (guess * 50)) 249 return guess / 1024; 250 251 return 0; 252} 253