1/* mke2fs.c - Create an ext2 filesystem image. 2 * 3 * Copyright 2006, 2007 Rob Landley <rob@landley.net> 4 5// Still to go: "E:jJ:L:m:O:" 6USE_MKE2FS(NEWTOY(mke2fs, "<1>2g:Fnqm#N#i#b#", TOYFLAG_SBIN)) 7 8config MKE2FS 9 bool "mke2fs" 10 default n 11 help 12 usage: mke2fs [-Fnq] [-b ###] [-N|i ###] [-m ###] device 13 14 Create an ext2 filesystem on a block device or filesystem image. 15 16 -F Force to run on a mounted device 17 -n Don't write to device 18 -q Quiet (no output) 19 -b size Block size (1024, 2048, or 4096) 20 -N inodes Allocate this many inodes 21 -i bytes Allocate one inode for every XXX bytes of device 22 -m percent Reserve this percent of filesystem space for root user 23 24config MKE2FS_JOURNAL 25 bool "Journaling support (ext3)" 26 default n 27 depends on MKE2FS 28 help 29 usage: mke2fs [-j] [-J size=###,device=XXX] 30 31 -j Create journal (ext3) 32 -J Journal options 33 size: Number of blocks (1024-102400) 34 device: Specify an external journal 35 36config MKE2FS_GEN 37 bool "Generate (gene2fs)" 38 default n 39 depends on MKE2FS 40 help 41 usage: gene2fs [options] device filename 42 43 The [options] are the same as mke2fs. 44 45config MKE2FS_LABEL 46 bool "Label support" 47 default n 48 depends on MKE2FS 49 help 50 usage: mke2fs [-L label] [-M path] [-o string] 51 52 -L Volume label 53 -M Path to mount point 54 -o Created by 55 56config MKE2FS_EXTENDED 57 bool "Extended options" 58 default n 59 depends on MKE2FS 60 help 61 usage: mke2fs [-E stride=###] [-O option[,option]] 62 63 -E stride= Set RAID stripe size (in blocks) 64 -O [opts] Specify fewer ext2 option flags (for old kernels) 65 All of these are on by default (as appropriate) 66 none Clear default options (all but journaling) 67 dir_index Use htree indexes for large directories 68 filetype Store file type info in directory entry 69 has_journal Set by -j 70 journal_dev Set by -J device=XXX 71 sparse_super Don't allocate huge numbers of redundant superblocks 72*/ 73 74#define FOR_mke2fs 75#include "toys.h" 76 77GLOBALS( 78 // Command line arguments. 79 long blocksize; 80 long bytes_per_inode; 81 long inodes; // Total inodes in filesystem. 82 long reserved_percent; // Integer precent of space to reserve for root. 83 char *gendir; // Where to read dirtree from. 84 85 // Internal data. 86 struct dirtree *dt; // Tree of files to copy into the new filesystem. 87 unsigned treeblocks; // Blocks used by dt 88 unsigned treeinodes; // Inodes used by dt 89 90 unsigned blocks; // Total blocks in the filesystem. 91 unsigned freeblocks; // Free blocks in the filesystem. 92 unsigned inodespg; // Inodes per group 93 unsigned groups; // Total number of block groups. 94 unsigned blockbits; // Bits per block. (Also blocks per group.) 95 96 // For gene2fs 97 unsigned nextblock; // Next data block to allocate 98 unsigned nextgroup; // Next group we'll be allocating from 99 int fsfd; // File descriptor of filesystem (to output to). 100 101 struct ext2_superblock sb; 102) 103 104#define INODES_RESERVED 10 105 106static uint32_t div_round_up(uint32_t a, uint32_t b) 107{ 108 uint32_t c = a/b; 109 110 if (a%b) c++; 111 return c; 112} 113 114// Calculate data blocks plus index blocks needed to hold a file. 115 116static uint32_t file_blocks_used(uint64_t size, uint32_t *blocklist) 117{ 118 uint32_t dblocks = (uint32_t)((size+(TT.blocksize-1))/TT.blocksize); 119 uint32_t idx=TT.blocksize/4, iblocks=0, diblocks=0, tiblocks=0; 120 121 // Fill out index blocks in inode. 122 123 if (blocklist) { 124 int i; 125 126 // Direct index blocks 127 for (i=0; i<13 && i<dblocks; i++) blocklist[i] = i; 128 // Singly indirect index blocks 129 if (dblocks > 13+idx) blocklist[13] = 13+idx; 130 // Doubly indirect index blocks 131 idx = 13 + idx + (idx*idx); 132 if (dblocks > idx) blocklist[14] = idx; 133 134 return 0; 135 } 136 137 // Account for direct, singly, doubly, and triply indirect index blocks 138 139 if (dblocks > 12) { 140 iblocks = ((dblocks-13)/idx)+1; 141 if (iblocks > 1) { 142 diblocks = ((iblocks-2)/idx)+1; 143 if (diblocks > 1) 144 tiblocks = ((diblocks-2)/idx)+1; 145 } 146 } 147 148 return dblocks + iblocks + diblocks + tiblocks; 149} 150 151// Use the parent pointer to iterate through the tree non-recursively. 152static struct dirtree *treenext(struct dirtree *this) 153{ 154 while (this && !this->next) this = this->parent; 155 if (this) this = this->next; 156 157 return this; 158} 159 160// Recursively calculate the number of blocks used by each inode in the tree. 161// Returns blocks used by this directory, assigns bytes used to *size. 162// Writes total block count to TT.treeblocks and inode count to TT.treeinodes. 163 164static long check_treesize(struct dirtree *that, off_t *size) 165{ 166 long blocks; 167 168 while (that) { 169 *size += sizeof(struct ext2_dentry) + strlen(that->name); 170 171 if (that->child) 172 that->st.st_blocks = check_treesize(that->child, &that->st.st_size); 173 else if (S_ISREG(that->st.st_mode)) { 174 that->st.st_blocks = file_blocks_used(that->st.st_size, 0); 175 TT.treeblocks += that->st.st_blocks; 176 } 177 that = that->next; 178 } 179 TT.treeblocks += blocks = file_blocks_used(*size, 0); 180 TT.treeinodes++; 181 182 return blocks; 183} 184 185// Calculate inode numbers and link counts. 186// 187// To do this right I need to copy the tree and sort it, but here's a really 188// ugly n^2 way of dealing with the problem that doesn't scale well to large 189// numbers of files (> 100,000) but can be done in very little code. 190// This rewrites inode numbers to their final values, allocating depth first. 191 192static void check_treelinks(struct dirtree *tree) 193{ 194 struct dirtree *current=tree, *that; 195 long inode = INODES_RESERVED; 196 197 while (current) { 198 ++inode; 199 // Since we can't hardlink to directories, we know their link count. 200 if (S_ISDIR(current->st.st_mode)) current->st.st_nlink = 2; 201 else { 202 dev_t new = current->st.st_dev; 203 204 if (!new) continue; 205 206 // Look for other copies of current node 207 current->st.st_nlink = 0; 208 for (that = tree; that; that = treenext(that)) { 209 if (current->st.st_ino == that->st.st_ino && 210 current->st.st_dev == that->st.st_dev) 211 { 212 current->st.st_nlink++; 213 current->st.st_ino = inode; 214 } 215 } 216 } 217 current->st.st_ino = inode; 218 current = treenext(current); 219 } 220} 221 222// According to http://www.opengroup.org/onlinepubs/9629399/apdxa.htm 223// we should generate a uuid structure by reading a clock with 100 nanosecond 224// precision, normalizing it to the start of the gregorian calendar in 1582, 225// and looking up our eth0 mac address. 226// 227// On the other hand, we have 128 bits to come up with a unique identifier, of 228// which 6 have a defined value. /dev/urandom it is. 229 230static void create_uuid(char *uuid) 231{ 232 // Read 128 random bits 233 int fd = xopen("/dev/urandom", O_RDONLY); 234 xreadall(fd, uuid, 16); 235 close(fd); 236 237 // Claim to be a DCE format UUID. 238 uuid[6] = (uuid[6] & 0x0F) | 0x40; 239 uuid[8] = (uuid[8] & 0x3F) | 0x80; 240 241 // rfc2518 section 6.4.1 suggests if we're not using a macaddr, we should 242 // set bit 1 of the node ID, which is the mac multicast bit. This means we 243 // should never collide with anybody actually using a macaddr. 244 uuid[11] = uuid[11] | 128; 245} 246 247// Calculate inodes per group from total inodes. 248static uint32_t get_inodespg(uint32_t inodes) 249{ 250 uint32_t temp; 251 252 // Round up to fill complete inode blocks. 253 temp = (inodes + TT.groups - 1) / TT.groups; 254 inodes = TT.blocksize/sizeof(struct ext2_inode); 255 return ((temp + inodes - 1)/inodes)*inodes; 256} 257 258// Fill out superblock and TT structures. 259 260static void init_superblock(struct ext2_superblock *sb) 261{ 262 uint32_t temp; 263 264 // Set log_block_size and log_frag_size. 265 266 for (temp = 0; temp < 4; temp++) if (TT.blocksize == 1024<<temp) break; 267 if (temp==4) error_exit("bad blocksize"); 268 sb->log_block_size = sb->log_frag_size = SWAP_LE32(temp); 269 270 // Fill out blocks_count, r_blocks_count, first_data_block 271 272 sb->blocks_count = SWAP_LE32(TT.blocks); 273 sb->free_blocks_count = SWAP_LE32(TT.freeblocks); 274 temp = (TT.blocks * (uint64_t)TT.reserved_percent) / 100; 275 sb->r_blocks_count = SWAP_LE32(temp); 276 277 sb->first_data_block = SWAP_LE32(TT.blocksize == 1024 ? 1 : 0); 278 279 // Set blocks_per_group and frags_per_group, which is the size of an 280 // allocation bitmap that fits in one block (I.E. how many bits per block)? 281 282 sb->blocks_per_group = sb->frags_per_group = SWAP_LE32(TT.blockbits); 283 284 // Set inodes_per_group and total inodes_count 285 sb->inodes_per_group = SWAP_LE32(TT.inodespg); 286 sb->inodes_count = SWAP_LE32(TT.inodespg * TT.groups); 287 288 // Determine free inodes. 289 temp = TT.inodespg*TT.groups - INODES_RESERVED; 290 if (temp < TT.treeinodes) error_exit("Not enough inodes.\n"); 291 sb->free_inodes_count = SWAP_LE32(temp - TT.treeinodes); 292 293 // Fill out the rest of the superblock. 294 sb->max_mnt_count=0xFFFF; 295 sb->wtime = sb->lastcheck = sb->mkfs_time = SWAP_LE32(time(NULL)); 296 sb->magic = SWAP_LE32(0xEF53); 297 sb->state = sb->errors = SWAP_LE16(1); 298 299 sb->rev_level = SWAP_LE32(1); 300 sb->first_ino = SWAP_LE32(INODES_RESERVED+1); 301 sb->inode_size = SWAP_LE16(sizeof(struct ext2_inode)); 302 sb->feature_incompat = SWAP_LE32(EXT2_FEATURE_INCOMPAT_FILETYPE); 303 sb->feature_ro_compat = SWAP_LE32(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER); 304 305 create_uuid(sb->uuid); 306 307 // TODO If we're called as mke3fs or mkfs.ext3, do a journal. 308 309 //if (strchr(toys.which->name,'3')) 310 // sb->feature_compat |= SWAP_LE32(EXT3_FEATURE_COMPAT_HAS_JOURNAL); 311} 312 313// Does this group contain a superblock backup (and group descriptor table)? 314static int is_sb_group(uint32_t group) 315{ 316 int i; 317 318 // Superblock backups are on groups 0, 1, and powers of 3, 5, and 7. 319 if(!group || group==1) return 1; 320 for (i=3; i<9; i+=2) { 321 int j = i; 322 while (j<group) j*=i; 323 if (j==group) return 1; 324 } 325 return 0; 326} 327 328 329// Number of blocks used in group by optional superblock/group list backup. 330static int group_superblock_overhead(uint32_t group) 331{ 332 int used; 333 334 if (!is_sb_group(group)) return 0; 335 336 // How many blocks does the group descriptor table take up? 337 used = TT.groups * sizeof(struct ext2_group); 338 used += TT.blocksize - 1; 339 used /= TT.blocksize; 340 // Plus the superblock itself. 341 used++; 342 // And a corner case. 343 if (!group && TT.blocksize == 1024) used++; 344 345 return used; 346} 347 348// Number of blocks used in group to store superblock/group/inode list 349static int group_overhead(uint32_t group) 350{ 351 // Return superblock backup overhead (if any), plus block/inode 352 // allocation bitmaps, plus inode tables. 353 return group_superblock_overhead(group) + 2 + get_inodespg(TT.inodespg) 354 / (TT.blocksize/sizeof(struct ext2_inode)); 355} 356 357// In bitmap "array" set "len" bits starting at position "start" (from 0). 358static void bits_set(char *array, int start, int len) 359{ 360 while(len) { 361 if ((start&7) || len<8) { 362 array[start/8]|=(1<<(start&7)); 363 start++; 364 len--; 365 } else { 366 array[start/8]=255; 367 start+=8; 368 len-=8; 369 } 370 } 371} 372 373// Seek past len bytes (to maintain sparse file), or write zeroes if output 374// not seekable 375static void put_zeroes(int len) 376{ 377 if(-1 == lseek(TT.fsfd, len, SEEK_SET)) { 378 memset(toybuf, 0, sizeof(toybuf)); 379 while (len) { 380 int out = len > sizeof(toybuf) ? sizeof(toybuf) : len; 381 xwrite(TT.fsfd, toybuf, out); 382 len -= out; 383 } 384 } 385} 386 387// Fill out an inode structure from struct stat info in dirtree. 388static void fill_inode(struct ext2_inode *in, struct dirtree *that) 389{ 390 uint32_t fbu[15]; 391 int temp; 392 393 file_blocks_used(that->st.st_size, fbu); 394 395 // If that inode needs data blocks allocated to it. 396 if (that->st.st_size) { 397 int i, group = TT.nextblock/TT.blockbits; 398 399 // TODO: teach this about indirect blocks. 400 for (i=0; i<15; i++) { 401 // If we just jumped into a new group, skip group overhead blocks. 402 while (group >= TT.nextgroup) 403 TT.nextblock += group_overhead(TT.nextgroup++); 404 } 405 } 406 // TODO : S_ISREG/DIR/CHR/BLK/FIFO/LNK/SOCK(m) 407 in->mode = SWAP_LE32(that->st.st_mode); 408 409 in->uid = SWAP_LE16(that->st.st_uid & 0xFFFF); 410 in->uid_high = SWAP_LE16(that->st.st_uid >> 16); 411 in->gid = SWAP_LE16(that->st.st_gid & 0xFFFF); 412 in->gid_high = SWAP_LE16(that->st.st_gid >> 16); 413 in->size = SWAP_LE32(that->st.st_size & 0xFFFFFFFF); 414 415 // Contortions to make the compiler not generate a warning for x>>32 416 // when x is 32 bits. The optimizer should clean this up. 417 if (sizeof(that->st.st_size) > 4) temp = 32; 418 else temp = 0; 419 if (temp) in->dir_acl = SWAP_LE32(that->st.st_size >> temp); 420 421 in->atime = SWAP_LE32(that->st.st_atime); 422 in->ctime = SWAP_LE32(that->st.st_ctime); 423 in->mtime = SWAP_LE32(that->st.st_mtime); 424 425 in->links_count = SWAP_LE16(that->st.st_nlink); 426 in->blocks = SWAP_LE32(that->st.st_blocks); 427 // in->faddr 428} 429 430// Works like an archiver. 431// The first argument is the name of the file to create. If it already 432// exists, that size will be used. 433 434void mke2fs_main(void) 435{ 436 int i, temp; 437 off_t length; 438 uint32_t usedblocks, usedinodes, dtiblk, dtbblk; 439 struct dirtree *dti, *dtb; 440 441 // Handle command line arguments. 442 443 if (toys.optargs[1]) { 444 sscanf(toys.optargs[1], "%u", &TT.blocks); 445 temp = O_RDWR|O_CREAT; 446 } else temp = O_RDWR; 447 if (!TT.reserved_percent) TT.reserved_percent = 5; 448 449 // TODO: Check if filesystem is mounted here 450 451 // For mke?fs, open file. For gene?fs, create file. 452 TT.fsfd = xcreate(*toys.optargs, temp, 0777); 453 454 // Determine appropriate block size and block count from file length. 455 // (If no length, default to 4k. They can override it on the cmdline.) 456 457 length = fdlength(TT.fsfd); 458 if (!TT.blocksize) TT.blocksize = (length && length < 1<<29) ? 1024 : 4096; 459 TT.blockbits = 8*TT.blocksize; 460 if (!TT.blocks) TT.blocks = length/TT.blocksize; 461 462 // Collect gene2fs list or lost+found, calculate requirements. 463 464 if (TT.gendir) { 465 strncpy(toybuf, TT.gendir, sizeof(toybuf)); 466 dti = dirtree_read(toybuf, dirtree_notdotdot); 467 } else { 468 dti = xzalloc(sizeof(struct dirtree)+11); 469 strcpy(dti->name, "lost+found"); 470 dti->st.st_mode = S_IFDIR|0755; 471 dti->st.st_ctime = dti->st.st_mtime = time(NULL); 472 } 473 474 // Add root directory inode. This is iterated through for when finding 475 // blocks, but not when finding inodes. The tree's parent pointers don't 476 // point back into this. 477 478 dtb = xzalloc(sizeof(struct dirtree)+1); 479 dtb->st.st_mode = S_IFDIR|0755; 480 dtb->st.st_ctime = dtb->st.st_mtime = time(NULL); 481 dtb->child = dti; 482 483 // Figure out how much space is used by preset files 484 length = check_treesize(dtb, &(dtb->st.st_size)); 485 check_treelinks(dtb); 486 487 // Figure out how many total inodes we need. 488 489 if (!TT.inodes) { 490 if (!TT.bytes_per_inode) TT.bytes_per_inode = 8192; 491 TT.inodes = (TT.blocks * (uint64_t)TT.blocksize) / TT.bytes_per_inode; 492 } 493 494 // If we're generating a filesystem and have no idea how many blocks it 495 // needs, start with a minimal guess, find the overhead of that many 496 // groups, and loop until this is enough groups to store this many blocks. 497 if (!TT.blocks) TT.groups = (TT.treeblocks/TT.blockbits)+1; 498 else TT.groups = div_round_up(TT.blocks, TT.blockbits); 499 500 for (;;) { 501 temp = TT.treeblocks; 502 503 for (i = 0; i<TT.groups; i++) temp += group_overhead(i); 504 505 if (TT.blocks) { 506 if (TT.blocks < temp) error_exit("Not enough space.\n"); 507 break; 508 } 509 if (temp <= TT.groups * TT.blockbits) { 510 TT.blocks = temp; 511 break; 512 } 513 TT.groups++; 514 } 515 TT.freeblocks = TT.blocks - temp; 516 517 // Now we know all the TT data, initialize superblock structure. 518 519 init_superblock(&TT.sb); 520 521 // Start writing. Skip the first 1k to avoid the boot sector (if any). 522 put_zeroes(1024); 523 524 // Loop through block groups, write out each one. 525 dtiblk = dtbblk = usedblocks = usedinodes = 0; 526 for (i=0; i<TT.groups; i++) { 527 struct ext2_inode *in = (struct ext2_inode *)toybuf; 528 uint32_t start, itable, used, end; 529 int j, slot; 530 531 // Where does this group end? 532 end = TT.blockbits; 533 if ((i+1)*TT.blockbits > TT.blocks) end = TT.blocks & (TT.blockbits-1); 534 535 // Blocks used by inode table 536 itable = (TT.inodespg*sizeof(struct ext2_inode))/TT.blocksize; 537 538 // If a superblock goes here, write it out. 539 start = group_superblock_overhead(i); 540 if (start) { 541 struct ext2_group *bg = (struct ext2_group *)toybuf; 542 int treeblocks = TT.treeblocks, treeinodes = TT.treeinodes; 543 544 TT.sb.block_group_nr = SWAP_LE16(i); 545 546 // Write superblock and pad it up to block size 547 xwrite(TT.fsfd, &TT.sb, sizeof(struct ext2_superblock)); 548 temp = TT.blocksize - sizeof(struct ext2_superblock); 549 if (!i && TT.blocksize > 1024) temp -= 1024; 550 memset(toybuf, 0, TT.blocksize); 551 xwrite(TT.fsfd, toybuf, temp); 552 553 // Loop through groups to write group descriptor table. 554 for(j=0; j<TT.groups; j++) { 555 556 // Figure out what sector this group starts in. 557 used = group_superblock_overhead(j); 558 559 // Find next array slot in this block (flush block if full). 560 slot = j % (TT.blocksize/sizeof(struct ext2_group)); 561 if (!slot) { 562 if (j) xwrite(TT.fsfd, bg, TT.blocksize); 563 memset(bg, 0, TT.blocksize); 564 } 565 566 // How many free inodes in this group? 567 temp = TT.inodespg; 568 if (!i) temp -= INODES_RESERVED; 569 if (temp > treeinodes) { 570 treeinodes -= temp; 571 temp = 0; 572 } else { 573 temp -= treeinodes; 574 treeinodes = 0; 575 } 576 bg[slot].free_inodes_count = SWAP_LE16(temp); 577 578 // How many free blocks in this group? 579 temp = TT.inodespg/(TT.blocksize/sizeof(struct ext2_inode)) + 2; 580 temp = end-used-temp; 581 if (temp > treeblocks) { 582 treeblocks -= temp; 583 temp = 0; 584 } else { 585 temp -= treeblocks; 586 treeblocks = 0; 587 } 588 bg[slot].free_blocks_count = SWAP_LE32(temp); 589 590 // Fill out rest of group structure 591 used += j*TT.blockbits; 592 bg[slot].block_bitmap = SWAP_LE32(used++); 593 bg[slot].inode_bitmap = SWAP_LE32(used++); 594 bg[slot].inode_table = SWAP_LE32(used); 595 bg[slot].used_dirs_count = 0; // (TODO) 596 } 597 xwrite(TT.fsfd, bg, TT.blocksize); 598 } 599 600 // Now write out stuff that every block group has. 601 602 // Write block usage bitmap 603 604 start += 2 + itable; 605 memset(toybuf, 0, TT.blocksize); 606 bits_set(toybuf, 0, start); 607 bits_set(toybuf, end, TT.blockbits-end); 608 temp = TT.treeblocks - usedblocks; 609 if (temp) { 610 if (end-start > temp) temp = end-start; 611 bits_set(toybuf, start, temp); 612 } 613 xwrite(TT.fsfd, toybuf, TT.blocksize); 614 615 // Write inode bitmap 616 memset(toybuf, 0, TT.blocksize); 617 j = 0; 618 if (!i) bits_set(toybuf, 0, j = INODES_RESERVED); 619 bits_set(toybuf, TT.inodespg, slot = TT.blockbits-TT.inodespg); 620 temp = TT.treeinodes - usedinodes; 621 if (temp) { 622 if (slot-j > temp) temp = slot-j; 623 bits_set(toybuf, j, temp); 624 } 625 xwrite(TT.fsfd, toybuf, TT.blocksize); 626 627 // Write inode table for this group (TODO) 628 for (j = 0; j<TT.inodespg; j++) { 629 slot = j % (TT.blocksize/sizeof(struct ext2_inode)); 630 if (!slot) { 631 if (j) xwrite(TT.fsfd, in, TT.blocksize); 632 memset(in, 0, TT.blocksize); 633 } 634 if (!i && j<INODES_RESERVED) { 635 // Write root inode 636 if (j == 2) fill_inode(in+slot, dtb); 637 } else if (dti) { 638 fill_inode(in+slot, dti); 639 dti = treenext(dti); 640 } 641 } 642 xwrite(TT.fsfd, in, TT.blocksize); 643 644 while (dtb) { 645 // TODO write index data block 646 // TODO write root directory data block 647 // TODO write directory data block 648 // TODO write file data block 649 put_zeroes(TT.blocksize); 650 start++; 651 if (start == end) break; 652 } 653 // Write data blocks (TODO) 654 put_zeroes((end-start) * TT.blocksize); 655 } 656} 657