1/* mke2fs.c - Create an ext2 filesystem image.
2 *
3 * Copyright 2006, 2007 Rob Landley <rob@landley.net>
4
5// Still to go: "E:jJ:L:m:O:"
6USE_MKE2FS(NEWTOY(mke2fs, "<1>2g:Fnqm#N#i#b#", TOYFLAG_SBIN))
7
8config MKE2FS
9  bool "mke2fs"
10  default n
11  help
12    usage: mke2fs [-Fnq] [-b ###] [-N|i ###] [-m ###] device
13
14    Create an ext2 filesystem on a block device or filesystem image.
15
16    -F         Force to run on a mounted device
17    -n         Don't write to device
18    -q         Quiet (no output)
19    -b size    Block size (1024, 2048, or 4096)
20    -N inodes  Allocate this many inodes
21    -i bytes   Allocate one inode for every XXX bytes of device
22    -m percent Reserve this percent of filesystem space for root user
23
24config MKE2FS_JOURNAL
25  bool "Journaling support (ext3)"
26  default n
27  depends on MKE2FS
28  help
29    usage: mke2fs [-j] [-J size=###,device=XXX]
30
31    -j         Create journal (ext3)
32    -J         Journal options
33               size: Number of blocks (1024-102400)
34               device: Specify an external journal
35
36config MKE2FS_GEN
37  bool "Generate (gene2fs)"
38  default n
39  depends on MKE2FS
40  help
41    usage: gene2fs [options] device filename
42
43    The [options] are the same as mke2fs.
44
45config MKE2FS_LABEL
46  bool "Label support"
47  default n
48  depends on MKE2FS
49  help
50    usage: mke2fs [-L label] [-M path] [-o string]
51
52    -L         Volume label
53    -M         Path to mount point
54    -o         Created by
55
56config MKE2FS_EXTENDED
57  bool "Extended options"
58  default n
59  depends on MKE2FS
60  help
61    usage: mke2fs [-E stride=###] [-O option[,option]]
62
63    -E stride= Set RAID stripe size (in blocks)
64    -O [opts]  Specify fewer ext2 option flags (for old kernels)
65               All of these are on by default (as appropriate)
66       none         Clear default options (all but journaling)
67       dir_index    Use htree indexes for large directories
68       filetype     Store file type info in directory entry
69       has_journal  Set by -j
70       journal_dev  Set by -J device=XXX
71       sparse_super Don't allocate huge numbers of redundant superblocks
72*/
73
74#define FOR_mke2fs
75#include "toys.h"
76
77GLOBALS(
78  // Command line arguments.
79  long blocksize;
80  long bytes_per_inode;
81  long inodes;           // Total inodes in filesystem.
82  long reserved_percent; // Integer precent of space to reserve for root.
83  char *gendir;          // Where to read dirtree from.
84
85  // Internal data.
86  struct dirtree *dt;    // Tree of files to copy into the new filesystem.
87  unsigned treeblocks;   // Blocks used by dt
88  unsigned treeinodes;   // Inodes used by dt
89
90  unsigned blocks;       // Total blocks in the filesystem.
91  unsigned freeblocks;   // Free blocks in the filesystem.
92  unsigned inodespg;     // Inodes per group
93  unsigned groups;       // Total number of block groups.
94  unsigned blockbits;    // Bits per block.  (Also blocks per group.)
95
96  // For gene2fs
97  unsigned nextblock;    // Next data block to allocate
98  unsigned nextgroup;    // Next group we'll be allocating from
99  int fsfd;              // File descriptor of filesystem (to output to).
100
101  struct ext2_superblock sb;
102)
103
104#define INODES_RESERVED 10
105
106static uint32_t div_round_up(uint32_t a, uint32_t b)
107{
108  uint32_t c = a/b;
109
110  if (a%b) c++;
111  return c;
112}
113
114// Calculate data blocks plus index blocks needed to hold a file.
115
116static uint32_t file_blocks_used(uint64_t size, uint32_t *blocklist)
117{
118  uint32_t dblocks = (uint32_t)((size+(TT.blocksize-1))/TT.blocksize);
119  uint32_t idx=TT.blocksize/4, iblocks=0, diblocks=0, tiblocks=0;
120
121  // Fill out index blocks in inode.
122
123  if (blocklist) {
124    int i;
125
126    // Direct index blocks
127    for (i=0; i<13 && i<dblocks; i++) blocklist[i] = i;
128    // Singly indirect index blocks
129    if (dblocks > 13+idx) blocklist[13] = 13+idx;
130    // Doubly indirect index blocks
131    idx = 13 + idx + (idx*idx);
132    if (dblocks > idx) blocklist[14] = idx;
133
134    return 0;
135  }
136
137  // Account for direct, singly, doubly, and triply indirect index blocks
138
139  if (dblocks > 12) {
140    iblocks = ((dblocks-13)/idx)+1;
141    if (iblocks > 1) {
142      diblocks = ((iblocks-2)/idx)+1;
143      if (diblocks > 1)
144        tiblocks = ((diblocks-2)/idx)+1;
145    }
146  }
147
148  return dblocks + iblocks + diblocks + tiblocks;
149}
150
151// Use the parent pointer to iterate through the tree non-recursively.
152static struct dirtree *treenext(struct dirtree *this)
153{
154  while (this && !this->next) this = this->parent;
155  if (this) this = this->next;
156
157  return this;
158}
159
160// Recursively calculate the number of blocks used by each inode in the tree.
161// Returns blocks used by this directory, assigns bytes used to *size.
162// Writes total block count to TT.treeblocks and inode count to TT.treeinodes.
163
164static long check_treesize(struct dirtree *that, off_t *size)
165{
166  long blocks;
167
168  while (that) {
169    *size += sizeof(struct ext2_dentry) + strlen(that->name);
170
171    if (that->child)
172      that->st.st_blocks = check_treesize(that->child, &that->st.st_size);
173    else if (S_ISREG(that->st.st_mode)) {
174       that->st.st_blocks = file_blocks_used(that->st.st_size, 0);
175       TT.treeblocks += that->st.st_blocks;
176    }
177    that = that->next;
178  }
179  TT.treeblocks += blocks = file_blocks_used(*size, 0);
180  TT.treeinodes++;
181
182  return blocks;
183}
184
185// Calculate inode numbers and link counts.
186//
187// To do this right I need to copy the tree and sort it, but here's a really
188// ugly n^2 way of dealing with the problem that doesn't scale well to large
189// numbers of files (> 100,000) but can be done in very little code.
190// This rewrites inode numbers to their final values, allocating depth first.
191
192static void check_treelinks(struct dirtree *tree)
193{
194  struct dirtree *current=tree, *that;
195  long inode = INODES_RESERVED;
196
197  while (current) {
198    ++inode;
199    // Since we can't hardlink to directories, we know their link count.
200    if (S_ISDIR(current->st.st_mode)) current->st.st_nlink = 2;
201    else {
202      dev_t new = current->st.st_dev;
203
204      if (!new) continue;
205
206      // Look for other copies of current node
207      current->st.st_nlink = 0;
208      for (that = tree; that; that = treenext(that)) {
209        if (current->st.st_ino == that->st.st_ino &&
210          current->st.st_dev == that->st.st_dev)
211        {
212          current->st.st_nlink++;
213          current->st.st_ino = inode;
214        }
215      }
216    }
217    current->st.st_ino = inode;
218    current = treenext(current);
219  }
220}
221
222// According to http://www.opengroup.org/onlinepubs/9629399/apdxa.htm
223// we should generate a uuid structure by reading a clock with 100 nanosecond
224// precision, normalizing it to the start of the gregorian calendar in 1582,
225// and looking up our eth0 mac address.
226//
227// On the other hand, we have 128 bits to come up with a unique identifier, of
228// which 6 have a defined value.  /dev/urandom it is.
229
230static void create_uuid(char *uuid)
231{
232  // Read 128 random bits
233  int fd = xopen("/dev/urandom", O_RDONLY);
234  xreadall(fd, uuid, 16);
235  close(fd);
236
237  // Claim to be a DCE format UUID.
238  uuid[6] = (uuid[6] & 0x0F) | 0x40;
239  uuid[8] = (uuid[8] & 0x3F) | 0x80;
240
241  // rfc2518 section 6.4.1 suggests if we're not using a macaddr, we should
242  // set bit 1 of the node ID, which is the mac multicast bit.  This means we
243  // should never collide with anybody actually using a macaddr.
244  uuid[11] = uuid[11] | 128;
245}
246
247// Calculate inodes per group from total inodes.
248static uint32_t get_inodespg(uint32_t inodes)
249{
250  uint32_t temp;
251
252  // Round up to fill complete inode blocks.
253  temp = (inodes + TT.groups - 1) / TT.groups;
254  inodes = TT.blocksize/sizeof(struct ext2_inode);
255  return ((temp + inodes - 1)/inodes)*inodes;
256}
257
258// Fill out superblock and TT structures.
259
260static void init_superblock(struct ext2_superblock *sb)
261{
262  uint32_t temp;
263
264  // Set log_block_size and log_frag_size.
265
266  for (temp = 0; temp < 4; temp++) if (TT.blocksize == 1024<<temp) break;
267  if (temp==4) error_exit("bad blocksize");
268  sb->log_block_size = sb->log_frag_size = SWAP_LE32(temp);
269
270  // Fill out blocks_count, r_blocks_count, first_data_block
271
272  sb->blocks_count = SWAP_LE32(TT.blocks);
273  sb->free_blocks_count = SWAP_LE32(TT.freeblocks);
274  temp = (TT.blocks * (uint64_t)TT.reserved_percent) / 100;
275  sb->r_blocks_count = SWAP_LE32(temp);
276
277  sb->first_data_block = SWAP_LE32(TT.blocksize == 1024 ? 1 : 0);
278
279  // Set blocks_per_group and frags_per_group, which is the size of an
280  // allocation bitmap that fits in one block (I.E. how many bits per block)?
281
282  sb->blocks_per_group = sb->frags_per_group = SWAP_LE32(TT.blockbits);
283
284  // Set inodes_per_group and total inodes_count
285  sb->inodes_per_group = SWAP_LE32(TT.inodespg);
286  sb->inodes_count = SWAP_LE32(TT.inodespg * TT.groups);
287
288  // Determine free inodes.
289  temp = TT.inodespg*TT.groups - INODES_RESERVED;
290  if (temp < TT.treeinodes) error_exit("Not enough inodes.\n");
291  sb->free_inodes_count = SWAP_LE32(temp - TT.treeinodes);
292
293  // Fill out the rest of the superblock.
294  sb->max_mnt_count=0xFFFF;
295  sb->wtime = sb->lastcheck = sb->mkfs_time = SWAP_LE32(time(NULL));
296  sb->magic = SWAP_LE32(0xEF53);
297  sb->state = sb->errors = SWAP_LE16(1);
298
299  sb->rev_level = SWAP_LE32(1);
300  sb->first_ino = SWAP_LE32(INODES_RESERVED+1);
301  sb->inode_size = SWAP_LE16(sizeof(struct ext2_inode));
302  sb->feature_incompat = SWAP_LE32(EXT2_FEATURE_INCOMPAT_FILETYPE);
303  sb->feature_ro_compat = SWAP_LE32(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER);
304
305  create_uuid(sb->uuid);
306
307  // TODO If we're called as mke3fs or mkfs.ext3, do a journal.
308
309  //if (strchr(toys.which->name,'3'))
310  //	sb->feature_compat |= SWAP_LE32(EXT3_FEATURE_COMPAT_HAS_JOURNAL);
311}
312
313// Does this group contain a superblock backup (and group descriptor table)?
314static int is_sb_group(uint32_t group)
315{
316  int i;
317
318  // Superblock backups are on groups 0, 1, and powers of 3, 5, and 7.
319  if(!group || group==1) return 1;
320  for (i=3; i<9; i+=2) {
321    int j = i;
322    while (j<group) j*=i;
323    if (j==group) return 1;
324  }
325  return 0;
326}
327
328
329// Number of blocks used in group by optional superblock/group list backup.
330static int group_superblock_overhead(uint32_t group)
331{
332  int used;
333
334  if (!is_sb_group(group)) return 0;
335
336  // How many blocks does the group descriptor table take up?
337  used = TT.groups * sizeof(struct ext2_group);
338  used += TT.blocksize - 1;
339  used /= TT.blocksize;
340  // Plus the superblock itself.
341  used++;
342  // And a corner case.
343  if (!group && TT.blocksize == 1024) used++;
344
345  return used;
346}
347
348// Number of blocks used in group to store superblock/group/inode list
349static int group_overhead(uint32_t group)
350{
351  // Return superblock backup overhead (if any), plus block/inode
352  // allocation bitmaps, plus inode tables.
353  return group_superblock_overhead(group) + 2 + get_inodespg(TT.inodespg)
354        / (TT.blocksize/sizeof(struct ext2_inode));
355}
356
357// In bitmap "array" set "len" bits starting at position "start" (from 0).
358static void bits_set(char *array, int start, int len)
359{
360  while(len) {
361    if ((start&7) || len<8) {
362      array[start/8]|=(1<<(start&7));
363      start++;
364      len--;
365    } else {
366      array[start/8]=255;
367      start+=8;
368      len-=8;
369    }
370  }
371}
372
373// Seek past len bytes (to maintain sparse file), or write zeroes if output
374// not seekable
375static void put_zeroes(int len)
376{
377  if(-1 == lseek(TT.fsfd, len, SEEK_SET)) {
378    memset(toybuf, 0, sizeof(toybuf));
379    while (len) {
380      int out = len > sizeof(toybuf) ? sizeof(toybuf) : len;
381      xwrite(TT.fsfd, toybuf, out);
382      len -= out;
383    }
384  }
385}
386
387// Fill out an inode structure from struct stat info in dirtree.
388static void fill_inode(struct ext2_inode *in, struct dirtree *that)
389{
390  uint32_t fbu[15];
391  int temp;
392
393  file_blocks_used(that->st.st_size, fbu);
394
395  // If that inode needs data blocks allocated to it.
396  if (that->st.st_size) {
397    int i, group = TT.nextblock/TT.blockbits;
398
399    // TODO: teach this about indirect blocks.
400    for (i=0; i<15; i++) {
401      // If we just jumped into a new group, skip group overhead blocks.
402      while (group >= TT.nextgroup)
403        TT.nextblock += group_overhead(TT.nextgroup++);
404    }
405  }
406  // TODO :  S_ISREG/DIR/CHR/BLK/FIFO/LNK/SOCK(m)
407  in->mode = SWAP_LE32(that->st.st_mode);
408
409  in->uid = SWAP_LE16(that->st.st_uid & 0xFFFF);
410  in->uid_high = SWAP_LE16(that->st.st_uid >> 16);
411  in->gid = SWAP_LE16(that->st.st_gid & 0xFFFF);
412  in->gid_high = SWAP_LE16(that->st.st_gid >> 16);
413  in->size = SWAP_LE32(that->st.st_size & 0xFFFFFFFF);
414
415  // Contortions to make the compiler not generate a warning for x>>32
416  // when x is 32 bits.  The optimizer should clean this up.
417  if (sizeof(that->st.st_size) > 4) temp = 32;
418  else temp = 0;
419  if (temp) in->dir_acl = SWAP_LE32(that->st.st_size >> temp);
420
421  in->atime = SWAP_LE32(that->st.st_atime);
422  in->ctime = SWAP_LE32(that->st.st_ctime);
423  in->mtime = SWAP_LE32(that->st.st_mtime);
424
425  in->links_count = SWAP_LE16(that->st.st_nlink);
426  in->blocks = SWAP_LE32(that->st.st_blocks);
427  // in->faddr
428}
429
430// Works like an archiver.
431// The first argument is the name of the file to create.  If it already
432// exists, that size will be used.
433
434void mke2fs_main(void)
435{
436  int i, temp;
437  off_t length;
438  uint32_t usedblocks, usedinodes, dtiblk, dtbblk;
439  struct dirtree *dti, *dtb;
440
441  // Handle command line arguments.
442
443  if (toys.optargs[1]) {
444    sscanf(toys.optargs[1], "%u", &TT.blocks);
445    temp = O_RDWR|O_CREAT;
446  } else temp = O_RDWR;
447  if (!TT.reserved_percent) TT.reserved_percent = 5;
448
449  // TODO: Check if filesystem is mounted here
450
451  // For mke?fs, open file.  For gene?fs, create file.
452  TT.fsfd = xcreate(*toys.optargs, temp, 0777);
453
454  // Determine appropriate block size and block count from file length.
455  // (If no length, default to 4k.  They can override it on the cmdline.)
456
457  length = fdlength(TT.fsfd);
458  if (!TT.blocksize) TT.blocksize = (length && length < 1<<29) ? 1024 : 4096;
459  TT.blockbits = 8*TT.blocksize;
460  if (!TT.blocks) TT.blocks = length/TT.blocksize;
461
462  // Collect gene2fs list or lost+found, calculate requirements.
463
464  if (TT.gendir) {
465    strncpy(toybuf, TT.gendir, sizeof(toybuf));
466    dti = dirtree_read(toybuf, dirtree_notdotdot);
467  } else {
468    dti = xzalloc(sizeof(struct dirtree)+11);
469    strcpy(dti->name, "lost+found");
470    dti->st.st_mode = S_IFDIR|0755;
471    dti->st.st_ctime = dti->st.st_mtime = time(NULL);
472  }
473
474  // Add root directory inode.  This is iterated through for when finding
475  // blocks, but not when finding inodes.  The tree's parent pointers don't
476  // point back into this.
477
478  dtb = xzalloc(sizeof(struct dirtree)+1);
479  dtb->st.st_mode = S_IFDIR|0755;
480  dtb->st.st_ctime = dtb->st.st_mtime = time(NULL);
481  dtb->child = dti;
482
483  // Figure out how much space is used by preset files
484  length = check_treesize(dtb, &(dtb->st.st_size));
485  check_treelinks(dtb);
486
487  // Figure out how many total inodes we need.
488
489  if (!TT.inodes) {
490    if (!TT.bytes_per_inode) TT.bytes_per_inode = 8192;
491    TT.inodes = (TT.blocks * (uint64_t)TT.blocksize) / TT.bytes_per_inode;
492  }
493
494  // If we're generating a filesystem and have no idea how many blocks it
495  // needs, start with a minimal guess, find the overhead of that many
496  // groups, and loop until this is enough groups to store this many blocks.
497  if (!TT.blocks) TT.groups = (TT.treeblocks/TT.blockbits)+1;
498  else TT.groups = div_round_up(TT.blocks, TT.blockbits);
499
500  for (;;) {
501    temp = TT.treeblocks;
502
503    for (i = 0; i<TT.groups; i++) temp += group_overhead(i);
504
505    if (TT.blocks) {
506      if (TT.blocks < temp) error_exit("Not enough space.\n");
507      break;
508    }
509    if (temp <= TT.groups * TT.blockbits) {
510      TT.blocks = temp;
511      break;
512    }
513    TT.groups++;
514  }
515  TT.freeblocks = TT.blocks - temp;
516
517  // Now we know all the TT data, initialize superblock structure.
518
519  init_superblock(&TT.sb);
520
521  // Start writing.  Skip the first 1k to avoid the boot sector (if any).
522  put_zeroes(1024);
523
524  // Loop through block groups, write out each one.
525  dtiblk = dtbblk = usedblocks = usedinodes = 0;
526  for (i=0; i<TT.groups; i++) {
527    struct ext2_inode *in = (struct ext2_inode *)toybuf;
528    uint32_t start, itable, used, end;
529    int j, slot;
530
531    // Where does this group end?
532    end = TT.blockbits;
533    if ((i+1)*TT.blockbits > TT.blocks) end = TT.blocks & (TT.blockbits-1);
534
535    // Blocks used by inode table
536    itable = (TT.inodespg*sizeof(struct ext2_inode))/TT.blocksize;
537
538    // If a superblock goes here, write it out.
539    start = group_superblock_overhead(i);
540    if (start) {
541      struct ext2_group *bg = (struct ext2_group *)toybuf;
542      int treeblocks = TT.treeblocks, treeinodes = TT.treeinodes;
543
544      TT.sb.block_group_nr = SWAP_LE16(i);
545
546      // Write superblock and pad it up to block size
547      xwrite(TT.fsfd, &TT.sb, sizeof(struct ext2_superblock));
548      temp = TT.blocksize - sizeof(struct ext2_superblock);
549      if (!i && TT.blocksize > 1024) temp -= 1024;
550      memset(toybuf, 0, TT.blocksize);
551      xwrite(TT.fsfd, toybuf, temp);
552
553      // Loop through groups to write group descriptor table.
554      for(j=0; j<TT.groups; j++) {
555
556        // Figure out what sector this group starts in.
557        used = group_superblock_overhead(j);
558
559        // Find next array slot in this block (flush block if full).
560        slot = j % (TT.blocksize/sizeof(struct ext2_group));
561        if (!slot) {
562          if (j) xwrite(TT.fsfd, bg, TT.blocksize);
563          memset(bg, 0, TT.blocksize);
564        }
565
566        // How many free inodes in this group?
567        temp = TT.inodespg;
568        if (!i) temp -= INODES_RESERVED;
569        if (temp > treeinodes) {
570          treeinodes -= temp;
571          temp = 0;
572        } else {
573          temp -= treeinodes;
574          treeinodes = 0;
575        }
576        bg[slot].free_inodes_count = SWAP_LE16(temp);
577
578        // How many free blocks in this group?
579        temp = TT.inodespg/(TT.blocksize/sizeof(struct ext2_inode)) + 2;
580        temp = end-used-temp;
581        if (temp > treeblocks) {
582          treeblocks -= temp;
583          temp = 0;
584        } else {
585          temp -= treeblocks;
586          treeblocks = 0;
587        }
588        bg[slot].free_blocks_count = SWAP_LE32(temp);
589
590        // Fill out rest of group structure
591        used += j*TT.blockbits;
592        bg[slot].block_bitmap = SWAP_LE32(used++);
593        bg[slot].inode_bitmap = SWAP_LE32(used++);
594        bg[slot].inode_table = SWAP_LE32(used);
595        bg[slot].used_dirs_count = 0;  // (TODO)
596      }
597      xwrite(TT.fsfd, bg, TT.blocksize);
598    }
599
600    // Now write out stuff that every block group has.
601
602    // Write block usage bitmap
603
604    start += 2 + itable;
605    memset(toybuf, 0, TT.blocksize);
606    bits_set(toybuf, 0, start);
607    bits_set(toybuf, end, TT.blockbits-end);
608    temp = TT.treeblocks - usedblocks;
609    if (temp) {
610      if (end-start > temp) temp = end-start;
611      bits_set(toybuf, start, temp);
612    }
613    xwrite(TT.fsfd, toybuf, TT.blocksize);
614
615    // Write inode bitmap
616    memset(toybuf, 0, TT.blocksize);
617    j = 0;
618    if (!i) bits_set(toybuf, 0, j = INODES_RESERVED);
619    bits_set(toybuf, TT.inodespg, slot = TT.blockbits-TT.inodespg);
620    temp = TT.treeinodes - usedinodes;
621    if (temp) {
622      if (slot-j > temp) temp = slot-j;
623      bits_set(toybuf, j, temp);
624    }
625    xwrite(TT.fsfd, toybuf, TT.blocksize);
626
627    // Write inode table for this group (TODO)
628    for (j = 0; j<TT.inodespg; j++) {
629      slot = j % (TT.blocksize/sizeof(struct ext2_inode));
630      if (!slot) {
631        if (j) xwrite(TT.fsfd, in, TT.blocksize);
632        memset(in, 0, TT.blocksize);
633      }
634      if (!i && j<INODES_RESERVED) {
635        // Write root inode
636        if (j == 2) fill_inode(in+slot, dtb);
637      } else if (dti) {
638        fill_inode(in+slot, dti);
639        dti = treenext(dti);
640      }
641    }
642    xwrite(TT.fsfd, in, TT.blocksize);
643
644    while (dtb) {
645      // TODO write index data block
646      // TODO write root directory data block
647      // TODO write directory data block
648      // TODO write file data block
649      put_zeroes(TT.blocksize);
650      start++;
651      if (start == end) break;
652    }
653    // Write data blocks (TODO)
654    put_zeroes((end-start) * TT.blocksize);
655  }
656}
657