1/*
2 * mkjournal.c --- make a journal for a filesystem
3 *
4 * Copyright (C) 2000 Theodore Ts'o.
5 *
6 * %Begin-Header%
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
9 * %End-Header%
10 */
11
12#include <stdio.h>
13#include <string.h>
14#if HAVE_UNISTD_H
15#include <unistd.h>
16#endif
17#if HAVE_ERRNO_H
18#include <errno.h>
19#endif
20#include <fcntl.h>
21#include <time.h>
22#if HAVE_SYS_STAT_H
23#include <sys/stat.h>
24#endif
25#if HAVE_SYS_TYPES_H
26#include <sys/types.h>
27#endif
28#if HAVE_SYS_IOCTL_H
29#include <sys/ioctl.h>
30#endif
31#if HAVE_NETINET_IN_H
32#include <netinet/in.h>
33#endif
34
35#include "ext2_fs.h"
36#include "e2p/e2p.h"
37#include "ext2fs.h"
38#include "jfs_user.h"
39
40/*
41 * This function automatically sets up the journal superblock and
42 * returns it as an allocated block.
43 */
44errcode_t ext2fs_create_journal_superblock(ext2_filsys fs,
45					   __u32 num_blocks, int flags,
46					   char  **ret_jsb)
47{
48	errcode_t		retval;
49	journal_superblock_t	*jsb;
50
51	if (num_blocks < 1024)
52		return EXT2_ET_JOURNAL_TOO_SMALL;
53
54	if ((retval = ext2fs_get_mem(fs->blocksize, &jsb)))
55		return retval;
56
57	memset (jsb, 0, fs->blocksize);
58
59	jsb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER);
60	if (flags & EXT2_MKJOURNAL_V1_SUPER)
61		jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V1);
62	else
63		jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
64	jsb->s_blocksize = htonl(fs->blocksize);
65	jsb->s_maxlen = htonl(num_blocks);
66	jsb->s_nr_users = htonl(1);
67	jsb->s_first = htonl(1);
68	jsb->s_sequence = htonl(1);
69	memcpy(jsb->s_uuid, fs->super->s_uuid, sizeof(fs->super->s_uuid));
70	/*
71	 * If we're creating an external journal device, we need to
72	 * adjust these fields.
73	 */
74	if (fs->super->s_feature_incompat &
75	    EXT3_FEATURE_INCOMPAT_JOURNAL_DEV) {
76		jsb->s_nr_users = 0;
77		if (fs->blocksize == 1024)
78			jsb->s_first = htonl(3);
79		else
80			jsb->s_first = htonl(2);
81	}
82
83	*ret_jsb = (char *) jsb;
84	return 0;
85}
86
87/*
88 * This function writes a journal using POSIX routines.  It is used
89 * for creating external journals and creating journals on live
90 * filesystems.
91 */
92static errcode_t write_journal_file(ext2_filsys fs, char *filename,
93				    blk_t num_blocks, int flags)
94{
95	errcode_t	retval;
96	char		*buf = 0;
97	int		fd, ret_size;
98	blk_t		i;
99
100	if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags,
101						       &buf)))
102		return retval;
103
104	/* Open the device or journal file */
105	if ((fd = open(filename, O_WRONLY)) < 0) {
106		retval = errno;
107		goto errfree;
108	}
109
110	/* Write the superblock out */
111	retval = EXT2_ET_SHORT_WRITE;
112	ret_size = write(fd, buf, fs->blocksize);
113	if (ret_size < 0) {
114		retval = errno;
115		goto errout;
116	}
117	if (ret_size != (int) fs->blocksize)
118		goto errout;
119	memset(buf, 0, fs->blocksize);
120
121	if (flags & EXT2_MKJOURNAL_LAZYINIT)
122		goto success;
123
124	for (i = 1; i < num_blocks; i++) {
125		ret_size = write(fd, buf, fs->blocksize);
126		if (ret_size < 0) {
127			retval = errno;
128			goto errout;
129		}
130		if (ret_size != (int) fs->blocksize)
131			goto errout;
132	}
133
134success:
135	retval = 0;
136errout:
137	close(fd);
138errfree:
139	ext2fs_free_mem(&buf);
140	return retval;
141}
142
143/*
144 * Convenience function which zeros out _num_ blocks starting at
145 * _blk_.  In case of an error, the details of the error is returned
146 * via _ret_blk_ and _ret_count_ if they are non-NULL pointers.
147 * Returns 0 on success, and an error code on an error.
148 *
149 * As a special case, if the first argument is NULL, then it will
150 * attempt to free the static zeroizing buffer.  (This is to keep
151 * programs that check for memory leaks happy.)
152 */
153#define STRIDE_LENGTH 8
154errcode_t ext2fs_zero_blocks2(ext2_filsys fs, blk64_t blk, int num,
155			      blk64_t *ret_blk, int *ret_count)
156{
157	int		j, count;
158	static char	*buf;
159	errcode_t	retval;
160
161	/* If fs is null, clean up the static buffer and return */
162	if (!fs) {
163		if (buf) {
164			free(buf);
165			buf = 0;
166		}
167		return 0;
168	}
169	/* Allocate the zeroizing buffer if necessary */
170	if (!buf) {
171		buf = malloc(fs->blocksize * STRIDE_LENGTH);
172		if (!buf)
173			return ENOMEM;
174		memset(buf, 0, fs->blocksize * STRIDE_LENGTH);
175	}
176	/* OK, do the write loop */
177	j=0;
178	while (j < num) {
179		if (blk % STRIDE_LENGTH) {
180			count = STRIDE_LENGTH - (blk % STRIDE_LENGTH);
181			if (count > (num - j))
182				count = num - j;
183		} else {
184			count = num - j;
185			if (count > STRIDE_LENGTH)
186				count = STRIDE_LENGTH;
187		}
188		retval = io_channel_write_blk64(fs->io, blk, count, buf);
189		if (retval) {
190			if (ret_count)
191				*ret_count = count;
192			if (ret_blk)
193				*ret_blk = blk;
194			return retval;
195		}
196		j += count; blk += count;
197	}
198	return 0;
199}
200
201errcode_t ext2fs_zero_blocks(ext2_filsys fs, blk_t blk, int num,
202			     blk_t *ret_blk, int *ret_count)
203{
204	blk64_t ret_blk2;
205	errcode_t retval;
206
207	retval = ext2fs_zero_blocks2(fs, blk, num, &ret_blk2, ret_count);
208	if (retval)
209		*ret_blk = (blk_t) ret_blk2;
210	return retval;
211}
212
213/*
214 * Helper function for creating the journal using direct I/O routines
215 */
216struct mkjournal_struct {
217	int		num_blocks;
218	int		newblocks;
219	blk64_t		goal;
220	blk64_t		blk_to_zero;
221	int		zero_count;
222	int		flags;
223	char		*buf;
224	errcode_t	err;
225};
226
227static int mkjournal_proc(ext2_filsys	fs,
228			  blk64_t	*blocknr,
229			  e2_blkcnt_t	blockcnt,
230			  blk64_t	ref_block EXT2FS_ATTR((unused)),
231			  int		ref_offset EXT2FS_ATTR((unused)),
232			  void		*priv_data)
233{
234	struct mkjournal_struct *es = (struct mkjournal_struct *) priv_data;
235	blk64_t	new_blk;
236	errcode_t	retval;
237
238	if (*blocknr) {
239		es->goal = *blocknr;
240		return 0;
241	}
242	if (blockcnt &&
243	    (EXT2FS_B2C(fs, es->goal) == EXT2FS_B2C(fs, es->goal+1)))
244		new_blk = es->goal+1;
245	else {
246		es->goal &= ~EXT2FS_CLUSTER_MASK(fs);
247		retval = ext2fs_new_block2(fs, es->goal, 0, &new_blk);
248		if (retval) {
249			es->err = retval;
250			return BLOCK_ABORT;
251		}
252		ext2fs_block_alloc_stats2(fs, new_blk, +1);
253		es->newblocks++;
254	}
255	if (blockcnt >= 0)
256		es->num_blocks--;
257
258	retval = 0;
259	if (blockcnt <= 0)
260		retval = io_channel_write_blk64(fs->io, new_blk, 1, es->buf);
261	else if (!(es->flags & EXT2_MKJOURNAL_LAZYINIT)) {
262		if (es->zero_count) {
263			if ((es->blk_to_zero + es->zero_count == new_blk) &&
264			    (es->zero_count < 1024))
265				es->zero_count++;
266			else {
267				retval = ext2fs_zero_blocks2(fs,
268							     es->blk_to_zero,
269							     es->zero_count,
270							     0, 0);
271				es->zero_count = 0;
272			}
273		}
274		if (es->zero_count == 0) {
275			es->blk_to_zero = new_blk;
276			es->zero_count = 1;
277		}
278	}
279
280	if (blockcnt == 0)
281		memset(es->buf, 0, fs->blocksize);
282
283	if (retval) {
284		es->err = retval;
285		return BLOCK_ABORT;
286	}
287	*blocknr = es->goal = new_blk;
288
289	if (es->num_blocks == 0)
290		return (BLOCK_CHANGED | BLOCK_ABORT);
291	else
292		return BLOCK_CHANGED;
293
294}
295
296/*
297 * This function creates a journal using direct I/O routines.
298 */
299static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino,
300				     blk_t num_blocks, int flags)
301{
302	char			*buf;
303	dgrp_t			group, start, end, i, log_flex;
304	errcode_t		retval;
305	struct ext2_inode	inode;
306	unsigned long long	inode_size;
307	struct mkjournal_struct	es;
308
309	if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags,
310						       &buf)))
311		return retval;
312
313	if ((retval = ext2fs_read_bitmaps(fs)))
314		goto out2;
315
316	if ((retval = ext2fs_read_inode(fs, journal_ino, &inode)))
317		goto out2;
318
319	if (inode.i_blocks > 0) {
320		retval = EEXIST;
321		goto out2;
322	}
323
324	es.num_blocks = num_blocks;
325	es.newblocks = 0;
326	es.buf = buf;
327	es.err = 0;
328	es.flags = flags;
329	es.zero_count = 0;
330
331	if (fs->super->s_feature_incompat & EXT3_FEATURE_INCOMPAT_EXTENTS) {
332		inode.i_flags |= EXT4_EXTENTS_FL;
333		if ((retval = ext2fs_write_inode(fs, journal_ino, &inode)))
334			goto out2;
335	}
336
337	/*
338	 * Set the initial goal block to be roughly at the middle of
339	 * the filesystem.  Pick a group that has the largest number
340	 * of free blocks.
341	 */
342	group = ext2fs_group_of_blk2(fs, (ext2fs_blocks_count(fs->super) -
343					 fs->super->s_first_data_block) / 2);
344	log_flex = 1 << fs->super->s_log_groups_per_flex;
345	if (fs->super->s_log_groups_per_flex && (group > log_flex)) {
346		group = group & ~(log_flex - 1);
347		while ((group < fs->group_desc_count) &&
348		       ext2fs_bg_free_blocks_count(fs, group) == 0)
349			group++;
350		if (group == fs->group_desc_count)
351			group = 0;
352		start = group;
353	} else
354		start = (group > 0) ? group-1 : group;
355	end = ((group+1) < fs->group_desc_count) ? group+1 : group;
356	group = start;
357	for (i=start+1; i <= end; i++)
358		if (ext2fs_bg_free_blocks_count(fs, i) >
359		    ext2fs_bg_free_blocks_count(fs, group))
360			group = i;
361
362	es.goal = ext2fs_group_first_block2(fs, group);
363	retval = ext2fs_block_iterate3(fs, journal_ino, BLOCK_FLAG_APPEND,
364				       0, mkjournal_proc, &es);
365	if (es.err) {
366		retval = es.err;
367		goto errout;
368	}
369	if (es.zero_count) {
370		retval = ext2fs_zero_blocks2(fs, es.blk_to_zero,
371					    es.zero_count, 0, 0);
372		if (retval)
373			goto errout;
374	}
375
376	if ((retval = ext2fs_read_inode(fs, journal_ino, &inode)))
377		goto errout;
378
379	inode_size = (unsigned long long)fs->blocksize * num_blocks;
380	inode.i_size = inode_size & 0xFFFFFFFF;
381	inode.i_size_high = (inode_size >> 32) & 0xFFFFFFFF;
382	if (ext2fs_needs_large_file_feature(inode_size))
383		fs->super->s_feature_ro_compat |=
384			EXT2_FEATURE_RO_COMPAT_LARGE_FILE;
385	ext2fs_iblk_add_blocks(fs, &inode, es.newblocks);
386	inode.i_mtime = inode.i_ctime = fs->now ? fs->now : time(0);
387	inode.i_links_count = 1;
388	inode.i_mode = LINUX_S_IFREG | 0600;
389
390	if ((retval = ext2fs_write_new_inode(fs, journal_ino, &inode)))
391		goto errout;
392	retval = 0;
393
394	memcpy(fs->super->s_jnl_blocks, inode.i_block, EXT2_N_BLOCKS*4);
395	fs->super->s_jnl_blocks[15] = inode.i_size_high;
396	fs->super->s_jnl_blocks[16] = inode.i_size;
397	fs->super->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS;
398	ext2fs_mark_super_dirty(fs);
399
400errout:
401	ext2fs_zero_blocks2(0, 0, 0, 0, 0);
402out2:
403	ext2fs_free_mem(&buf);
404	return retval;
405}
406
407/*
408 * Find a reasonable journal file size (in blocks) given the number of blocks
409 * in the filesystem.  For very small filesystems, it is not reasonable to
410 * have a journal that fills more than half of the filesystem.
411 */
412int ext2fs_default_journal_size(__u64 num_blocks)
413{
414	if (num_blocks < 2048)
415		return -1;
416	if (num_blocks < 32768)
417		return (1024);
418	if (num_blocks < 256*1024)
419		return (4096);
420	if (num_blocks < 512*1024)
421		return (8192);
422	if (num_blocks < 1024*1024)
423		return (16384);
424	return 32768;
425}
426
427/*
428 * This function adds a journal device to a filesystem
429 */
430errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev)
431{
432	struct stat	st;
433	errcode_t	retval;
434	char		buf[1024];
435	journal_superblock_t	*jsb;
436	int		start;
437	__u32		i, nr_users;
438
439	/* Make sure the device exists and is a block device */
440	if (stat(journal_dev->device_name, &st) < 0)
441		return errno;
442
443	if (!S_ISBLK(st.st_mode))
444		return EXT2_ET_JOURNAL_NOT_BLOCK; /* Must be a block device */
445
446	/* Get the journal superblock */
447	start = 1;
448	if (journal_dev->blocksize == 1024)
449		start++;
450	if ((retval = io_channel_read_blk64(journal_dev->io, start, -1024,
451					    buf)))
452		return retval;
453
454	jsb = (journal_superblock_t *) buf;
455	if ((jsb->s_header.h_magic != (unsigned) ntohl(JFS_MAGIC_NUMBER)) ||
456	    (jsb->s_header.h_blocktype != (unsigned) ntohl(JFS_SUPERBLOCK_V2)))
457		return EXT2_ET_NO_JOURNAL_SB;
458
459	if (ntohl(jsb->s_blocksize) != (unsigned long) fs->blocksize)
460		return EXT2_ET_UNEXPECTED_BLOCK_SIZE;
461
462	/* Check and see if this filesystem has already been added */
463	nr_users = ntohl(jsb->s_nr_users);
464	for (i=0; i < nr_users; i++) {
465		if (memcmp(fs->super->s_uuid,
466			   &jsb->s_users[i*16], 16) == 0)
467			break;
468	}
469	if (i >= nr_users) {
470		memcpy(&jsb->s_users[nr_users*16],
471		       fs->super->s_uuid, 16);
472		jsb->s_nr_users = htonl(nr_users+1);
473	}
474
475	/* Writeback the journal superblock */
476	if ((retval = io_channel_write_blk64(journal_dev->io, start, -1024, buf)))
477		return retval;
478
479	fs->super->s_journal_inum = 0;
480	fs->super->s_journal_dev = st.st_rdev;
481	memcpy(fs->super->s_journal_uuid, jsb->s_uuid,
482	       sizeof(fs->super->s_journal_uuid));
483	fs->super->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL;
484	ext2fs_mark_super_dirty(fs);
485	return 0;
486}
487
488/*
489 * This function adds a journal inode to a filesystem, using either
490 * POSIX routines if the filesystem is mounted, or using direct I/O
491 * functions if it is not.
492 */
493errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t num_blocks, int flags)
494{
495	errcode_t		retval;
496	ext2_ino_t		journal_ino;
497	struct stat		st;
498	char			jfile[1024];
499	int			mount_flags;
500	int			fd = -1;
501
502	if (flags & EXT2_MKJOURNAL_NO_MNT_CHECK)
503		mount_flags = 0;
504	else if ((retval = ext2fs_check_mount_point(fs->device_name,
505						    &mount_flags,
506						    jfile, sizeof(jfile)-10)))
507		return retval;
508
509	if (mount_flags & EXT2_MF_MOUNTED) {
510#if HAVE_EXT2_IOCTLS
511		int f = 0;
512#endif
513		strcat(jfile, "/.journal");
514
515		/*
516		 * If .../.journal already exists, make sure any
517		 * immutable or append-only flags are cleared.
518		 */
519#if defined(HAVE_CHFLAGS) && defined(UF_NODUMP)
520		(void) chflags (jfile, 0);
521#else
522#if HAVE_EXT2_IOCTLS
523		fd = open(jfile, O_RDONLY);
524		if (fd >= 0) {
525			retval = ioctl(fd, EXT2_IOC_SETFLAGS, &f);
526			close(fd);
527			if (retval)
528				return retval;
529		}
530#endif
531#endif
532
533		/* Create the journal file */
534		if ((fd = open(jfile, O_CREAT|O_WRONLY, 0600)) < 0)
535			return errno;
536
537		/* Note that we can't do lazy journal initialization for mounted
538		 * filesystems, since the zero writing is also allocating the
539		 * journal blocks.  We could use fallocate, but not all kernels
540		 * support that, and creating a journal on a mounted ext2
541		 * filesystems is extremely rare these days...  Ignore it. */
542		flags &= ~EXT2_MKJOURNAL_LAZYINIT;
543
544		if ((retval = write_journal_file(fs, jfile, num_blocks, flags)))
545			goto errout;
546
547		/* Get inode number of the journal file */
548		if (fstat(fd, &st) < 0) {
549			retval = errno;
550			goto errout;
551		}
552
553#if defined(HAVE_CHFLAGS) && defined(UF_NODUMP)
554		retval = fchflags (fd, UF_NODUMP|UF_IMMUTABLE);
555#else
556#if HAVE_EXT2_IOCTLS
557		if (ioctl(fd, EXT2_IOC_GETFLAGS, &f) < 0) {
558			retval = errno;
559			goto errout;
560		}
561		f |= EXT2_NODUMP_FL | EXT2_IMMUTABLE_FL;
562		retval = ioctl(fd, EXT2_IOC_SETFLAGS, &f);
563#endif
564#endif
565		if (retval) {
566			retval = errno;
567			goto errout;
568		}
569
570		if (close(fd) < 0) {
571			retval = errno;
572			fd = -1;
573			goto errout;
574		}
575		journal_ino = st.st_ino;
576	} else {
577		if ((mount_flags & EXT2_MF_BUSY) &&
578		    !(fs->flags & EXT2_FLAG_EXCLUSIVE)) {
579			retval = EBUSY;
580			goto errout;
581		}
582		journal_ino = EXT2_JOURNAL_INO;
583		if ((retval = write_journal_inode(fs, journal_ino,
584						  num_blocks, flags)))
585			return retval;
586	}
587
588	fs->super->s_journal_inum = journal_ino;
589	fs->super->s_journal_dev = 0;
590	memset(fs->super->s_journal_uuid, 0,
591	       sizeof(fs->super->s_journal_uuid));
592	fs->super->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL;
593
594	ext2fs_mark_super_dirty(fs);
595	return 0;
596errout:
597	if (fd >= 0)
598		close(fd);
599	return retval;
600}
601
602#ifdef DEBUG
603main(int argc, char **argv)
604{
605	errcode_t	retval;
606	char		*device_name;
607	ext2_filsys	fs;
608
609	if (argc < 2) {
610		fprintf(stderr, "Usage: %s filesystem\n", argv[0]);
611		exit(1);
612	}
613	device_name = argv[1];
614
615	retval = ext2fs_open (device_name, EXT2_FLAG_RW, 0, 0,
616			      unix_io_manager, &fs);
617	if (retval) {
618		com_err(argv[0], retval, "while opening %s", device_name);
619		exit(1);
620	}
621
622	retval = ext2fs_add_journal_inode(fs, 1024, 0);
623	if (retval) {
624		com_err(argv[0], retval, "while adding journal to %s",
625			device_name);
626		exit(1);
627	}
628	retval = ext2fs_flush(fs);
629	if (retval) {
630		printf("Warning, had trouble writing out superblocks.\n");
631	}
632	ext2fs_close(fs);
633	exit(0);
634
635}
636#endif
637