mkjournal.c revision 6c54689fadc3fe0b0bcae2cc93946cb7a28b9f15
1/*
2 * mkjournal.c --- make a journal for a filesystem
3 *
4 * Copyright (C) 2000 Theodore Ts'o.
5 *
6 * %Begin-Header%
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
9 * %End-Header%
10 */
11
12#include <stdio.h>
13#include <string.h>
14#if HAVE_UNISTD_H
15#include <unistd.h>
16#endif
17#if HAVE_ERRNO_H
18#include <errno.h>
19#endif
20#include <fcntl.h>
21#include <time.h>
22#if HAVE_SYS_STAT_H
23#include <sys/stat.h>
24#endif
25#if HAVE_SYS_TYPES_H
26#include <sys/types.h>
27#endif
28#if HAVE_SYS_IOCTL_H
29#include <sys/ioctl.h>
30#endif
31#if HAVE_NETINET_IN_H
32#include <netinet/in.h>
33#endif
34
35#include "ext2_fs.h"
36#include "e2p/e2p.h"
37#include "ext2fs.h"
38#include "jfs_user.h"
39
40/*
41 * This function automatically sets up the journal superblock and
42 * returns it as an allocated block.
43 */
44errcode_t ext2fs_create_journal_superblock(ext2_filsys fs,
45					   __u32 num_blocks, int flags,
46					   char  **ret_jsb)
47{
48	errcode_t		retval;
49	journal_superblock_t	*jsb;
50
51	if (num_blocks < 1024)
52		return EXT2_ET_JOURNAL_TOO_SMALL;
53
54	if ((retval = ext2fs_get_mem(fs->blocksize, &jsb)))
55		return retval;
56
57	memset (jsb, 0, fs->blocksize);
58
59	jsb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER);
60	if (flags & EXT2_MKJOURNAL_V1_SUPER)
61		jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V1);
62	else
63		jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
64	jsb->s_blocksize = htonl(fs->blocksize);
65	jsb->s_maxlen = htonl(num_blocks);
66	jsb->s_nr_users = htonl(1);
67	jsb->s_first = htonl(1);
68	jsb->s_sequence = htonl(1);
69	memcpy(jsb->s_uuid, fs->super->s_uuid, sizeof(fs->super->s_uuid));
70	/*
71	 * If we're creating an external journal device, we need to
72	 * adjust these fields.
73	 */
74	if (fs->super->s_feature_incompat &
75	    EXT3_FEATURE_INCOMPAT_JOURNAL_DEV) {
76		jsb->s_nr_users = 0;
77		if (fs->blocksize == 1024)
78			jsb->s_first = htonl(3);
79		else
80			jsb->s_first = htonl(2);
81	}
82
83	*ret_jsb = (char *) jsb;
84	return 0;
85}
86
87/*
88 * This function writes a journal using POSIX routines.  It is used
89 * for creating external journals and creating journals on live
90 * filesystems.
91 */
92static errcode_t write_journal_file(ext2_filsys fs, char *filename,
93				    blk_t num_blocks, int flags)
94{
95	errcode_t	retval;
96	char		*buf = 0;
97	int		fd, ret_size;
98	blk_t		i;
99
100	if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags,
101						       &buf)))
102		return retval;
103
104	/* Open the device or journal file */
105	if ((fd = open(filename, O_WRONLY)) < 0) {
106		retval = errno;
107		goto errfree;
108	}
109
110	/* Write the superblock out */
111	retval = EXT2_ET_SHORT_WRITE;
112	ret_size = write(fd, buf, fs->blocksize);
113	if (ret_size < 0) {
114		retval = errno;
115		goto errout;
116	}
117	if (ret_size != (int) fs->blocksize)
118		goto errout;
119	memset(buf, 0, fs->blocksize);
120
121	if (flags & EXT2_MKJOURNAL_LAZYINIT)
122		goto success;
123
124	for (i = 1; i < num_blocks; i++) {
125		ret_size = write(fd, buf, fs->blocksize);
126		if (ret_size < 0) {
127			retval = errno;
128			goto errout;
129		}
130		if (ret_size != (int) fs->blocksize)
131			goto errout;
132	}
133
134success:
135	retval = 0;
136errout:
137	close(fd);
138errfree:
139	ext2fs_free_mem(&buf);
140	return retval;
141}
142
143/*
144 * Convenience function which zeros out _num_ blocks starting at
145 * _blk_.  In case of an error, the details of the error is returned
146 * via _ret_blk_ and _ret_count_ if they are non-NULL pointers.
147 * Returns 0 on success, and an error code on an error.
148 *
149 * As a special case, if the first argument is NULL, then it will
150 * attempt to free the static zeroizing buffer.  (This is to keep
151 * programs that check for memory leaks happy.)
152 */
153#define STRIDE_LENGTH 8
154errcode_t ext2fs_zero_blocks2(ext2_filsys fs, blk64_t blk, int num,
155			      blk64_t *ret_blk, int *ret_count)
156{
157	int		j, count;
158	static char	*buf;
159	errcode_t	retval;
160
161	/* If fs is null, clean up the static buffer and return */
162	if (!fs) {
163		if (buf) {
164			free(buf);
165			buf = 0;
166		}
167		return 0;
168	}
169	/* Allocate the zeroizing buffer if necessary */
170	if (!buf) {
171		buf = malloc(fs->blocksize * STRIDE_LENGTH);
172		if (!buf)
173			return ENOMEM;
174		memset(buf, 0, fs->blocksize * STRIDE_LENGTH);
175	}
176	/* OK, do the write loop */
177	j=0;
178	while (j < num) {
179		if (blk % STRIDE_LENGTH) {
180			count = STRIDE_LENGTH - (blk % STRIDE_LENGTH);
181			if (count > (num - j))
182				count = num - j;
183		} else {
184			count = num - j;
185			if (count > STRIDE_LENGTH)
186				count = STRIDE_LENGTH;
187		}
188		retval = io_channel_write_blk64(fs->io, blk, count, buf);
189		if (retval) {
190			if (ret_count)
191				*ret_count = count;
192			if (ret_blk)
193				*ret_blk = blk;
194			return retval;
195		}
196		j += count; blk += count;
197	}
198	return 0;
199}
200
201errcode_t ext2fs_zero_blocks(ext2_filsys fs, blk_t blk, int num,
202			     blk_t *ret_blk, int *ret_count)
203{
204	blk64_t ret_blk2;
205	errcode_t retval;
206
207	retval = ext2fs_zero_blocks2(fs, blk, num, &ret_blk2, ret_count);
208	if (retval)
209		*ret_blk = (blk_t) ret_blk2;
210	return retval;
211}
212
213/*
214 * Helper function for creating the journal using direct I/O routines
215 */
216struct mkjournal_struct {
217	int		num_blocks;
218	int		newblocks;
219	blk64_t		goal;
220	blk64_t		blk_to_zero;
221	int		zero_count;
222	int		flags;
223	char		*buf;
224	errcode_t	err;
225};
226
227static int mkjournal_proc(ext2_filsys	fs,
228			  blk64_t	*blocknr,
229			  e2_blkcnt_t	blockcnt,
230			  blk64_t	ref_block EXT2FS_ATTR((unused)),
231			  int		ref_offset EXT2FS_ATTR((unused)),
232			  void		*priv_data)
233{
234	struct mkjournal_struct *es = (struct mkjournal_struct *) priv_data;
235	blk64_t	new_blk;
236	errcode_t	retval;
237
238	if (*blocknr) {
239		es->goal = *blocknr;
240		return 0;
241	}
242	if (blockcnt &&
243	    (EXT2FS_B2C(fs, es->goal) == EXT2FS_B2C(fs, es->goal+1)))
244		new_blk = es->goal+1;
245	else {
246		es->goal &= ~EXT2FS_CLUSTER_MASK(fs);
247		retval = ext2fs_new_block2(fs, es->goal, 0, &new_blk);
248		if (retval) {
249			es->err = retval;
250			return BLOCK_ABORT;
251		}
252	}
253	if (blockcnt >= 0)
254		es->num_blocks--;
255
256	es->newblocks++;
257	retval = 0;
258	if (blockcnt <= 0)
259		retval = io_channel_write_blk64(fs->io, new_blk, 1, es->buf);
260	else if (!(es->flags & EXT2_MKJOURNAL_LAZYINIT)) {
261		if (es->zero_count) {
262			if ((es->blk_to_zero + es->zero_count == new_blk) &&
263			    (es->zero_count < 1024))
264				es->zero_count++;
265			else {
266				retval = ext2fs_zero_blocks2(fs,
267							     es->blk_to_zero,
268							     es->zero_count,
269							     0, 0);
270				es->zero_count = 0;
271			}
272		}
273		if (es->zero_count == 0) {
274			es->blk_to_zero = new_blk;
275			es->zero_count = 1;
276		}
277	}
278
279	if (blockcnt == 0)
280		memset(es->buf, 0, fs->blocksize);
281
282	if (retval) {
283		es->err = retval;
284		return BLOCK_ABORT;
285	}
286	*blocknr = es->goal = new_blk;
287	ext2fs_block_alloc_stats2(fs, new_blk, +1);
288
289	if (es->num_blocks == 0)
290		return (BLOCK_CHANGED | BLOCK_ABORT);
291	else
292		return BLOCK_CHANGED;
293
294}
295
296/*
297 * This function creates a journal using direct I/O routines.
298 */
299static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino,
300				     blk_t num_blocks, int flags)
301{
302	char			*buf;
303	dgrp_t			group, start, end, i, log_flex;
304	errcode_t		retval;
305	struct ext2_inode	inode;
306	unsigned long long	inode_size;
307	struct mkjournal_struct	es;
308
309	if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags,
310						       &buf)))
311		return retval;
312
313	if ((retval = ext2fs_read_bitmaps(fs)))
314		return retval;
315
316	if ((retval = ext2fs_read_inode(fs, journal_ino, &inode)))
317		return retval;
318
319	if (inode.i_blocks > 0)
320		return EEXIST;
321
322	es.num_blocks = num_blocks;
323	es.newblocks = 0;
324	es.buf = buf;
325	es.err = 0;
326	es.flags = flags;
327	es.zero_count = 0;
328
329	if (fs->super->s_feature_incompat & EXT3_FEATURE_INCOMPAT_EXTENTS) {
330		inode.i_flags |= EXT4_EXTENTS_FL;
331		if ((retval = ext2fs_write_inode(fs, journal_ino, &inode)))
332			return retval;
333	}
334
335	/*
336	 * Set the initial goal block to be roughly at the middle of
337	 * the filesystem.  Pick a group that has the largest number
338	 * of free blocks.
339	 */
340	group = ext2fs_group_of_blk2(fs, (ext2fs_blocks_count(fs->super) -
341					 fs->super->s_first_data_block) / 2);
342	log_flex = 1 << fs->super->s_log_groups_per_flex;
343	if (fs->super->s_log_groups_per_flex && (group > log_flex)) {
344		group = group & ~(log_flex - 1);
345		while ((group < fs->group_desc_count) &&
346		       ext2fs_bg_free_blocks_count(fs, group) == 0)
347			group++;
348		if (group == fs->group_desc_count)
349			group = 0;
350		start = group;
351	} else
352		start = (group > 0) ? group-1 : group;
353	end = ((group+1) < fs->group_desc_count) ? group+1 : group;
354	group = start;
355	for (i=start+1; i <= end; i++)
356		if (ext2fs_bg_free_blocks_count(fs, i) >
357		    ext2fs_bg_free_blocks_count(fs, group))
358			group = i;
359
360	es.goal = (fs->super->s_blocks_per_group * group) +
361		fs->super->s_first_data_block;
362
363	retval = ext2fs_block_iterate3(fs, journal_ino, BLOCK_FLAG_APPEND,
364				       0, mkjournal_proc, &es);
365	if (es.err) {
366		retval = es.err;
367		goto errout;
368	}
369	if (es.zero_count) {
370		retval = ext2fs_zero_blocks2(fs, es.blk_to_zero,
371					    es.zero_count, 0, 0);
372		if (retval)
373			goto errout;
374	}
375
376	if ((retval = ext2fs_read_inode(fs, journal_ino, &inode)))
377		goto errout;
378
379	inode_size = (unsigned long long)fs->blocksize * num_blocks;
380	inode.i_size = inode_size & 0xFFFFFFFF;
381	inode.i_size_high = (inode_size >> 32) & 0xFFFFFFFF;
382	if (inode.i_size_high)
383		fs->super->s_feature_ro_compat |=
384			EXT2_FEATURE_RO_COMPAT_LARGE_FILE;
385	ext2fs_iblk_add_blocks(fs, &inode, es.newblocks);
386	inode.i_mtime = inode.i_ctime = fs->now ? fs->now : time(0);
387	inode.i_links_count = 1;
388	inode.i_mode = LINUX_S_IFREG | 0600;
389
390	if ((retval = ext2fs_write_new_inode(fs, journal_ino, &inode)))
391		goto errout;
392	retval = 0;
393
394	memcpy(fs->super->s_jnl_blocks, inode.i_block, EXT2_N_BLOCKS*4);
395	fs->super->s_jnl_blocks[15] = inode.i_size_high;
396	fs->super->s_jnl_blocks[16] = inode.i_size;
397	fs->super->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS;
398	ext2fs_mark_super_dirty(fs);
399
400errout:
401	ext2fs_zero_blocks2(0, 0, 0, 0, 0);
402	ext2fs_free_mem(&buf);
403	return retval;
404}
405
406/*
407 * Find a reasonable journal file size (in blocks) given the number of blocks
408 * in the filesystem.  For very small filesystems, it is not reasonable to
409 * have a journal that fills more than half of the filesystem.
410 */
411int ext2fs_default_journal_size(__u64 num_blocks)
412{
413	if (num_blocks < 2048)
414		return -1;
415	if (num_blocks < 32768)
416		return (1024);
417	if (num_blocks < 256*1024)
418		return (4096);
419	if (num_blocks < 512*1024)
420		return (8192);
421	if (num_blocks < 1024*1024)
422		return (16384);
423	return 32768;
424}
425
426/*
427 * This function adds a journal device to a filesystem
428 */
429errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev)
430{
431	struct stat	st;
432	errcode_t	retval;
433	char		buf[1024];
434	journal_superblock_t	*jsb;
435	int		start;
436	__u32		i, nr_users;
437
438	/* Make sure the device exists and is a block device */
439	if (stat(journal_dev->device_name, &st) < 0)
440		return errno;
441
442	if (!S_ISBLK(st.st_mode))
443		return EXT2_ET_JOURNAL_NOT_BLOCK; /* Must be a block device */
444
445	/* Get the journal superblock */
446	start = 1;
447	if (journal_dev->blocksize == 1024)
448		start++;
449	if ((retval = io_channel_read_blk64(journal_dev->io, start, -1024,
450					    buf)))
451		return retval;
452
453	jsb = (journal_superblock_t *) buf;
454	if ((jsb->s_header.h_magic != (unsigned) ntohl(JFS_MAGIC_NUMBER)) ||
455	    (jsb->s_header.h_blocktype != (unsigned) ntohl(JFS_SUPERBLOCK_V2)))
456		return EXT2_ET_NO_JOURNAL_SB;
457
458	if (ntohl(jsb->s_blocksize) != (unsigned long) fs->blocksize)
459		return EXT2_ET_UNEXPECTED_BLOCK_SIZE;
460
461	/* Check and see if this filesystem has already been added */
462	nr_users = ntohl(jsb->s_nr_users);
463	for (i=0; i < nr_users; i++) {
464		if (memcmp(fs->super->s_uuid,
465			   &jsb->s_users[i*16], 16) == 0)
466			break;
467	}
468	if (i >= nr_users) {
469		memcpy(&jsb->s_users[nr_users*16],
470		       fs->super->s_uuid, 16);
471		jsb->s_nr_users = htonl(nr_users+1);
472	}
473
474	/* Writeback the journal superblock */
475	if ((retval = io_channel_write_blk64(journal_dev->io, start, -1024, buf)))
476		return retval;
477
478	fs->super->s_journal_inum = 0;
479	fs->super->s_journal_dev = st.st_rdev;
480	memcpy(fs->super->s_journal_uuid, jsb->s_uuid,
481	       sizeof(fs->super->s_journal_uuid));
482	fs->super->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL;
483	ext2fs_mark_super_dirty(fs);
484	return 0;
485}
486
487/*
488 * This function adds a journal inode to a filesystem, using either
489 * POSIX routines if the filesystem is mounted, or using direct I/O
490 * functions if it is not.
491 */
492errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t num_blocks, int flags)
493{
494	errcode_t		retval;
495	ext2_ino_t		journal_ino;
496	struct stat		st;
497	char			jfile[1024];
498	int			mount_flags, f;
499	int			fd = -1;
500
501	if ((retval = ext2fs_check_mount_point(fs->device_name, &mount_flags,
502					       jfile, sizeof(jfile)-10)))
503		return retval;
504
505	if (mount_flags & EXT2_MF_MOUNTED) {
506		strcat(jfile, "/.journal");
507
508		/*
509		 * If .../.journal already exists, make sure any
510		 * immutable or append-only flags are cleared.
511		 */
512#if defined(HAVE_CHFLAGS) && defined(UF_NODUMP)
513		(void) chflags (jfile, 0);
514#else
515#if HAVE_EXT2_IOCTLS
516		fd = open(jfile, O_RDONLY);
517		if (fd >= 0) {
518			f = 0;
519			ioctl(fd, EXT2_IOC_SETFLAGS, &f);
520			close(fd);
521		}
522#endif
523#endif
524
525		/* Create the journal file */
526		if ((fd = open(jfile, O_CREAT|O_WRONLY, 0600)) < 0)
527			return errno;
528
529		/* Note that we can't do lazy journal initialization for mounted
530		 * filesystems, since the zero writing is also allocating the
531		 * journal blocks.  We could use fallocate, but not all kernels
532		 * support that, and creating a journal on a mounted ext2
533		 * filesystems is extremely rare these days...  Ignore it. */
534		flags &= ~EXT2_MKJOURNAL_LAZYINIT;
535
536		if ((retval = write_journal_file(fs, jfile, num_blocks, flags)))
537			goto errout;
538
539		/* Get inode number of the journal file */
540		if (fstat(fd, &st) < 0) {
541			retval = errno;
542			goto errout;
543		}
544
545#if defined(HAVE_CHFLAGS) && defined(UF_NODUMP)
546		retval = fchflags (fd, UF_NODUMP|UF_IMMUTABLE);
547#else
548#if HAVE_EXT2_IOCTLS
549		if (ioctl(fd, EXT2_IOC_GETFLAGS, &f) < 0) {
550			retval = errno;
551			goto errout;
552		}
553		f |= EXT2_NODUMP_FL | EXT2_IMMUTABLE_FL;
554		retval = ioctl(fd, EXT2_IOC_SETFLAGS, &f);
555#endif
556#endif
557		if (retval) {
558			retval = errno;
559			goto errout;
560		}
561
562		if (close(fd) < 0) {
563			retval = errno;
564			fd = -1;
565			goto errout;
566		}
567		journal_ino = st.st_ino;
568	} else {
569		if ((mount_flags & EXT2_MF_BUSY) &&
570		    !(fs->flags & EXT2_FLAG_EXCLUSIVE)) {
571			retval = EBUSY;
572			goto errout;
573		}
574		journal_ino = EXT2_JOURNAL_INO;
575		if ((retval = write_journal_inode(fs, journal_ino,
576						  num_blocks, flags)))
577			return retval;
578	}
579
580	fs->super->s_journal_inum = journal_ino;
581	fs->super->s_journal_dev = 0;
582	memset(fs->super->s_journal_uuid, 0,
583	       sizeof(fs->super->s_journal_uuid));
584	fs->super->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL;
585
586	ext2fs_mark_super_dirty(fs);
587	return 0;
588errout:
589	if (fd > 0)
590		close(fd);
591	return retval;
592}
593
594#ifdef DEBUG
595main(int argc, char **argv)
596{
597	errcode_t	retval;
598	char		*device_name;
599	ext2_filsys	fs;
600
601	if (argc < 2) {
602		fprintf(stderr, "Usage: %s filesystem\n", argv[0]);
603		exit(1);
604	}
605	device_name = argv[1];
606
607	retval = ext2fs_open (device_name, EXT2_FLAG_RW, 0, 0,
608			      unix_io_manager, &fs);
609	if (retval) {
610		com_err(argv[0], retval, "while opening %s", device_name);
611		exit(1);
612	}
613
614	retval = ext2fs_add_journal_inode(fs, 1024);
615	if (retval) {
616		com_err(argv[0], retval, "while adding journal to %s",
617			device_name);
618		exit(1);
619	}
620	retval = ext2fs_flush(fs);
621	if (retval) {
622		printf("Warning, had trouble writing out superblocks.\n");
623	}
624	ext2fs_close(fs);
625	exit(0);
626
627}
628#endif
629