1/*
2 * journal.c --- code for handling the "ext3" journal
3 *
4 * Copyright (C) 2000 Andreas Dilger
5 * Copyright (C) 2000 Theodore Ts'o
6 *
7 * Parts of the code are based on fs/jfs/journal.c by Stephen C. Tweedie
8 * Copyright (C) 1999 Red Hat Software
9 *
10 * This file may be redistributed under the terms of the
11 * GNU General Public License version 2 or at your discretion
12 * any later version.
13 */
14
15#ifdef HAVE_SYS_MOUNT_H
16#include <sys/param.h>
17#include <sys/mount.h>
18#define MNT_FL (MS_MGC_VAL | MS_RDONLY)
19#endif
20#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23
24#define E2FSCK_INCLUDE_INLINE_FUNCS
25#include "jfs_user.h"
26#include "problem.h"
27#include "uuid/uuid.h"
28
29#ifdef CONFIG_JBD_DEBUG		/* Enabled by configure --enable-jfs-debug */
30static int bh_count = 0;
31#endif
32
33/*
34 * Define USE_INODE_IO to use the inode_io.c / fileio.c codepaths.
35 * This creates a larger static binary, and a smaller binary using
36 * shared libraries.  It's also probably slightly less CPU-efficient,
37 * which is why it's not on by default.  But, it's a good way of
38 * testing the functions in inode_io.c and fileio.c.
39 */
40#undef USE_INODE_IO
41
42/* Kernel compatibility functions for handling the journal.  These allow us
43 * to use the recovery.c file virtually unchanged from the kernel, so we
44 * don't have to do much to keep kernel and user recovery in sync.
45 */
46int journal_bmap(journal_t *journal, blk64_t block, unsigned long long *phys)
47{
48#ifdef USE_INODE_IO
49	*phys = block;
50	return 0;
51#else
52	struct inode 	*inode = journal->j_inode;
53	errcode_t	retval;
54	blk64_t		pblk;
55
56	if (!inode) {
57		*phys = block;
58		return 0;
59	}
60
61	retval= ext2fs_bmap2(inode->i_ctx->fs, inode->i_ino,
62			     &inode->i_ext2, NULL, 0, block, 0, &pblk);
63	*phys = pblk;
64	return (int) retval;
65#endif
66}
67
68struct buffer_head *getblk(kdev_t kdev, blk64_t blocknr, int blocksize)
69{
70	struct buffer_head *bh;
71	int bufsize = sizeof(*bh) + kdev->k_ctx->fs->blocksize -
72		sizeof(bh->b_data);
73
74	bh = e2fsck_allocate_memory(kdev->k_ctx, bufsize, "block buffer");
75	if (!bh)
76		return NULL;
77
78#ifdef CONFIG_JBD_DEBUG
79	if (journal_enable_debug >= 3)
80		bh_count++;
81#endif
82	jfs_debug(4, "getblk for block %llu (%d bytes)(total %d)\n",
83		  (unsigned long long) blocknr, blocksize, bh_count);
84
85	bh->b_ctx = kdev->k_ctx;
86	if (kdev->k_dev == K_DEV_FS)
87		bh->b_io = kdev->k_ctx->fs->io;
88	else
89		bh->b_io = kdev->k_ctx->journal_io;
90	bh->b_size = blocksize;
91	bh->b_blocknr = blocknr;
92
93	return bh;
94}
95
96void sync_blockdev(kdev_t kdev)
97{
98	io_channel	io;
99
100	if (kdev->k_dev == K_DEV_FS)
101		io = kdev->k_ctx->fs->io;
102	else
103		io = kdev->k_ctx->journal_io;
104
105	io_channel_flush(io);
106}
107
108void ll_rw_block(int rw, int nr, struct buffer_head *bhp[])
109{
110	errcode_t retval;
111	struct buffer_head *bh;
112
113	for (; nr > 0; --nr) {
114		bh = *bhp++;
115		if (rw == READ && !bh->b_uptodate) {
116			jfs_debug(3, "reading block %llu/%p\n",
117				  bh->b_blocknr, (void *) bh);
118			retval = io_channel_read_blk64(bh->b_io,
119						     bh->b_blocknr,
120						     1, bh->b_data);
121			if (retval) {
122				com_err(bh->b_ctx->device_name, retval,
123					"while reading block %llu\n",
124					bh->b_blocknr);
125				bh->b_err = (int) retval;
126				continue;
127			}
128			bh->b_uptodate = 1;
129		} else if (rw == WRITE && bh->b_dirty) {
130			jfs_debug(3, "writing block %llu/%p\n",
131				  bh->b_blocknr,
132				  (void *) bh);
133			retval = io_channel_write_blk64(bh->b_io,
134						      bh->b_blocknr,
135						      1, bh->b_data);
136			if (retval) {
137				com_err(bh->b_ctx->device_name, retval,
138					"while writing block %llu\n",
139					bh->b_blocknr);
140				bh->b_err = (int) retval;
141				continue;
142			}
143			bh->b_dirty = 0;
144			bh->b_uptodate = 1;
145		} else {
146			jfs_debug(3, "no-op %s for block %llu\n",
147				  rw == READ ? "read" : "write",
148				  bh->b_blocknr);
149		}
150	}
151}
152
153void mark_buffer_dirty(struct buffer_head *bh)
154{
155	bh->b_dirty = 1;
156}
157
158static void mark_buffer_clean(struct buffer_head * bh)
159{
160	bh->b_dirty = 0;
161}
162
163void brelse(struct buffer_head *bh)
164{
165	if (bh->b_dirty)
166		ll_rw_block(WRITE, 1, &bh);
167	jfs_debug(3, "freeing block %llu/%p (total %d)\n",
168		  bh->b_blocknr, (void *) bh, --bh_count);
169	ext2fs_free_mem(&bh);
170}
171
172int buffer_uptodate(struct buffer_head *bh)
173{
174	return bh->b_uptodate;
175}
176
177void mark_buffer_uptodate(struct buffer_head *bh, int val)
178{
179	bh->b_uptodate = val;
180}
181
182void wait_on_buffer(struct buffer_head *bh)
183{
184	if (!bh->b_uptodate)
185		ll_rw_block(READ, 1, &bh);
186}
187
188
189static void e2fsck_clear_recover(e2fsck_t ctx, int error)
190{
191	ctx->fs->super->s_feature_incompat &= ~EXT3_FEATURE_INCOMPAT_RECOVER;
192
193	/* if we had an error doing journal recovery, we need a full fsck */
194	if (error)
195		ctx->fs->super->s_state &= ~EXT2_VALID_FS;
196	ext2fs_mark_super_dirty(ctx->fs);
197}
198
199/*
200 * This is a helper function to check the validity of the journal.
201 */
202struct process_block_struct {
203	e2_blkcnt_t	last_block;
204};
205
206static int process_journal_block(ext2_filsys fs,
207				 blk64_t	*block_nr,
208				 e2_blkcnt_t blockcnt,
209				 blk64_t ref_block EXT2FS_ATTR((unused)),
210				 int ref_offset EXT2FS_ATTR((unused)),
211				 void *priv_data)
212{
213	struct process_block_struct *p;
214	blk64_t	blk = *block_nr;
215
216	p = (struct process_block_struct *) priv_data;
217
218	if (!blk || blk < fs->super->s_first_data_block ||
219	    blk >= ext2fs_blocks_count(fs->super))
220		return BLOCK_ABORT;
221
222	if (blockcnt >= 0)
223		p->last_block = blockcnt;
224	return 0;
225}
226
227static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
228{
229	struct process_block_struct pb;
230	struct ext2_super_block *sb = ctx->fs->super;
231	struct ext2_super_block jsuper;
232	struct problem_context	pctx;
233	struct buffer_head 	*bh;
234	struct inode		*j_inode = NULL;
235	struct kdev_s		*dev_fs = NULL, *dev_journal;
236	const char		*journal_name = 0;
237	journal_t		*journal = NULL;
238	errcode_t		retval = 0;
239	io_manager		io_ptr = 0;
240	unsigned long long	start = 0;
241	int			ext_journal = 0;
242	int			tried_backup_jnl = 0;
243
244	clear_problem_context(&pctx);
245
246	journal = e2fsck_allocate_memory(ctx, sizeof(journal_t), "journal");
247	if (!journal) {
248		return EXT2_ET_NO_MEMORY;
249	}
250
251	dev_fs = e2fsck_allocate_memory(ctx, 2*sizeof(struct kdev_s), "kdev");
252	if (!dev_fs) {
253		retval = EXT2_ET_NO_MEMORY;
254		goto errout;
255	}
256	dev_journal = dev_fs+1;
257
258	dev_fs->k_ctx = dev_journal->k_ctx = ctx;
259	dev_fs->k_dev = K_DEV_FS;
260	dev_journal->k_dev = K_DEV_JOURNAL;
261
262	journal->j_dev = dev_journal;
263	journal->j_fs_dev = dev_fs;
264	journal->j_inode = NULL;
265	journal->j_blocksize = ctx->fs->blocksize;
266
267	if (uuid_is_null(sb->s_journal_uuid)) {
268		if (!sb->s_journal_inum) {
269			retval = EXT2_ET_BAD_INODE_NUM;
270			goto errout;
271		}
272		j_inode = e2fsck_allocate_memory(ctx, sizeof(*j_inode),
273						 "journal inode");
274		if (!j_inode) {
275			retval = EXT2_ET_NO_MEMORY;
276			goto errout;
277		}
278
279		j_inode->i_ctx = ctx;
280		j_inode->i_ino = sb->s_journal_inum;
281
282		if ((retval = ext2fs_read_inode(ctx->fs,
283						sb->s_journal_inum,
284						&j_inode->i_ext2))) {
285		try_backup_journal:
286			if (sb->s_jnl_backup_type != EXT3_JNL_BACKUP_BLOCKS ||
287			    tried_backup_jnl)
288				goto errout;
289			memset(&j_inode->i_ext2, 0, sizeof(struct ext2_inode));
290			memcpy(&j_inode->i_ext2.i_block[0], sb->s_jnl_blocks,
291			       EXT2_N_BLOCKS*4);
292			j_inode->i_ext2.i_size_high = sb->s_jnl_blocks[15];
293			j_inode->i_ext2.i_size = sb->s_jnl_blocks[16];
294			j_inode->i_ext2.i_links_count = 1;
295			j_inode->i_ext2.i_mode = LINUX_S_IFREG | 0600;
296			e2fsck_use_inode_shortcuts(ctx, 1);
297			ctx->stashed_ino = j_inode->i_ino;
298			ctx->stashed_inode = &j_inode->i_ext2;
299			tried_backup_jnl++;
300		}
301		if (!j_inode->i_ext2.i_links_count ||
302		    !LINUX_S_ISREG(j_inode->i_ext2.i_mode)) {
303			retval = EXT2_ET_NO_JOURNAL;
304			goto try_backup_journal;
305		}
306		if (EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize <
307		    JFS_MIN_JOURNAL_BLOCKS) {
308			retval = EXT2_ET_JOURNAL_TOO_SMALL;
309			goto try_backup_journal;
310		}
311		pb.last_block = -1;
312		retval = ext2fs_block_iterate3(ctx->fs, j_inode->i_ino,
313					       BLOCK_FLAG_HOLE, 0,
314					       process_journal_block, &pb);
315		if ((pb.last_block + 1) * ctx->fs->blocksize <
316		    (int) EXT2_I_SIZE(&j_inode->i_ext2)) {
317			retval = EXT2_ET_JOURNAL_TOO_SMALL;
318			goto try_backup_journal;
319		}
320		if (tried_backup_jnl && !(ctx->options & E2F_OPT_READONLY)) {
321			retval = ext2fs_write_inode(ctx->fs, sb->s_journal_inum,
322						    &j_inode->i_ext2);
323			if (retval)
324				goto errout;
325		}
326
327		journal->j_maxlen = EXT2_I_SIZE(&j_inode->i_ext2) /
328			journal->j_blocksize;
329
330#ifdef USE_INODE_IO
331		retval = ext2fs_inode_io_intern2(ctx->fs, sb->s_journal_inum,
332						 &j_inode->i_ext2,
333						 &journal_name);
334		if (retval)
335			goto errout;
336
337		io_ptr = inode_io_manager;
338#else
339		journal->j_inode = j_inode;
340		ctx->journal_io = ctx->fs->io;
341		if ((retval = (errcode_t) journal_bmap(journal, 0, &start)) != 0)
342			goto errout;
343#endif
344	} else {
345		ext_journal = 1;
346		if (!ctx->journal_name) {
347			char uuid[37];
348
349			uuid_unparse(sb->s_journal_uuid, uuid);
350			ctx->journal_name = blkid_get_devname(ctx->blkid,
351							      "UUID", uuid);
352			if (!ctx->journal_name)
353				ctx->journal_name = blkid_devno_to_devname(sb->s_journal_dev);
354		}
355		journal_name = ctx->journal_name;
356
357		if (!journal_name) {
358			fix_problem(ctx, PR_0_CANT_FIND_JOURNAL, &pctx);
359			retval = EXT2_ET_LOAD_EXT_JOURNAL;
360			goto errout;
361		}
362
363		jfs_debug(1, "Using journal file %s\n", journal_name);
364		io_ptr = unix_io_manager;
365	}
366
367#if 0
368	test_io_backing_manager = io_ptr;
369	io_ptr = test_io_manager;
370#endif
371#ifndef USE_INODE_IO
372	if (ext_journal)
373#endif
374	{
375		int flags = IO_FLAG_RW;
376		if (!(ctx->mount_flags & EXT2_MF_ISROOT &&
377		      ctx->mount_flags & EXT2_MF_READONLY))
378			flags |= IO_FLAG_EXCLUSIVE;
379		if ((ctx->mount_flags & EXT2_MF_READONLY) &&
380		    (ctx->options & E2F_OPT_FORCE))
381			flags &= ~IO_FLAG_EXCLUSIVE;
382
383
384		retval = io_ptr->open(journal_name, flags,
385				      &ctx->journal_io);
386	}
387	if (retval)
388		goto errout;
389
390	io_channel_set_blksize(ctx->journal_io, ctx->fs->blocksize);
391
392	if (ext_journal) {
393		blk64_t maxlen;
394
395		if (ctx->fs->blocksize == 1024)
396			start = 1;
397		bh = getblk(dev_journal, start, ctx->fs->blocksize);
398		if (!bh) {
399			retval = EXT2_ET_NO_MEMORY;
400			goto errout;
401		}
402		ll_rw_block(READ, 1, &bh);
403		if ((retval = bh->b_err) != 0) {
404			brelse(bh);
405			goto errout;
406		}
407		memcpy(&jsuper, start ? bh->b_data :  bh->b_data + 1024,
408		       sizeof(jsuper));
409		brelse(bh);
410#ifdef WORDS_BIGENDIAN
411		if (jsuper.s_magic == ext2fs_swab16(EXT2_SUPER_MAGIC))
412			ext2fs_swap_super(&jsuper);
413#endif
414		if (jsuper.s_magic != EXT2_SUPER_MAGIC ||
415		    !(jsuper.s_feature_incompat & EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
416			fix_problem(ctx, PR_0_EXT_JOURNAL_BAD_SUPER, &pctx);
417			retval = EXT2_ET_LOAD_EXT_JOURNAL;
418			goto errout;
419		}
420		/* Make sure the journal UUID is correct */
421		if (memcmp(jsuper.s_uuid, ctx->fs->super->s_journal_uuid,
422			   sizeof(jsuper.s_uuid))) {
423			fix_problem(ctx, PR_0_JOURNAL_BAD_UUID, &pctx);
424			retval = EXT2_ET_LOAD_EXT_JOURNAL;
425			goto errout;
426		}
427
428		maxlen = ext2fs_blocks_count(&jsuper);
429		journal->j_maxlen = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1;
430		start++;
431	}
432
433	if (!(bh = getblk(dev_journal, start, journal->j_blocksize))) {
434		retval = EXT2_ET_NO_MEMORY;
435		goto errout;
436	}
437
438	journal->j_sb_buffer = bh;
439	journal->j_superblock = (journal_superblock_t *)bh->b_data;
440
441#ifdef USE_INODE_IO
442	if (j_inode)
443		ext2fs_free_mem(&j_inode);
444#endif
445
446	*ret_journal = journal;
447	e2fsck_use_inode_shortcuts(ctx, 0);
448	return 0;
449
450errout:
451	e2fsck_use_inode_shortcuts(ctx, 0);
452	if (dev_fs)
453		ext2fs_free_mem(&dev_fs);
454	if (j_inode)
455		ext2fs_free_mem(&j_inode);
456	if (journal)
457		ext2fs_free_mem(&journal);
458	return retval;
459}
460
461static errcode_t e2fsck_journal_fix_bad_inode(e2fsck_t ctx,
462					      struct problem_context *pctx)
463{
464	struct ext2_super_block *sb = ctx->fs->super;
465	int recover = ctx->fs->super->s_feature_incompat &
466		EXT3_FEATURE_INCOMPAT_RECOVER;
467	int has_journal = ctx->fs->super->s_feature_compat &
468		EXT3_FEATURE_COMPAT_HAS_JOURNAL;
469
470	if (has_journal || sb->s_journal_inum) {
471		/* The journal inode is bogus, remove and force full fsck */
472		pctx->ino = sb->s_journal_inum;
473		if (fix_problem(ctx, PR_0_JOURNAL_BAD_INODE, pctx)) {
474			if (has_journal && sb->s_journal_inum)
475				printf("*** ext3 journal has been deleted - "
476				       "filesystem is now ext2 only ***\n\n");
477			sb->s_feature_compat &= ~EXT3_FEATURE_COMPAT_HAS_JOURNAL;
478			sb->s_journal_inum = 0;
479			ctx->flags |= E2F_FLAG_JOURNAL_INODE;
480			ctx->fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
481			e2fsck_clear_recover(ctx, 1);
482			return 0;
483		}
484		return EXT2_ET_BAD_INODE_NUM;
485	} else if (recover) {
486		if (fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, pctx)) {
487			e2fsck_clear_recover(ctx, 1);
488			return 0;
489		}
490		return EXT2_ET_UNSUPP_FEATURE;
491	}
492	return 0;
493}
494
495#define V1_SB_SIZE	0x0024
496static void clear_v2_journal_fields(journal_t *journal)
497{
498	e2fsck_t ctx = journal->j_dev->k_ctx;
499	struct problem_context pctx;
500
501	clear_problem_context(&pctx);
502
503	if (!fix_problem(ctx, PR_0_CLEAR_V2_JOURNAL, &pctx))
504		return;
505
506	memset(((char *) journal->j_superblock) + V1_SB_SIZE, 0,
507	       ctx->fs->blocksize-V1_SB_SIZE);
508	mark_buffer_dirty(journal->j_sb_buffer);
509}
510
511
512static errcode_t e2fsck_journal_load(journal_t *journal)
513{
514	e2fsck_t ctx = journal->j_dev->k_ctx;
515	journal_superblock_t *jsb;
516	struct buffer_head *jbh = journal->j_sb_buffer;
517	struct problem_context pctx;
518
519	clear_problem_context(&pctx);
520
521	ll_rw_block(READ, 1, &jbh);
522	if (jbh->b_err) {
523		com_err(ctx->device_name, jbh->b_err, "%s",
524			_("reading journal superblock\n"));
525		return jbh->b_err;
526	}
527
528	jsb = journal->j_superblock;
529	/* If we don't even have JFS_MAGIC, we probably have a wrong inode */
530	if (jsb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER))
531		return e2fsck_journal_fix_bad_inode(ctx, &pctx);
532
533	switch (ntohl(jsb->s_header.h_blocktype)) {
534	case JFS_SUPERBLOCK_V1:
535		journal->j_format_version = 1;
536		if (jsb->s_feature_compat ||
537		    jsb->s_feature_incompat ||
538		    jsb->s_feature_ro_compat ||
539		    jsb->s_nr_users)
540			clear_v2_journal_fields(journal);
541		break;
542
543	case JFS_SUPERBLOCK_V2:
544		journal->j_format_version = 2;
545		if (ntohl(jsb->s_nr_users) > 1 &&
546		    uuid_is_null(ctx->fs->super->s_journal_uuid))
547			clear_v2_journal_fields(journal);
548		if (ntohl(jsb->s_nr_users) > 1) {
549			fix_problem(ctx, PR_0_JOURNAL_UNSUPP_MULTIFS, &pctx);
550			return EXT2_ET_JOURNAL_UNSUPP_VERSION;
551		}
552		break;
553
554	/*
555	 * These should never appear in a journal super block, so if
556	 * they do, the journal is badly corrupted.
557	 */
558	case JFS_DESCRIPTOR_BLOCK:
559	case JFS_COMMIT_BLOCK:
560	case JFS_REVOKE_BLOCK:
561		return EXT2_ET_CORRUPT_SUPERBLOCK;
562
563	/* If we don't understand the superblock major type, but there
564	 * is a magic number, then it is likely to be a new format we
565	 * just don't understand, so leave it alone. */
566	default:
567		return EXT2_ET_JOURNAL_UNSUPP_VERSION;
568	}
569
570	if (JFS_HAS_INCOMPAT_FEATURE(journal, ~JFS_KNOWN_INCOMPAT_FEATURES))
571		return EXT2_ET_UNSUPP_FEATURE;
572
573	if (JFS_HAS_RO_COMPAT_FEATURE(journal, ~JFS_KNOWN_ROCOMPAT_FEATURES))
574		return EXT2_ET_RO_UNSUPP_FEATURE;
575
576	/* We have now checked whether we know enough about the journal
577	 * format to be able to proceed safely, so any other checks that
578	 * fail we should attempt to recover from. */
579	if (jsb->s_blocksize != htonl(journal->j_blocksize)) {
580		com_err(ctx->program_name, EXT2_ET_CORRUPT_SUPERBLOCK,
581			_("%s: no valid journal superblock found\n"),
582			ctx->device_name);
583		return EXT2_ET_CORRUPT_SUPERBLOCK;
584	}
585
586	if (ntohl(jsb->s_maxlen) < journal->j_maxlen)
587		journal->j_maxlen = ntohl(jsb->s_maxlen);
588	else if (ntohl(jsb->s_maxlen) > journal->j_maxlen) {
589		com_err(ctx->program_name, EXT2_ET_CORRUPT_SUPERBLOCK,
590			_("%s: journal too short\n"),
591			ctx->device_name);
592		return EXT2_ET_CORRUPT_SUPERBLOCK;
593	}
594
595	journal->j_tail_sequence = ntohl(jsb->s_sequence);
596	journal->j_transaction_sequence = journal->j_tail_sequence;
597	journal->j_tail = ntohl(jsb->s_start);
598	journal->j_first = ntohl(jsb->s_first);
599	journal->j_last = ntohl(jsb->s_maxlen);
600
601	return 0;
602}
603
604static void e2fsck_journal_reset_super(e2fsck_t ctx, journal_superblock_t *jsb,
605				       journal_t *journal)
606{
607	char *p;
608	union {
609		uuid_t uuid;
610		__u32 val[4];
611	} u;
612	__u32 new_seq = 0;
613	int i;
614
615	/* Leave a valid existing V1 superblock signature alone.
616	 * Anything unrecognisable we overwrite with a new V2
617	 * signature. */
618
619	if (jsb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER) ||
620	    jsb->s_header.h_blocktype != htonl(JFS_SUPERBLOCK_V1)) {
621		jsb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER);
622		jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
623	}
624
625	/* Zero out everything else beyond the superblock header */
626
627	p = ((char *) jsb) + sizeof(journal_header_t);
628	memset (p, 0, ctx->fs->blocksize-sizeof(journal_header_t));
629
630	jsb->s_blocksize = htonl(ctx->fs->blocksize);
631	jsb->s_maxlen = htonl(journal->j_maxlen);
632	jsb->s_first = htonl(1);
633
634	/* Initialize the journal sequence number so that there is "no"
635	 * chance we will find old "valid" transactions in the journal.
636	 * This avoids the need to zero the whole journal (slow to do,
637	 * and risky when we are just recovering the filesystem).
638	 */
639	uuid_generate(u.uuid);
640	for (i = 0; i < 4; i ++)
641		new_seq ^= u.val[i];
642	jsb->s_sequence = htonl(new_seq);
643
644	mark_buffer_dirty(journal->j_sb_buffer);
645	ll_rw_block(WRITE, 1, &journal->j_sb_buffer);
646}
647
648static errcode_t e2fsck_journal_fix_corrupt_super(e2fsck_t ctx,
649						  journal_t *journal,
650						  struct problem_context *pctx)
651{
652	struct ext2_super_block *sb = ctx->fs->super;
653	int recover = ctx->fs->super->s_feature_incompat &
654		EXT3_FEATURE_INCOMPAT_RECOVER;
655
656	if (sb->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL) {
657		if (fix_problem(ctx, PR_0_JOURNAL_BAD_SUPER, pctx)) {
658			e2fsck_journal_reset_super(ctx, journal->j_superblock,
659						   journal);
660			journal->j_transaction_sequence = 1;
661			e2fsck_clear_recover(ctx, recover);
662			return 0;
663		}
664		return EXT2_ET_CORRUPT_SUPERBLOCK;
665	} else if (e2fsck_journal_fix_bad_inode(ctx, pctx))
666		return EXT2_ET_CORRUPT_SUPERBLOCK;
667
668	return 0;
669}
670
671static void e2fsck_journal_release(e2fsck_t ctx, journal_t *journal,
672				   int reset, int drop)
673{
674	journal_superblock_t *jsb;
675
676	if (drop)
677		mark_buffer_clean(journal->j_sb_buffer);
678	else if (!(ctx->options & E2F_OPT_READONLY)) {
679		jsb = journal->j_superblock;
680		jsb->s_sequence = htonl(journal->j_transaction_sequence);
681		if (reset)
682			jsb->s_start = 0; /* this marks the journal as empty */
683		mark_buffer_dirty(journal->j_sb_buffer);
684	}
685	brelse(journal->j_sb_buffer);
686
687	if (ctx->journal_io) {
688		if (ctx->fs && ctx->fs->io != ctx->journal_io)
689			io_channel_close(ctx->journal_io);
690		ctx->journal_io = 0;
691	}
692
693#ifndef USE_INODE_IO
694	if (journal->j_inode)
695		ext2fs_free_mem(&journal->j_inode);
696#endif
697	if (journal->j_fs_dev)
698		ext2fs_free_mem(&journal->j_fs_dev);
699	ext2fs_free_mem(&journal);
700}
701
702/*
703 * This function makes sure that the superblock fields regarding the
704 * journal are consistent.
705 */
706errcode_t e2fsck_check_ext3_journal(e2fsck_t ctx)
707{
708	struct ext2_super_block *sb = ctx->fs->super;
709	journal_t *journal;
710	int recover = ctx->fs->super->s_feature_incompat &
711		EXT3_FEATURE_INCOMPAT_RECOVER;
712	struct problem_context pctx;
713	problem_t problem;
714	int reset = 0, force_fsck = 0;
715	errcode_t retval;
716
717	/* If we don't have any journal features, don't do anything more */
718	if (!(sb->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL) &&
719	    !recover && sb->s_journal_inum == 0 && sb->s_journal_dev == 0 &&
720	    uuid_is_null(sb->s_journal_uuid))
721 		return 0;
722
723	clear_problem_context(&pctx);
724	pctx.num = sb->s_journal_inum;
725
726	retval = e2fsck_get_journal(ctx, &journal);
727	if (retval) {
728		if ((retval == EXT2_ET_BAD_INODE_NUM) ||
729		    (retval == EXT2_ET_BAD_BLOCK_NUM) ||
730		    (retval == EXT2_ET_JOURNAL_TOO_SMALL) ||
731		    (retval == EXT2_ET_NO_JOURNAL))
732			return e2fsck_journal_fix_bad_inode(ctx, &pctx);
733		return retval;
734	}
735
736	retval = e2fsck_journal_load(journal);
737	if (retval) {
738		if ((retval == EXT2_ET_CORRUPT_SUPERBLOCK) ||
739		    ((retval == EXT2_ET_UNSUPP_FEATURE) &&
740		    (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_INCOMPAT,
741				  &pctx))) ||
742		    ((retval == EXT2_ET_RO_UNSUPP_FEATURE) &&
743		    (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_ROCOMPAT,
744				  &pctx))) ||
745		    ((retval == EXT2_ET_JOURNAL_UNSUPP_VERSION) &&
746		    (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_VERSION, &pctx))))
747			retval = e2fsck_journal_fix_corrupt_super(ctx, journal,
748								  &pctx);
749		e2fsck_journal_release(ctx, journal, 0, 1);
750		return retval;
751	}
752
753	/*
754	 * We want to make the flags consistent here.  We will not leave with
755	 * needs_recovery set but has_journal clear.  We can't get in a loop
756	 * with -y, -n, or -p, only if a user isn't making up their mind.
757	 */
758no_has_journal:
759	if (!(sb->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
760		recover = sb->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER;
761		pctx.str = "inode";
762		if (fix_problem(ctx, PR_0_JOURNAL_HAS_JOURNAL, &pctx)) {
763			if (recover &&
764			    !fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, &pctx))
765				goto no_has_journal;
766			/*
767			 * Need a full fsck if we are releasing a
768			 * journal stored on a reserved inode.
769			 */
770			force_fsck = recover ||
771				(sb->s_journal_inum < EXT2_FIRST_INODE(sb));
772			/* Clear all of the journal fields */
773			sb->s_journal_inum = 0;
774			sb->s_journal_dev = 0;
775			memset(sb->s_journal_uuid, 0,
776			       sizeof(sb->s_journal_uuid));
777			e2fsck_clear_recover(ctx, force_fsck);
778		} else if (!(ctx->options & E2F_OPT_READONLY)) {
779			sb->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL;
780			ctx->fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
781			ext2fs_mark_super_dirty(ctx->fs);
782		}
783	}
784
785	if (sb->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL &&
786	    !(sb->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER) &&
787	    journal->j_superblock->s_start != 0) {
788		/* Print status information */
789		fix_problem(ctx, PR_0_JOURNAL_RECOVERY_CLEAR, &pctx);
790		if (ctx->superblock)
791			problem = PR_0_JOURNAL_RUN_DEFAULT;
792		else
793			problem = PR_0_JOURNAL_RUN;
794		if (fix_problem(ctx, problem, &pctx)) {
795			ctx->options |= E2F_OPT_FORCE;
796			sb->s_feature_incompat |=
797				EXT3_FEATURE_INCOMPAT_RECOVER;
798			ext2fs_mark_super_dirty(ctx->fs);
799		} else if (fix_problem(ctx,
800				       PR_0_JOURNAL_RESET_JOURNAL, &pctx)) {
801			reset = 1;
802			sb->s_state &= ~EXT2_VALID_FS;
803			ext2fs_mark_super_dirty(ctx->fs);
804		}
805		/*
806		 * If the user answers no to the above question, we
807		 * ignore the fact that journal apparently has data;
808		 * accidentally replaying over valid data would be far
809		 * worse than skipping a questionable recovery.
810		 *
811		 * XXX should we abort with a fatal error here?  What
812		 * will the ext3 kernel code do if a filesystem with
813		 * !NEEDS_RECOVERY but with a non-zero
814		 * journal->j_superblock->s_start is mounted?
815		 */
816	}
817
818	/*
819	 * If we don't need to do replay the journal, check to see if
820	 * the journal's errno is set; if so, we need to mark the file
821	 * system as being corrupt and clear the journal's s_errno.
822	 */
823	if (!(sb->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER) &&
824	    journal->j_superblock->s_errno) {
825		ctx->fs->super->s_state |= EXT2_ERROR_FS;
826		ext2fs_mark_super_dirty(ctx->fs);
827		journal->j_superblock->s_errno = 0;
828		mark_buffer_dirty(journal->j_sb_buffer);
829	}
830
831	e2fsck_journal_release(ctx, journal, reset, 0);
832	return retval;
833}
834
835static errcode_t recover_ext3_journal(e2fsck_t ctx)
836{
837	struct problem_context	pctx;
838	journal_t *journal;
839	errcode_t retval;
840
841	clear_problem_context(&pctx);
842
843	journal_init_revoke_caches();
844	retval = e2fsck_get_journal(ctx, &journal);
845	if (retval)
846		return retval;
847
848	retval = e2fsck_journal_load(journal);
849	if (retval)
850		goto errout;
851
852	retval = journal_init_revoke(journal, 1024);
853	if (retval)
854		goto errout;
855
856	retval = -journal_recover(journal);
857	if (retval)
858		goto errout;
859
860	if (journal->j_failed_commit) {
861		pctx.ino = journal->j_failed_commit;
862		fix_problem(ctx, PR_0_JNL_TXN_CORRUPT, &pctx);
863		journal->j_superblock->s_errno = -EINVAL;
864		mark_buffer_dirty(journal->j_sb_buffer);
865	}
866
867errout:
868	journal_destroy_revoke(journal);
869	journal_destroy_revoke_caches();
870	e2fsck_journal_release(ctx, journal, 1, 0);
871	return retval;
872}
873
874errcode_t e2fsck_run_ext3_journal(e2fsck_t ctx)
875{
876	io_manager io_ptr = ctx->fs->io->manager;
877	int blocksize = ctx->fs->blocksize;
878	errcode_t	retval, recover_retval;
879	io_stats	stats = 0;
880	unsigned long long kbytes_written = 0;
881
882	printf(_("%s: recovering journal\n"), ctx->device_name);
883	if (ctx->options & E2F_OPT_READONLY) {
884		printf(_("%s: won't do journal recovery while read-only\n"),
885		       ctx->device_name);
886		return EXT2_ET_FILE_RO;
887	}
888
889	if (ctx->fs->flags & EXT2_FLAG_DIRTY)
890		ext2fs_flush(ctx->fs);	/* Force out any modifications */
891
892	recover_retval = recover_ext3_journal(ctx);
893
894	/*
895	 * Reload the filesystem context to get up-to-date data from disk
896	 * because journal recovery will change the filesystem under us.
897	 */
898	if (ctx->fs->super->s_kbytes_written &&
899	    ctx->fs->io->manager->get_stats)
900		ctx->fs->io->manager->get_stats(ctx->fs->io, &stats);
901	if (stats && stats->bytes_written)
902		kbytes_written = stats->bytes_written >> 10;
903
904	ext2fs_mmp_stop(ctx->fs);
905	ext2fs_free(ctx->fs);
906	retval = ext2fs_open(ctx->filesystem_name, EXT2_FLAG_RW,
907			     ctx->superblock, blocksize, io_ptr,
908			     &ctx->fs);
909	if (retval) {
910		com_err(ctx->program_name, retval,
911			_("while trying to re-open %s"),
912			ctx->device_name);
913		fatal_error(ctx, 0);
914	}
915	ctx->fs->priv_data = ctx;
916	ctx->fs->now = ctx->now;
917	ctx->fs->flags |= EXT2_FLAG_MASTER_SB_ONLY;
918	ctx->fs->super->s_kbytes_written += kbytes_written;
919
920	/* Set the superblock flags */
921	e2fsck_clear_recover(ctx, recover_retval != 0);
922
923	/*
924	 * Do one last sanity check, and propagate journal->s_errno to
925	 * the EXT2_ERROR_FS flag in the fs superblock if needed.
926	 */
927	retval = e2fsck_check_ext3_journal(ctx);
928	return retval ? retval : recover_retval;
929}
930
931/*
932 * This function will move the journal inode from a visible file in
933 * the filesystem directory hierarchy to the reserved inode if necessary.
934 */
935static const char * const journal_names[] = {
936	".journal", "journal", ".journal.dat", "journal.dat", 0 };
937
938void e2fsck_move_ext3_journal(e2fsck_t ctx)
939{
940	struct ext2_super_block *sb = ctx->fs->super;
941	struct problem_context	pctx;
942	struct ext2_inode 	inode;
943	ext2_filsys		fs = ctx->fs;
944	ext2_ino_t		ino;
945	errcode_t		retval;
946	const char * const *	cpp;
947	dgrp_t			group;
948	int			mount_flags;
949
950	clear_problem_context(&pctx);
951
952	/*
953	 * If the filesystem is opened read-only, or there is no
954	 * journal, then do nothing.
955	 */
956	if ((ctx->options & E2F_OPT_READONLY) ||
957	    (sb->s_journal_inum == 0) ||
958	    !(sb->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL))
959		return;
960
961	/*
962	 * Read in the journal inode
963	 */
964	if (ext2fs_read_inode(fs, sb->s_journal_inum, &inode) != 0)
965		return;
966
967	/*
968	 * If it's necessary to backup the journal inode, do so.
969	 */
970	if ((sb->s_jnl_backup_type == 0) ||
971	    ((sb->s_jnl_backup_type == EXT3_JNL_BACKUP_BLOCKS) &&
972	     memcmp(inode.i_block, sb->s_jnl_blocks, EXT2_N_BLOCKS*4))) {
973		if (fix_problem(ctx, PR_0_BACKUP_JNL, &pctx)) {
974			memcpy(sb->s_jnl_blocks, inode.i_block,
975			       EXT2_N_BLOCKS*4);
976			sb->s_jnl_blocks[15] = inode.i_size_high;
977			sb->s_jnl_blocks[16] = inode.i_size;
978			sb->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS;
979			ext2fs_mark_super_dirty(fs);
980			fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
981		}
982	}
983
984	/*
985	 * If the journal is already the hidden inode, then do nothing
986	 */
987	if (sb->s_journal_inum == EXT2_JOURNAL_INO)
988		return;
989
990	/*
991	 * The journal inode had better have only one link and not be readable.
992	 */
993	if (inode.i_links_count != 1)
994		return;
995
996	/*
997	 * If the filesystem is mounted, or we can't tell whether
998	 * or not it's mounted, do nothing.
999	 */
1000	retval = ext2fs_check_if_mounted(ctx->filesystem_name, &mount_flags);
1001	if (retval || (mount_flags & EXT2_MF_MOUNTED))
1002		return;
1003
1004	/*
1005	 * If we can't find the name of the journal inode, then do
1006	 * nothing.
1007	 */
1008	for (cpp = journal_names; *cpp; cpp++) {
1009		retval = ext2fs_lookup(fs, EXT2_ROOT_INO, *cpp,
1010				       strlen(*cpp), 0, &ino);
1011		if ((retval == 0) && (ino == sb->s_journal_inum))
1012			break;
1013	}
1014	if (*cpp == 0)
1015		return;
1016
1017	/* We need the inode bitmap to be loaded */
1018	retval = ext2fs_read_bitmaps(fs);
1019	if (retval)
1020		return;
1021
1022	pctx.str = *cpp;
1023	if (!fix_problem(ctx, PR_0_MOVE_JOURNAL, &pctx))
1024		return;
1025
1026	/*
1027	 * OK, we've done all the checks, let's actually move the
1028	 * journal inode.  Errors at this point mean we need to force
1029	 * an ext2 filesystem check.
1030	 */
1031	if ((retval = ext2fs_unlink(fs, EXT2_ROOT_INO, *cpp, ino, 0)) != 0)
1032		goto err_out;
1033	if ((retval = ext2fs_write_inode(fs, EXT2_JOURNAL_INO, &inode)) != 0)
1034		goto err_out;
1035	sb->s_journal_inum = EXT2_JOURNAL_INO;
1036	ext2fs_mark_super_dirty(fs);
1037	fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
1038	inode.i_links_count = 0;
1039	inode.i_dtime = ctx->now;
1040	if ((retval = ext2fs_write_inode(fs, ino, &inode)) != 0)
1041		goto err_out;
1042
1043	group = ext2fs_group_of_ino(fs, ino);
1044	ext2fs_unmark_inode_bitmap2(fs->inode_map, ino);
1045	ext2fs_mark_ib_dirty(fs);
1046	ext2fs_bg_free_inodes_count_set(fs, group, ext2fs_bg_free_inodes_count(fs, group) + 1);
1047	ext2fs_group_desc_csum_set(fs, group);
1048	fs->super->s_free_inodes_count++;
1049	return;
1050
1051err_out:
1052	pctx.errcode = retval;
1053	fix_problem(ctx, PR_0_ERR_MOVE_JOURNAL, &pctx);
1054	fs->super->s_state &= ~EXT2_VALID_FS;
1055	ext2fs_mark_super_dirty(fs);
1056	return;
1057}
1058
1059/*
1060 * This function makes sure the superblock hint for the external
1061 * journal is correct.
1062 */
1063int e2fsck_fix_ext3_journal_hint(e2fsck_t ctx)
1064{
1065	struct ext2_super_block *sb = ctx->fs->super;
1066	struct problem_context pctx;
1067	char uuid[37], *journal_name;
1068	struct stat st;
1069
1070	if (!(sb->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL) ||
1071	    uuid_is_null(sb->s_journal_uuid))
1072 		return 0;
1073
1074	uuid_unparse(sb->s_journal_uuid, uuid);
1075	journal_name = blkid_get_devname(ctx->blkid, "UUID", uuid);
1076	if (!journal_name)
1077		return 0;
1078
1079	if (stat(journal_name, &st) < 0) {
1080		free(journal_name);
1081		return 0;
1082	}
1083
1084	if (st.st_rdev != sb->s_journal_dev) {
1085		clear_problem_context(&pctx);
1086		pctx.num = st.st_rdev;
1087		if (fix_problem(ctx, PR_0_EXTERNAL_JOURNAL_HINT, &pctx)) {
1088			sb->s_journal_dev = st.st_rdev;
1089			ext2fs_mark_super_dirty(ctx->fs);
1090		}
1091	}
1092
1093	free(journal_name);
1094	return 0;
1095}
1096