segment.c revision c212991a6bc3ba120d41205a294c5b89f05f1535
1/*
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 *             http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/bio.h>
14#include <linux/blkdev.h>
15#include <linux/vmalloc.h>
16
17#include "f2fs.h"
18#include "segment.h"
19#include "node.h"
20
21static int need_to_flush(struct f2fs_sb_info *sbi)
22{
23	unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
24			sbi->segs_per_sec;
25	int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
26		>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
27	int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
28		>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
29
30	if (sbi->por_doing)
31		return 0;
32
33	if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
34						reserved_sections(sbi)))
35		return 1;
36	return 0;
37}
38
39/*
40 * This function balances dirty node and dentry pages.
41 * In addition, it controls garbage collection.
42 */
43void f2fs_balance_fs(struct f2fs_sb_info *sbi)
44{
45	struct writeback_control wbc = {
46		.sync_mode = WB_SYNC_ALL,
47		.nr_to_write = LONG_MAX,
48		.for_reclaim = 0,
49	};
50
51	if (sbi->por_doing)
52		return;
53
54	/*
55	 * We should do checkpoint when there are so many dirty node pages
56	 * with enough free segments. After then, we should do GC.
57	 */
58	if (need_to_flush(sbi)) {
59		sync_dirty_dir_inodes(sbi);
60		sync_node_pages(sbi, 0, &wbc);
61	}
62
63	if (has_not_enough_free_secs(sbi)) {
64		mutex_lock(&sbi->gc_mutex);
65		f2fs_gc(sbi, 1);
66	}
67}
68
69static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
70		enum dirty_type dirty_type)
71{
72	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
73
74	/* need not be added */
75	if (IS_CURSEG(sbi, segno))
76		return;
77
78	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
79		dirty_i->nr_dirty[dirty_type]++;
80
81	if (dirty_type == DIRTY) {
82		struct seg_entry *sentry = get_seg_entry(sbi, segno);
83		dirty_type = sentry->type;
84		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
85			dirty_i->nr_dirty[dirty_type]++;
86	}
87}
88
89static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
90		enum dirty_type dirty_type)
91{
92	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
93
94	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
95		dirty_i->nr_dirty[dirty_type]--;
96
97	if (dirty_type == DIRTY) {
98		struct seg_entry *sentry = get_seg_entry(sbi, segno);
99		dirty_type = sentry->type;
100		if (test_and_clear_bit(segno,
101					dirty_i->dirty_segmap[dirty_type]))
102			dirty_i->nr_dirty[dirty_type]--;
103		clear_bit(segno, dirty_i->victim_segmap[FG_GC]);
104		clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
105	}
106}
107
108/*
109 * Should not occur error such as -ENOMEM.
110 * Adding dirty entry into seglist is not critical operation.
111 * If a given segment is one of current working segments, it won't be added.
112 */
113void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
114{
115	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
116	unsigned short valid_blocks;
117
118	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
119		return;
120
121	mutex_lock(&dirty_i->seglist_lock);
122
123	valid_blocks = get_valid_blocks(sbi, segno, 0);
124
125	if (valid_blocks == 0) {
126		__locate_dirty_segment(sbi, segno, PRE);
127		__remove_dirty_segment(sbi, segno, DIRTY);
128	} else if (valid_blocks < sbi->blocks_per_seg) {
129		__locate_dirty_segment(sbi, segno, DIRTY);
130	} else {
131		/* Recovery routine with SSR needs this */
132		__remove_dirty_segment(sbi, segno, DIRTY);
133	}
134
135	mutex_unlock(&dirty_i->seglist_lock);
136	return;
137}
138
139/*
140 * Should call clear_prefree_segments after checkpoint is done.
141 */
142static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
143{
144	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
145	unsigned int segno, offset = 0;
146	unsigned int total_segs = TOTAL_SEGS(sbi);
147
148	mutex_lock(&dirty_i->seglist_lock);
149	while (1) {
150		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
151				offset);
152		if (segno >= total_segs)
153			break;
154		__set_test_and_free(sbi, segno);
155		offset = segno + 1;
156	}
157	mutex_unlock(&dirty_i->seglist_lock);
158}
159
160void clear_prefree_segments(struct f2fs_sb_info *sbi)
161{
162	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
163	unsigned int segno, offset = 0;
164	unsigned int total_segs = TOTAL_SEGS(sbi);
165
166	mutex_lock(&dirty_i->seglist_lock);
167	while (1) {
168		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
169				offset);
170		if (segno >= total_segs)
171			break;
172
173		offset = segno + 1;
174		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
175			dirty_i->nr_dirty[PRE]--;
176
177		/* Let's use trim */
178		if (test_opt(sbi, DISCARD))
179			blkdev_issue_discard(sbi->sb->s_bdev,
180					START_BLOCK(sbi, segno) <<
181					sbi->log_sectors_per_block,
182					1 << (sbi->log_sectors_per_block +
183						sbi->log_blocks_per_seg),
184					GFP_NOFS, 0);
185	}
186	mutex_unlock(&dirty_i->seglist_lock);
187}
188
189static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
190{
191	struct sit_info *sit_i = SIT_I(sbi);
192	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap))
193		sit_i->dirty_sentries++;
194}
195
196static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
197					unsigned int segno, int modified)
198{
199	struct seg_entry *se = get_seg_entry(sbi, segno);
200	se->type = type;
201	if (modified)
202		__mark_sit_entry_dirty(sbi, segno);
203}
204
205static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
206{
207	struct seg_entry *se;
208	unsigned int segno, offset;
209	long int new_vblocks;
210
211	segno = GET_SEGNO(sbi, blkaddr);
212
213	se = get_seg_entry(sbi, segno);
214	new_vblocks = se->valid_blocks + del;
215	offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
216
217	BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
218				(new_vblocks > sbi->blocks_per_seg)));
219
220	se->valid_blocks = new_vblocks;
221	se->mtime = get_mtime(sbi);
222	SIT_I(sbi)->max_mtime = se->mtime;
223
224	/* Update valid block bitmap */
225	if (del > 0) {
226		if (f2fs_set_bit(offset, se->cur_valid_map))
227			BUG();
228	} else {
229		if (!f2fs_clear_bit(offset, se->cur_valid_map))
230			BUG();
231	}
232	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
233		se->ckpt_valid_blocks += del;
234
235	__mark_sit_entry_dirty(sbi, segno);
236
237	/* update total number of valid blocks to be written in ckpt area */
238	SIT_I(sbi)->written_valid_blocks += del;
239
240	if (sbi->segs_per_sec > 1)
241		get_sec_entry(sbi, segno)->valid_blocks += del;
242}
243
244static void refresh_sit_entry(struct f2fs_sb_info *sbi,
245			block_t old_blkaddr, block_t new_blkaddr)
246{
247	update_sit_entry(sbi, new_blkaddr, 1);
248	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
249		update_sit_entry(sbi, old_blkaddr, -1);
250}
251
252void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
253{
254	unsigned int segno = GET_SEGNO(sbi, addr);
255	struct sit_info *sit_i = SIT_I(sbi);
256
257	BUG_ON(addr == NULL_ADDR);
258	if (addr == NEW_ADDR)
259		return;
260
261	/* add it into sit main buffer */
262	mutex_lock(&sit_i->sentry_lock);
263
264	update_sit_entry(sbi, addr, -1);
265
266	/* add it into dirty seglist */
267	locate_dirty_segment(sbi, segno);
268
269	mutex_unlock(&sit_i->sentry_lock);
270}
271
272/*
273 * This function should be resided under the curseg_mutex lock
274 */
275static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
276		struct f2fs_summary *sum, unsigned short offset)
277{
278	struct curseg_info *curseg = CURSEG_I(sbi, type);
279	void *addr = curseg->sum_blk;
280	addr += offset * sizeof(struct f2fs_summary);
281	memcpy(addr, sum, sizeof(struct f2fs_summary));
282	return;
283}
284
285/*
286 * Calculate the number of current summary pages for writing
287 */
288int npages_for_summary_flush(struct f2fs_sb_info *sbi)
289{
290	int total_size_bytes = 0;
291	int valid_sum_count = 0;
292	int i, sum_space;
293
294	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
295		if (sbi->ckpt->alloc_type[i] == SSR)
296			valid_sum_count += sbi->blocks_per_seg;
297		else
298			valid_sum_count += curseg_blkoff(sbi, i);
299	}
300
301	total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
302			+ sizeof(struct nat_journal) + 2
303			+ sizeof(struct sit_journal) + 2;
304	sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
305	if (total_size_bytes < sum_space)
306		return 1;
307	else if (total_size_bytes < 2 * sum_space)
308		return 2;
309	return 3;
310}
311
312/*
313 * Caller should put this summary page
314 */
315struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
316{
317	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
318}
319
320static void write_sum_page(struct f2fs_sb_info *sbi,
321			struct f2fs_summary_block *sum_blk, block_t blk_addr)
322{
323	struct page *page = grab_meta_page(sbi, blk_addr);
324	void *kaddr = page_address(page);
325	memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
326	set_page_dirty(page);
327	f2fs_put_page(page, 1);
328}
329
330static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
331					int ofs_unit, int type)
332{
333	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
334	unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
335	unsigned int segno, next_segno, i;
336	int ofs = 0;
337
338	/*
339	 * If there is not enough reserved sections,
340	 * we should not reuse prefree segments.
341	 */
342	if (has_not_enough_free_secs(sbi))
343		return NULL_SEGNO;
344
345	/*
346	 * NODE page should not reuse prefree segment,
347	 * since those information is used for SPOR.
348	 */
349	if (IS_NODESEG(type))
350		return NULL_SEGNO;
351next:
352	segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++);
353	ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit;
354	if (segno < TOTAL_SEGS(sbi)) {
355		/* skip intermediate segments in a section */
356		if (segno % ofs_unit)
357			goto next;
358
359		/* skip if whole section is not prefree */
360		next_segno = find_next_zero_bit(prefree_segmap,
361						TOTAL_SEGS(sbi), segno + 1);
362		if (next_segno - segno < ofs_unit)
363			goto next;
364
365		/* skip if whole section was not free at the last checkpoint */
366		for (i = 0; i < ofs_unit; i++)
367			if (get_seg_entry(sbi, segno)->ckpt_valid_blocks)
368				goto next;
369		return segno;
370	}
371	return NULL_SEGNO;
372}
373
374/*
375 * Find a new segment from the free segments bitmap to right order
376 * This function should be returned with success, otherwise BUG
377 */
378static void get_new_segment(struct f2fs_sb_info *sbi,
379			unsigned int *newseg, bool new_sec, int dir)
380{
381	struct free_segmap_info *free_i = FREE_I(sbi);
382	unsigned int total_secs = sbi->total_sections;
383	unsigned int segno, secno, zoneno;
384	unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone;
385	unsigned int hint = *newseg / sbi->segs_per_sec;
386	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
387	unsigned int left_start = hint;
388	bool init = true;
389	int go_left = 0;
390	int i;
391
392	write_lock(&free_i->segmap_lock);
393
394	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
395		segno = find_next_zero_bit(free_i->free_segmap,
396					TOTAL_SEGS(sbi), *newseg + 1);
397		if (segno < TOTAL_SEGS(sbi))
398			goto got_it;
399	}
400find_other_zone:
401	secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint);
402	if (secno >= total_secs) {
403		if (dir == ALLOC_RIGHT) {
404			secno = find_next_zero_bit(free_i->free_secmap,
405						total_secs, 0);
406			BUG_ON(secno >= total_secs);
407		} else {
408			go_left = 1;
409			left_start = hint - 1;
410		}
411	}
412	if (go_left == 0)
413		goto skip_left;
414
415	while (test_bit(left_start, free_i->free_secmap)) {
416		if (left_start > 0) {
417			left_start--;
418			continue;
419		}
420		left_start = find_next_zero_bit(free_i->free_secmap,
421						total_secs, 0);
422		BUG_ON(left_start >= total_secs);
423		break;
424	}
425	secno = left_start;
426skip_left:
427	hint = secno;
428	segno = secno * sbi->segs_per_sec;
429	zoneno = secno / sbi->secs_per_zone;
430
431	/* give up on finding another zone */
432	if (!init)
433		goto got_it;
434	if (sbi->secs_per_zone == 1)
435		goto got_it;
436	if (zoneno == old_zoneno)
437		goto got_it;
438	if (dir == ALLOC_LEFT) {
439		if (!go_left && zoneno + 1 >= total_zones)
440			goto got_it;
441		if (go_left && zoneno == 0)
442			goto got_it;
443	}
444	for (i = 0; i < NR_CURSEG_TYPE; i++)
445		if (CURSEG_I(sbi, i)->zone == zoneno)
446			break;
447
448	if (i < NR_CURSEG_TYPE) {
449		/* zone is in user, try another */
450		if (go_left)
451			hint = zoneno * sbi->secs_per_zone - 1;
452		else if (zoneno + 1 >= total_zones)
453			hint = 0;
454		else
455			hint = (zoneno + 1) * sbi->secs_per_zone;
456		init = false;
457		goto find_other_zone;
458	}
459got_it:
460	/* set it as dirty segment in free segmap */
461	BUG_ON(test_bit(segno, free_i->free_segmap));
462	__set_inuse(sbi, segno);
463	*newseg = segno;
464	write_unlock(&free_i->segmap_lock);
465}
466
467static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
468{
469	struct curseg_info *curseg = CURSEG_I(sbi, type);
470	struct summary_footer *sum_footer;
471
472	curseg->segno = curseg->next_segno;
473	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
474	curseg->next_blkoff = 0;
475	curseg->next_segno = NULL_SEGNO;
476
477	sum_footer = &(curseg->sum_blk->footer);
478	memset(sum_footer, 0, sizeof(struct summary_footer));
479	if (IS_DATASEG(type))
480		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
481	if (IS_NODESEG(type))
482		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
483	__set_sit_entry_type(sbi, type, curseg->segno, modified);
484}
485
486/*
487 * Allocate a current working segment.
488 * This function always allocates a free segment in LFS manner.
489 */
490static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
491{
492	struct curseg_info *curseg = CURSEG_I(sbi, type);
493	unsigned int segno = curseg->segno;
494	int dir = ALLOC_LEFT;
495
496	write_sum_page(sbi, curseg->sum_blk,
497				GET_SUM_BLOCK(sbi, curseg->segno));
498	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
499		dir = ALLOC_RIGHT;
500
501	if (test_opt(sbi, NOHEAP))
502		dir = ALLOC_RIGHT;
503
504	get_new_segment(sbi, &segno, new_sec, dir);
505	curseg->next_segno = segno;
506	reset_curseg(sbi, type, 1);
507	curseg->alloc_type = LFS;
508}
509
510static void __next_free_blkoff(struct f2fs_sb_info *sbi,
511			struct curseg_info *seg, block_t start)
512{
513	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
514	block_t ofs;
515	for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
516		if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
517			&& !f2fs_test_bit(ofs, se->cur_valid_map))
518			break;
519	}
520	seg->next_blkoff = ofs;
521}
522
523/*
524 * If a segment is written by LFS manner, next block offset is just obtained
525 * by increasing the current block offset. However, if a segment is written by
526 * SSR manner, next block offset obtained by calling __next_free_blkoff
527 */
528static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
529				struct curseg_info *seg)
530{
531	if (seg->alloc_type == SSR)
532		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
533	else
534		seg->next_blkoff++;
535}
536
537/*
538 * This function always allocates a used segment (from dirty seglist) by SSR
539 * manner, so it should recover the existing segment information of valid blocks
540 */
541static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
542{
543	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
544	struct curseg_info *curseg = CURSEG_I(sbi, type);
545	unsigned int new_segno = curseg->next_segno;
546	struct f2fs_summary_block *sum_node;
547	struct page *sum_page;
548
549	write_sum_page(sbi, curseg->sum_blk,
550				GET_SUM_BLOCK(sbi, curseg->segno));
551	__set_test_and_inuse(sbi, new_segno);
552
553	mutex_lock(&dirty_i->seglist_lock);
554	__remove_dirty_segment(sbi, new_segno, PRE);
555	__remove_dirty_segment(sbi, new_segno, DIRTY);
556	mutex_unlock(&dirty_i->seglist_lock);
557
558	reset_curseg(sbi, type, 1);
559	curseg->alloc_type = SSR;
560	__next_free_blkoff(sbi, curseg, 0);
561
562	if (reuse) {
563		sum_page = get_sum_page(sbi, new_segno);
564		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
565		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
566		f2fs_put_page(sum_page, 1);
567	}
568}
569
570/*
571 * flush out current segment and replace it with new segment
572 * This function should be returned with success, otherwise BUG
573 */
574static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
575						int type, bool force)
576{
577	struct curseg_info *curseg = CURSEG_I(sbi, type);
578	unsigned int ofs_unit;
579
580	if (force) {
581		new_curseg(sbi, type, true);
582		goto out;
583	}
584
585	ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec;
586	curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
587
588	if (curseg->next_segno != NULL_SEGNO)
589		change_curseg(sbi, type, false);
590	else if (type == CURSEG_WARM_NODE)
591		new_curseg(sbi, type, false);
592	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
593		change_curseg(sbi, type, true);
594	else
595		new_curseg(sbi, type, false);
596out:
597	sbi->segment_count[curseg->alloc_type]++;
598}
599
600void allocate_new_segments(struct f2fs_sb_info *sbi)
601{
602	struct curseg_info *curseg;
603	unsigned int old_curseg;
604	int i;
605
606	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
607		curseg = CURSEG_I(sbi, i);
608		old_curseg = curseg->segno;
609		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
610		locate_dirty_segment(sbi, old_curseg);
611	}
612}
613
614static const struct segment_allocation default_salloc_ops = {
615	.allocate_segment = allocate_segment_by_default,
616};
617
618static void f2fs_end_io_write(struct bio *bio, int err)
619{
620	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
621	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
622	struct bio_private *p = bio->bi_private;
623
624	do {
625		struct page *page = bvec->bv_page;
626
627		if (--bvec >= bio->bi_io_vec)
628			prefetchw(&bvec->bv_page->flags);
629		if (!uptodate) {
630			SetPageError(page);
631			if (page->mapping)
632				set_bit(AS_EIO, &page->mapping->flags);
633			set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
634			set_page_dirty(page);
635		}
636		end_page_writeback(page);
637		dec_page_count(p->sbi, F2FS_WRITEBACK);
638	} while (bvec >= bio->bi_io_vec);
639
640	if (p->is_sync)
641		complete(p->wait);
642	kfree(p);
643	bio_put(bio);
644}
645
646struct bio *f2fs_bio_alloc(struct block_device *bdev, sector_t first_sector,
647					int nr_vecs, gfp_t gfp_flags)
648{
649	struct bio *bio;
650
651	/* allocate new bio */
652	bio = bio_alloc(gfp_flags, nr_vecs);
653
654	bio->bi_bdev = bdev;
655	bio->bi_sector = first_sector;
656retry:
657	bio->bi_private = kmalloc(sizeof(struct bio_private),
658					GFP_NOFS | __GFP_HIGH);
659	if (!bio->bi_private) {
660		cond_resched();
661		goto retry;
662	}
663	return bio;
664}
665
666static void do_submit_bio(struct f2fs_sb_info *sbi,
667				enum page_type type, bool sync)
668{
669	int rw = sync ? WRITE_SYNC : WRITE;
670	enum page_type btype = type > META ? META : type;
671
672	if (type >= META_FLUSH)
673		rw = WRITE_FLUSH_FUA;
674
675	if (sbi->bio[btype]) {
676		struct bio_private *p = sbi->bio[btype]->bi_private;
677		p->sbi = sbi;
678		sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
679		if (type == META_FLUSH) {
680			DECLARE_COMPLETION_ONSTACK(wait);
681			p->is_sync = true;
682			p->wait = &wait;
683			submit_bio(rw, sbi->bio[btype]);
684			wait_for_completion(&wait);
685		} else {
686			p->is_sync = false;
687			submit_bio(rw, sbi->bio[btype]);
688		}
689		sbi->bio[btype] = NULL;
690	}
691}
692
693void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
694{
695	down_write(&sbi->bio_sem);
696	do_submit_bio(sbi, type, sync);
697	up_write(&sbi->bio_sem);
698}
699
700static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
701				block_t blk_addr, enum page_type type)
702{
703	struct block_device *bdev = sbi->sb->s_bdev;
704
705	verify_block_addr(sbi, blk_addr);
706
707	down_write(&sbi->bio_sem);
708
709	inc_page_count(sbi, F2FS_WRITEBACK);
710
711	if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
712		do_submit_bio(sbi, type, false);
713alloc_new:
714	if (sbi->bio[type] == NULL)
715		sbi->bio[type] = f2fs_bio_alloc(bdev,
716				blk_addr << (sbi->log_blocksize - 9),
717				bio_get_nr_vecs(bdev), GFP_NOFS | __GFP_HIGH);
718
719	if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
720							PAGE_CACHE_SIZE) {
721		do_submit_bio(sbi, type, false);
722		goto alloc_new;
723	}
724
725	sbi->last_block_in_bio[type] = blk_addr;
726
727	up_write(&sbi->bio_sem);
728}
729
730static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
731{
732	struct curseg_info *curseg = CURSEG_I(sbi, type);
733	if (curseg->next_blkoff < sbi->blocks_per_seg)
734		return true;
735	return false;
736}
737
738static int __get_segment_type_2(struct page *page, enum page_type p_type)
739{
740	if (p_type == DATA)
741		return CURSEG_HOT_DATA;
742	else
743		return CURSEG_HOT_NODE;
744}
745
746static int __get_segment_type_4(struct page *page, enum page_type p_type)
747{
748	if (p_type == DATA) {
749		struct inode *inode = page->mapping->host;
750
751		if (S_ISDIR(inode->i_mode))
752			return CURSEG_HOT_DATA;
753		else
754			return CURSEG_COLD_DATA;
755	} else {
756		if (IS_DNODE(page) && !is_cold_node(page))
757			return CURSEG_HOT_NODE;
758		else
759			return CURSEG_COLD_NODE;
760	}
761}
762
763static int __get_segment_type_6(struct page *page, enum page_type p_type)
764{
765	if (p_type == DATA) {
766		struct inode *inode = page->mapping->host;
767
768		if (S_ISDIR(inode->i_mode))
769			return CURSEG_HOT_DATA;
770		else if (is_cold_data(page) || is_cold_file(inode))
771			return CURSEG_COLD_DATA;
772		else
773			return CURSEG_WARM_DATA;
774	} else {
775		if (IS_DNODE(page))
776			return is_cold_node(page) ? CURSEG_WARM_NODE :
777						CURSEG_HOT_NODE;
778		else
779			return CURSEG_COLD_NODE;
780	}
781}
782
783static int __get_segment_type(struct page *page, enum page_type p_type)
784{
785	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
786	switch (sbi->active_logs) {
787	case 2:
788		return __get_segment_type_2(page, p_type);
789	case 4:
790		return __get_segment_type_4(page, p_type);
791	case 6:
792		return __get_segment_type_6(page, p_type);
793	default:
794		BUG();
795	}
796}
797
798static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
799			block_t old_blkaddr, block_t *new_blkaddr,
800			struct f2fs_summary *sum, enum page_type p_type)
801{
802	struct sit_info *sit_i = SIT_I(sbi);
803	struct curseg_info *curseg;
804	unsigned int old_cursegno;
805	int type;
806
807	type = __get_segment_type(page, p_type);
808	curseg = CURSEG_I(sbi, type);
809
810	mutex_lock(&curseg->curseg_mutex);
811
812	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
813	old_cursegno = curseg->segno;
814
815	/*
816	 * __add_sum_entry should be resided under the curseg_mutex
817	 * because, this function updates a summary entry in the
818	 * current summary block.
819	 */
820	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
821
822	mutex_lock(&sit_i->sentry_lock);
823	__refresh_next_blkoff(sbi, curseg);
824	sbi->block_count[curseg->alloc_type]++;
825
826	/*
827	 * SIT information should be updated before segment allocation,
828	 * since SSR needs latest valid block information.
829	 */
830	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
831
832	if (!__has_curseg_space(sbi, type))
833		sit_i->s_ops->allocate_segment(sbi, type, false);
834
835	locate_dirty_segment(sbi, old_cursegno);
836	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
837	mutex_unlock(&sit_i->sentry_lock);
838
839	if (p_type == NODE)
840		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
841
842	/* writeout dirty page into bdev */
843	submit_write_page(sbi, page, *new_blkaddr, p_type);
844
845	mutex_unlock(&curseg->curseg_mutex);
846}
847
848int write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
849			struct writeback_control *wbc)
850{
851	if (wbc->for_reclaim)
852		return AOP_WRITEPAGE_ACTIVATE;
853
854	set_page_writeback(page);
855	submit_write_page(sbi, page, page->index, META);
856	return 0;
857}
858
859void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
860		unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
861{
862	struct f2fs_summary sum;
863	set_summary(&sum, nid, 0, 0);
864	do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
865}
866
867void write_data_page(struct inode *inode, struct page *page,
868		struct dnode_of_data *dn, block_t old_blkaddr,
869		block_t *new_blkaddr)
870{
871	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
872	struct f2fs_summary sum;
873	struct node_info ni;
874
875	BUG_ON(old_blkaddr == NULL_ADDR);
876	get_node_info(sbi, dn->nid, &ni);
877	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
878
879	do_write_page(sbi, page, old_blkaddr,
880			new_blkaddr, &sum, DATA);
881}
882
883void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
884					block_t old_blk_addr)
885{
886	submit_write_page(sbi, page, old_blk_addr, DATA);
887}
888
889void recover_data_page(struct f2fs_sb_info *sbi,
890			struct page *page, struct f2fs_summary *sum,
891			block_t old_blkaddr, block_t new_blkaddr)
892{
893	struct sit_info *sit_i = SIT_I(sbi);
894	struct curseg_info *curseg;
895	unsigned int segno, old_cursegno;
896	struct seg_entry *se;
897	int type;
898
899	segno = GET_SEGNO(sbi, new_blkaddr);
900	se = get_seg_entry(sbi, segno);
901	type = se->type;
902
903	if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
904		if (old_blkaddr == NULL_ADDR)
905			type = CURSEG_COLD_DATA;
906		else
907			type = CURSEG_WARM_DATA;
908	}
909	curseg = CURSEG_I(sbi, type);
910
911	mutex_lock(&curseg->curseg_mutex);
912	mutex_lock(&sit_i->sentry_lock);
913
914	old_cursegno = curseg->segno;
915
916	/* change the current segment */
917	if (segno != curseg->segno) {
918		curseg->next_segno = segno;
919		change_curseg(sbi, type, true);
920	}
921
922	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
923					(sbi->blocks_per_seg - 1);
924	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
925
926	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
927
928	locate_dirty_segment(sbi, old_cursegno);
929	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
930
931	mutex_unlock(&sit_i->sentry_lock);
932	mutex_unlock(&curseg->curseg_mutex);
933}
934
935void rewrite_node_page(struct f2fs_sb_info *sbi,
936			struct page *page, struct f2fs_summary *sum,
937			block_t old_blkaddr, block_t new_blkaddr)
938{
939	struct sit_info *sit_i = SIT_I(sbi);
940	int type = CURSEG_WARM_NODE;
941	struct curseg_info *curseg;
942	unsigned int segno, old_cursegno;
943	block_t next_blkaddr = next_blkaddr_of_node(page);
944	unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
945
946	curseg = CURSEG_I(sbi, type);
947
948	mutex_lock(&curseg->curseg_mutex);
949	mutex_lock(&sit_i->sentry_lock);
950
951	segno = GET_SEGNO(sbi, new_blkaddr);
952	old_cursegno = curseg->segno;
953
954	/* change the current segment */
955	if (segno != curseg->segno) {
956		curseg->next_segno = segno;
957		change_curseg(sbi, type, true);
958	}
959	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
960					(sbi->blocks_per_seg - 1);
961	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
962
963	/* change the current log to the next block addr in advance */
964	if (next_segno != segno) {
965		curseg->next_segno = next_segno;
966		change_curseg(sbi, type, true);
967	}
968	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
969					(sbi->blocks_per_seg - 1);
970
971	/* rewrite node page */
972	set_page_writeback(page);
973	submit_write_page(sbi, page, new_blkaddr, NODE);
974	f2fs_submit_bio(sbi, NODE, true);
975	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
976
977	locate_dirty_segment(sbi, old_cursegno);
978	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
979
980	mutex_unlock(&sit_i->sentry_lock);
981	mutex_unlock(&curseg->curseg_mutex);
982}
983
984static int read_compacted_summaries(struct f2fs_sb_info *sbi)
985{
986	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
987	struct curseg_info *seg_i;
988	unsigned char *kaddr;
989	struct page *page;
990	block_t start;
991	int i, j, offset;
992
993	start = start_sum_block(sbi);
994
995	page = get_meta_page(sbi, start++);
996	kaddr = (unsigned char *)page_address(page);
997
998	/* Step 1: restore nat cache */
999	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1000	memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1001
1002	/* Step 2: restore sit cache */
1003	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1004	memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1005						SUM_JOURNAL_SIZE);
1006	offset = 2 * SUM_JOURNAL_SIZE;
1007
1008	/* Step 3: restore summary entries */
1009	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1010		unsigned short blk_off;
1011		unsigned int segno;
1012
1013		seg_i = CURSEG_I(sbi, i);
1014		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1015		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1016		seg_i->next_segno = segno;
1017		reset_curseg(sbi, i, 0);
1018		seg_i->alloc_type = ckpt->alloc_type[i];
1019		seg_i->next_blkoff = blk_off;
1020
1021		if (seg_i->alloc_type == SSR)
1022			blk_off = sbi->blocks_per_seg;
1023
1024		for (j = 0; j < blk_off; j++) {
1025			struct f2fs_summary *s;
1026			s = (struct f2fs_summary *)(kaddr + offset);
1027			seg_i->sum_blk->entries[j] = *s;
1028			offset += SUMMARY_SIZE;
1029			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1030						SUM_FOOTER_SIZE)
1031				continue;
1032
1033			f2fs_put_page(page, 1);
1034			page = NULL;
1035
1036			page = get_meta_page(sbi, start++);
1037			kaddr = (unsigned char *)page_address(page);
1038			offset = 0;
1039		}
1040	}
1041	f2fs_put_page(page, 1);
1042	return 0;
1043}
1044
1045static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1046{
1047	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1048	struct f2fs_summary_block *sum;
1049	struct curseg_info *curseg;
1050	struct page *new;
1051	unsigned short blk_off;
1052	unsigned int segno = 0;
1053	block_t blk_addr = 0;
1054
1055	/* get segment number and block addr */
1056	if (IS_DATASEG(type)) {
1057		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1058		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1059							CURSEG_HOT_DATA]);
1060		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1061			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1062		else
1063			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1064	} else {
1065		segno = le32_to_cpu(ckpt->cur_node_segno[type -
1066							CURSEG_HOT_NODE]);
1067		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1068							CURSEG_HOT_NODE]);
1069		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1070			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1071							type - CURSEG_HOT_NODE);
1072		else
1073			blk_addr = GET_SUM_BLOCK(sbi, segno);
1074	}
1075
1076	new = get_meta_page(sbi, blk_addr);
1077	sum = (struct f2fs_summary_block *)page_address(new);
1078
1079	if (IS_NODESEG(type)) {
1080		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1081			struct f2fs_summary *ns = &sum->entries[0];
1082			int i;
1083			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1084				ns->version = 0;
1085				ns->ofs_in_node = 0;
1086			}
1087		} else {
1088			if (restore_node_summary(sbi, segno, sum)) {
1089				f2fs_put_page(new, 1);
1090				return -EINVAL;
1091			}
1092		}
1093	}
1094
1095	/* set uncompleted segment to curseg */
1096	curseg = CURSEG_I(sbi, type);
1097	mutex_lock(&curseg->curseg_mutex);
1098	memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1099	curseg->next_segno = segno;
1100	reset_curseg(sbi, type, 0);
1101	curseg->alloc_type = ckpt->alloc_type[type];
1102	curseg->next_blkoff = blk_off;
1103	mutex_unlock(&curseg->curseg_mutex);
1104	f2fs_put_page(new, 1);
1105	return 0;
1106}
1107
1108static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1109{
1110	int type = CURSEG_HOT_DATA;
1111
1112	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1113		/* restore for compacted data summary */
1114		if (read_compacted_summaries(sbi))
1115			return -EINVAL;
1116		type = CURSEG_HOT_NODE;
1117	}
1118
1119	for (; type <= CURSEG_COLD_NODE; type++)
1120		if (read_normal_summaries(sbi, type))
1121			return -EINVAL;
1122	return 0;
1123}
1124
1125static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1126{
1127	struct page *page;
1128	unsigned char *kaddr;
1129	struct f2fs_summary *summary;
1130	struct curseg_info *seg_i;
1131	int written_size = 0;
1132	int i, j;
1133
1134	page = grab_meta_page(sbi, blkaddr++);
1135	kaddr = (unsigned char *)page_address(page);
1136
1137	/* Step 1: write nat cache */
1138	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1139	memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1140	written_size += SUM_JOURNAL_SIZE;
1141
1142	/* Step 2: write sit cache */
1143	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1144	memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1145						SUM_JOURNAL_SIZE);
1146	written_size += SUM_JOURNAL_SIZE;
1147
1148	set_page_dirty(page);
1149
1150	/* Step 3: write summary entries */
1151	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1152		unsigned short blkoff;
1153		seg_i = CURSEG_I(sbi, i);
1154		if (sbi->ckpt->alloc_type[i] == SSR)
1155			blkoff = sbi->blocks_per_seg;
1156		else
1157			blkoff = curseg_blkoff(sbi, i);
1158
1159		for (j = 0; j < blkoff; j++) {
1160			if (!page) {
1161				page = grab_meta_page(sbi, blkaddr++);
1162				kaddr = (unsigned char *)page_address(page);
1163				written_size = 0;
1164			}
1165			summary = (struct f2fs_summary *)(kaddr + written_size);
1166			*summary = seg_i->sum_blk->entries[j];
1167			written_size += SUMMARY_SIZE;
1168			set_page_dirty(page);
1169
1170			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1171							SUM_FOOTER_SIZE)
1172				continue;
1173
1174			f2fs_put_page(page, 1);
1175			page = NULL;
1176		}
1177	}
1178	if (page)
1179		f2fs_put_page(page, 1);
1180}
1181
1182static void write_normal_summaries(struct f2fs_sb_info *sbi,
1183					block_t blkaddr, int type)
1184{
1185	int i, end;
1186	if (IS_DATASEG(type))
1187		end = type + NR_CURSEG_DATA_TYPE;
1188	else
1189		end = type + NR_CURSEG_NODE_TYPE;
1190
1191	for (i = type; i < end; i++) {
1192		struct curseg_info *sum = CURSEG_I(sbi, i);
1193		mutex_lock(&sum->curseg_mutex);
1194		write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1195		mutex_unlock(&sum->curseg_mutex);
1196	}
1197}
1198
1199void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1200{
1201	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1202		write_compacted_summaries(sbi, start_blk);
1203	else
1204		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1205}
1206
1207void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1208{
1209	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1210		write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1211	return;
1212}
1213
1214int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1215					unsigned int val, int alloc)
1216{
1217	int i;
1218
1219	if (type == NAT_JOURNAL) {
1220		for (i = 0; i < nats_in_cursum(sum); i++) {
1221			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1222				return i;
1223		}
1224		if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1225			return update_nats_in_cursum(sum, 1);
1226	} else if (type == SIT_JOURNAL) {
1227		for (i = 0; i < sits_in_cursum(sum); i++)
1228			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1229				return i;
1230		if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1231			return update_sits_in_cursum(sum, 1);
1232	}
1233	return -1;
1234}
1235
1236static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1237					unsigned int segno)
1238{
1239	struct sit_info *sit_i = SIT_I(sbi);
1240	unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
1241	block_t blk_addr = sit_i->sit_base_addr + offset;
1242
1243	check_seg_range(sbi, segno);
1244
1245	/* calculate sit block address */
1246	if (f2fs_test_bit(offset, sit_i->sit_bitmap))
1247		blk_addr += sit_i->sit_blocks;
1248
1249	return get_meta_page(sbi, blk_addr);
1250}
1251
1252static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1253					unsigned int start)
1254{
1255	struct sit_info *sit_i = SIT_I(sbi);
1256	struct page *src_page, *dst_page;
1257	pgoff_t src_off, dst_off;
1258	void *src_addr, *dst_addr;
1259
1260	src_off = current_sit_addr(sbi, start);
1261	dst_off = next_sit_addr(sbi, src_off);
1262
1263	/* get current sit block page without lock */
1264	src_page = get_meta_page(sbi, src_off);
1265	dst_page = grab_meta_page(sbi, dst_off);
1266	BUG_ON(PageDirty(src_page));
1267
1268	src_addr = page_address(src_page);
1269	dst_addr = page_address(dst_page);
1270	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1271
1272	set_page_dirty(dst_page);
1273	f2fs_put_page(src_page, 1);
1274
1275	set_to_next_sit(sit_i, start);
1276
1277	return dst_page;
1278}
1279
1280static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
1281{
1282	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1283	struct f2fs_summary_block *sum = curseg->sum_blk;
1284	int i;
1285
1286	/*
1287	 * If the journal area in the current summary is full of sit entries,
1288	 * all the sit entries will be flushed. Otherwise the sit entries
1289	 * are not able to replace with newly hot sit entries.
1290	 */
1291	if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) {
1292		for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1293			unsigned int segno;
1294			segno = le32_to_cpu(segno_in_journal(sum, i));
1295			__mark_sit_entry_dirty(sbi, segno);
1296		}
1297		update_sits_in_cursum(sum, -sits_in_cursum(sum));
1298		return 1;
1299	}
1300	return 0;
1301}
1302
1303/*
1304 * CP calls this function, which flushes SIT entries including sit_journal,
1305 * and moves prefree segs to free segs.
1306 */
1307void flush_sit_entries(struct f2fs_sb_info *sbi)
1308{
1309	struct sit_info *sit_i = SIT_I(sbi);
1310	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1311	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1312	struct f2fs_summary_block *sum = curseg->sum_blk;
1313	unsigned long nsegs = TOTAL_SEGS(sbi);
1314	struct page *page = NULL;
1315	struct f2fs_sit_block *raw_sit = NULL;
1316	unsigned int start = 0, end = 0;
1317	unsigned int segno = -1;
1318	bool flushed;
1319
1320	mutex_lock(&curseg->curseg_mutex);
1321	mutex_lock(&sit_i->sentry_lock);
1322
1323	/*
1324	 * "flushed" indicates whether sit entries in journal are flushed
1325	 * to the SIT area or not.
1326	 */
1327	flushed = flush_sits_in_journal(sbi);
1328
1329	while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
1330		struct seg_entry *se = get_seg_entry(sbi, segno);
1331		int sit_offset, offset;
1332
1333		sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1334
1335		if (flushed)
1336			goto to_sit_page;
1337
1338		offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
1339		if (offset >= 0) {
1340			segno_in_journal(sum, offset) = cpu_to_le32(segno);
1341			seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
1342			goto flush_done;
1343		}
1344to_sit_page:
1345		if (!page || (start > segno) || (segno > end)) {
1346			if (page) {
1347				f2fs_put_page(page, 1);
1348				page = NULL;
1349			}
1350
1351			start = START_SEGNO(sit_i, segno);
1352			end = start + SIT_ENTRY_PER_BLOCK - 1;
1353
1354			/* read sit block that will be updated */
1355			page = get_next_sit_page(sbi, start);
1356			raw_sit = page_address(page);
1357		}
1358
1359		/* udpate entry in SIT block */
1360		seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
1361flush_done:
1362		__clear_bit(segno, bitmap);
1363		sit_i->dirty_sentries--;
1364	}
1365	mutex_unlock(&sit_i->sentry_lock);
1366	mutex_unlock(&curseg->curseg_mutex);
1367
1368	/* writeout last modified SIT block */
1369	f2fs_put_page(page, 1);
1370
1371	set_prefree_as_free_segments(sbi);
1372}
1373
1374static int build_sit_info(struct f2fs_sb_info *sbi)
1375{
1376	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1377	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1378	struct sit_info *sit_i;
1379	unsigned int sit_segs, start;
1380	char *src_bitmap, *dst_bitmap;
1381	unsigned int bitmap_size;
1382
1383	/* allocate memory for SIT information */
1384	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1385	if (!sit_i)
1386		return -ENOMEM;
1387
1388	SM_I(sbi)->sit_info = sit_i;
1389
1390	sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry));
1391	if (!sit_i->sentries)
1392		return -ENOMEM;
1393
1394	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1395	sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1396	if (!sit_i->dirty_sentries_bitmap)
1397		return -ENOMEM;
1398
1399	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1400		sit_i->sentries[start].cur_valid_map
1401			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1402		sit_i->sentries[start].ckpt_valid_map
1403			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1404		if (!sit_i->sentries[start].cur_valid_map
1405				|| !sit_i->sentries[start].ckpt_valid_map)
1406			return -ENOMEM;
1407	}
1408
1409	if (sbi->segs_per_sec > 1) {
1410		sit_i->sec_entries = vzalloc(sbi->total_sections *
1411					sizeof(struct sec_entry));
1412		if (!sit_i->sec_entries)
1413			return -ENOMEM;
1414	}
1415
1416	/* get information related with SIT */
1417	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1418
1419	/* setup SIT bitmap from ckeckpoint pack */
1420	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1421	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1422
1423	dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1424	if (!dst_bitmap)
1425		return -ENOMEM;
1426	memcpy(dst_bitmap, src_bitmap, bitmap_size);
1427
1428	/* init SIT information */
1429	sit_i->s_ops = &default_salloc_ops;
1430
1431	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1432	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1433	sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1434	sit_i->sit_bitmap = dst_bitmap;
1435	sit_i->bitmap_size = bitmap_size;
1436	sit_i->dirty_sentries = 0;
1437	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1438	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1439	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1440	mutex_init(&sit_i->sentry_lock);
1441	return 0;
1442}
1443
1444static int build_free_segmap(struct f2fs_sb_info *sbi)
1445{
1446	struct f2fs_sm_info *sm_info = SM_I(sbi);
1447	struct free_segmap_info *free_i;
1448	unsigned int bitmap_size, sec_bitmap_size;
1449
1450	/* allocate memory for free segmap information */
1451	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1452	if (!free_i)
1453		return -ENOMEM;
1454
1455	SM_I(sbi)->free_info = free_i;
1456
1457	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1458	free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1459	if (!free_i->free_segmap)
1460		return -ENOMEM;
1461
1462	sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections);
1463	free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1464	if (!free_i->free_secmap)
1465		return -ENOMEM;
1466
1467	/* set all segments as dirty temporarily */
1468	memset(free_i->free_segmap, 0xff, bitmap_size);
1469	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1470
1471	/* init free segmap information */
1472	free_i->start_segno =
1473		(unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
1474	free_i->free_segments = 0;
1475	free_i->free_sections = 0;
1476	rwlock_init(&free_i->segmap_lock);
1477	return 0;
1478}
1479
1480static int build_curseg(struct f2fs_sb_info *sbi)
1481{
1482	struct curseg_info *array;
1483	int i;
1484
1485	array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
1486	if (!array)
1487		return -ENOMEM;
1488
1489	SM_I(sbi)->curseg_array = array;
1490
1491	for (i = 0; i < NR_CURSEG_TYPE; i++) {
1492		mutex_init(&array[i].curseg_mutex);
1493		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1494		if (!array[i].sum_blk)
1495			return -ENOMEM;
1496		array[i].segno = NULL_SEGNO;
1497		array[i].next_blkoff = 0;
1498	}
1499	return restore_curseg_summaries(sbi);
1500}
1501
1502static void build_sit_entries(struct f2fs_sb_info *sbi)
1503{
1504	struct sit_info *sit_i = SIT_I(sbi);
1505	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1506	struct f2fs_summary_block *sum = curseg->sum_blk;
1507	unsigned int start;
1508
1509	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1510		struct seg_entry *se = &sit_i->sentries[start];
1511		struct f2fs_sit_block *sit_blk;
1512		struct f2fs_sit_entry sit;
1513		struct page *page;
1514		int i;
1515
1516		mutex_lock(&curseg->curseg_mutex);
1517		for (i = 0; i < sits_in_cursum(sum); i++) {
1518			if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
1519				sit = sit_in_journal(sum, i);
1520				mutex_unlock(&curseg->curseg_mutex);
1521				goto got_it;
1522			}
1523		}
1524		mutex_unlock(&curseg->curseg_mutex);
1525		page = get_current_sit_page(sbi, start);
1526		sit_blk = (struct f2fs_sit_block *)page_address(page);
1527		sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1528		f2fs_put_page(page, 1);
1529got_it:
1530		check_block_count(sbi, start, &sit);
1531		seg_info_from_raw_sit(se, &sit);
1532		if (sbi->segs_per_sec > 1) {
1533			struct sec_entry *e = get_sec_entry(sbi, start);
1534			e->valid_blocks += se->valid_blocks;
1535		}
1536	}
1537}
1538
1539static void init_free_segmap(struct f2fs_sb_info *sbi)
1540{
1541	unsigned int start;
1542	int type;
1543
1544	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1545		struct seg_entry *sentry = get_seg_entry(sbi, start);
1546		if (!sentry->valid_blocks)
1547			__set_free(sbi, start);
1548	}
1549
1550	/* set use the current segments */
1551	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1552		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1553		__set_test_and_inuse(sbi, curseg_t->segno);
1554	}
1555}
1556
1557static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1558{
1559	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1560	struct free_segmap_info *free_i = FREE_I(sbi);
1561	unsigned int segno = 0, offset = 0;
1562	unsigned short valid_blocks;
1563
1564	while (segno < TOTAL_SEGS(sbi)) {
1565		/* find dirty segment based on free segmap */
1566		segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset);
1567		if (segno >= TOTAL_SEGS(sbi))
1568			break;
1569		offset = segno + 1;
1570		valid_blocks = get_valid_blocks(sbi, segno, 0);
1571		if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks)
1572			continue;
1573		mutex_lock(&dirty_i->seglist_lock);
1574		__locate_dirty_segment(sbi, segno, DIRTY);
1575		mutex_unlock(&dirty_i->seglist_lock);
1576	}
1577}
1578
1579static int init_victim_segmap(struct f2fs_sb_info *sbi)
1580{
1581	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1582	unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1583
1584	dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1585	dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1586	if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
1587		return -ENOMEM;
1588	return 0;
1589}
1590
1591static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1592{
1593	struct dirty_seglist_info *dirty_i;
1594	unsigned int bitmap_size, i;
1595
1596	/* allocate memory for dirty segments list information */
1597	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
1598	if (!dirty_i)
1599		return -ENOMEM;
1600
1601	SM_I(sbi)->dirty_info = dirty_i;
1602	mutex_init(&dirty_i->seglist_lock);
1603
1604	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1605
1606	for (i = 0; i < NR_DIRTY_TYPE; i++) {
1607		dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1608		dirty_i->nr_dirty[i] = 0;
1609		if (!dirty_i->dirty_segmap[i])
1610			return -ENOMEM;
1611	}
1612
1613	init_dirty_segmap(sbi);
1614	return init_victim_segmap(sbi);
1615}
1616
1617/*
1618 * Update min, max modified time for cost-benefit GC algorithm
1619 */
1620static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1621{
1622	struct sit_info *sit_i = SIT_I(sbi);
1623	unsigned int segno;
1624
1625	mutex_lock(&sit_i->sentry_lock);
1626
1627	sit_i->min_mtime = LLONG_MAX;
1628
1629	for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
1630		unsigned int i;
1631		unsigned long long mtime = 0;
1632
1633		for (i = 0; i < sbi->segs_per_sec; i++)
1634			mtime += get_seg_entry(sbi, segno + i)->mtime;
1635
1636		mtime = div_u64(mtime, sbi->segs_per_sec);
1637
1638		if (sit_i->min_mtime > mtime)
1639			sit_i->min_mtime = mtime;
1640	}
1641	sit_i->max_mtime = get_mtime(sbi);
1642	mutex_unlock(&sit_i->sentry_lock);
1643}
1644
1645int build_segment_manager(struct f2fs_sb_info *sbi)
1646{
1647	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1648	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1649	struct f2fs_sm_info *sm_info;
1650	int err;
1651
1652	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
1653	if (!sm_info)
1654		return -ENOMEM;
1655
1656	/* init sm info */
1657	sbi->sm_info = sm_info;
1658	INIT_LIST_HEAD(&sm_info->wblist_head);
1659	spin_lock_init(&sm_info->wblist_lock);
1660	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1661	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1662	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
1663	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1664	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1665	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1666	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1667
1668	err = build_sit_info(sbi);
1669	if (err)
1670		return err;
1671	err = build_free_segmap(sbi);
1672	if (err)
1673		return err;
1674	err = build_curseg(sbi);
1675	if (err)
1676		return err;
1677
1678	/* reinit free segmap based on SIT */
1679	build_sit_entries(sbi);
1680
1681	init_free_segmap(sbi);
1682	err = build_dirty_segmap(sbi);
1683	if (err)
1684		return err;
1685
1686	init_min_max_mtime(sbi);
1687	return 0;
1688}
1689
1690static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
1691		enum dirty_type dirty_type)
1692{
1693	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1694
1695	mutex_lock(&dirty_i->seglist_lock);
1696	kfree(dirty_i->dirty_segmap[dirty_type]);
1697	dirty_i->nr_dirty[dirty_type] = 0;
1698	mutex_unlock(&dirty_i->seglist_lock);
1699}
1700
1701void reset_victim_segmap(struct f2fs_sb_info *sbi)
1702{
1703	unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1704	memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
1705}
1706
1707static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
1708{
1709	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1710
1711	kfree(dirty_i->victim_segmap[FG_GC]);
1712	kfree(dirty_i->victim_segmap[BG_GC]);
1713}
1714
1715static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
1716{
1717	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1718	int i;
1719
1720	if (!dirty_i)
1721		return;
1722
1723	/* discard pre-free/dirty segments list */
1724	for (i = 0; i < NR_DIRTY_TYPE; i++)
1725		discard_dirty_segmap(sbi, i);
1726
1727	destroy_victim_segmap(sbi);
1728	SM_I(sbi)->dirty_info = NULL;
1729	kfree(dirty_i);
1730}
1731
1732static void destroy_curseg(struct f2fs_sb_info *sbi)
1733{
1734	struct curseg_info *array = SM_I(sbi)->curseg_array;
1735	int i;
1736
1737	if (!array)
1738		return;
1739	SM_I(sbi)->curseg_array = NULL;
1740	for (i = 0; i < NR_CURSEG_TYPE; i++)
1741		kfree(array[i].sum_blk);
1742	kfree(array);
1743}
1744
1745static void destroy_free_segmap(struct f2fs_sb_info *sbi)
1746{
1747	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
1748	if (!free_i)
1749		return;
1750	SM_I(sbi)->free_info = NULL;
1751	kfree(free_i->free_segmap);
1752	kfree(free_i->free_secmap);
1753	kfree(free_i);
1754}
1755
1756static void destroy_sit_info(struct f2fs_sb_info *sbi)
1757{
1758	struct sit_info *sit_i = SIT_I(sbi);
1759	unsigned int start;
1760
1761	if (!sit_i)
1762		return;
1763
1764	if (sit_i->sentries) {
1765		for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1766			kfree(sit_i->sentries[start].cur_valid_map);
1767			kfree(sit_i->sentries[start].ckpt_valid_map);
1768		}
1769	}
1770	vfree(sit_i->sentries);
1771	vfree(sit_i->sec_entries);
1772	kfree(sit_i->dirty_sentries_bitmap);
1773
1774	SM_I(sbi)->sit_info = NULL;
1775	kfree(sit_i->sit_bitmap);
1776	kfree(sit_i);
1777}
1778
1779void destroy_segment_manager(struct f2fs_sb_info *sbi)
1780{
1781	struct f2fs_sm_info *sm_info = SM_I(sbi);
1782	destroy_dirty_segmap(sbi);
1783	destroy_curseg(sbi);
1784	destroy_free_segmap(sbi);
1785	destroy_sit_info(sbi);
1786	sbi->sm_info = NULL;
1787	kfree(sm_info);
1788}
1789