segment.c revision 53cf95222fad7a962cc03fb61a33e37bcf4f5c9d
1/*
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 *             http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/bio.h>
14#include <linux/blkdev.h>
15#include <linux/prefetch.h>
16#include <linux/vmalloc.h>
17
18#include "f2fs.h"
19#include "segment.h"
20#include "node.h"
21
22/*
23 * This function balances dirty node and dentry pages.
24 * In addition, it controls garbage collection.
25 */
26void f2fs_balance_fs(struct f2fs_sb_info *sbi)
27{
28	/*
29	 * We should do GC or end up with checkpoint, if there are so many dirty
30	 * dir/node pages without enough free segments.
31	 */
32	if (has_not_enough_free_secs(sbi, 0)) {
33		mutex_lock(&sbi->gc_mutex);
34		f2fs_gc(sbi);
35	}
36}
37
38static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
39		enum dirty_type dirty_type)
40{
41	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
42
43	/* need not be added */
44	if (IS_CURSEG(sbi, segno))
45		return;
46
47	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
48		dirty_i->nr_dirty[dirty_type]++;
49
50	if (dirty_type == DIRTY) {
51		struct seg_entry *sentry = get_seg_entry(sbi, segno);
52		dirty_type = sentry->type;
53		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
54			dirty_i->nr_dirty[dirty_type]++;
55	}
56}
57
58static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
59		enum dirty_type dirty_type)
60{
61	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
62
63	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
64		dirty_i->nr_dirty[dirty_type]--;
65
66	if (dirty_type == DIRTY) {
67		struct seg_entry *sentry = get_seg_entry(sbi, segno);
68		dirty_type = sentry->type;
69		if (test_and_clear_bit(segno,
70					dirty_i->dirty_segmap[dirty_type]))
71			dirty_i->nr_dirty[dirty_type]--;
72		clear_bit(segno, dirty_i->victim_segmap[FG_GC]);
73		clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
74	}
75}
76
77/*
78 * Should not occur error such as -ENOMEM.
79 * Adding dirty entry into seglist is not critical operation.
80 * If a given segment is one of current working segments, it won't be added.
81 */
82void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
83{
84	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
85	unsigned short valid_blocks;
86
87	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
88		return;
89
90	mutex_lock(&dirty_i->seglist_lock);
91
92	valid_blocks = get_valid_blocks(sbi, segno, 0);
93
94	if (valid_blocks == 0) {
95		__locate_dirty_segment(sbi, segno, PRE);
96		__remove_dirty_segment(sbi, segno, DIRTY);
97	} else if (valid_blocks < sbi->blocks_per_seg) {
98		__locate_dirty_segment(sbi, segno, DIRTY);
99	} else {
100		/* Recovery routine with SSR needs this */
101		__remove_dirty_segment(sbi, segno, DIRTY);
102	}
103
104	mutex_unlock(&dirty_i->seglist_lock);
105	return;
106}
107
108/*
109 * Should call clear_prefree_segments after checkpoint is done.
110 */
111static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
112{
113	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
114	unsigned int segno, offset = 0;
115	unsigned int total_segs = TOTAL_SEGS(sbi);
116
117	mutex_lock(&dirty_i->seglist_lock);
118	while (1) {
119		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
120				offset);
121		if (segno >= total_segs)
122			break;
123		__set_test_and_free(sbi, segno);
124		offset = segno + 1;
125	}
126	mutex_unlock(&dirty_i->seglist_lock);
127}
128
129void clear_prefree_segments(struct f2fs_sb_info *sbi)
130{
131	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
132	unsigned int segno, offset = 0;
133	unsigned int total_segs = TOTAL_SEGS(sbi);
134
135	mutex_lock(&dirty_i->seglist_lock);
136	while (1) {
137		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
138				offset);
139		if (segno >= total_segs)
140			break;
141
142		offset = segno + 1;
143		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
144			dirty_i->nr_dirty[PRE]--;
145
146		/* Let's use trim */
147		if (test_opt(sbi, DISCARD))
148			blkdev_issue_discard(sbi->sb->s_bdev,
149					START_BLOCK(sbi, segno) <<
150					sbi->log_sectors_per_block,
151					1 << (sbi->log_sectors_per_block +
152						sbi->log_blocks_per_seg),
153					GFP_NOFS, 0);
154	}
155	mutex_unlock(&dirty_i->seglist_lock);
156}
157
158static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
159{
160	struct sit_info *sit_i = SIT_I(sbi);
161	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap))
162		sit_i->dirty_sentries++;
163}
164
165static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
166					unsigned int segno, int modified)
167{
168	struct seg_entry *se = get_seg_entry(sbi, segno);
169	se->type = type;
170	if (modified)
171		__mark_sit_entry_dirty(sbi, segno);
172}
173
174static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
175{
176	struct seg_entry *se;
177	unsigned int segno, offset;
178	long int new_vblocks;
179
180	segno = GET_SEGNO(sbi, blkaddr);
181
182	se = get_seg_entry(sbi, segno);
183	new_vblocks = se->valid_blocks + del;
184	offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
185
186	BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
187				(new_vblocks > sbi->blocks_per_seg)));
188
189	se->valid_blocks = new_vblocks;
190	se->mtime = get_mtime(sbi);
191	SIT_I(sbi)->max_mtime = se->mtime;
192
193	/* Update valid block bitmap */
194	if (del > 0) {
195		if (f2fs_set_bit(offset, se->cur_valid_map))
196			BUG();
197	} else {
198		if (!f2fs_clear_bit(offset, se->cur_valid_map))
199			BUG();
200	}
201	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
202		se->ckpt_valid_blocks += del;
203
204	__mark_sit_entry_dirty(sbi, segno);
205
206	/* update total number of valid blocks to be written in ckpt area */
207	SIT_I(sbi)->written_valid_blocks += del;
208
209	if (sbi->segs_per_sec > 1)
210		get_sec_entry(sbi, segno)->valid_blocks += del;
211}
212
213static void refresh_sit_entry(struct f2fs_sb_info *sbi,
214			block_t old_blkaddr, block_t new_blkaddr)
215{
216	update_sit_entry(sbi, new_blkaddr, 1);
217	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
218		update_sit_entry(sbi, old_blkaddr, -1);
219}
220
221void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
222{
223	unsigned int segno = GET_SEGNO(sbi, addr);
224	struct sit_info *sit_i = SIT_I(sbi);
225
226	BUG_ON(addr == NULL_ADDR);
227	if (addr == NEW_ADDR)
228		return;
229
230	/* add it into sit main buffer */
231	mutex_lock(&sit_i->sentry_lock);
232
233	update_sit_entry(sbi, addr, -1);
234
235	/* add it into dirty seglist */
236	locate_dirty_segment(sbi, segno);
237
238	mutex_unlock(&sit_i->sentry_lock);
239}
240
241/*
242 * This function should be resided under the curseg_mutex lock
243 */
244static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
245		struct f2fs_summary *sum, unsigned short offset)
246{
247	struct curseg_info *curseg = CURSEG_I(sbi, type);
248	void *addr = curseg->sum_blk;
249	addr += offset * sizeof(struct f2fs_summary);
250	memcpy(addr, sum, sizeof(struct f2fs_summary));
251	return;
252}
253
254/*
255 * Calculate the number of current summary pages for writing
256 */
257int npages_for_summary_flush(struct f2fs_sb_info *sbi)
258{
259	int total_size_bytes = 0;
260	int valid_sum_count = 0;
261	int i, sum_space;
262
263	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
264		if (sbi->ckpt->alloc_type[i] == SSR)
265			valid_sum_count += sbi->blocks_per_seg;
266		else
267			valid_sum_count += curseg_blkoff(sbi, i);
268	}
269
270	total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
271			+ sizeof(struct nat_journal) + 2
272			+ sizeof(struct sit_journal) + 2;
273	sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
274	if (total_size_bytes < sum_space)
275		return 1;
276	else if (total_size_bytes < 2 * sum_space)
277		return 2;
278	return 3;
279}
280
281/*
282 * Caller should put this summary page
283 */
284struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
285{
286	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
287}
288
289static void write_sum_page(struct f2fs_sb_info *sbi,
290			struct f2fs_summary_block *sum_blk, block_t blk_addr)
291{
292	struct page *page = grab_meta_page(sbi, blk_addr);
293	void *kaddr = page_address(page);
294	memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
295	set_page_dirty(page);
296	f2fs_put_page(page, 1);
297}
298
299static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
300					int ofs_unit, int type)
301{
302	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
303	unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
304	unsigned int segno, next_segno, i;
305	int ofs = 0;
306
307	/*
308	 * If there is not enough reserved sections,
309	 * we should not reuse prefree segments.
310	 */
311	if (has_not_enough_free_secs(sbi, 0))
312		return NULL_SEGNO;
313
314	/*
315	 * NODE page should not reuse prefree segment,
316	 * since those information is used for SPOR.
317	 */
318	if (IS_NODESEG(type))
319		return NULL_SEGNO;
320next:
321	segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++);
322	ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit;
323	if (segno < TOTAL_SEGS(sbi)) {
324		/* skip intermediate segments in a section */
325		if (segno % ofs_unit)
326			goto next;
327
328		/* skip if whole section is not prefree */
329		next_segno = find_next_zero_bit(prefree_segmap,
330						TOTAL_SEGS(sbi), segno + 1);
331		if (next_segno - segno < ofs_unit)
332			goto next;
333
334		/* skip if whole section was not free at the last checkpoint */
335		for (i = 0; i < ofs_unit; i++)
336			if (get_seg_entry(sbi, segno)->ckpt_valid_blocks)
337				goto next;
338		return segno;
339	}
340	return NULL_SEGNO;
341}
342
343/*
344 * Find a new segment from the free segments bitmap to right order
345 * This function should be returned with success, otherwise BUG
346 */
347static void get_new_segment(struct f2fs_sb_info *sbi,
348			unsigned int *newseg, bool new_sec, int dir)
349{
350	struct free_segmap_info *free_i = FREE_I(sbi);
351	unsigned int segno, secno, zoneno;
352	unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone;
353	unsigned int hint = *newseg / sbi->segs_per_sec;
354	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
355	unsigned int left_start = hint;
356	bool init = true;
357	int go_left = 0;
358	int i;
359
360	write_lock(&free_i->segmap_lock);
361
362	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
363		segno = find_next_zero_bit(free_i->free_segmap,
364					TOTAL_SEGS(sbi), *newseg + 1);
365		if (segno < TOTAL_SEGS(sbi))
366			goto got_it;
367	}
368find_other_zone:
369	secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint);
370	if (secno >= TOTAL_SECS(sbi)) {
371		if (dir == ALLOC_RIGHT) {
372			secno = find_next_zero_bit(free_i->free_secmap,
373							TOTAL_SECS(sbi), 0);
374			BUG_ON(secno >= TOTAL_SECS(sbi));
375		} else {
376			go_left = 1;
377			left_start = hint - 1;
378		}
379	}
380	if (go_left == 0)
381		goto skip_left;
382
383	while (test_bit(left_start, free_i->free_secmap)) {
384		if (left_start > 0) {
385			left_start--;
386			continue;
387		}
388		left_start = find_next_zero_bit(free_i->free_secmap,
389							TOTAL_SECS(sbi), 0);
390		BUG_ON(left_start >= TOTAL_SECS(sbi));
391		break;
392	}
393	secno = left_start;
394skip_left:
395	hint = secno;
396	segno = secno * sbi->segs_per_sec;
397	zoneno = secno / sbi->secs_per_zone;
398
399	/* give up on finding another zone */
400	if (!init)
401		goto got_it;
402	if (sbi->secs_per_zone == 1)
403		goto got_it;
404	if (zoneno == old_zoneno)
405		goto got_it;
406	if (dir == ALLOC_LEFT) {
407		if (!go_left && zoneno + 1 >= total_zones)
408			goto got_it;
409		if (go_left && zoneno == 0)
410			goto got_it;
411	}
412	for (i = 0; i < NR_CURSEG_TYPE; i++)
413		if (CURSEG_I(sbi, i)->zone == zoneno)
414			break;
415
416	if (i < NR_CURSEG_TYPE) {
417		/* zone is in user, try another */
418		if (go_left)
419			hint = zoneno * sbi->secs_per_zone - 1;
420		else if (zoneno + 1 >= total_zones)
421			hint = 0;
422		else
423			hint = (zoneno + 1) * sbi->secs_per_zone;
424		init = false;
425		goto find_other_zone;
426	}
427got_it:
428	/* set it as dirty segment in free segmap */
429	BUG_ON(test_bit(segno, free_i->free_segmap));
430	__set_inuse(sbi, segno);
431	*newseg = segno;
432	write_unlock(&free_i->segmap_lock);
433}
434
435static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
436{
437	struct curseg_info *curseg = CURSEG_I(sbi, type);
438	struct summary_footer *sum_footer;
439
440	curseg->segno = curseg->next_segno;
441	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
442	curseg->next_blkoff = 0;
443	curseg->next_segno = NULL_SEGNO;
444
445	sum_footer = &(curseg->sum_blk->footer);
446	memset(sum_footer, 0, sizeof(struct summary_footer));
447	if (IS_DATASEG(type))
448		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
449	if (IS_NODESEG(type))
450		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
451	__set_sit_entry_type(sbi, type, curseg->segno, modified);
452}
453
454/*
455 * Allocate a current working segment.
456 * This function always allocates a free segment in LFS manner.
457 */
458static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
459{
460	struct curseg_info *curseg = CURSEG_I(sbi, type);
461	unsigned int segno = curseg->segno;
462	int dir = ALLOC_LEFT;
463
464	write_sum_page(sbi, curseg->sum_blk,
465				GET_SUM_BLOCK(sbi, curseg->segno));
466	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
467		dir = ALLOC_RIGHT;
468
469	if (test_opt(sbi, NOHEAP))
470		dir = ALLOC_RIGHT;
471
472	get_new_segment(sbi, &segno, new_sec, dir);
473	curseg->next_segno = segno;
474	reset_curseg(sbi, type, 1);
475	curseg->alloc_type = LFS;
476}
477
478static void __next_free_blkoff(struct f2fs_sb_info *sbi,
479			struct curseg_info *seg, block_t start)
480{
481	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
482	block_t ofs;
483	for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
484		if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
485			&& !f2fs_test_bit(ofs, se->cur_valid_map))
486			break;
487	}
488	seg->next_blkoff = ofs;
489}
490
491/*
492 * If a segment is written by LFS manner, next block offset is just obtained
493 * by increasing the current block offset. However, if a segment is written by
494 * SSR manner, next block offset obtained by calling __next_free_blkoff
495 */
496static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
497				struct curseg_info *seg)
498{
499	if (seg->alloc_type == SSR)
500		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
501	else
502		seg->next_blkoff++;
503}
504
505/*
506 * This function always allocates a used segment (from dirty seglist) by SSR
507 * manner, so it should recover the existing segment information of valid blocks
508 */
509static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
510{
511	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
512	struct curseg_info *curseg = CURSEG_I(sbi, type);
513	unsigned int new_segno = curseg->next_segno;
514	struct f2fs_summary_block *sum_node;
515	struct page *sum_page;
516
517	write_sum_page(sbi, curseg->sum_blk,
518				GET_SUM_BLOCK(sbi, curseg->segno));
519	__set_test_and_inuse(sbi, new_segno);
520
521	mutex_lock(&dirty_i->seglist_lock);
522	__remove_dirty_segment(sbi, new_segno, PRE);
523	__remove_dirty_segment(sbi, new_segno, DIRTY);
524	mutex_unlock(&dirty_i->seglist_lock);
525
526	reset_curseg(sbi, type, 1);
527	curseg->alloc_type = SSR;
528	__next_free_blkoff(sbi, curseg, 0);
529
530	if (reuse) {
531		sum_page = get_sum_page(sbi, new_segno);
532		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
533		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
534		f2fs_put_page(sum_page, 1);
535	}
536}
537
538static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
539{
540	struct curseg_info *curseg = CURSEG_I(sbi, type);
541	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
542
543	if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
544		return v_ops->get_victim(sbi,
545				&(curseg)->next_segno, BG_GC, type, SSR);
546
547	/* For data segments, let's do SSR more intensively */
548	for (; type >= CURSEG_HOT_DATA; type--)
549		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
550						BG_GC, type, SSR))
551			return 1;
552	return 0;
553}
554
555/*
556 * flush out current segment and replace it with new segment
557 * This function should be returned with success, otherwise BUG
558 */
559static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
560						int type, bool force)
561{
562	struct curseg_info *curseg = CURSEG_I(sbi, type);
563	unsigned int ofs_unit;
564
565	if (force) {
566		new_curseg(sbi, type, true);
567		goto out;
568	}
569
570	ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec;
571	curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
572
573	if (curseg->next_segno != NULL_SEGNO)
574		change_curseg(sbi, type, false);
575	else if (type == CURSEG_WARM_NODE)
576		new_curseg(sbi, type, false);
577	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
578		change_curseg(sbi, type, true);
579	else
580		new_curseg(sbi, type, false);
581out:
582	sbi->segment_count[curseg->alloc_type]++;
583}
584
585void allocate_new_segments(struct f2fs_sb_info *sbi)
586{
587	struct curseg_info *curseg;
588	unsigned int old_curseg;
589	int i;
590
591	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
592		curseg = CURSEG_I(sbi, i);
593		old_curseg = curseg->segno;
594		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
595		locate_dirty_segment(sbi, old_curseg);
596	}
597}
598
599static const struct segment_allocation default_salloc_ops = {
600	.allocate_segment = allocate_segment_by_default,
601};
602
603static void f2fs_end_io_write(struct bio *bio, int err)
604{
605	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
606	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
607	struct bio_private *p = bio->bi_private;
608
609	do {
610		struct page *page = bvec->bv_page;
611
612		if (--bvec >= bio->bi_io_vec)
613			prefetchw(&bvec->bv_page->flags);
614		if (!uptodate) {
615			SetPageError(page);
616			if (page->mapping)
617				set_bit(AS_EIO, &page->mapping->flags);
618			set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
619			p->sbi->sb->s_flags |= MS_RDONLY;
620		}
621		end_page_writeback(page);
622		dec_page_count(p->sbi, F2FS_WRITEBACK);
623	} while (bvec >= bio->bi_io_vec);
624
625	if (p->is_sync)
626		complete(p->wait);
627	kfree(p);
628	bio_put(bio);
629}
630
631struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
632{
633	struct bio *bio;
634	struct bio_private *priv;
635retry:
636	priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
637	if (!priv) {
638		cond_resched();
639		goto retry;
640	}
641
642	/* No failure on bio allocation */
643	bio = bio_alloc(GFP_NOIO, npages);
644	bio->bi_bdev = bdev;
645	bio->bi_private = priv;
646	return bio;
647}
648
649static void do_submit_bio(struct f2fs_sb_info *sbi,
650				enum page_type type, bool sync)
651{
652	int rw = sync ? WRITE_SYNC : WRITE;
653	enum page_type btype = type > META ? META : type;
654
655	if (type >= META_FLUSH)
656		rw = WRITE_FLUSH_FUA;
657
658	if (sbi->bio[btype]) {
659		struct bio_private *p = sbi->bio[btype]->bi_private;
660		p->sbi = sbi;
661		sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
662		if (type == META_FLUSH) {
663			DECLARE_COMPLETION_ONSTACK(wait);
664			p->is_sync = true;
665			p->wait = &wait;
666			submit_bio(rw, sbi->bio[btype]);
667			wait_for_completion(&wait);
668		} else {
669			p->is_sync = false;
670			submit_bio(rw, sbi->bio[btype]);
671		}
672		sbi->bio[btype] = NULL;
673	}
674}
675
676void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
677{
678	down_write(&sbi->bio_sem);
679	do_submit_bio(sbi, type, sync);
680	up_write(&sbi->bio_sem);
681}
682
683static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
684				block_t blk_addr, enum page_type type)
685{
686	struct block_device *bdev = sbi->sb->s_bdev;
687
688	verify_block_addr(sbi, blk_addr);
689
690	down_write(&sbi->bio_sem);
691
692	inc_page_count(sbi, F2FS_WRITEBACK);
693
694	if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
695		do_submit_bio(sbi, type, false);
696alloc_new:
697	if (sbi->bio[type] == NULL) {
698		sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev));
699		sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
700		/*
701		 * The end_io will be assigned at the sumbission phase.
702		 * Until then, let bio_add_page() merge consecutive IOs as much
703		 * as possible.
704		 */
705	}
706
707	if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
708							PAGE_CACHE_SIZE) {
709		do_submit_bio(sbi, type, false);
710		goto alloc_new;
711	}
712
713	sbi->last_block_in_bio[type] = blk_addr;
714
715	up_write(&sbi->bio_sem);
716}
717
718static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
719{
720	struct curseg_info *curseg = CURSEG_I(sbi, type);
721	if (curseg->next_blkoff < sbi->blocks_per_seg)
722		return true;
723	return false;
724}
725
726static int __get_segment_type_2(struct page *page, enum page_type p_type)
727{
728	if (p_type == DATA)
729		return CURSEG_HOT_DATA;
730	else
731		return CURSEG_HOT_NODE;
732}
733
734static int __get_segment_type_4(struct page *page, enum page_type p_type)
735{
736	if (p_type == DATA) {
737		struct inode *inode = page->mapping->host;
738
739		if (S_ISDIR(inode->i_mode))
740			return CURSEG_HOT_DATA;
741		else
742			return CURSEG_COLD_DATA;
743	} else {
744		if (IS_DNODE(page) && !is_cold_node(page))
745			return CURSEG_HOT_NODE;
746		else
747			return CURSEG_COLD_NODE;
748	}
749}
750
751static int __get_segment_type_6(struct page *page, enum page_type p_type)
752{
753	if (p_type == DATA) {
754		struct inode *inode = page->mapping->host;
755
756		if (S_ISDIR(inode->i_mode))
757			return CURSEG_HOT_DATA;
758		else if (is_cold_data(page) || is_cold_file(inode))
759			return CURSEG_COLD_DATA;
760		else
761			return CURSEG_WARM_DATA;
762	} else {
763		if (IS_DNODE(page))
764			return is_cold_node(page) ? CURSEG_WARM_NODE :
765						CURSEG_HOT_NODE;
766		else
767			return CURSEG_COLD_NODE;
768	}
769}
770
771static int __get_segment_type(struct page *page, enum page_type p_type)
772{
773	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
774	switch (sbi->active_logs) {
775	case 2:
776		return __get_segment_type_2(page, p_type);
777	case 4:
778		return __get_segment_type_4(page, p_type);
779	}
780	/* NR_CURSEG_TYPE(6) logs by default */
781	BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
782	return __get_segment_type_6(page, p_type);
783}
784
785static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
786			block_t old_blkaddr, block_t *new_blkaddr,
787			struct f2fs_summary *sum, enum page_type p_type)
788{
789	struct sit_info *sit_i = SIT_I(sbi);
790	struct curseg_info *curseg;
791	unsigned int old_cursegno;
792	int type;
793
794	type = __get_segment_type(page, p_type);
795	curseg = CURSEG_I(sbi, type);
796
797	mutex_lock(&curseg->curseg_mutex);
798
799	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
800	old_cursegno = curseg->segno;
801
802	/*
803	 * __add_sum_entry should be resided under the curseg_mutex
804	 * because, this function updates a summary entry in the
805	 * current summary block.
806	 */
807	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
808
809	mutex_lock(&sit_i->sentry_lock);
810	__refresh_next_blkoff(sbi, curseg);
811	sbi->block_count[curseg->alloc_type]++;
812
813	/*
814	 * SIT information should be updated before segment allocation,
815	 * since SSR needs latest valid block information.
816	 */
817	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
818
819	if (!__has_curseg_space(sbi, type))
820		sit_i->s_ops->allocate_segment(sbi, type, false);
821
822	locate_dirty_segment(sbi, old_cursegno);
823	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
824	mutex_unlock(&sit_i->sentry_lock);
825
826	if (p_type == NODE)
827		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
828
829	/* writeout dirty page into bdev */
830	submit_write_page(sbi, page, *new_blkaddr, p_type);
831
832	mutex_unlock(&curseg->curseg_mutex);
833}
834
835void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
836{
837	set_page_writeback(page);
838	submit_write_page(sbi, page, page->index, META);
839}
840
841void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
842		unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
843{
844	struct f2fs_summary sum;
845	set_summary(&sum, nid, 0, 0);
846	do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
847}
848
849void write_data_page(struct inode *inode, struct page *page,
850		struct dnode_of_data *dn, block_t old_blkaddr,
851		block_t *new_blkaddr)
852{
853	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
854	struct f2fs_summary sum;
855	struct node_info ni;
856
857	BUG_ON(old_blkaddr == NULL_ADDR);
858	get_node_info(sbi, dn->nid, &ni);
859	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
860
861	do_write_page(sbi, page, old_blkaddr,
862			new_blkaddr, &sum, DATA);
863}
864
865void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
866					block_t old_blk_addr)
867{
868	submit_write_page(sbi, page, old_blk_addr, DATA);
869}
870
871void recover_data_page(struct f2fs_sb_info *sbi,
872			struct page *page, struct f2fs_summary *sum,
873			block_t old_blkaddr, block_t new_blkaddr)
874{
875	struct sit_info *sit_i = SIT_I(sbi);
876	struct curseg_info *curseg;
877	unsigned int segno, old_cursegno;
878	struct seg_entry *se;
879	int type;
880
881	segno = GET_SEGNO(sbi, new_blkaddr);
882	se = get_seg_entry(sbi, segno);
883	type = se->type;
884
885	if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
886		if (old_blkaddr == NULL_ADDR)
887			type = CURSEG_COLD_DATA;
888		else
889			type = CURSEG_WARM_DATA;
890	}
891	curseg = CURSEG_I(sbi, type);
892
893	mutex_lock(&curseg->curseg_mutex);
894	mutex_lock(&sit_i->sentry_lock);
895
896	old_cursegno = curseg->segno;
897
898	/* change the current segment */
899	if (segno != curseg->segno) {
900		curseg->next_segno = segno;
901		change_curseg(sbi, type, true);
902	}
903
904	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
905					(sbi->blocks_per_seg - 1);
906	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
907
908	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
909
910	locate_dirty_segment(sbi, old_cursegno);
911	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
912
913	mutex_unlock(&sit_i->sentry_lock);
914	mutex_unlock(&curseg->curseg_mutex);
915}
916
917void rewrite_node_page(struct f2fs_sb_info *sbi,
918			struct page *page, struct f2fs_summary *sum,
919			block_t old_blkaddr, block_t new_blkaddr)
920{
921	struct sit_info *sit_i = SIT_I(sbi);
922	int type = CURSEG_WARM_NODE;
923	struct curseg_info *curseg;
924	unsigned int segno, old_cursegno;
925	block_t next_blkaddr = next_blkaddr_of_node(page);
926	unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
927
928	curseg = CURSEG_I(sbi, type);
929
930	mutex_lock(&curseg->curseg_mutex);
931	mutex_lock(&sit_i->sentry_lock);
932
933	segno = GET_SEGNO(sbi, new_blkaddr);
934	old_cursegno = curseg->segno;
935
936	/* change the current segment */
937	if (segno != curseg->segno) {
938		curseg->next_segno = segno;
939		change_curseg(sbi, type, true);
940	}
941	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
942					(sbi->blocks_per_seg - 1);
943	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
944
945	/* change the current log to the next block addr in advance */
946	if (next_segno != segno) {
947		curseg->next_segno = next_segno;
948		change_curseg(sbi, type, true);
949	}
950	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
951					(sbi->blocks_per_seg - 1);
952
953	/* rewrite node page */
954	set_page_writeback(page);
955	submit_write_page(sbi, page, new_blkaddr, NODE);
956	f2fs_submit_bio(sbi, NODE, true);
957	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
958
959	locate_dirty_segment(sbi, old_cursegno);
960	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
961
962	mutex_unlock(&sit_i->sentry_lock);
963	mutex_unlock(&curseg->curseg_mutex);
964}
965
966static int read_compacted_summaries(struct f2fs_sb_info *sbi)
967{
968	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
969	struct curseg_info *seg_i;
970	unsigned char *kaddr;
971	struct page *page;
972	block_t start;
973	int i, j, offset;
974
975	start = start_sum_block(sbi);
976
977	page = get_meta_page(sbi, start++);
978	kaddr = (unsigned char *)page_address(page);
979
980	/* Step 1: restore nat cache */
981	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
982	memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
983
984	/* Step 2: restore sit cache */
985	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
986	memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
987						SUM_JOURNAL_SIZE);
988	offset = 2 * SUM_JOURNAL_SIZE;
989
990	/* Step 3: restore summary entries */
991	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
992		unsigned short blk_off;
993		unsigned int segno;
994
995		seg_i = CURSEG_I(sbi, i);
996		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
997		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
998		seg_i->next_segno = segno;
999		reset_curseg(sbi, i, 0);
1000		seg_i->alloc_type = ckpt->alloc_type[i];
1001		seg_i->next_blkoff = blk_off;
1002
1003		if (seg_i->alloc_type == SSR)
1004			blk_off = sbi->blocks_per_seg;
1005
1006		for (j = 0; j < blk_off; j++) {
1007			struct f2fs_summary *s;
1008			s = (struct f2fs_summary *)(kaddr + offset);
1009			seg_i->sum_blk->entries[j] = *s;
1010			offset += SUMMARY_SIZE;
1011			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1012						SUM_FOOTER_SIZE)
1013				continue;
1014
1015			f2fs_put_page(page, 1);
1016			page = NULL;
1017
1018			page = get_meta_page(sbi, start++);
1019			kaddr = (unsigned char *)page_address(page);
1020			offset = 0;
1021		}
1022	}
1023	f2fs_put_page(page, 1);
1024	return 0;
1025}
1026
1027static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1028{
1029	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1030	struct f2fs_summary_block *sum;
1031	struct curseg_info *curseg;
1032	struct page *new;
1033	unsigned short blk_off;
1034	unsigned int segno = 0;
1035	block_t blk_addr = 0;
1036
1037	/* get segment number and block addr */
1038	if (IS_DATASEG(type)) {
1039		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1040		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1041							CURSEG_HOT_DATA]);
1042		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1043			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1044		else
1045			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1046	} else {
1047		segno = le32_to_cpu(ckpt->cur_node_segno[type -
1048							CURSEG_HOT_NODE]);
1049		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1050							CURSEG_HOT_NODE]);
1051		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1052			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1053							type - CURSEG_HOT_NODE);
1054		else
1055			blk_addr = GET_SUM_BLOCK(sbi, segno);
1056	}
1057
1058	new = get_meta_page(sbi, blk_addr);
1059	sum = (struct f2fs_summary_block *)page_address(new);
1060
1061	if (IS_NODESEG(type)) {
1062		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1063			struct f2fs_summary *ns = &sum->entries[0];
1064			int i;
1065			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1066				ns->version = 0;
1067				ns->ofs_in_node = 0;
1068			}
1069		} else {
1070			if (restore_node_summary(sbi, segno, sum)) {
1071				f2fs_put_page(new, 1);
1072				return -EINVAL;
1073			}
1074		}
1075	}
1076
1077	/* set uncompleted segment to curseg */
1078	curseg = CURSEG_I(sbi, type);
1079	mutex_lock(&curseg->curseg_mutex);
1080	memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1081	curseg->next_segno = segno;
1082	reset_curseg(sbi, type, 0);
1083	curseg->alloc_type = ckpt->alloc_type[type];
1084	curseg->next_blkoff = blk_off;
1085	mutex_unlock(&curseg->curseg_mutex);
1086	f2fs_put_page(new, 1);
1087	return 0;
1088}
1089
1090static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1091{
1092	int type = CURSEG_HOT_DATA;
1093
1094	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1095		/* restore for compacted data summary */
1096		if (read_compacted_summaries(sbi))
1097			return -EINVAL;
1098		type = CURSEG_HOT_NODE;
1099	}
1100
1101	for (; type <= CURSEG_COLD_NODE; type++)
1102		if (read_normal_summaries(sbi, type))
1103			return -EINVAL;
1104	return 0;
1105}
1106
1107static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1108{
1109	struct page *page;
1110	unsigned char *kaddr;
1111	struct f2fs_summary *summary;
1112	struct curseg_info *seg_i;
1113	int written_size = 0;
1114	int i, j;
1115
1116	page = grab_meta_page(sbi, blkaddr++);
1117	kaddr = (unsigned char *)page_address(page);
1118
1119	/* Step 1: write nat cache */
1120	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1121	memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1122	written_size += SUM_JOURNAL_SIZE;
1123
1124	/* Step 2: write sit cache */
1125	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1126	memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1127						SUM_JOURNAL_SIZE);
1128	written_size += SUM_JOURNAL_SIZE;
1129
1130	set_page_dirty(page);
1131
1132	/* Step 3: write summary entries */
1133	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1134		unsigned short blkoff;
1135		seg_i = CURSEG_I(sbi, i);
1136		if (sbi->ckpt->alloc_type[i] == SSR)
1137			blkoff = sbi->blocks_per_seg;
1138		else
1139			blkoff = curseg_blkoff(sbi, i);
1140
1141		for (j = 0; j < blkoff; j++) {
1142			if (!page) {
1143				page = grab_meta_page(sbi, blkaddr++);
1144				kaddr = (unsigned char *)page_address(page);
1145				written_size = 0;
1146			}
1147			summary = (struct f2fs_summary *)(kaddr + written_size);
1148			*summary = seg_i->sum_blk->entries[j];
1149			written_size += SUMMARY_SIZE;
1150			set_page_dirty(page);
1151
1152			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1153							SUM_FOOTER_SIZE)
1154				continue;
1155
1156			f2fs_put_page(page, 1);
1157			page = NULL;
1158		}
1159	}
1160	if (page)
1161		f2fs_put_page(page, 1);
1162}
1163
1164static void write_normal_summaries(struct f2fs_sb_info *sbi,
1165					block_t blkaddr, int type)
1166{
1167	int i, end;
1168	if (IS_DATASEG(type))
1169		end = type + NR_CURSEG_DATA_TYPE;
1170	else
1171		end = type + NR_CURSEG_NODE_TYPE;
1172
1173	for (i = type; i < end; i++) {
1174		struct curseg_info *sum = CURSEG_I(sbi, i);
1175		mutex_lock(&sum->curseg_mutex);
1176		write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1177		mutex_unlock(&sum->curseg_mutex);
1178	}
1179}
1180
1181void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1182{
1183	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1184		write_compacted_summaries(sbi, start_blk);
1185	else
1186		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1187}
1188
1189void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1190{
1191	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1192		write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1193	return;
1194}
1195
1196int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1197					unsigned int val, int alloc)
1198{
1199	int i;
1200
1201	if (type == NAT_JOURNAL) {
1202		for (i = 0; i < nats_in_cursum(sum); i++) {
1203			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1204				return i;
1205		}
1206		if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1207			return update_nats_in_cursum(sum, 1);
1208	} else if (type == SIT_JOURNAL) {
1209		for (i = 0; i < sits_in_cursum(sum); i++)
1210			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1211				return i;
1212		if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1213			return update_sits_in_cursum(sum, 1);
1214	}
1215	return -1;
1216}
1217
1218static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1219					unsigned int segno)
1220{
1221	struct sit_info *sit_i = SIT_I(sbi);
1222	unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
1223	block_t blk_addr = sit_i->sit_base_addr + offset;
1224
1225	check_seg_range(sbi, segno);
1226
1227	/* calculate sit block address */
1228	if (f2fs_test_bit(offset, sit_i->sit_bitmap))
1229		blk_addr += sit_i->sit_blocks;
1230
1231	return get_meta_page(sbi, blk_addr);
1232}
1233
1234static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1235					unsigned int start)
1236{
1237	struct sit_info *sit_i = SIT_I(sbi);
1238	struct page *src_page, *dst_page;
1239	pgoff_t src_off, dst_off;
1240	void *src_addr, *dst_addr;
1241
1242	src_off = current_sit_addr(sbi, start);
1243	dst_off = next_sit_addr(sbi, src_off);
1244
1245	/* get current sit block page without lock */
1246	src_page = get_meta_page(sbi, src_off);
1247	dst_page = grab_meta_page(sbi, dst_off);
1248	BUG_ON(PageDirty(src_page));
1249
1250	src_addr = page_address(src_page);
1251	dst_addr = page_address(dst_page);
1252	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1253
1254	set_page_dirty(dst_page);
1255	f2fs_put_page(src_page, 1);
1256
1257	set_to_next_sit(sit_i, start);
1258
1259	return dst_page;
1260}
1261
1262static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
1263{
1264	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1265	struct f2fs_summary_block *sum = curseg->sum_blk;
1266	int i;
1267
1268	/*
1269	 * If the journal area in the current summary is full of sit entries,
1270	 * all the sit entries will be flushed. Otherwise the sit entries
1271	 * are not able to replace with newly hot sit entries.
1272	 */
1273	if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) {
1274		for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1275			unsigned int segno;
1276			segno = le32_to_cpu(segno_in_journal(sum, i));
1277			__mark_sit_entry_dirty(sbi, segno);
1278		}
1279		update_sits_in_cursum(sum, -sits_in_cursum(sum));
1280		return 1;
1281	}
1282	return 0;
1283}
1284
1285/*
1286 * CP calls this function, which flushes SIT entries including sit_journal,
1287 * and moves prefree segs to free segs.
1288 */
1289void flush_sit_entries(struct f2fs_sb_info *sbi)
1290{
1291	struct sit_info *sit_i = SIT_I(sbi);
1292	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1293	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1294	struct f2fs_summary_block *sum = curseg->sum_blk;
1295	unsigned long nsegs = TOTAL_SEGS(sbi);
1296	struct page *page = NULL;
1297	struct f2fs_sit_block *raw_sit = NULL;
1298	unsigned int start = 0, end = 0;
1299	unsigned int segno = -1;
1300	bool flushed;
1301
1302	mutex_lock(&curseg->curseg_mutex);
1303	mutex_lock(&sit_i->sentry_lock);
1304
1305	/*
1306	 * "flushed" indicates whether sit entries in journal are flushed
1307	 * to the SIT area or not.
1308	 */
1309	flushed = flush_sits_in_journal(sbi);
1310
1311	while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
1312		struct seg_entry *se = get_seg_entry(sbi, segno);
1313		int sit_offset, offset;
1314
1315		sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1316
1317		if (flushed)
1318			goto to_sit_page;
1319
1320		offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
1321		if (offset >= 0) {
1322			segno_in_journal(sum, offset) = cpu_to_le32(segno);
1323			seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
1324			goto flush_done;
1325		}
1326to_sit_page:
1327		if (!page || (start > segno) || (segno > end)) {
1328			if (page) {
1329				f2fs_put_page(page, 1);
1330				page = NULL;
1331			}
1332
1333			start = START_SEGNO(sit_i, segno);
1334			end = start + SIT_ENTRY_PER_BLOCK - 1;
1335
1336			/* read sit block that will be updated */
1337			page = get_next_sit_page(sbi, start);
1338			raw_sit = page_address(page);
1339		}
1340
1341		/* udpate entry in SIT block */
1342		seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
1343flush_done:
1344		__clear_bit(segno, bitmap);
1345		sit_i->dirty_sentries--;
1346	}
1347	mutex_unlock(&sit_i->sentry_lock);
1348	mutex_unlock(&curseg->curseg_mutex);
1349
1350	/* writeout last modified SIT block */
1351	f2fs_put_page(page, 1);
1352
1353	set_prefree_as_free_segments(sbi);
1354}
1355
1356static int build_sit_info(struct f2fs_sb_info *sbi)
1357{
1358	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1359	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1360	struct sit_info *sit_i;
1361	unsigned int sit_segs, start;
1362	char *src_bitmap, *dst_bitmap;
1363	unsigned int bitmap_size;
1364
1365	/* allocate memory for SIT information */
1366	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1367	if (!sit_i)
1368		return -ENOMEM;
1369
1370	SM_I(sbi)->sit_info = sit_i;
1371
1372	sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry));
1373	if (!sit_i->sentries)
1374		return -ENOMEM;
1375
1376	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1377	sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1378	if (!sit_i->dirty_sentries_bitmap)
1379		return -ENOMEM;
1380
1381	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1382		sit_i->sentries[start].cur_valid_map
1383			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1384		sit_i->sentries[start].ckpt_valid_map
1385			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1386		if (!sit_i->sentries[start].cur_valid_map
1387				|| !sit_i->sentries[start].ckpt_valid_map)
1388			return -ENOMEM;
1389	}
1390
1391	if (sbi->segs_per_sec > 1) {
1392		sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) *
1393					sizeof(struct sec_entry));
1394		if (!sit_i->sec_entries)
1395			return -ENOMEM;
1396	}
1397
1398	/* get information related with SIT */
1399	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1400
1401	/* setup SIT bitmap from ckeckpoint pack */
1402	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1403	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1404
1405	dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1406	if (!dst_bitmap)
1407		return -ENOMEM;
1408
1409	/* init SIT information */
1410	sit_i->s_ops = &default_salloc_ops;
1411
1412	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1413	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1414	sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1415	sit_i->sit_bitmap = dst_bitmap;
1416	sit_i->bitmap_size = bitmap_size;
1417	sit_i->dirty_sentries = 0;
1418	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1419	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1420	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1421	mutex_init(&sit_i->sentry_lock);
1422	return 0;
1423}
1424
1425static int build_free_segmap(struct f2fs_sb_info *sbi)
1426{
1427	struct f2fs_sm_info *sm_info = SM_I(sbi);
1428	struct free_segmap_info *free_i;
1429	unsigned int bitmap_size, sec_bitmap_size;
1430
1431	/* allocate memory for free segmap information */
1432	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1433	if (!free_i)
1434		return -ENOMEM;
1435
1436	SM_I(sbi)->free_info = free_i;
1437
1438	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1439	free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1440	if (!free_i->free_segmap)
1441		return -ENOMEM;
1442
1443	sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi));
1444	free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1445	if (!free_i->free_secmap)
1446		return -ENOMEM;
1447
1448	/* set all segments as dirty temporarily */
1449	memset(free_i->free_segmap, 0xff, bitmap_size);
1450	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1451
1452	/* init free segmap information */
1453	free_i->start_segno =
1454		(unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
1455	free_i->free_segments = 0;
1456	free_i->free_sections = 0;
1457	rwlock_init(&free_i->segmap_lock);
1458	return 0;
1459}
1460
1461static int build_curseg(struct f2fs_sb_info *sbi)
1462{
1463	struct curseg_info *array;
1464	int i;
1465
1466	array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
1467	if (!array)
1468		return -ENOMEM;
1469
1470	SM_I(sbi)->curseg_array = array;
1471
1472	for (i = 0; i < NR_CURSEG_TYPE; i++) {
1473		mutex_init(&array[i].curseg_mutex);
1474		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1475		if (!array[i].sum_blk)
1476			return -ENOMEM;
1477		array[i].segno = NULL_SEGNO;
1478		array[i].next_blkoff = 0;
1479	}
1480	return restore_curseg_summaries(sbi);
1481}
1482
1483static void build_sit_entries(struct f2fs_sb_info *sbi)
1484{
1485	struct sit_info *sit_i = SIT_I(sbi);
1486	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1487	struct f2fs_summary_block *sum = curseg->sum_blk;
1488	unsigned int start;
1489
1490	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1491		struct seg_entry *se = &sit_i->sentries[start];
1492		struct f2fs_sit_block *sit_blk;
1493		struct f2fs_sit_entry sit;
1494		struct page *page;
1495		int i;
1496
1497		mutex_lock(&curseg->curseg_mutex);
1498		for (i = 0; i < sits_in_cursum(sum); i++) {
1499			if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
1500				sit = sit_in_journal(sum, i);
1501				mutex_unlock(&curseg->curseg_mutex);
1502				goto got_it;
1503			}
1504		}
1505		mutex_unlock(&curseg->curseg_mutex);
1506		page = get_current_sit_page(sbi, start);
1507		sit_blk = (struct f2fs_sit_block *)page_address(page);
1508		sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1509		f2fs_put_page(page, 1);
1510got_it:
1511		check_block_count(sbi, start, &sit);
1512		seg_info_from_raw_sit(se, &sit);
1513		if (sbi->segs_per_sec > 1) {
1514			struct sec_entry *e = get_sec_entry(sbi, start);
1515			e->valid_blocks += se->valid_blocks;
1516		}
1517	}
1518}
1519
1520static void init_free_segmap(struct f2fs_sb_info *sbi)
1521{
1522	unsigned int start;
1523	int type;
1524
1525	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1526		struct seg_entry *sentry = get_seg_entry(sbi, start);
1527		if (!sentry->valid_blocks)
1528			__set_free(sbi, start);
1529	}
1530
1531	/* set use the current segments */
1532	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1533		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1534		__set_test_and_inuse(sbi, curseg_t->segno);
1535	}
1536}
1537
1538static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1539{
1540	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1541	struct free_segmap_info *free_i = FREE_I(sbi);
1542	unsigned int segno = 0, offset = 0;
1543	unsigned short valid_blocks;
1544
1545	while (segno < TOTAL_SEGS(sbi)) {
1546		/* find dirty segment based on free segmap */
1547		segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset);
1548		if (segno >= TOTAL_SEGS(sbi))
1549			break;
1550		offset = segno + 1;
1551		valid_blocks = get_valid_blocks(sbi, segno, 0);
1552		if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks)
1553			continue;
1554		mutex_lock(&dirty_i->seglist_lock);
1555		__locate_dirty_segment(sbi, segno, DIRTY);
1556		mutex_unlock(&dirty_i->seglist_lock);
1557	}
1558}
1559
1560static int init_victim_segmap(struct f2fs_sb_info *sbi)
1561{
1562	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1563	unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1564
1565	dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1566	dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1567	if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
1568		return -ENOMEM;
1569	return 0;
1570}
1571
1572static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1573{
1574	struct dirty_seglist_info *dirty_i;
1575	unsigned int bitmap_size, i;
1576
1577	/* allocate memory for dirty segments list information */
1578	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
1579	if (!dirty_i)
1580		return -ENOMEM;
1581
1582	SM_I(sbi)->dirty_info = dirty_i;
1583	mutex_init(&dirty_i->seglist_lock);
1584
1585	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1586
1587	for (i = 0; i < NR_DIRTY_TYPE; i++) {
1588		dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1589		if (!dirty_i->dirty_segmap[i])
1590			return -ENOMEM;
1591	}
1592
1593	init_dirty_segmap(sbi);
1594	return init_victim_segmap(sbi);
1595}
1596
1597/*
1598 * Update min, max modified time for cost-benefit GC algorithm
1599 */
1600static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1601{
1602	struct sit_info *sit_i = SIT_I(sbi);
1603	unsigned int segno;
1604
1605	mutex_lock(&sit_i->sentry_lock);
1606
1607	sit_i->min_mtime = LLONG_MAX;
1608
1609	for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
1610		unsigned int i;
1611		unsigned long long mtime = 0;
1612
1613		for (i = 0; i < sbi->segs_per_sec; i++)
1614			mtime += get_seg_entry(sbi, segno + i)->mtime;
1615
1616		mtime = div_u64(mtime, sbi->segs_per_sec);
1617
1618		if (sit_i->min_mtime > mtime)
1619			sit_i->min_mtime = mtime;
1620	}
1621	sit_i->max_mtime = get_mtime(sbi);
1622	mutex_unlock(&sit_i->sentry_lock);
1623}
1624
1625int build_segment_manager(struct f2fs_sb_info *sbi)
1626{
1627	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1628	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1629	struct f2fs_sm_info *sm_info;
1630	int err;
1631
1632	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
1633	if (!sm_info)
1634		return -ENOMEM;
1635
1636	/* init sm info */
1637	sbi->sm_info = sm_info;
1638	INIT_LIST_HEAD(&sm_info->wblist_head);
1639	spin_lock_init(&sm_info->wblist_lock);
1640	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1641	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1642	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
1643	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1644	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1645	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1646	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1647
1648	err = build_sit_info(sbi);
1649	if (err)
1650		return err;
1651	err = build_free_segmap(sbi);
1652	if (err)
1653		return err;
1654	err = build_curseg(sbi);
1655	if (err)
1656		return err;
1657
1658	/* reinit free segmap based on SIT */
1659	build_sit_entries(sbi);
1660
1661	init_free_segmap(sbi);
1662	err = build_dirty_segmap(sbi);
1663	if (err)
1664		return err;
1665
1666	init_min_max_mtime(sbi);
1667	return 0;
1668}
1669
1670static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
1671		enum dirty_type dirty_type)
1672{
1673	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1674
1675	mutex_lock(&dirty_i->seglist_lock);
1676	kfree(dirty_i->dirty_segmap[dirty_type]);
1677	dirty_i->nr_dirty[dirty_type] = 0;
1678	mutex_unlock(&dirty_i->seglist_lock);
1679}
1680
1681void reset_victim_segmap(struct f2fs_sb_info *sbi)
1682{
1683	unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1684	memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
1685}
1686
1687static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
1688{
1689	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1690
1691	kfree(dirty_i->victim_segmap[FG_GC]);
1692	kfree(dirty_i->victim_segmap[BG_GC]);
1693}
1694
1695static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
1696{
1697	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1698	int i;
1699
1700	if (!dirty_i)
1701		return;
1702
1703	/* discard pre-free/dirty segments list */
1704	for (i = 0; i < NR_DIRTY_TYPE; i++)
1705		discard_dirty_segmap(sbi, i);
1706
1707	destroy_victim_segmap(sbi);
1708	SM_I(sbi)->dirty_info = NULL;
1709	kfree(dirty_i);
1710}
1711
1712static void destroy_curseg(struct f2fs_sb_info *sbi)
1713{
1714	struct curseg_info *array = SM_I(sbi)->curseg_array;
1715	int i;
1716
1717	if (!array)
1718		return;
1719	SM_I(sbi)->curseg_array = NULL;
1720	for (i = 0; i < NR_CURSEG_TYPE; i++)
1721		kfree(array[i].sum_blk);
1722	kfree(array);
1723}
1724
1725static void destroy_free_segmap(struct f2fs_sb_info *sbi)
1726{
1727	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
1728	if (!free_i)
1729		return;
1730	SM_I(sbi)->free_info = NULL;
1731	kfree(free_i->free_segmap);
1732	kfree(free_i->free_secmap);
1733	kfree(free_i);
1734}
1735
1736static void destroy_sit_info(struct f2fs_sb_info *sbi)
1737{
1738	struct sit_info *sit_i = SIT_I(sbi);
1739	unsigned int start;
1740
1741	if (!sit_i)
1742		return;
1743
1744	if (sit_i->sentries) {
1745		for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1746			kfree(sit_i->sentries[start].cur_valid_map);
1747			kfree(sit_i->sentries[start].ckpt_valid_map);
1748		}
1749	}
1750	vfree(sit_i->sentries);
1751	vfree(sit_i->sec_entries);
1752	kfree(sit_i->dirty_sentries_bitmap);
1753
1754	SM_I(sbi)->sit_info = NULL;
1755	kfree(sit_i->sit_bitmap);
1756	kfree(sit_i);
1757}
1758
1759void destroy_segment_manager(struct f2fs_sb_info *sbi)
1760{
1761	struct f2fs_sm_info *sm_info = SM_I(sbi);
1762	destroy_dirty_segmap(sbi);
1763	destroy_curseg(sbi);
1764	destroy_free_segmap(sbi);
1765	destroy_sit_info(sbi);
1766	sbi->sm_info = NULL;
1767	kfree(sm_info);
1768}
1769