e4defrag.c revision b1667425f8cd8b3c3d6b205ac9f3dee2e085058c
1/*
2 * e4defrag.c - ext4 filesystem defragmenter
3 *
4 * Copyright (C) 2009 NEC Software Tohoku, Ltd.
5 *
6 * Author: Akira Fujita	<a-fujita@rs.jp.nec.com>
7 *         Takashi Sato	<t-sato@yk.jp.nec.com>
8 */
9
10#ifndef _LARGEFILE_SOURCE
11#define _LARGEFILE_SOURCE
12#endif
13
14#ifndef _LARGEFILE64_SOURCE
15#define _LARGEFILE64_SOURCE
16#endif
17
18#ifndef _GNU_SOURCE
19#define _GNU_SOURCE
20#endif
21
22#include "config.h"
23#include <ctype.h>
24#include <dirent.h>
25#include <endian.h>
26#include <errno.h>
27#include <fcntl.h>
28#include <ftw.h>
29#include <limits.h>
30#include <mntent.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35#include <ext2fs/ext2_types.h>
36#include <ext2fs/ext2fs.h>
37#include <linux/fs.h>
38#include <sys/ioctl.h>
39#include <ext2fs/fiemap.h>
40#include <sys/mman.h>
41#include <sys/stat.h>
42#include <sys/statfs.h>
43#include <sys/syscall.h>
44#include <sys/vfs.h>
45
46/* A relatively new ioctl interface ... */
47#ifndef EXT4_IOC_MOVE_EXT
48#define EXT4_IOC_MOVE_EXT      _IOWR('f', 15, struct move_extent)
49#endif
50
51/* Macro functions */
52#define PRINT_ERR_MSG(msg)	fprintf(stderr, "%s\n", (msg))
53#define IN_FTW_PRINT_ERR_MSG(msg)	\
54	fprintf(stderr, "\t%s\t\t[ NG ]\n", (msg))
55#define PRINT_FILE_NAME(file)	fprintf(stderr, " \"%s\"\n", (file))
56#define PRINT_ERR_MSG_WITH_ERRNO(msg)	\
57	fprintf(stderr, "\t%s:%s\t[ NG ]\n", (msg), strerror(errno))
58#define STATISTIC_ERR_MSG(msg)	\
59	fprintf(stderr, "\t%s\n", (msg))
60#define STATISTIC_ERR_MSG_WITH_ERRNO(msg)	\
61	fprintf(stderr, "\t%s:%s\n", (msg), strerror(errno))
62#define min(x, y) (((x) > (y)) ? (y) : (x))
63#define CALC_SCORE(ratio) \
64	((ratio) > 10 ? (80 + 20 * (ratio) / 100) : (8 * (ratio)))
65/* Wrap up the free function */
66#define FREE(tmp)				\
67	do {					\
68		if ((tmp) != NULL)		\
69			free(tmp);		\
70	} while (0)				\
71/* Insert list2 after list1 */
72#define insert(list1, list2)			\
73	do {					\
74		list2->next = list1->next;	\
75		list1->next->prev = list2;	\
76		list2->prev = list1;		\
77		list1->next = list2;		\
78	} while (0)
79
80/* To delete unused warning */
81#ifdef __GNUC__
82#define EXT2FS_ATTR(x) __attribute__(x)
83#else
84#define EXT2FS_ATTR(x)
85#endif
86
87/* The mode of defrag */
88#define DETAIL			0x01
89#define STATISTIC		0x02
90
91#define DEVNAME			0
92#define DIRNAME			1
93#define FILENAME		2
94
95#define FTW_OPEN_FD		2000
96
97#define FS_EXT4			"ext4"
98#define ROOT_UID		0
99
100#define BOUND_SCORE		55
101#define SHOW_FRAG_FILES	5
102
103/* Magic number for ext4 */
104#define EXT4_SUPER_MAGIC	0xEF53
105
106/* Definition of flex_bg */
107#define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
108
109/* The following macro is used for ioctl FS_IOC_FIEMAP
110 * EXTENT_MAX_COUNT:	the maximum number of extents for exchanging between
111 *			kernel-space and user-space per ioctl
112 */
113#define EXTENT_MAX_COUNT	512
114
115/* The following macros are error message */
116#define MSG_USAGE		\
117"Usage	: e4defrag [-v] file...| directory...| device...\n\
118	: e4defrag  -c  file...| directory...| device...\n"
119
120#define NGMSG_EXT4		"Filesystem is not ext4 filesystem"
121#define NGMSG_FILE_EXTENT	"Failed to get file extents"
122#define NGMSG_FILE_INFO		"Failed to get file information"
123#define NGMSG_FILE_OPEN		"Failed to open"
124#define NGMSG_FILE_UNREG	"File is not regular file"
125#define NGMSG_LOST_FOUND	"Can not process \"lost+found\""
126
127/* Data type for filesystem-wide blocks number */
128typedef unsigned long long ext4_fsblk_t;
129
130struct fiemap_extent_data {
131	__u64 len;			/* blocks count */
132	__u64 logical;		/* start logical block number */
133	ext4_fsblk_t physical;		/* start physical block number */
134};
135
136struct fiemap_extent_list {
137	struct fiemap_extent_list *prev;
138	struct fiemap_extent_list *next;
139	struct fiemap_extent_data data;	/* extent belong to file */
140};
141
142struct fiemap_extent_group {
143	struct fiemap_extent_group *prev;
144	struct fiemap_extent_group *next;
145	__u64 len;	/* length of this continuous region */
146	struct fiemap_extent_list *start;	/* start ext */
147	struct fiemap_extent_list *end;		/* end ext */
148};
149
150struct move_extent {
151	__s32 reserved;	/* original file descriptor */
152	__u32 donor_fd;	/* donor file descriptor */
153	__u64 orig_start;	/* logical start offset in block for orig */
154	__u64 donor_start;	/* logical start offset in block for donor */
155	__u64 len;	/* block length to be moved */
156	__u64 moved_len;	/* moved block length */
157};
158
159struct frag_statistic_ino {
160	int now_count;	/* the file's extents count of before defrag */
161	int best_count; /* the best file's extents count */
162	__u64 size_per_ext;	/* size(KB) per extent */
163	float ratio;	/* the ratio of fragmentation */
164	char msg_buffer[PATH_MAX + 1];	/* pathname of the file */
165};
166
167char	lost_found_dir[PATH_MAX + 1];
168int	block_size;
169int	extents_before_defrag;
170int	extents_after_defrag;
171int	mode_flag;
172unsigned int	current_uid;
173unsigned int	defraged_file_count;
174unsigned int	frag_files_before_defrag;
175unsigned int	frag_files_after_defrag;
176unsigned int	regular_count;
177unsigned int	succeed_cnt;
178unsigned int	total_count;
179__u8 log_groups_per_flex;
180__u32 blocks_per_group;
181__u32 feature_incompat;
182ext4_fsblk_t	files_block_count;
183struct frag_statistic_ino	frag_rank[SHOW_FRAG_FILES];
184
185
186/* Local definitions of some syscalls glibc may not yet have */
187
188#ifndef HAVE_POSIX_FADVISE
189#warning Using locally defined posix_fadvise interface.
190
191#ifndef __NR_fadvise64_64
192#error Your kernel headers dont define __NR_fadvise64_64
193#endif
194
195/*
196 * fadvise() -		Give advice about file access.
197 *
198 * @fd:			defrag target file's descriptor.
199 * @offset:		file offset.
200 * @len:		area length.
201 * @advise:		process flag.
202 */
203static int posix_fadvise(int fd, loff_t offset, size_t len, int advise)
204{
205	return syscall(__NR_fadvise64_64, fd, offset, len, advise);
206}
207#endif /* ! HAVE_FADVISE64_64 */
208
209#ifndef HAVE_SYNC_FILE_RANGE
210#warning Using locally defined sync_file_range interface.
211
212#ifndef __NR_sync_file_range
213#ifndef __NR_sync_file_range2 /* ppc */
214#error Your kernel headers dont define __NR_sync_file_range
215#endif
216#endif
217
218/*
219 * sync_file_range() -	Sync file region.
220 *
221 * @fd:			defrag target file's descriptor.
222 * @offset:		file offset.
223 * @length:		area length.
224 * @flag:		process flag.
225 */
226int sync_file_range(int fd, loff_t offset, loff_t length, unsigned int flag)
227{
228#ifdef __NR_sync_file_range
229	return syscall(__NR_sync_file_range, fd, offset, length, flag);
230#else
231	return syscall(__NR_sync_file_range2, fd, flag, offset, length);
232#endif
233}
234#endif /* ! HAVE_SYNC_FILE_RANGE */
235
236#ifndef HAVE_FALLOCATE64
237#warning Using locally defined fallocate syscall interface.
238
239#ifndef __NR_fallocate
240#error Your kernel headers dont define __NR_fallocate
241#endif
242
243/*
244 * fallocate64() -	Manipulate file space.
245 *
246 * @fd:			defrag target file's descriptor.
247 * @mode:		process flag.
248 * @offset:		file offset.
249 * @len:		file size.
250 */
251static int fallocate64(int fd, int mode, loff_t offset, loff_t len)
252{
253	return syscall(__NR_fallocate, fd, mode, offset, len);
254}
255#endif /* ! HAVE_FALLOCATE */
256
257/*
258 * get_mount_point() -	Get device's mount point.
259 *
260 * @devname:		the device's name.
261 * @mount_point:	the mount point.
262 * @dir_path_len:	the length of directory.
263 */
264static int get_mount_point(const char *devname, char *mount_point,
265							int dir_path_len)
266{
267	/* Refer to /etc/mtab */
268	const char	*mtab = MOUNTED;
269	FILE		*fp = NULL;
270	struct mntent	*mnt = NULL;
271	struct stat64	sb;
272
273	if (stat64(devname, &sb) < 0) {
274		perror(NGMSG_FILE_INFO);
275		PRINT_FILE_NAME(devname);
276		return -1;
277	}
278
279	fp = setmntent(mtab, "r");
280	if (fp == NULL) {
281		perror("Couldn't access /etc/mtab");
282		return -1;
283	}
284
285	while ((mnt = getmntent(fp)) != NULL) {
286		struct stat64 ms;
287
288		/*
289		 * To handle device symlinks, we see if the
290		 * device number matches, not the name
291		 */
292		if (stat64(mnt->mnt_fsname, &ms) < 0)
293			continue;
294		if (sb.st_rdev != ms.st_rdev)
295			continue;
296
297		endmntent(fp);
298		if (strcmp(mnt->mnt_type, FS_EXT4) == 0) {
299			strncpy(mount_point, mnt->mnt_dir,
300				dir_path_len);
301			return 0;
302		}
303		PRINT_ERR_MSG(NGMSG_EXT4);
304		return -1;
305	}
306	endmntent(fp);
307	PRINT_ERR_MSG("Filesystem is not mounted");
308	return -1;
309}
310
311/*
312 * is_ext4() -		Whether on an ext4 filesystem.
313 *
314 * @file:		the file's name.
315 */
316static int is_ext4(const char *file, char *devname)
317{
318	int 	maxlen = 0;
319	int	len, ret;
320	FILE	*fp = NULL;
321	char	*mnt_type = NULL;
322	/* Refer to /etc/mtab */
323	const char	*mtab = MOUNTED;
324	char	file_path[PATH_MAX + 1];
325	struct mntent	*mnt = NULL;
326	struct statfs64	fsbuf;
327
328	/* Get full path */
329	if (realpath(file, file_path) == NULL) {
330		perror("Couldn't get full path");
331		PRINT_FILE_NAME(file);
332		return -1;
333	}
334
335	if (statfs64(file_path, &fsbuf) < 0) {
336		perror("Failed to get filesystem information");
337		PRINT_FILE_NAME(file);
338		return -1;
339	}
340
341	if (fsbuf.f_type != EXT4_SUPER_MAGIC) {
342		PRINT_ERR_MSG(NGMSG_EXT4);
343		return -1;
344	}
345
346	fp = setmntent(mtab, "r");
347	if (fp == NULL) {
348		perror("Couldn't access /etc/mtab");
349		return -1;
350	}
351
352	while ((mnt = getmntent(fp)) != NULL) {
353		if (mnt->mnt_fsname[0] != '/')
354			continue;
355		len = strlen(mnt->mnt_dir);
356		ret = memcmp(file_path, mnt->mnt_dir, len);
357		if (ret != 0)
358			continue;
359
360		if (maxlen >= len)
361			continue;
362
363		maxlen = len;
364
365		mnt_type = realloc(mnt_type, strlen(mnt->mnt_type) + 1);
366		if (mnt_type == NULL) {
367			endmntent(fp);
368			return -1;
369		}
370		memset(mnt_type, 0, strlen(mnt->mnt_type) + 1);
371		strncpy(mnt_type, mnt->mnt_type, strlen(mnt->mnt_type));
372		strncpy(lost_found_dir, mnt->mnt_dir, PATH_MAX);
373		strncpy(devname, mnt->mnt_fsname, strlen(mnt->mnt_fsname) + 1);
374	}
375
376	endmntent(fp);
377	if (mnt_type && strcmp(mnt_type, FS_EXT4) == 0) {
378		FREE(mnt_type);
379		return 0;
380	} else {
381		FREE(mnt_type);
382		PRINT_ERR_MSG(NGMSG_EXT4);
383		return -1;
384	}
385}
386
387/*
388 * calc_entry_counts() -	Calculate file counts.
389 *
390 * @file:		file name.
391 * @buf:		file info.
392 * @flag:		file type.
393 * @ftwbuf:		the pointer of a struct FTW.
394 */
395static int calc_entry_counts(const char *file EXT2FS_ATTR((unused)),
396		const struct stat64 *buf, int flag EXT2FS_ATTR((unused)),
397		struct FTW *ftwbuf EXT2FS_ATTR((unused)))
398{
399	if (S_ISREG(buf->st_mode))
400		regular_count++;
401
402	total_count++;
403
404	return 0;
405}
406
407/*
408 * page_in_core() -	Get information on whether pages are in core.
409 *
410 * @fd:			defrag target file's descriptor.
411 * @defrag_data:	data used for defrag.
412 * @vec:		page state array.
413 * @page_num:		page number.
414 */
415static int page_in_core(int fd, struct move_extent defrag_data,
416			unsigned char **vec, unsigned int *page_num)
417{
418	long	pagesize;
419	void	*page = NULL;
420	loff_t	offset, end_offset, length;
421
422	if (vec == NULL || *vec != NULL)
423		return -1;
424
425	pagesize = sysconf(_SC_PAGESIZE);
426	if (pagesize < 0)
427		return -1;
428	/* In mmap, offset should be a multiple of the page size */
429	offset = (loff_t)defrag_data.orig_start * block_size;
430	length = (loff_t)defrag_data.len * block_size;
431	end_offset = offset + length;
432	/* Round the offset down to the nearest multiple of pagesize */
433	offset = (offset / pagesize) * pagesize;
434	length = end_offset - offset;
435
436	page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset);
437	if (page == MAP_FAILED)
438		return -1;
439
440	*page_num = 0;
441	*page_num = (length + pagesize - 1) / pagesize;
442	*vec = (unsigned char *)calloc(*page_num, 1);
443	if (*vec == NULL)
444		return -1;
445
446	/* Get information on whether pages are in core */
447	if (mincore(page, (size_t)length, *vec) == -1 ||
448		munmap(page, length) == -1) {
449		FREE(*vec);
450		return -1;
451	}
452
453	return 0;
454}
455
456/*
457 * defrag_fadvise() -	Predeclare an access pattern for file data.
458 *
459 * @fd:			defrag target file's descriptor.
460 * @defrag_data:	data used for defrag.
461 * @vec:		page state array.
462 * @page_num:		page number.
463 */
464static int defrag_fadvise(int fd, struct move_extent defrag_data,
465		   unsigned char *vec, unsigned int page_num)
466{
467	int	flag = 1;
468	long	pagesize = sysconf(_SC_PAGESIZE);
469	int	fadvise_flag = POSIX_FADV_DONTNEED;
470	int	sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE |
471			    SYNC_FILE_RANGE_WRITE |
472			    SYNC_FILE_RANGE_WAIT_AFTER;
473	unsigned int	i;
474	loff_t	offset;
475
476	if (pagesize < 1)
477		return -1;
478
479	offset = (loff_t)defrag_data.orig_start * block_size;
480	offset = (offset / pagesize) * pagesize;
481
482	/* Sync file for fadvise process */
483	if (sync_file_range(fd, offset,
484		(loff_t)pagesize * page_num, sync_flag) < 0)
485		return -1;
486
487	/* Try to release buffer cache which this process used,
488	 * then other process can use the released buffer
489	 */
490	for (i = 0; i < page_num; i++) {
491		if ((vec[i] & 0x1) == 0) {
492			offset += pagesize;
493			continue;
494		}
495		if (posix_fadvise(fd, offset, pagesize, fadvise_flag) < 0) {
496			if ((mode_flag & DETAIL) && flag) {
497				perror("\tFailed to fadvise");
498				flag = 0;
499			}
500		}
501		offset += pagesize;
502	}
503
504	return 0;
505}
506
507/*
508 * check_free_size() -	Check if there's enough disk space.
509 *
510 * @fd:			defrag target file's descriptor.
511 * @file:		file name.
512 * @blk_count:		file blocks.
513 */
514static int check_free_size(int fd, const char *file, ext4_fsblk_t blk_count)
515{
516	ext4_fsblk_t	free_blk_count;
517	struct statfs64	fsbuf;
518
519	if (fstatfs64(fd, &fsbuf) < 0) {
520		if (mode_flag & DETAIL) {
521			PRINT_FILE_NAME(file);
522			PRINT_ERR_MSG_WITH_ERRNO(
523				"Failed to get filesystem information");
524		}
525		return -1;
526	}
527
528	/* Compute free space for root and normal user separately */
529	if (current_uid == ROOT_UID)
530		free_blk_count = fsbuf.f_bfree;
531	else
532		free_blk_count = fsbuf.f_bavail;
533
534	if (free_blk_count >= blk_count)
535		return 0;
536
537	return -ENOSPC;
538}
539
540/*
541 * file_frag_count() -	Get file fragment count.
542 *
543 * @fd:			defrag target file's descriptor.
544 */
545static int file_frag_count(int fd)
546{
547	int	ret;
548	struct fiemap	fiemap_buf;
549
550	/* When fm_extent_count is 0,
551	 * ioctl just get file fragment count.
552	 */
553	memset(&fiemap_buf, 0, sizeof(struct fiemap));
554	fiemap_buf.fm_start = 0;
555	fiemap_buf.fm_length = FIEMAP_MAX_OFFSET;
556	fiemap_buf.fm_flags |= FIEMAP_FLAG_SYNC;
557
558	ret = ioctl(fd, FS_IOC_FIEMAP, &fiemap_buf);
559	if (ret < 0)
560		return ret;
561
562	return fiemap_buf.fm_mapped_extents;
563}
564
565/*
566 * file_check() -	Check file's attributes.
567 *
568 * @fd:			defrag target file's descriptor.
569 * @buf:		a pointer of the struct stat64.
570 * @file:		file name.
571 * @extents:		file extents.
572 * @blk_count:		file blocks.
573 */
574static int file_check(int fd, const struct stat64 *buf, const char *file,
575		int extents, ext4_fsblk_t blk_count)
576{
577	int	ret;
578	struct flock	lock;
579
580	/* Write-lock check is more reliable */
581	lock.l_type = F_WRLCK;
582	lock.l_start = 0;
583	lock.l_whence = SEEK_SET;
584	lock.l_len = 0;
585
586	/* Free space */
587	ret = check_free_size(fd, file, blk_count);
588	if (ret < 0) {
589		if ((mode_flag & DETAIL) && ret == -ENOSPC) {
590			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
591				"  extents: %d -> %d\n", defraged_file_count,
592				total_count, file, extents, extents);
593			IN_FTW_PRINT_ERR_MSG(
594			"Defrag size is larger than filesystem's free space");
595		}
596		return -1;
597	}
598
599	/* Access authority */
600	if (current_uid != ROOT_UID &&
601		buf->st_uid != current_uid) {
602		if (mode_flag & DETAIL) {
603			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
604				"  extents: %d -> %d\n", defraged_file_count,
605				total_count, file, extents, extents);
606			IN_FTW_PRINT_ERR_MSG(
607				"File is not current user's file"
608				" or current user is not root");
609		}
610		return -1;
611	}
612
613	/* Lock status */
614	if (fcntl(fd, F_GETLK, &lock) < 0) {
615		if (mode_flag & DETAIL) {
616			PRINT_FILE_NAME(file);
617			PRINT_ERR_MSG_WITH_ERRNO(
618				"Failed to get lock information");
619		}
620		return -1;
621	} else if (lock.l_type != F_UNLCK) {
622		if (mode_flag & DETAIL) {
623			PRINT_FILE_NAME(file);
624			IN_FTW_PRINT_ERR_MSG("File has been locked");
625		}
626		return -1;
627	}
628
629	return 0;
630}
631
632/*
633 * insert_extent_by_logical() -	Sequentially insert extent by logical.
634 *
635 * @ext_list_head:	the head of logical extent list.
636 * @ext:		the extent element which will be inserted.
637 */
638static int insert_extent_by_logical(struct fiemap_extent_list **ext_list_head,
639			struct fiemap_extent_list *ext)
640{
641	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
642
643	if (ext == NULL)
644		goto out;
645
646	/* First element */
647	if (*ext_list_head == NULL) {
648		(*ext_list_head) = ext;
649		(*ext_list_head)->prev = *ext_list_head;
650		(*ext_list_head)->next = *ext_list_head;
651		return 0;
652	}
653
654	if (ext->data.logical <= ext_list_tmp->data.logical) {
655		/* Insert before head */
656		if (ext_list_tmp->data.logical <
657			ext->data.logical + ext->data.len)
658			/* Overlap */
659			goto out;
660		/* Adjust head */
661		*ext_list_head = ext;
662	} else {
663		/* Insert into the middle or last of the list */
664		do {
665			if (ext->data.logical < ext_list_tmp->data.logical)
666				break;
667			ext_list_tmp = ext_list_tmp->next;
668		} while (ext_list_tmp != (*ext_list_head));
669		if (ext->data.logical <
670		    ext_list_tmp->prev->data.logical +
671			ext_list_tmp->prev->data.len)
672			/* Overlap */
673			goto out;
674
675		if (ext_list_tmp != *ext_list_head &&
676		    ext_list_tmp->data.logical <
677		    ext->data.logical + ext->data.len)
678			/* Overlap */
679			goto out;
680	}
681	ext_list_tmp = ext_list_tmp->prev;
682	/* Insert "ext" after "ext_list_tmp" */
683	insert(ext_list_tmp, ext);
684	return 0;
685out:
686	errno = EINVAL;
687	return -1;
688}
689
690/*
691 * insert_extent_by_physical() -	Sequentially insert extent by physical.
692 *
693 * @ext_list_head:	the head of physical extent list.
694 * @ext:		the extent element which will be inserted.
695 */
696static int insert_extent_by_physical(struct fiemap_extent_list **ext_list_head,
697			struct fiemap_extent_list *ext)
698{
699	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
700
701	if (ext == NULL)
702		goto out;
703
704	/* First element */
705	if (*ext_list_head == NULL) {
706		(*ext_list_head) = ext;
707		(*ext_list_head)->prev = *ext_list_head;
708		(*ext_list_head)->next = *ext_list_head;
709		return 0;
710	}
711
712	if (ext->data.physical <= ext_list_tmp->data.physical) {
713		/* Insert before head */
714		if (ext_list_tmp->data.physical <
715					ext->data.physical + ext->data.len)
716			/* Overlap */
717			goto out;
718		/* Adjust head */
719		*ext_list_head = ext;
720	} else {
721		/* Insert into the middle or last of the list */
722		do {
723			if (ext->data.physical < ext_list_tmp->data.physical)
724				break;
725			ext_list_tmp = ext_list_tmp->next;
726		} while (ext_list_tmp != (*ext_list_head));
727		if (ext->data.physical <
728		    ext_list_tmp->prev->data.physical +
729				ext_list_tmp->prev->data.len)
730			/* Overlap */
731			goto out;
732
733		if (ext_list_tmp != *ext_list_head &&
734		    ext_list_tmp->data.physical <
735				ext->data.physical + ext->data.len)
736			/* Overlap */
737			goto out;
738	}
739	ext_list_tmp = ext_list_tmp->prev;
740	/* Insert "ext" after "ext_list_tmp" */
741	insert(ext_list_tmp, ext);
742	return 0;
743out:
744	errno = EINVAL;
745	return -1;
746}
747
748/*
749 * insert_exts_group() -	Insert a exts_group.
750 *
751 * @ext_group_head:		the head of a exts_group list.
752 * @exts_group:			the exts_group element which will be inserted.
753 */
754static int insert_exts_group(struct fiemap_extent_group **ext_group_head,
755				struct fiemap_extent_group *exts_group)
756{
757	struct fiemap_extent_group	*ext_group_tmp = NULL;
758
759	if (exts_group == NULL) {
760		errno = EINVAL;
761		return -1;
762	}
763
764	/* Initialize list */
765	if (*ext_group_head == NULL) {
766		(*ext_group_head) = exts_group;
767		(*ext_group_head)->prev = *ext_group_head;
768		(*ext_group_head)->next = *ext_group_head;
769		return 0;
770	}
771
772	ext_group_tmp = (*ext_group_head)->prev;
773	insert(ext_group_tmp, exts_group);
774
775	return 0;
776}
777
778/*
779 * join_extents() -		Find continuous region(exts_group).
780 *
781 * @ext_list_head:		the head of the extent list.
782 * @ext_group_head:		the head of the target exts_group list.
783 */
784static int join_extents(struct fiemap_extent_list *ext_list_head,
785		struct fiemap_extent_group **ext_group_head)
786{
787	__u64	len = ext_list_head->data.len;
788	struct fiemap_extent_list *ext_list_start = ext_list_head;
789	struct fiemap_extent_list *ext_list_tmp = ext_list_head->next;
790
791	do {
792		struct fiemap_extent_group	*ext_group_tmp = NULL;
793
794		/* This extent and previous extent are not continuous,
795		 * so, all previous extents are treated as an extent group.
796		 */
797		if ((ext_list_tmp->prev->data.logical +
798			ext_list_tmp->prev->data.len)
799				!= ext_list_tmp->data.logical) {
800			ext_group_tmp =
801				malloc(sizeof(struct fiemap_extent_group));
802			if (ext_group_tmp == NULL)
803				return -1;
804
805			memset(ext_group_tmp, 0,
806				sizeof(struct fiemap_extent_group));
807			ext_group_tmp->len = len;
808			ext_group_tmp->start = ext_list_start;
809			ext_group_tmp->end = ext_list_tmp->prev;
810
811			if (insert_exts_group(ext_group_head,
812				ext_group_tmp) < 0) {
813				FREE(ext_group_tmp);
814				return -1;
815			}
816			ext_list_start = ext_list_tmp;
817			len = ext_list_tmp->data.len;
818			ext_list_tmp = ext_list_tmp->next;
819			continue;
820		}
821
822		/* This extent and previous extent are continuous,
823		 * so, they belong to the same extent group, and we check
824		 * if the next extent belongs to the same extent group.
825		 */
826		len += ext_list_tmp->data.len;
827		ext_list_tmp = ext_list_tmp->next;
828	} while (ext_list_tmp != ext_list_head->next);
829
830	return 0;
831}
832
833/*
834 * get_file_extents() -	Get file's extent list.
835 *
836 * @fd:			defrag target file's descriptor.
837 * @ext_list_head:	the head of the extent list.
838 */
839static int get_file_extents(int fd, struct fiemap_extent_list **ext_list_head)
840{
841	__u32	i;
842	int	ret;
843	int	ext_buf_size, fie_buf_size;
844	__u64	pos = 0;
845	struct fiemap	*fiemap_buf = NULL;
846	struct fiemap_extent	*ext_buf = NULL;
847	struct fiemap_extent_list	*ext_list = NULL;
848
849	/* Convert units, in bytes.
850	 * Be careful : now, physical block number in extent is 48bit,
851	 * and the maximum blocksize for ext4 is 4K(12bit),
852	 * so there is no overflow, but in future it may be changed.
853	 */
854
855	/* Alloc space for fiemap */
856	ext_buf_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);
857	fie_buf_size = sizeof(struct fiemap) + ext_buf_size;
858
859	fiemap_buf = malloc(fie_buf_size);
860	if (fiemap_buf == NULL)
861		return -1;
862
863	ext_buf = fiemap_buf->fm_extents;
864	memset(fiemap_buf, 0, fie_buf_size);
865	fiemap_buf->fm_length = FIEMAP_MAX_OFFSET;
866	fiemap_buf->fm_flags |= FIEMAP_FLAG_SYNC;
867	fiemap_buf->fm_extent_count = EXTENT_MAX_COUNT;
868
869	do {
870		fiemap_buf->fm_start = pos;
871		memset(ext_buf, 0, ext_buf_size);
872		ret = ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
873		if (ret < 0 || fiemap_buf->fm_mapped_extents == 0)
874			goto out;
875		for (i = 0; i < fiemap_buf->fm_mapped_extents; i++) {
876			ext_list = NULL;
877			ext_list = malloc(sizeof(struct fiemap_extent_list));
878			if (ext_list == NULL)
879				goto out;
880
881			ext_list->data.physical = ext_buf[i].fe_physical
882						/ block_size;
883			ext_list->data.logical = ext_buf[i].fe_logical
884						/ block_size;
885			ext_list->data.len = ext_buf[i].fe_length
886						/ block_size;
887
888			ret = insert_extent_by_physical(
889					ext_list_head, ext_list);
890			if (ret < 0) {
891				FREE(ext_list);
892				goto out;
893			}
894		}
895		/* Record file's logical offset this time */
896		pos = ext_buf[EXTENT_MAX_COUNT-1].fe_logical +
897			ext_buf[EXTENT_MAX_COUNT-1].fe_length;
898		/*
899		 * If fm_extents array has been filled and
900		 * there are extents left, continue to cycle.
901		 */
902	} while (fiemap_buf->fm_mapped_extents
903					== EXTENT_MAX_COUNT &&
904		!(ext_buf[EXTENT_MAX_COUNT-1].fe_flags
905					& FIEMAP_EXTENT_LAST));
906
907	FREE(fiemap_buf);
908	return 0;
909out:
910	FREE(fiemap_buf);
911	return -1;
912}
913
914/*
915 * get_logical_count() -	Get the file logical extents count.
916 *
917 * @logical_list_head:	the head of the logical extent list.
918 */
919static int get_logical_count(struct fiemap_extent_list *logical_list_head)
920{
921	int ret = 0;
922	struct fiemap_extent_list *ext_list_tmp  = logical_list_head;
923
924	do {
925		ret++;
926		ext_list_tmp = ext_list_tmp->next;
927	} while (ext_list_tmp != logical_list_head);
928
929	return ret;
930}
931
932/*
933 * get_physical_count() -	Get the file physical extents count.
934 *
935 * @physical_list_head:	the head of the physical extent list.
936 */
937static int get_physical_count(struct fiemap_extent_list *physical_list_head)
938{
939	int ret = 0;
940	struct fiemap_extent_list *ext_list_tmp = physical_list_head;
941
942	do {
943		if ((ext_list_tmp->data.physical + ext_list_tmp->data.len)
944				!= ext_list_tmp->next->data.physical) {
945			/* This extent and next extent are not continuous. */
946			ret++;
947		}
948
949		ext_list_tmp = ext_list_tmp->next;
950	} while (ext_list_tmp != physical_list_head);
951
952	return ret;
953}
954
955/*
956 * change_physical_to_logical() -	Change list from physical to logical.
957 *
958 * @physical_list_head:	the head of physical extent list.
959 * @logical_list_head:	the head of logical extent list.
960 */
961static int change_physical_to_logical(
962			struct fiemap_extent_list **physical_list_head,
963			struct fiemap_extent_list **logical_list_head)
964{
965	int ret;
966	struct fiemap_extent_list *ext_list_tmp = *physical_list_head;
967	struct fiemap_extent_list *ext_list_next = ext_list_tmp->next;
968
969	while (1) {
970		if (ext_list_tmp == ext_list_next) {
971			ret = insert_extent_by_logical(
972				logical_list_head, ext_list_tmp);
973			if (ret < 0)
974				return -1;
975
976			*physical_list_head = NULL;
977			break;
978		}
979
980		ext_list_tmp->prev->next = ext_list_tmp->next;
981		ext_list_tmp->next->prev = ext_list_tmp->prev;
982		*physical_list_head = ext_list_next;
983
984		ret = insert_extent_by_logical(
985			logical_list_head, ext_list_tmp);
986		if (ret < 0) {
987			FREE(ext_list_tmp);
988			return -1;
989		}
990		ext_list_tmp = ext_list_next;
991		ext_list_next = ext_list_next->next;
992	}
993
994	return 0;
995}
996
997/* get_file_blocks() -  Get total file blocks.
998 *
999 * @ext_list_head:	the extent list head of the target file
1000 */
1001static ext4_fsblk_t get_file_blocks(struct fiemap_extent_list *ext_list_head)
1002{
1003	ext4_fsblk_t blk_count = 0;
1004	struct fiemap_extent_list *ext_list_tmp = ext_list_head;
1005
1006	do {
1007		blk_count += ext_list_tmp->data.len;
1008		ext_list_tmp = ext_list_tmp->next;
1009	} while (ext_list_tmp != ext_list_head);
1010
1011	return blk_count;
1012}
1013
1014/*
1015 * free_ext() -		Free the extent list.
1016 *
1017 * @ext_list_head:	the extent list head of which will be free.
1018 */
1019static void free_ext(struct fiemap_extent_list *ext_list_head)
1020{
1021	struct fiemap_extent_list	*ext_list_tmp = NULL;
1022
1023	if (ext_list_head == NULL)
1024		return;
1025
1026	while (ext_list_head->next != ext_list_head) {
1027		ext_list_tmp = ext_list_head;
1028		ext_list_head->prev->next = ext_list_head->next;
1029		ext_list_head->next->prev = ext_list_head->prev;
1030		ext_list_head = ext_list_head->next;
1031		free(ext_list_tmp);
1032	}
1033	free(ext_list_head);
1034}
1035
1036/*
1037 * free_exts_group() -		Free the exts_group.
1038 *
1039 * @*ext_group_head:	the exts_group list head which will be free.
1040 */
1041static void free_exts_group(struct fiemap_extent_group *ext_group_head)
1042{
1043	struct fiemap_extent_group	*ext_group_tmp = NULL;
1044
1045	if (ext_group_head == NULL)
1046		return;
1047
1048	while (ext_group_head->next != ext_group_head) {
1049		ext_group_tmp = ext_group_head;
1050		ext_group_head->prev->next = ext_group_head->next;
1051		ext_group_head->next->prev = ext_group_head->prev;
1052		ext_group_head = ext_group_head->next;
1053		free(ext_group_tmp);
1054	}
1055	free(ext_group_head);
1056}
1057
1058/*
1059 * get_best_count() -	Get the file best extents count.
1060 *
1061 * @block_count:		the file's physical block count.
1062 */
1063static int get_best_count(ext4_fsblk_t block_count)
1064{
1065	int ret;
1066	unsigned int flex_bg_num;
1067
1068	/* Calcuate best extents count */
1069	if (feature_incompat & EXT4_FEATURE_INCOMPAT_FLEX_BG) {
1070		flex_bg_num = 1 << log_groups_per_flex;
1071		ret = ((block_count - 1) /
1072			((ext4_fsblk_t)blocks_per_group *
1073				flex_bg_num)) + 1;
1074	} else
1075		ret = ((block_count - 1) / blocks_per_group) + 1;
1076
1077	return ret;
1078}
1079
1080
1081/*
1082 * file_statistic() -	Get statistic info of the file's fragments.
1083 *
1084 * @file:		the file's name.
1085 * @buf:		the pointer of the struct stat64.
1086 * @flag:		file type.
1087 * @ftwbuf:		the pointer of a struct FTW.
1088 */
1089static int file_statistic(const char *file, const struct stat64 *buf,
1090			int flag EXT2FS_ATTR((unused)),
1091			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1092{
1093	int	fd;
1094	int	ret;
1095	int	now_ext_count, best_ext_count = 0, physical_ext_count;
1096	int	i, j;
1097	__u64	size_per_ext = 0;
1098	float	ratio = 0.0;
1099	ext4_fsblk_t	blk_count = 0;
1100	char	msg_buffer[PATH_MAX + 24];
1101	struct fiemap_extent_list *physical_list_head = NULL;
1102	struct fiemap_extent_list *logical_list_head = NULL;
1103
1104	defraged_file_count++;
1105
1106	if (mode_flag & DETAIL) {
1107		if (total_count == 1 && regular_count == 1)
1108			printf("<File>\n");
1109		else {
1110			printf("[%u/%u]", defraged_file_count, total_count);
1111			fflush(stdout);
1112		}
1113	}
1114	if (lost_found_dir[0] != '\0' &&
1115	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1116		if (mode_flag & DETAIL) {
1117			PRINT_FILE_NAME(file);
1118			STATISTIC_ERR_MSG(NGMSG_LOST_FOUND);
1119		}
1120			return 0;
1121	}
1122
1123	if (!S_ISREG(buf->st_mode)) {
1124		if (mode_flag & DETAIL) {
1125			PRINT_FILE_NAME(file);
1126			STATISTIC_ERR_MSG(NGMSG_FILE_UNREG);
1127		}
1128		return 0;
1129	}
1130
1131	/* Access authority */
1132	if (current_uid != ROOT_UID &&
1133		buf->st_uid != current_uid) {
1134		if (mode_flag & DETAIL) {
1135			PRINT_FILE_NAME(file);
1136			STATISTIC_ERR_MSG(
1137				"File is not current user's file"
1138				" or current user is not root");
1139		}
1140		return 0;
1141	}
1142
1143	/* Empty file */
1144	if (buf->st_size == 0) {
1145		if (mode_flag & DETAIL) {
1146			PRINT_FILE_NAME(file);
1147			STATISTIC_ERR_MSG("File size is 0");
1148		}
1149		return 0;
1150	}
1151
1152	/* Has no blocks */
1153	if (buf->st_blocks == 0) {
1154		if (mode_flag & DETAIL) {
1155			PRINT_FILE_NAME(file);
1156			STATISTIC_ERR_MSG("File has no blocks");
1157		}
1158		return 0;
1159	}
1160
1161	fd = open64(file, O_RDONLY);
1162	if (fd < 0) {
1163		if (mode_flag & DETAIL) {
1164			PRINT_FILE_NAME(file);
1165			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1166		}
1167		return 0;
1168	}
1169
1170	/* Get file's physical extents  */
1171	ret = get_file_extents(fd, &physical_list_head);
1172	if (ret < 0) {
1173		if (mode_flag & DETAIL) {
1174			PRINT_FILE_NAME(file);
1175			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1176		}
1177		goto out;
1178	}
1179
1180	/* Get the count of file's continuous physical region */
1181	physical_ext_count = get_physical_count(physical_list_head);
1182
1183	/* Change list from physical to logical */
1184	ret = change_physical_to_logical(&physical_list_head,
1185							&logical_list_head);
1186	if (ret < 0) {
1187		if (mode_flag & DETAIL) {
1188			PRINT_FILE_NAME(file);
1189			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1190		}
1191		goto out;
1192	}
1193
1194	/* Count file fragments before defrag */
1195	now_ext_count = get_logical_count(logical_list_head);
1196
1197	if (current_uid == ROOT_UID) {
1198		/* Calculate the size per extent */
1199		blk_count = get_file_blocks(logical_list_head);
1200
1201		best_ext_count = get_best_count(blk_count);
1202
1203		/* e4defrag rounds size_per_ext up to a block size boundary */
1204		size_per_ext = blk_count * (buf->st_blksize / 1024) /
1205							now_ext_count;
1206
1207		ratio = (float)(physical_ext_count - best_ext_count) * 100 /
1208							blk_count;
1209
1210		extents_before_defrag += now_ext_count;
1211		extents_after_defrag += best_ext_count;
1212		files_block_count += blk_count;
1213	}
1214
1215	if (total_count == 1 && regular_count == 1) {
1216		/* File only */
1217		if (mode_flag & DETAIL) {
1218			int count = 0;
1219			struct fiemap_extent_list *ext_list_tmp =
1220						logical_list_head;
1221
1222			/* Print extents info */
1223			do {
1224				count++;
1225				printf("[ext %d]:\tstart %llu:\tlogical "
1226						"%llu:\tlen %llu\n", count,
1227						ext_list_tmp->data.physical,
1228						ext_list_tmp->data.logical,
1229						ext_list_tmp->data.len);
1230				ext_list_tmp = ext_list_tmp->next;
1231			} while (ext_list_tmp != logical_list_head);
1232
1233		} else {
1234			printf("%-40s%10s/%-10s%9s\n",
1235					"<File>", "now", "best", "size/ext");
1236			if (current_uid == ROOT_UID) {
1237				if (strlen(file) > 40)
1238					printf("%s\n%50d/%-10d%6llu KB\n",
1239						file, now_ext_count,
1240						best_ext_count, size_per_ext);
1241				else
1242					printf("%-40s%10d/%-10d%6llu KB\n",
1243						file, now_ext_count,
1244						best_ext_count, size_per_ext);
1245			} else {
1246				if (strlen(file) > 40)
1247					printf("%s\n%50d/%-10s%7s\n",
1248							file, now_ext_count,
1249							"-", "-");
1250				else
1251					printf("%-40s%10d/%-10s%7s\n",
1252							file, now_ext_count,
1253							"-", "-");
1254			}
1255		}
1256		succeed_cnt++;
1257		goto out;
1258	}
1259
1260	if (mode_flag & DETAIL) {
1261		/* Print statistic info */
1262		sprintf(msg_buffer, "[%u/%u]%s",
1263				defraged_file_count, total_count, file);
1264		if (current_uid == ROOT_UID) {
1265			if (strlen(msg_buffer) > 40)
1266				printf("\033[79;0H\033[K%s\n"
1267						"%50d/%-10d%6llu KB\n",
1268						msg_buffer, now_ext_count,
1269						best_ext_count, size_per_ext);
1270			else
1271				printf("\033[79;0H\033[K%-40s"
1272						"%10d/%-10d%6llu KB\n",
1273						msg_buffer, now_ext_count,
1274						best_ext_count, size_per_ext);
1275		} else {
1276			if (strlen(msg_buffer) > 40)
1277				printf("\033[79;0H\033[K%s\n%50d/%-10s%7s\n",
1278						msg_buffer, now_ext_count,
1279							"-", "-");
1280			else
1281				printf("\033[79;0H\033[K%-40s%10d/%-10s%7s\n",
1282						msg_buffer, now_ext_count,
1283							"-", "-");
1284		}
1285	}
1286
1287	for (i = 0; i < SHOW_FRAG_FILES; i++) {
1288		if (ratio >= frag_rank[i].ratio) {
1289			for (j = SHOW_FRAG_FILES - 1; j > i; j--) {
1290				memset(&frag_rank[j], 0,
1291					sizeof(struct frag_statistic_ino));
1292				strncpy(frag_rank[j].msg_buffer,
1293					frag_rank[j - 1].msg_buffer,
1294					strnlen(frag_rank[j - 1].msg_buffer,
1295					PATH_MAX));
1296				frag_rank[j].now_count =
1297					frag_rank[j - 1].now_count;
1298				frag_rank[j].best_count =
1299					frag_rank[j - 1].best_count;
1300				frag_rank[j].size_per_ext =
1301					frag_rank[j - 1].size_per_ext;
1302				frag_rank[j].ratio =
1303					frag_rank[j - 1].ratio;
1304			}
1305			memset(&frag_rank[i], 0,
1306					sizeof(struct frag_statistic_ino));
1307			strncpy(frag_rank[i].msg_buffer, file,
1308						strnlen(file, PATH_MAX));
1309			frag_rank[i].now_count = now_ext_count;
1310			frag_rank[i].best_count = best_ext_count;
1311			frag_rank[i].size_per_ext = size_per_ext;
1312			frag_rank[i].ratio = ratio;
1313			break;
1314		}
1315	}
1316
1317	succeed_cnt++;
1318
1319out:
1320	close(fd);
1321	free_ext(physical_list_head);
1322	free_ext(logical_list_head);
1323	return 0;
1324}
1325
1326/*
1327 * print_progress -	Print defrag progress
1328 *
1329 * @file:		file name.
1330 * @start:		logical offset for defrag target file
1331 * @file_size:		defrag target filesize
1332 */
1333static void print_progress(const char *file, loff_t start, loff_t file_size)
1334{
1335	int percent = (start * 100) / file_size;
1336	printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1337		defraged_file_count, total_count, file, min(percent, 100));
1338	fflush(stdout);
1339
1340	return;
1341}
1342
1343/*
1344 * call_defrag() -	Execute the defrag program.
1345 *
1346 * @fd:			target file descriptor.
1347 * @donor_fd:		donor file descriptor.
1348 * @file:			target file name.
1349 * @buf:			pointer of the struct stat64.
1350 * @ext_list_head:	head of the extent list.
1351 */
1352static int call_defrag(int fd, int donor_fd, const char *file,
1353	const struct stat64 *buf, struct fiemap_extent_list *ext_list_head)
1354{
1355	loff_t	start = 0;
1356	unsigned int	page_num;
1357	unsigned char	*vec = NULL;
1358	int	defraged_ret = 0;
1359	int	ret;
1360	struct move_extent	move_data;
1361	struct fiemap_extent_list	*ext_list_tmp = NULL;
1362
1363	memset(&move_data, 0, sizeof(struct move_extent));
1364	move_data.donor_fd = donor_fd;
1365
1366	/* Print defrag progress */
1367	print_progress(file, start, buf->st_size);
1368
1369	ext_list_tmp = ext_list_head;
1370	do {
1371		move_data.orig_start = ext_list_tmp->data.logical;
1372		/* Logical offset of orig and donor should be same */
1373		move_data.donor_start = move_data.orig_start;
1374		move_data.len = ext_list_tmp->data.len;
1375		move_data.moved_len = 0;
1376
1377		ret = page_in_core(fd, move_data, &vec, &page_num);
1378		if (ret < 0) {
1379			if (mode_flag & DETAIL) {
1380				printf("\n");
1381				PRINT_ERR_MSG_WITH_ERRNO(
1382						"Failed to get file map");
1383			} else {
1384				printf("\t[ NG ]\n");
1385			}
1386			return -1;
1387		}
1388
1389		/* EXT4_IOC_MOVE_EXT */
1390		defraged_ret =
1391			ioctl(fd, EXT4_IOC_MOVE_EXT, &move_data);
1392
1393		/* Free pages */
1394		ret = defrag_fadvise(fd, move_data, vec, page_num);
1395		if (vec) {
1396			free(vec);
1397			vec = NULL;
1398		}
1399		if (ret < 0) {
1400			if (mode_flag & DETAIL) {
1401				printf("\n");
1402				PRINT_ERR_MSG_WITH_ERRNO(
1403					"Failed to free page");
1404			} else {
1405				printf("\t[ NG ]\n");
1406			}
1407			return -1;
1408		}
1409
1410		if (defraged_ret < 0) {
1411			if (mode_flag & DETAIL) {
1412				printf("\n");
1413				PRINT_ERR_MSG_WITH_ERRNO(
1414					"Failed to defrag with "
1415					"EXT4_IOC_MOVE_EXT ioctl");
1416				if (errno == ENOTTY)
1417					printf("\tAt least 2.6.31-rc1 of "
1418						"vanilla kernel is required\n");
1419			} else {
1420				printf("\t[ NG ]\n");
1421			}
1422			return -1;
1423		}
1424		/* Adjust logical offset for next ioctl */
1425		move_data.orig_start += move_data.moved_len;
1426		move_data.donor_start = move_data.orig_start;
1427
1428		start = move_data.orig_start * buf->st_blksize;
1429
1430		/* Print defrag progress */
1431		print_progress(file, start, buf->st_size);
1432
1433		/* End of file */
1434		if (start >= buf->st_size)
1435			break;
1436
1437		ext_list_tmp = ext_list_tmp->next;
1438	} while (ext_list_tmp != ext_list_head);
1439
1440	return 0;
1441}
1442
1443/*
1444 * file_defrag() -		Check file attributes and call ioctl to defrag.
1445 *
1446 * @file:		the file's name.
1447 * @buf:		the pointer of the struct stat64.
1448 * @flag:		file type.
1449 * @ftwbuf:		the pointer of a struct FTW.
1450 */
1451static int file_defrag(const char *file, const struct stat64 *buf,
1452			int flag EXT2FS_ATTR((unused)),
1453			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1454{
1455	int	fd;
1456	int	donor_fd = -1;
1457	int	ret;
1458	int	best;
1459	int	file_frags_start, file_frags_end;
1460	int	orig_physical_cnt, donor_physical_cnt = 0;
1461	char	tmp_inode_name[PATH_MAX + 8];
1462	ext4_fsblk_t			blk_count = 0;
1463	struct fiemap_extent_list	*orig_list_physical = NULL;
1464	struct fiemap_extent_list	*orig_list_logical = NULL;
1465	struct fiemap_extent_list	*donor_list_physical = NULL;
1466	struct fiemap_extent_list	*donor_list_logical = NULL;
1467	struct fiemap_extent_group	*orig_group_head = NULL;
1468	struct fiemap_extent_group	*orig_group_tmp = NULL;
1469
1470	defraged_file_count++;
1471
1472	if (mode_flag & DETAIL) {
1473		printf("[%u/%u]", defraged_file_count, total_count);
1474		fflush(stdout);
1475	}
1476
1477	if (lost_found_dir[0] != '\0' &&
1478	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1479		if (mode_flag & DETAIL) {
1480			PRINT_FILE_NAME(file);
1481			IN_FTW_PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1482		}
1483		return 0;
1484	}
1485
1486	if (!S_ISREG(buf->st_mode)) {
1487		if (mode_flag & DETAIL) {
1488			PRINT_FILE_NAME(file);
1489			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1490		}
1491		return 0;
1492	}
1493
1494	/* Empty file */
1495	if (buf->st_size == 0) {
1496		if (mode_flag & DETAIL) {
1497			PRINT_FILE_NAME(file);
1498			IN_FTW_PRINT_ERR_MSG("File size is 0");
1499		}
1500		return 0;
1501	}
1502
1503	/* Has no blocks */
1504	if (buf->st_blocks == 0) {
1505		if (mode_flag & DETAIL) {
1506			PRINT_FILE_NAME(file);
1507			STATISTIC_ERR_MSG("File has no blocks");
1508		}
1509		return 0;
1510	}
1511
1512	fd = open64(file, O_RDWR);
1513	if (fd < 0) {
1514		if (mode_flag & DETAIL) {
1515			PRINT_FILE_NAME(file);
1516			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1517		}
1518		return 0;
1519	}
1520
1521	/* Get file's extents */
1522	ret = get_file_extents(fd, &orig_list_physical);
1523	if (ret < 0) {
1524		if (mode_flag & DETAIL) {
1525			PRINT_FILE_NAME(file);
1526			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1527		}
1528		goto out;
1529	}
1530
1531	/* Get the count of file's continuous physical region */
1532	orig_physical_cnt = get_physical_count(orig_list_physical);
1533
1534	/* Change list from physical to logical */
1535	ret = change_physical_to_logical(&orig_list_physical,
1536							&orig_list_logical);
1537	if (ret < 0) {
1538		if (mode_flag & DETAIL) {
1539			PRINT_FILE_NAME(file);
1540			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1541		}
1542		goto out;
1543	}
1544
1545	/* Count file fragments before defrag */
1546	file_frags_start = get_logical_count(orig_list_logical);
1547
1548	blk_count = get_file_blocks(orig_list_logical);
1549	if (file_check(fd, buf, file, file_frags_start, blk_count) < 0)
1550		goto out;
1551
1552	if (fsync(fd) < 0) {
1553		if (mode_flag & DETAIL) {
1554			PRINT_FILE_NAME(file);
1555			PRINT_ERR_MSG_WITH_ERRNO("Failed to sync(fsync)");
1556		}
1557		goto out;
1558	}
1559
1560	if (current_uid == ROOT_UID)
1561		best = get_best_count(blk_count);
1562	else
1563		best = 1;
1564
1565	if (file_frags_start <= best)
1566		goto check_improvement;
1567
1568	/* Combine extents to group */
1569	ret = join_extents(orig_list_logical, &orig_group_head);
1570	if (ret < 0) {
1571		if (mode_flag & DETAIL) {
1572			PRINT_FILE_NAME(file);
1573			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1574		}
1575		goto out;
1576	}
1577
1578	/* Create donor inode */
1579	memset(tmp_inode_name, 0, PATH_MAX + 8);
1580	sprintf(tmp_inode_name, "%.*s.defrag",
1581				(int)strnlen(file, PATH_MAX), file);
1582	donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
1583	if (donor_fd < 0) {
1584		if (mode_flag & DETAIL) {
1585			PRINT_FILE_NAME(file);
1586			if (errno == EEXIST)
1587				PRINT_ERR_MSG_WITH_ERRNO(
1588				"File is being defraged by other program");
1589			else
1590				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1591		}
1592		goto out;
1593	}
1594
1595	/* Unlink donor inode */
1596	ret = unlink(tmp_inode_name);
1597	if (ret < 0) {
1598		if (mode_flag & DETAIL) {
1599			PRINT_FILE_NAME(file);
1600			PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
1601		}
1602		goto out;
1603	}
1604
1605	/* Allocate space for donor inode */
1606	orig_group_tmp = orig_group_head;
1607	do {
1608		ret = fallocate64(donor_fd, 0,
1609		  (loff_t)orig_group_tmp->start->data.logical * block_size,
1610		  (loff_t)orig_group_tmp->len * block_size);
1611		if (ret < 0) {
1612			if (mode_flag & DETAIL) {
1613				PRINT_FILE_NAME(file);
1614				PRINT_ERR_MSG_WITH_ERRNO("Failed to fallocate");
1615			}
1616			goto out;
1617		}
1618
1619		orig_group_tmp = orig_group_tmp->next;
1620	} while (orig_group_tmp != orig_group_head);
1621
1622	/* Get donor inode's extents */
1623	ret = get_file_extents(donor_fd, &donor_list_physical);
1624	if (ret < 0) {
1625		if (mode_flag & DETAIL) {
1626			PRINT_FILE_NAME(file);
1627			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1628		}
1629		goto out;
1630	}
1631
1632	/* Calcuate donor inode's continuous physical region */
1633	donor_physical_cnt = get_physical_count(donor_list_physical);
1634
1635	/* Change donor extent list from physical to logical */
1636	ret = change_physical_to_logical(&donor_list_physical,
1637							&donor_list_logical);
1638	if (ret < 0) {
1639		if (mode_flag & DETAIL) {
1640			PRINT_FILE_NAME(file);
1641			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1642		}
1643		goto out;
1644	}
1645
1646check_improvement:
1647	if (mode_flag & DETAIL) {
1648		if (file_frags_start != 1)
1649			frag_files_before_defrag++;
1650
1651		extents_before_defrag += file_frags_start;
1652	}
1653
1654	if (file_frags_start <= best ||
1655			orig_physical_cnt <= donor_physical_cnt) {
1656		printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1657			defraged_file_count, total_count, file, 100);
1658		if (mode_flag & DETAIL)
1659			printf("  extents: %d -> %d",
1660				file_frags_start, file_frags_start);
1661
1662		printf("\t[ OK ]\n");
1663		succeed_cnt++;
1664
1665		if (file_frags_start != 1)
1666			frag_files_after_defrag++;
1667
1668		extents_after_defrag += file_frags_start;
1669		goto out;
1670	}
1671
1672	/* Defrag the file */
1673	ret = call_defrag(fd, donor_fd, file, buf, donor_list_logical);
1674
1675	/* Count file fragments after defrag and print extents info */
1676	if (mode_flag & DETAIL) {
1677		file_frags_end = file_frag_count(fd);
1678		if (file_frags_end < 0) {
1679			printf("\n");
1680			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
1681			goto out;
1682		}
1683
1684		if (file_frags_end != 1)
1685			frag_files_after_defrag++;
1686
1687		extents_after_defrag += file_frags_end;
1688
1689		if (ret < 0)
1690			goto out;
1691
1692		printf("  extents: %d -> %d",
1693			file_frags_start, file_frags_end);
1694		fflush(stdout);
1695	}
1696
1697	if (ret < 0)
1698		goto out;
1699
1700	printf("\t[ OK ]\n");
1701	fflush(stdout);
1702	succeed_cnt++;
1703
1704out:
1705	close(fd);
1706	if (donor_fd != -1)
1707		close(donor_fd);
1708	free_ext(orig_list_physical);
1709	free_ext(orig_list_logical);
1710	free_ext(donor_list_physical);
1711	free_exts_group(orig_group_head);
1712	return 0;
1713}
1714
1715/*
1716 * main() -		Ext4 online defrag.
1717 *
1718 * @argc:		the number of parameter.
1719 * @argv[]:		the pointer array of parameter.
1720 */
1721int main(int argc, char *argv[])
1722{
1723	int	opt;
1724	int	i, j, ret = 0;
1725	int	flags = FTW_PHYS | FTW_MOUNT;
1726	int	arg_type = -1;
1727	int	success_flag = 0;
1728	char	dir_name[PATH_MAX + 1];
1729	char	dev_name[PATH_MAX + 1];
1730	struct stat64	buf;
1731	ext2_filsys fs = NULL;
1732
1733	/* Parse arguments */
1734	if (argc == 1)
1735		goto out;
1736
1737	while ((opt = getopt(argc, argv, "vc")) != EOF) {
1738		switch (opt) {
1739		case 'v':
1740			mode_flag |= DETAIL;
1741			break;
1742		case 'c':
1743			mode_flag |= STATISTIC;
1744			break;
1745		default:
1746			goto out;
1747		}
1748	}
1749
1750	if (argc == optind)
1751		goto out;
1752
1753	current_uid = getuid();
1754
1755	/* Main process */
1756	for (i = optind; i < argc; i++) {
1757		succeed_cnt = 0;
1758		regular_count = 0;
1759		total_count = 0;
1760		frag_files_before_defrag = 0;
1761		frag_files_after_defrag = 0;
1762		extents_before_defrag = 0;
1763		extents_after_defrag = 0;
1764		defraged_file_count = 0;
1765		files_block_count = 0;
1766		blocks_per_group = 0;
1767		feature_incompat = 0;
1768		log_groups_per_flex = 0;
1769
1770		memset(dir_name, 0, PATH_MAX + 1);
1771		memset(dev_name, 0, PATH_MAX + 1);
1772		memset(lost_found_dir, 0, PATH_MAX + 1);
1773		memset(frag_rank, 0,
1774			sizeof(struct frag_statistic_ino) * SHOW_FRAG_FILES);
1775
1776		if ((mode_flag & STATISTIC) && i > optind)
1777			printf("\n");
1778
1779#if BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN
1780		PRINT_ERR_MSG("Endian's type is not big/little endian");
1781		PRINT_FILE_NAME(argv[i]);
1782		continue;
1783#endif
1784
1785		if (lstat64(argv[i], &buf) < 0) {
1786			perror(NGMSG_FILE_INFO);
1787			PRINT_FILE_NAME(argv[i]);
1788			continue;
1789		}
1790
1791		/* Handle i.e. lvm device symlinks */
1792		if (S_ISLNK(buf.st_mode)) {
1793			struct stat64	buf2;
1794
1795			if (stat64(argv[i], &buf2) == 0 &&
1796			    S_ISBLK(buf2.st_mode))
1797				buf = buf2;
1798		}
1799
1800		if (S_ISBLK(buf.st_mode)) {
1801			/* Block device */
1802			strncpy(dev_name, argv[i], strnlen(argv[i], PATH_MAX));
1803			if (get_mount_point(argv[i], dir_name, PATH_MAX) < 0)
1804				continue;
1805			if (lstat64(dir_name, &buf) < 0) {
1806				perror(NGMSG_FILE_INFO);
1807				PRINT_FILE_NAME(argv[i]);
1808				continue;
1809			}
1810			arg_type = DEVNAME;
1811			if (!(mode_flag & STATISTIC))
1812				printf("ext4 defragmentation for device(%s)\n",
1813					argv[i]);
1814		} else if (S_ISDIR(buf.st_mode)) {
1815			/* Directory */
1816			if (access(argv[i], R_OK) < 0) {
1817				perror(argv[i]);
1818				continue;
1819			}
1820			arg_type = DIRNAME;
1821			strncpy(dir_name, argv[i], strnlen(argv[i], PATH_MAX));
1822		} else if (S_ISREG(buf.st_mode)) {
1823			/* Regular file */
1824			arg_type = FILENAME;
1825		} else {
1826			/* Irregular file */
1827			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1828			PRINT_FILE_NAME(argv[i]);
1829			continue;
1830		}
1831
1832		/* Set blocksize */
1833		block_size = buf.st_blksize;
1834
1835		/* For device case,
1836		 * filesystem type checked in get_mount_point()
1837		 */
1838		if (arg_type == FILENAME || arg_type == DIRNAME) {
1839			if (is_ext4(argv[i], dev_name) < 0)
1840				continue;
1841			if (realpath(argv[i], dir_name) == NULL) {
1842				perror("Couldn't get full path");
1843				PRINT_FILE_NAME(argv[i]);
1844				continue;
1845			}
1846		}
1847
1848		if (current_uid == ROOT_UID) {
1849			/* Get super block info */
1850			ret = ext2fs_open(dev_name, 0, 0, block_size,
1851					unix_io_manager, &fs);
1852			if (ret) {
1853				if (mode_flag & DETAIL) {
1854					perror("Can't get super block info");
1855					PRINT_FILE_NAME(argv[i]);
1856				}
1857				continue;
1858			}
1859
1860			blocks_per_group = fs->super->s_blocks_per_group;
1861			feature_incompat = fs->super->s_feature_incompat;
1862			log_groups_per_flex = fs->super->s_log_groups_per_flex;
1863
1864			ext2fs_close(fs);
1865		}
1866
1867		switch (arg_type) {
1868		case DIRNAME:
1869			if (!(mode_flag & STATISTIC))
1870				printf("ext4 defragmentation "
1871					"for directory(%s)\n", argv[i]);
1872
1873			int mount_dir_len = 0;
1874			mount_dir_len = strnlen(lost_found_dir, PATH_MAX);
1875
1876			strncat(lost_found_dir, "/lost+found",
1877				PATH_MAX - strnlen(lost_found_dir, PATH_MAX));
1878
1879			/* Not the case("e4defrag  mount_piont_dir") */
1880			if (dir_name[mount_dir_len] != '\0') {
1881				/*
1882				 * "e4defrag mount_piont_dir/lost+found"
1883				 * or "e4defrag mount_piont_dir/lost+found/"
1884				 */
1885				if (strncmp(lost_found_dir, dir_name,
1886					    strnlen(lost_found_dir,
1887						    PATH_MAX)) == 0 &&
1888				    (dir_name[strnlen(lost_found_dir,
1889						      PATH_MAX)] == '\0' ||
1890				     dir_name[strnlen(lost_found_dir,
1891						      PATH_MAX)] == '/')) {
1892					PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1893					PRINT_FILE_NAME(argv[i]);
1894					continue;
1895				}
1896
1897				/* "e4defrag mount_piont_dir/else_dir" */
1898				memset(lost_found_dir, 0, PATH_MAX + 1);
1899			}
1900		case DEVNAME:
1901			if (arg_type == DEVNAME) {
1902				strncpy(lost_found_dir, dir_name,
1903					strnlen(dir_name, PATH_MAX));
1904				strncat(lost_found_dir, "/lost+found/",
1905					PATH_MAX - strnlen(lost_found_dir,
1906							   PATH_MAX));
1907			}
1908
1909			nftw64(dir_name, calc_entry_counts, FTW_OPEN_FD, flags);
1910
1911			if (mode_flag & STATISTIC) {
1912				if (mode_flag & DETAIL)
1913					printf("%-40s%10s/%-10s%9s\n",
1914					"<File>", "now", "best", "size/ext");
1915
1916				if (!(mode_flag & DETAIL) &&
1917						current_uid != ROOT_UID) {
1918					printf(" Done.\n");
1919					success_flag = 1;
1920					continue;
1921				}
1922
1923				nftw64(dir_name, file_statistic,
1924							FTW_OPEN_FD, flags);
1925
1926				if (succeed_cnt != 0 &&
1927					current_uid == ROOT_UID) {
1928					if (mode_flag & DETAIL)
1929						printf("\n");
1930					printf("%-40s%10s/%-10s%9s\n",
1931						"<Fragmented files>", "now",
1932						"best", "size/ext");
1933					for (j = 0; j < SHOW_FRAG_FILES; j++) {
1934						if (strlen(frag_rank[j].
1935							msg_buffer) > 37) {
1936							printf("%d. %s\n%50d/"
1937							"%-10d%6llu KB\n",
1938							j + 1,
1939							frag_rank[j].msg_buffer,
1940							frag_rank[j].now_count,
1941							frag_rank[j].best_count,
1942							frag_rank[j].
1943								size_per_ext);
1944						} else if (strlen(frag_rank[j].
1945							msg_buffer) > 0) {
1946							printf("%d. %-37s%10d/"
1947							"%-10d%6llu KB\n",
1948							j + 1,
1949							frag_rank[j].msg_buffer,
1950							frag_rank[j].now_count,
1951							frag_rank[j].best_count,
1952							frag_rank[j].
1953								size_per_ext);
1954						} else
1955							break;
1956					}
1957				}
1958				break;
1959			}
1960			/* File tree walk */
1961			nftw64(dir_name, file_defrag, FTW_OPEN_FD, flags);
1962			printf("\n\tSuccess:\t\t\t[ %u/%u ]\n", succeed_cnt,
1963				total_count);
1964			printf("\tFailure:\t\t\t[ %u/%u ]\n",
1965				total_count - succeed_cnt, total_count);
1966			if (mode_flag & DETAIL) {
1967				printf("\tTotal extents:\t\t\t%4d->%d\n",
1968					extents_before_defrag,
1969					extents_after_defrag);
1970				printf("\tFragmented percentage:\t\t"
1971					"%3llu%%->%llu%%\n",
1972					!regular_count ? 0 :
1973					((unsigned long long)
1974					frag_files_before_defrag * 100) /
1975					regular_count,
1976					!regular_count ? 0 :
1977					((unsigned long long)
1978					frag_files_after_defrag * 100) /
1979					regular_count);
1980			}
1981			break;
1982		case FILENAME:
1983			total_count = 1;
1984			regular_count = 1;
1985			strncat(lost_found_dir, "/lost+found/",
1986				PATH_MAX - strnlen(lost_found_dir,
1987						   PATH_MAX));
1988			if (strncmp(lost_found_dir, dir_name,
1989				    strnlen(lost_found_dir,
1990					    PATH_MAX)) == 0) {
1991				PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1992				PRINT_FILE_NAME(argv[i]);
1993				continue;
1994			}
1995
1996			if (mode_flag & STATISTIC) {
1997				file_statistic(argv[i], &buf, FTW_F, NULL);
1998				break;
1999			} else
2000				printf("ext4 defragmentation for %s\n",
2001								 argv[i]);
2002			/* Defrag single file process */
2003			file_defrag(argv[i], &buf, FTW_F, NULL);
2004			if (succeed_cnt != 0)
2005				printf(" Success:\t\t\t[1/1]\n");
2006			else
2007				printf(" Success:\t\t\t[0/1]\n");
2008
2009			break;
2010		}
2011
2012		if (succeed_cnt != 0)
2013			success_flag = 1;
2014		if (mode_flag & STATISTIC) {
2015			if (current_uid != ROOT_UID) {
2016				printf(" Done.\n");
2017				continue;
2018			}
2019
2020			if (!succeed_cnt) {
2021				if (mode_flag & DETAIL)
2022					printf("\n");
2023
2024				if (arg_type == DEVNAME)
2025					printf(" In this device(%s), "
2026					"none can be defragmented.\n", argv[i]);
2027				else if (arg_type == DIRNAME)
2028					printf(" In this directory(%s), "
2029					"none can be defragmented.\n", argv[i]);
2030				else
2031					printf(" This file(%s) "
2032					"can't be defragmented.\n", argv[i]);
2033			} else {
2034				float files_ratio = 0.0;
2035				float score = 0.0;
2036				__u64 size_per_ext = files_block_count *
2037						(buf.st_blksize / 1024) /
2038						extents_before_defrag;
2039				files_ratio = (float)(extents_before_defrag -
2040						extents_after_defrag) *
2041						100 / files_block_count;
2042				score = CALC_SCORE(files_ratio);
2043				printf("\n Total/best extents\t\t\t\t%d/%d\n"
2044					" Average size per extent"
2045					"\t\t\t%llu KB\n"
2046					" Fragmentation score\t\t\t\t%.0f\n",
2047						extents_before_defrag,
2048						extents_after_defrag,
2049						size_per_ext, score);
2050				printf(" [0-30 no problem:"
2051					" 31-55 a little bit fragmented:"
2052					" 56- needs defrag]\n");
2053
2054				if (arg_type == DEVNAME)
2055					printf(" This device (%s) ", argv[i]);
2056				else if (arg_type == DIRNAME)
2057					printf(" This directory (%s) ",
2058								argv[i]);
2059				else
2060					printf(" This file (%s) ", argv[i]);
2061
2062				if (score > BOUND_SCORE)
2063					printf("needs defragmentation.\n");
2064				else
2065					printf("does not need "
2066							"defragmentation.\n");
2067			}
2068			printf(" Done.\n");
2069		}
2070
2071	}
2072
2073	if (success_flag)
2074		return 0;
2075
2076	exit(1);
2077
2078out:
2079	printf(MSG_USAGE);
2080	exit(1);
2081}
2082
2083