e4defrag.c revision 18a1444b4f1e6a0948fd38fa0de382d86cfe04de
1/*
2 * e4defrag.c - ext4 filesystem defragmenter
3 *
4 * Copyright (C) 2009 NEC Software Tohoku, Ltd.
5 *
6 * Author: Akira Fujita	<a-fujita@rs.jp.nec.com>
7 *         Takashi Sato	<t-sato@yk.jp.nec.com>
8 */
9
10#ifndef _LARGEFILE_SOURCE
11#define _LARGEFILE_SOURCE
12#endif
13
14#ifndef _LARGEFILE64_SOURCE
15#define _LARGEFILE64_SOURCE
16#endif
17
18#ifndef _GNU_SOURCE
19#define _GNU_SOURCE
20#endif
21
22#include <ctype.h>
23#include <dirent.h>
24#include <endian.h>
25#include <errno.h>
26#include <fcntl.h>
27#include <ftw.h>
28#include <limits.h>
29#include <mntent.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <unistd.h>
34#include <ext2fs/ext2_types.h>
35#include <ext2fs/ext2fs.h>
36#include <linux/fs.h>
37#include <sys/ioctl.h>
38#include <ext2fs/fiemap.h>
39#include <sys/mman.h>
40#include <sys/stat.h>
41#include <sys/statfs.h>
42#include <sys/syscall.h>
43#include <sys/vfs.h>
44
45/* A relatively new ioctl interface ... */
46#ifndef EXT4_IOC_MOVE_EXT
47#define EXT4_IOC_MOVE_EXT      _IOWR('f', 15, struct move_extent)
48#endif
49
50/* Macro functions */
51#define PRINT_ERR_MSG(msg)	fprintf(stderr, "%s\n", (msg))
52#define IN_FTW_PRINT_ERR_MSG(msg)	\
53	fprintf(stderr, "\t%s\t\t[ NG ]\n", (msg))
54#define PRINT_FILE_NAME(file)	fprintf(stderr, " \"%s\"\n", (file))
55#define PRINT_ERR_MSG_WITH_ERRNO(msg)	\
56	fprintf(stderr, "\t%s:%s\t[ NG ]\n", (msg), strerror(errno))
57#define STATISTIC_ERR_MSG(msg)	\
58	fprintf(stderr, "\t%s\n", (msg))
59#define STATISTIC_ERR_MSG_WITH_ERRNO(msg)	\
60	fprintf(stderr, "\t%s:%s\n", (msg), strerror(errno))
61#define min(x, y) (((x) > (y)) ? (y) : (x))
62#define CALC_SCORE(ratio) \
63	((ratio) > 10 ? (80 + 20 * (ratio) / 100) : (8 * (ratio)))
64/* Wrap up the free function */
65#define FREE(tmp)				\
66	do {					\
67		if ((tmp) != NULL)		\
68			free(tmp);		\
69	} while (0)				\
70/* Insert list2 after list1 */
71#define insert(list1, list2)			\
72	do {					\
73		list2->next = list1->next;	\
74		list1->next->prev = list2;	\
75		list2->prev = list1;		\
76		list1->next = list2;		\
77	} while (0)
78
79/* To delete unused warning */
80#ifdef __GNUC__
81#define EXT2FS_ATTR(x) __attribute__(x)
82#else
83#define EXT2FS_ATTR(x)
84#endif
85
86/* The mode of defrag */
87#define DETAIL			0x01
88#define STATISTIC		0x02
89
90#define DEVNAME			0
91#define DIRNAME			1
92#define FILENAME		2
93
94#define FTW_OPEN_FD		2000
95
96#define FS_EXT4			"ext4"
97#define ROOT_UID		0
98
99#define BOUND_SCORE		55
100#define SHOW_FRAG_FILES	5
101
102/* Magic number for ext4 */
103#define EXT4_SUPER_MAGIC	0xEF53
104
105/* Definition of flex_bg */
106#define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
107
108/* The following macro is used for ioctl FS_IOC_FIEMAP
109 * EXTENT_MAX_COUNT:	the maximum number of extents for exchanging between
110 *			kernel-space and user-space per ioctl
111 */
112#define EXTENT_MAX_COUNT	512
113
114/* The following macros are error message */
115#define MSG_USAGE		\
116"Usage	: e4defrag [-v] file...| directory...| device...\n\
117	: e4defrag  -c  file...| directory...| device...\n"
118
119#define NGMSG_EXT4		"Filesystem is not ext4 filesystem"
120#define NGMSG_FILE_EXTENT	"Failed to get file extents"
121#define NGMSG_FILE_INFO		"Failed to get file information"
122#define NGMSG_FILE_OPEN		"Failed to open"
123#define NGMSG_FILE_UNREG	"File is not regular file"
124#define NGMSG_LOST_FOUND	"Can not process \"lost+found\""
125
126/* Data type for filesystem-wide blocks number */
127typedef unsigned long long ext4_fsblk_t;
128
129struct fiemap_extent_data {
130	__u64 len;			/* blocks count */
131	__u64 logical;		/* start logical block number */
132	ext4_fsblk_t physical;		/* start physical block number */
133};
134
135struct fiemap_extent_list {
136	struct fiemap_extent_list *prev;
137	struct fiemap_extent_list *next;
138	struct fiemap_extent_data data;	/* extent belong to file */
139};
140
141struct fiemap_extent_group {
142	struct fiemap_extent_group *prev;
143	struct fiemap_extent_group *next;
144	__u64 len;	/* length of this continuous region */
145	struct fiemap_extent_list *start;	/* start ext */
146	struct fiemap_extent_list *end;		/* end ext */
147};
148
149struct move_extent {
150	__s32 reserved;	/* original file descriptor */
151	__u32 donor_fd;	/* donor file descriptor */
152	__u64 orig_start;	/* logical start offset in block for orig */
153	__u64 donor_start;	/* logical start offset in block for donor */
154	__u64 len;	/* block length to be moved */
155	__u64 moved_len;	/* moved block length */
156};
157
158struct frag_statistic_ino {
159	int now_count;	/* the file's extents count of before defrag */
160	int best_count; /* the best file's extents count */
161	__u64 size_per_ext;	/* size(KB) per extent */
162	float ratio;	/* the ratio of fragmentation */
163	char msg_buffer[PATH_MAX + 1];	/* pathname of the file */
164};
165
166static char	lost_found_dir[PATH_MAX + 1];
167static int	block_size;
168static int	extents_before_defrag;
169static int	extents_after_defrag;
170static int	mode_flag;
171static unsigned int	current_uid;
172static unsigned int	defraged_file_count;
173static unsigned int	frag_files_before_defrag;
174static unsigned int	frag_files_after_defrag;
175static unsigned int	regular_count;
176static unsigned int	succeed_cnt;
177static unsigned int	total_count;
178static __u8 log_groups_per_flex;
179static __u32 blocks_per_group;
180static __u32 feature_incompat;
181static ext4_fsblk_t	files_block_count;
182static struct frag_statistic_ino	frag_rank[SHOW_FRAG_FILES];
183
184
185/* Local definitions of some syscalls glibc may not yet have */
186
187#ifndef HAVE_POSIX_FADVISE
188#warning Using locally defined posix_fadvise interface.
189
190#ifndef __NR_fadvise64_64
191#error Your kernel headers dont define __NR_fadvise64_64
192#endif
193
194/*
195 * fadvise() -		Give advice about file access.
196 *
197 * @fd:			defrag target file's descriptor.
198 * @offset:		file offset.
199 * @len:		area length.
200 * @advise:		process flag.
201 */
202static int posix_fadvise(int fd, loff_t offset, size_t len, int advise)
203{
204	return syscall(__NR_fadvise64_64, fd, offset, len, advise);
205}
206#endif /* ! HAVE_FADVISE64_64 */
207
208#ifndef HAVE_SYNC_FILE_RANGE
209#warning Using locally defined sync_file_range interface.
210
211#ifndef __NR_sync_file_range
212#ifndef __NR_sync_file_range2 /* ppc */
213#error Your kernel headers dont define __NR_sync_file_range
214#endif
215#endif
216
217/*
218 * sync_file_range() -	Sync file region.
219 *
220 * @fd:			defrag target file's descriptor.
221 * @offset:		file offset.
222 * @length:		area length.
223 * @flag:		process flag.
224 */
225int sync_file_range(int fd, loff_t offset, loff_t length, unsigned int flag)
226{
227#ifdef __NR_sync_file_range
228	return syscall(__NR_sync_file_range, fd, offset, length, flag);
229#else
230	return syscall(__NR_sync_file_range2, fd, flag, offset, length);
231#endif
232}
233#endif /* ! HAVE_SYNC_FILE_RANGE */
234
235#ifndef HAVE_FALLOCATE64
236#warning Using locally defined fallocate syscall interface.
237
238#ifndef __NR_fallocate
239#error Your kernel headers dont define __NR_fallocate
240#endif
241
242/*
243 * fallocate64() -	Manipulate file space.
244 *
245 * @fd:			defrag target file's descriptor.
246 * @mode:		process flag.
247 * @offset:		file offset.
248 * @len:		file size.
249 */
250static int fallocate64(int fd, int mode, loff_t offset, loff_t len)
251{
252	return syscall(__NR_fallocate, fd, mode, offset, len);
253}
254#endif /* ! HAVE_FALLOCATE */
255
256/*
257 * get_mount_point() -	Get device's mount point.
258 *
259 * @devname:		the device's name.
260 * @mount_point:	the mount point.
261 * @dir_path_len:	the length of directory.
262 */
263static int get_mount_point(const char *devname, char *mount_point,
264							int dir_path_len)
265{
266	/* Refer to /etc/mtab */
267	const char	*mtab = MOUNTED;
268	FILE		*fp = NULL;
269	struct mntent	*mnt = NULL;
270	struct stat64	sb;
271
272	if (stat64(devname, &sb) < 0) {
273		perror(NGMSG_FILE_INFO);
274		PRINT_FILE_NAME(devname);
275		return -1;
276	}
277
278	fp = setmntent(mtab, "r");
279	if (fp == NULL) {
280		perror("Couldn't access /etc/mtab");
281		return -1;
282	}
283
284	while ((mnt = getmntent(fp)) != NULL) {
285		struct stat64 ms;
286
287		/*
288		 * To handle device symlinks, we see if the
289		 * device number matches, not the name
290		 */
291		if (stat64(mnt->mnt_fsname, &ms) < 0)
292			continue;
293		if (sb.st_rdev != ms.st_rdev)
294			continue;
295
296		endmntent(fp);
297		if (strcmp(mnt->mnt_type, FS_EXT4) == 0) {
298			strncpy(mount_point, mnt->mnt_dir,
299				dir_path_len);
300			return 0;
301		}
302		PRINT_ERR_MSG(NGMSG_EXT4);
303		return -1;
304	}
305	endmntent(fp);
306	PRINT_ERR_MSG("Filesystem is not mounted");
307	return -1;
308}
309
310/*
311 * is_ext4() -		Whether on an ext4 filesystem.
312 *
313 * @file:		the file's name.
314 */
315static int is_ext4(const char *file, char *devname)
316{
317	int 	maxlen = 0;
318	int	len, ret;
319	FILE	*fp = NULL;
320	char	*mnt_type = NULL;
321	/* Refer to /etc/mtab */
322	const char	*mtab = MOUNTED;
323	char	file_path[PATH_MAX + 1];
324	struct mntent	*mnt = NULL;
325	struct statfs64	fsbuf;
326
327	/* Get full path */
328	if (realpath(file, file_path) == NULL) {
329		perror("Couldn't get full path");
330		PRINT_FILE_NAME(file);
331		return -1;
332	}
333
334	if (statfs64(file_path, &fsbuf) < 0) {
335		perror("Failed to get filesystem information");
336		PRINT_FILE_NAME(file);
337		return -1;
338	}
339
340	if (fsbuf.f_type != EXT4_SUPER_MAGIC) {
341		PRINT_ERR_MSG(NGMSG_EXT4);
342		return -1;
343	}
344
345	fp = setmntent(mtab, "r");
346	if (fp == NULL) {
347		perror("Couldn't access /etc/mtab");
348		return -1;
349	}
350
351	while ((mnt = getmntent(fp)) != NULL) {
352		if (mnt->mnt_fsname[0] != '/')
353			continue;
354		len = strlen(mnt->mnt_dir);
355		ret = memcmp(file_path, mnt->mnt_dir, len);
356		if (ret != 0)
357			continue;
358
359		if (maxlen >= len)
360			continue;
361
362		maxlen = len;
363
364		mnt_type = realloc(mnt_type, strlen(mnt->mnt_type) + 1);
365		if (mnt_type == NULL) {
366			endmntent(fp);
367			return -1;
368		}
369		memset(mnt_type, 0, strlen(mnt->mnt_type) + 1);
370		strncpy(mnt_type, mnt->mnt_type, strlen(mnt->mnt_type));
371		strncpy(lost_found_dir, mnt->mnt_dir, PATH_MAX);
372		strncpy(devname, mnt->mnt_fsname, strlen(mnt->mnt_fsname) + 1);
373	}
374
375	endmntent(fp);
376	if (mnt_type && strcmp(mnt_type, FS_EXT4) == 0) {
377		FREE(mnt_type);
378		return 0;
379	} else {
380		FREE(mnt_type);
381		PRINT_ERR_MSG(NGMSG_EXT4);
382		return -1;
383	}
384}
385
386/*
387 * calc_entry_counts() -	Calculate file counts.
388 *
389 * @file:		file name.
390 * @buf:		file info.
391 * @flag:		file type.
392 * @ftwbuf:		the pointer of a struct FTW.
393 */
394static int calc_entry_counts(const char *file EXT2FS_ATTR((unused)),
395		const struct stat64 *buf, int flag EXT2FS_ATTR((unused)),
396		struct FTW *ftwbuf EXT2FS_ATTR((unused)))
397{
398	if (S_ISREG(buf->st_mode))
399		regular_count++;
400
401	total_count++;
402
403	return 0;
404}
405
406/*
407 * page_in_core() -	Get information on whether pages are in core.
408 *
409 * @fd:			defrag target file's descriptor.
410 * @defrag_data:	data used for defrag.
411 * @vec:		page state array.
412 * @page_num:		page number.
413 */
414static int page_in_core(int fd, struct move_extent defrag_data,
415			unsigned char **vec, unsigned int *page_num)
416{
417	long	pagesize;
418	void	*page = NULL;
419	loff_t	offset, end_offset, length;
420
421	if (vec == NULL || *vec != NULL)
422		return -1;
423
424	pagesize = sysconf(_SC_PAGESIZE);
425	if (pagesize < 0)
426		return -1;
427	/* In mmap, offset should be a multiple of the page size */
428	offset = (loff_t)defrag_data.orig_start * block_size;
429	length = (loff_t)defrag_data.len * block_size;
430	end_offset = offset + length;
431	/* Round the offset down to the nearest multiple of pagesize */
432	offset = (offset / pagesize) * pagesize;
433	length = end_offset - offset;
434
435	page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset);
436	if (page == MAP_FAILED)
437		return -1;
438
439	*page_num = 0;
440	*page_num = (length + pagesize - 1) / pagesize;
441	*vec = (unsigned char *)calloc(*page_num, 1);
442	if (*vec == NULL)
443		return -1;
444
445	/* Get information on whether pages are in core */
446	if (mincore(page, (size_t)length, *vec) == -1 ||
447		munmap(page, length) == -1) {
448		FREE(*vec);
449		return -1;
450	}
451
452	return 0;
453}
454
455/*
456 * defrag_fadvise() -	Predeclare an access pattern for file data.
457 *
458 * @fd:			defrag target file's descriptor.
459 * @defrag_data:	data used for defrag.
460 * @vec:		page state array.
461 * @page_num:		page number.
462 */
463static int defrag_fadvise(int fd, struct move_extent defrag_data,
464		   unsigned char *vec, unsigned int page_num)
465{
466	int	flag = 1;
467	long	pagesize = sysconf(_SC_PAGESIZE);
468	int	fadvise_flag = POSIX_FADV_DONTNEED;
469	int	sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE |
470			    SYNC_FILE_RANGE_WRITE |
471			    SYNC_FILE_RANGE_WAIT_AFTER;
472	unsigned int	i;
473	loff_t	offset;
474
475	if (pagesize < 1)
476		return -1;
477
478	offset = (loff_t)defrag_data.orig_start * block_size;
479	offset = (offset / pagesize) * pagesize;
480
481	/* Sync file for fadvise process */
482	if (sync_file_range(fd, offset,
483		(loff_t)pagesize * page_num, sync_flag) < 0)
484		return -1;
485
486	/* Try to release buffer cache which this process used,
487	 * then other process can use the released buffer
488	 */
489	for (i = 0; i < page_num; i++) {
490		if ((vec[i] & 0x1) == 0) {
491			offset += pagesize;
492			continue;
493		}
494		if (posix_fadvise(fd, offset, pagesize, fadvise_flag) < 0) {
495			if ((mode_flag & DETAIL) && flag) {
496				perror("\tFailed to fadvise");
497				flag = 0;
498			}
499		}
500		offset += pagesize;
501	}
502
503	return 0;
504}
505
506/*
507 * check_free_size() -	Check if there's enough disk space.
508 *
509 * @fd:			defrag target file's descriptor.
510 * @file:		file name.
511 * @blk_count:		file blocks.
512 */
513static int check_free_size(int fd, const char *file, ext4_fsblk_t blk_count)
514{
515	ext4_fsblk_t	free_blk_count;
516	struct statfs64	fsbuf;
517
518	if (fstatfs64(fd, &fsbuf) < 0) {
519		if (mode_flag & DETAIL) {
520			PRINT_FILE_NAME(file);
521			PRINT_ERR_MSG_WITH_ERRNO(
522				"Failed to get filesystem information");
523		}
524		return -1;
525	}
526
527	/* Compute free space for root and normal user separately */
528	if (current_uid == ROOT_UID)
529		free_blk_count = fsbuf.f_bfree;
530	else
531		free_blk_count = fsbuf.f_bavail;
532
533	if (free_blk_count >= blk_count)
534		return 0;
535
536	return -ENOSPC;
537}
538
539/*
540 * file_frag_count() -	Get file fragment count.
541 *
542 * @fd:			defrag target file's descriptor.
543 */
544static int file_frag_count(int fd)
545{
546	int	ret;
547	struct fiemap	fiemap_buf;
548
549	/* When fm_extent_count is 0,
550	 * ioctl just get file fragment count.
551	 */
552	memset(&fiemap_buf, 0, sizeof(struct fiemap));
553	fiemap_buf.fm_start = 0;
554	fiemap_buf.fm_length = FIEMAP_MAX_OFFSET;
555	fiemap_buf.fm_flags |= FIEMAP_FLAG_SYNC;
556
557	ret = ioctl(fd, FS_IOC_FIEMAP, &fiemap_buf);
558	if (ret < 0)
559		return ret;
560
561	return fiemap_buf.fm_mapped_extents;
562}
563
564/*
565 * file_check() -	Check file's attributes.
566 *
567 * @fd:			defrag target file's descriptor.
568 * @buf:		a pointer of the struct stat64.
569 * @file:		file name.
570 * @extents:		file extents.
571 * @blk_count:		file blocks.
572 */
573static int file_check(int fd, const struct stat64 *buf, const char *file,
574		int extents, ext4_fsblk_t blk_count)
575{
576	int	ret;
577	struct flock	lock;
578
579	/* Write-lock check is more reliable */
580	lock.l_type = F_WRLCK;
581	lock.l_start = 0;
582	lock.l_whence = SEEK_SET;
583	lock.l_len = 0;
584
585	/* Free space */
586	ret = check_free_size(fd, file, blk_count);
587	if (ret < 0) {
588		if ((mode_flag & DETAIL) && ret == -ENOSPC) {
589			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
590				"  extents: %d -> %d\n", defraged_file_count,
591				total_count, file, extents, extents);
592			IN_FTW_PRINT_ERR_MSG(
593			"Defrag size is larger than filesystem's free space");
594		}
595		return -1;
596	}
597
598	/* Access authority */
599	if (current_uid != ROOT_UID &&
600		buf->st_uid != current_uid) {
601		if (mode_flag & DETAIL) {
602			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
603				"  extents: %d -> %d\n", defraged_file_count,
604				total_count, file, extents, extents);
605			IN_FTW_PRINT_ERR_MSG(
606				"File is not current user's file"
607				" or current user is not root");
608		}
609		return -1;
610	}
611
612	/* Lock status */
613	if (fcntl(fd, F_GETLK, &lock) < 0) {
614		if (mode_flag & DETAIL) {
615			PRINT_FILE_NAME(file);
616			PRINT_ERR_MSG_WITH_ERRNO(
617				"Failed to get lock information");
618		}
619		return -1;
620	} else if (lock.l_type != F_UNLCK) {
621		if (mode_flag & DETAIL) {
622			PRINT_FILE_NAME(file);
623			IN_FTW_PRINT_ERR_MSG("File has been locked");
624		}
625		return -1;
626	}
627
628	return 0;
629}
630
631/*
632 * insert_extent_by_logical() -	Sequentially insert extent by logical.
633 *
634 * @ext_list_head:	the head of logical extent list.
635 * @ext:		the extent element which will be inserted.
636 */
637static int insert_extent_by_logical(struct fiemap_extent_list **ext_list_head,
638			struct fiemap_extent_list *ext)
639{
640	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
641
642	if (ext == NULL)
643		goto out;
644
645	/* First element */
646	if (*ext_list_head == NULL) {
647		(*ext_list_head) = ext;
648		(*ext_list_head)->prev = *ext_list_head;
649		(*ext_list_head)->next = *ext_list_head;
650		return 0;
651	}
652
653	if (ext->data.logical <= ext_list_tmp->data.logical) {
654		/* Insert before head */
655		if (ext_list_tmp->data.logical <
656			ext->data.logical + ext->data.len)
657			/* Overlap */
658			goto out;
659		/* Adjust head */
660		*ext_list_head = ext;
661	} else {
662		/* Insert into the middle or last of the list */
663		do {
664			if (ext->data.logical < ext_list_tmp->data.logical)
665				break;
666			ext_list_tmp = ext_list_tmp->next;
667		} while (ext_list_tmp != (*ext_list_head));
668		if (ext->data.logical <
669		    ext_list_tmp->prev->data.logical +
670			ext_list_tmp->prev->data.len)
671			/* Overlap */
672			goto out;
673
674		if (ext_list_tmp != *ext_list_head &&
675		    ext_list_tmp->data.logical <
676		    ext->data.logical + ext->data.len)
677			/* Overlap */
678			goto out;
679	}
680	ext_list_tmp = ext_list_tmp->prev;
681	/* Insert "ext" after "ext_list_tmp" */
682	insert(ext_list_tmp, ext);
683	return 0;
684out:
685	errno = EINVAL;
686	return -1;
687}
688
689/*
690 * insert_extent_by_physical() -	Sequentially insert extent by physical.
691 *
692 * @ext_list_head:	the head of physical extent list.
693 * @ext:		the extent element which will be inserted.
694 */
695static int insert_extent_by_physical(struct fiemap_extent_list **ext_list_head,
696			struct fiemap_extent_list *ext)
697{
698	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
699
700	if (ext == NULL)
701		goto out;
702
703	/* First element */
704	if (*ext_list_head == NULL) {
705		(*ext_list_head) = ext;
706		(*ext_list_head)->prev = *ext_list_head;
707		(*ext_list_head)->next = *ext_list_head;
708		return 0;
709	}
710
711	if (ext->data.physical <= ext_list_tmp->data.physical) {
712		/* Insert before head */
713		if (ext_list_tmp->data.physical <
714					ext->data.physical + ext->data.len)
715			/* Overlap */
716			goto out;
717		/* Adjust head */
718		*ext_list_head = ext;
719	} else {
720		/* Insert into the middle or last of the list */
721		do {
722			if (ext->data.physical < ext_list_tmp->data.physical)
723				break;
724			ext_list_tmp = ext_list_tmp->next;
725		} while (ext_list_tmp != (*ext_list_head));
726		if (ext->data.physical <
727		    ext_list_tmp->prev->data.physical +
728				ext_list_tmp->prev->data.len)
729			/* Overlap */
730			goto out;
731
732		if (ext_list_tmp != *ext_list_head &&
733		    ext_list_tmp->data.physical <
734				ext->data.physical + ext->data.len)
735			/* Overlap */
736			goto out;
737	}
738	ext_list_tmp = ext_list_tmp->prev;
739	/* Insert "ext" after "ext_list_tmp" */
740	insert(ext_list_tmp, ext);
741	return 0;
742out:
743	errno = EINVAL;
744	return -1;
745}
746
747/*
748 * insert_exts_group() -	Insert a exts_group.
749 *
750 * @ext_group_head:		the head of a exts_group list.
751 * @exts_group:			the exts_group element which will be inserted.
752 */
753static int insert_exts_group(struct fiemap_extent_group **ext_group_head,
754				struct fiemap_extent_group *exts_group)
755{
756	struct fiemap_extent_group	*ext_group_tmp = NULL;
757
758	if (exts_group == NULL) {
759		errno = EINVAL;
760		return -1;
761	}
762
763	/* Initialize list */
764	if (*ext_group_head == NULL) {
765		(*ext_group_head) = exts_group;
766		(*ext_group_head)->prev = *ext_group_head;
767		(*ext_group_head)->next = *ext_group_head;
768		return 0;
769	}
770
771	ext_group_tmp = (*ext_group_head)->prev;
772	insert(ext_group_tmp, exts_group);
773
774	return 0;
775}
776
777/*
778 * join_extents() -		Find continuous region(exts_group).
779 *
780 * @ext_list_head:		the head of the extent list.
781 * @ext_group_head:		the head of the target exts_group list.
782 */
783static int join_extents(struct fiemap_extent_list *ext_list_head,
784		struct fiemap_extent_group **ext_group_head)
785{
786	__u64	len = ext_list_head->data.len;
787	struct fiemap_extent_list *ext_list_start = ext_list_head;
788	struct fiemap_extent_list *ext_list_tmp = ext_list_head->next;
789
790	do {
791		struct fiemap_extent_group	*ext_group_tmp = NULL;
792
793		/* This extent and previous extent are not continuous,
794		 * so, all previous extents are treated as an extent group.
795		 */
796		if ((ext_list_tmp->prev->data.logical +
797			ext_list_tmp->prev->data.len)
798				!= ext_list_tmp->data.logical) {
799			ext_group_tmp =
800				malloc(sizeof(struct fiemap_extent_group));
801			if (ext_group_tmp == NULL)
802				return -1;
803
804			memset(ext_group_tmp, 0,
805				sizeof(struct fiemap_extent_group));
806			ext_group_tmp->len = len;
807			ext_group_tmp->start = ext_list_start;
808			ext_group_tmp->end = ext_list_tmp->prev;
809
810			if (insert_exts_group(ext_group_head,
811				ext_group_tmp) < 0) {
812				FREE(ext_group_tmp);
813				return -1;
814			}
815			ext_list_start = ext_list_tmp;
816			len = ext_list_tmp->data.len;
817			ext_list_tmp = ext_list_tmp->next;
818			continue;
819		}
820
821		/* This extent and previous extent are continuous,
822		 * so, they belong to the same extent group, and we check
823		 * if the next extent belongs to the same extent group.
824		 */
825		len += ext_list_tmp->data.len;
826		ext_list_tmp = ext_list_tmp->next;
827	} while (ext_list_tmp != ext_list_head->next);
828
829	return 0;
830}
831
832/*
833 * get_file_extents() -	Get file's extent list.
834 *
835 * @fd:			defrag target file's descriptor.
836 * @ext_list_head:	the head of the extent list.
837 */
838static int get_file_extents(int fd, struct fiemap_extent_list **ext_list_head)
839{
840	__u32	i;
841	int	ret;
842	int	ext_buf_size, fie_buf_size;
843	__u64	pos = 0;
844	struct fiemap	*fiemap_buf = NULL;
845	struct fiemap_extent	*ext_buf = NULL;
846	struct fiemap_extent_list	*ext_list = NULL;
847
848	/* Convert units, in bytes.
849	 * Be careful : now, physical block number in extent is 48bit,
850	 * and the maximum blocksize for ext4 is 4K(12bit),
851	 * so there is no overflow, but in future it may be changed.
852	 */
853
854	/* Alloc space for fiemap */
855	ext_buf_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);
856	fie_buf_size = sizeof(struct fiemap) + ext_buf_size;
857
858	fiemap_buf = malloc(fie_buf_size);
859	if (fiemap_buf == NULL)
860		return -1;
861
862	ext_buf = fiemap_buf->fm_extents;
863	memset(fiemap_buf, 0, fie_buf_size);
864	fiemap_buf->fm_length = FIEMAP_MAX_OFFSET;
865	fiemap_buf->fm_flags |= FIEMAP_FLAG_SYNC;
866	fiemap_buf->fm_extent_count = EXTENT_MAX_COUNT;
867
868	do {
869		fiemap_buf->fm_start = pos;
870		memset(ext_buf, 0, ext_buf_size);
871		ret = ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
872		if (ret < 0 || fiemap_buf->fm_mapped_extents == 0)
873			goto out;
874		for (i = 0; i < fiemap_buf->fm_mapped_extents; i++) {
875			ext_list = NULL;
876			ext_list = malloc(sizeof(struct fiemap_extent_list));
877			if (ext_list == NULL)
878				goto out;
879
880			ext_list->data.physical = ext_buf[i].fe_physical
881						/ block_size;
882			ext_list->data.logical = ext_buf[i].fe_logical
883						/ block_size;
884			ext_list->data.len = ext_buf[i].fe_length
885						/ block_size;
886
887			ret = insert_extent_by_physical(
888					ext_list_head, ext_list);
889			if (ret < 0) {
890				FREE(ext_list);
891				goto out;
892			}
893		}
894		/* Record file's logical offset this time */
895		pos = ext_buf[EXTENT_MAX_COUNT-1].fe_logical +
896			ext_buf[EXTENT_MAX_COUNT-1].fe_length;
897		/*
898		 * If fm_extents array has been filled and
899		 * there are extents left, continue to cycle.
900		 */
901	} while (fiemap_buf->fm_mapped_extents
902					== EXTENT_MAX_COUNT &&
903		!(ext_buf[EXTENT_MAX_COUNT-1].fe_flags
904					& FIEMAP_EXTENT_LAST));
905
906	FREE(fiemap_buf);
907	return 0;
908out:
909	FREE(fiemap_buf);
910	return -1;
911}
912
913/*
914 * get_logical_count() -	Get the file logical extents count.
915 *
916 * @logical_list_head:	the head of the logical extent list.
917 */
918static int get_logical_count(struct fiemap_extent_list *logical_list_head)
919{
920	int ret = 0;
921	struct fiemap_extent_list *ext_list_tmp  = logical_list_head;
922
923	do {
924		ret++;
925		ext_list_tmp = ext_list_tmp->next;
926	} while (ext_list_tmp != logical_list_head);
927
928	return ret;
929}
930
931/*
932 * get_physical_count() -	Get the file physical extents count.
933 *
934 * @physical_list_head:	the head of the physical extent list.
935 */
936static int get_physical_count(struct fiemap_extent_list *physical_list_head)
937{
938	int ret = 0;
939	struct fiemap_extent_list *ext_list_tmp = physical_list_head;
940
941	do {
942		if ((ext_list_tmp->data.physical + ext_list_tmp->data.len)
943				!= ext_list_tmp->next->data.physical) {
944			/* This extent and next extent are not continuous. */
945			ret++;
946		}
947
948		ext_list_tmp = ext_list_tmp->next;
949	} while (ext_list_tmp != physical_list_head);
950
951	return ret;
952}
953
954/*
955 * change_physical_to_logical() -	Change list from physical to logical.
956 *
957 * @physical_list_head:	the head of physical extent list.
958 * @logical_list_head:	the head of logical extent list.
959 */
960static int change_physical_to_logical(
961			struct fiemap_extent_list **physical_list_head,
962			struct fiemap_extent_list **logical_list_head)
963{
964	int ret;
965	struct fiemap_extent_list *ext_list_tmp = *physical_list_head;
966	struct fiemap_extent_list *ext_list_next = ext_list_tmp->next;
967
968	while (1) {
969		if (ext_list_tmp == ext_list_next) {
970			ret = insert_extent_by_logical(
971				logical_list_head, ext_list_tmp);
972			if (ret < 0)
973				return -1;
974
975			*physical_list_head = NULL;
976			break;
977		}
978
979		ext_list_tmp->prev->next = ext_list_tmp->next;
980		ext_list_tmp->next->prev = ext_list_tmp->prev;
981		*physical_list_head = ext_list_next;
982
983		ret = insert_extent_by_logical(
984			logical_list_head, ext_list_tmp);
985		if (ret < 0) {
986			FREE(ext_list_tmp);
987			return -1;
988		}
989		ext_list_tmp = ext_list_next;
990		ext_list_next = ext_list_next->next;
991	}
992
993	return 0;
994}
995
996/* get_file_blocks() -  Get total file blocks.
997 *
998 * @ext_list_head:	the extent list head of the target file
999 */
1000static ext4_fsblk_t get_file_blocks(struct fiemap_extent_list *ext_list_head)
1001{
1002	ext4_fsblk_t blk_count = 0;
1003	struct fiemap_extent_list *ext_list_tmp = ext_list_head;
1004
1005	do {
1006		blk_count += ext_list_tmp->data.len;
1007		ext_list_tmp = ext_list_tmp->next;
1008	} while (ext_list_tmp != ext_list_head);
1009
1010	return blk_count;
1011}
1012
1013/*
1014 * free_ext() -		Free the extent list.
1015 *
1016 * @ext_list_head:	the extent list head of which will be free.
1017 */
1018static void free_ext(struct fiemap_extent_list *ext_list_head)
1019{
1020	struct fiemap_extent_list	*ext_list_tmp = NULL;
1021
1022	if (ext_list_head == NULL)
1023		return;
1024
1025	while (ext_list_head->next != ext_list_head) {
1026		ext_list_tmp = ext_list_head;
1027		ext_list_head->prev->next = ext_list_head->next;
1028		ext_list_head->next->prev = ext_list_head->prev;
1029		ext_list_head = ext_list_head->next;
1030		free(ext_list_tmp);
1031	}
1032	free(ext_list_head);
1033}
1034
1035/*
1036 * free_exts_group() -		Free the exts_group.
1037 *
1038 * @*ext_group_head:	the exts_group list head which will be free.
1039 */
1040static void free_exts_group(struct fiemap_extent_group *ext_group_head)
1041{
1042	struct fiemap_extent_group	*ext_group_tmp = NULL;
1043
1044	if (ext_group_head == NULL)
1045		return;
1046
1047	while (ext_group_head->next != ext_group_head) {
1048		ext_group_tmp = ext_group_head;
1049		ext_group_head->prev->next = ext_group_head->next;
1050		ext_group_head->next->prev = ext_group_head->prev;
1051		ext_group_head = ext_group_head->next;
1052		free(ext_group_tmp);
1053	}
1054	free(ext_group_head);
1055}
1056
1057/*
1058 * get_best_count() -	Get the file best extents count.
1059 *
1060 * @block_count:		the file's physical block count.
1061 */
1062static int get_best_count(ext4_fsblk_t block_count)
1063{
1064	int ret;
1065	unsigned int flex_bg_num;
1066
1067	/* Calcuate best extents count */
1068	if (feature_incompat & EXT4_FEATURE_INCOMPAT_FLEX_BG) {
1069		flex_bg_num = 1 << log_groups_per_flex;
1070		ret = ((block_count - 1) /
1071			((ext4_fsblk_t)blocks_per_group *
1072				flex_bg_num)) + 1;
1073	} else
1074		ret = ((block_count - 1) / blocks_per_group) + 1;
1075
1076	return ret;
1077}
1078
1079
1080/*
1081 * file_statistic() -	Get statistic info of the file's fragments.
1082 *
1083 * @file:		the file's name.
1084 * @buf:		the pointer of the struct stat64.
1085 * @flag:		file type.
1086 * @ftwbuf:		the pointer of a struct FTW.
1087 */
1088static int file_statistic(const char *file, const struct stat64 *buf,
1089			int flag EXT2FS_ATTR((unused)),
1090			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1091{
1092	int	fd;
1093	int	ret;
1094	int	now_ext_count, best_ext_count = 0, physical_ext_count;
1095	int	i, j;
1096	__u64	size_per_ext = 0;
1097	float	ratio = 0.0;
1098	ext4_fsblk_t	blk_count = 0;
1099	char	msg_buffer[PATH_MAX + 24];
1100	struct fiemap_extent_list *physical_list_head = NULL;
1101	struct fiemap_extent_list *logical_list_head = NULL;
1102
1103	defraged_file_count++;
1104
1105	if (mode_flag & DETAIL) {
1106		if (total_count == 1 && regular_count == 1)
1107			printf("<File>\n");
1108		else {
1109			printf("[%u/%u]", defraged_file_count, total_count);
1110			fflush(stdout);
1111		}
1112	}
1113	if (lost_found_dir[0] != '\0' &&
1114	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1115		if (mode_flag & DETAIL) {
1116			PRINT_FILE_NAME(file);
1117			STATISTIC_ERR_MSG(NGMSG_LOST_FOUND);
1118		}
1119			return 0;
1120	}
1121
1122	if (!S_ISREG(buf->st_mode)) {
1123		if (mode_flag & DETAIL) {
1124			PRINT_FILE_NAME(file);
1125			STATISTIC_ERR_MSG(NGMSG_FILE_UNREG);
1126		}
1127		return 0;
1128	}
1129
1130	/* Access authority */
1131	if (current_uid != ROOT_UID &&
1132		buf->st_uid != current_uid) {
1133		if (mode_flag & DETAIL) {
1134			PRINT_FILE_NAME(file);
1135			STATISTIC_ERR_MSG(
1136				"File is not current user's file"
1137				" or current user is not root");
1138		}
1139		return 0;
1140	}
1141
1142	/* Empty file */
1143	if (buf->st_size == 0) {
1144		if (mode_flag & DETAIL) {
1145			PRINT_FILE_NAME(file);
1146			STATISTIC_ERR_MSG("File size is 0");
1147		}
1148		return 0;
1149	}
1150
1151	/* Has no blocks */
1152	if (buf->st_blocks == 0) {
1153		if (mode_flag & DETAIL) {
1154			PRINT_FILE_NAME(file);
1155			STATISTIC_ERR_MSG("File has no blocks");
1156		}
1157		return 0;
1158	}
1159
1160	fd = open64(file, O_RDONLY);
1161	if (fd < 0) {
1162		if (mode_flag & DETAIL) {
1163			PRINT_FILE_NAME(file);
1164			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1165		}
1166		return 0;
1167	}
1168
1169	/* Get file's physical extents  */
1170	ret = get_file_extents(fd, &physical_list_head);
1171	if (ret < 0) {
1172		if (mode_flag & DETAIL) {
1173			PRINT_FILE_NAME(file);
1174			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1175		}
1176		goto out;
1177	}
1178
1179	/* Get the count of file's continuous physical region */
1180	physical_ext_count = get_physical_count(physical_list_head);
1181
1182	/* Change list from physical to logical */
1183	ret = change_physical_to_logical(&physical_list_head,
1184							&logical_list_head);
1185	if (ret < 0) {
1186		if (mode_flag & DETAIL) {
1187			PRINT_FILE_NAME(file);
1188			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1189		}
1190		goto out;
1191	}
1192
1193	/* Count file fragments before defrag */
1194	now_ext_count = get_logical_count(logical_list_head);
1195
1196	if (current_uid == ROOT_UID) {
1197		/* Calculate the size per extent */
1198		blk_count = get_file_blocks(logical_list_head);
1199
1200		best_ext_count = get_best_count(blk_count);
1201
1202		/* e4defrag rounds size_per_ext up to a block size boundary */
1203		size_per_ext = blk_count * (buf->st_blksize / 1024) /
1204							now_ext_count;
1205
1206		ratio = (float)(physical_ext_count - best_ext_count) * 100 /
1207							blk_count;
1208
1209		extents_before_defrag += now_ext_count;
1210		extents_after_defrag += best_ext_count;
1211		files_block_count += blk_count;
1212	}
1213
1214	if (total_count == 1 && regular_count == 1) {
1215		/* File only */
1216		if (mode_flag & DETAIL) {
1217			int count = 0;
1218			struct fiemap_extent_list *ext_list_tmp =
1219						logical_list_head;
1220
1221			/* Print extents info */
1222			do {
1223				count++;
1224				printf("[ext %d]:\tstart %llu:\tlogical "
1225						"%llu:\tlen %llu\n", count,
1226						ext_list_tmp->data.physical,
1227						ext_list_tmp->data.logical,
1228						ext_list_tmp->data.len);
1229				ext_list_tmp = ext_list_tmp->next;
1230			} while (ext_list_tmp != logical_list_head);
1231
1232		} else {
1233			printf("%-40s%10s/%-10s%9s\n",
1234					"<File>", "now", "best", "size/ext");
1235			if (current_uid == ROOT_UID) {
1236				if (strlen(file) > 40)
1237					printf("%s\n%50d/%-10d%6llu KB\n",
1238						file, now_ext_count,
1239						best_ext_count, size_per_ext);
1240				else
1241					printf("%-40s%10d/%-10d%6llu KB\n",
1242						file, now_ext_count,
1243						best_ext_count, size_per_ext);
1244			} else {
1245				if (strlen(file) > 40)
1246					printf("%s\n%50d/%-10s%7s\n",
1247							file, now_ext_count,
1248							"-", "-");
1249				else
1250					printf("%-40s%10d/%-10s%7s\n",
1251							file, now_ext_count,
1252							"-", "-");
1253			}
1254		}
1255		succeed_cnt++;
1256		goto out;
1257	}
1258
1259	if (mode_flag & DETAIL) {
1260		/* Print statistic info */
1261		sprintf(msg_buffer, "[%u/%u]%s",
1262				defraged_file_count, total_count, file);
1263		if (current_uid == ROOT_UID) {
1264			if (strlen(msg_buffer) > 40)
1265				printf("\033[79;0H\033[K%s\n"
1266						"%50d/%-10d%6llu KB\n",
1267						msg_buffer, now_ext_count,
1268						best_ext_count, size_per_ext);
1269			else
1270				printf("\033[79;0H\033[K%-40s"
1271						"%10d/%-10d%6llu KB\n",
1272						msg_buffer, now_ext_count,
1273						best_ext_count, size_per_ext);
1274		} else {
1275			if (strlen(msg_buffer) > 40)
1276				printf("\033[79;0H\033[K%s\n%50d/%-10s%7s\n",
1277						msg_buffer, now_ext_count,
1278							"-", "-");
1279			else
1280				printf("\033[79;0H\033[K%-40s%10d/%-10s%7s\n",
1281						msg_buffer, now_ext_count,
1282							"-", "-");
1283		}
1284	}
1285
1286	for (i = 0; i < SHOW_FRAG_FILES; i++) {
1287		if (ratio >= frag_rank[i].ratio) {
1288			for (j = SHOW_FRAG_FILES - 1; j > i; j--) {
1289				memset(&frag_rank[j], 0,
1290					sizeof(struct frag_statistic_ino));
1291				strncpy(frag_rank[j].msg_buffer,
1292					frag_rank[j - 1].msg_buffer,
1293					strnlen(frag_rank[j - 1].msg_buffer,
1294					PATH_MAX));
1295				frag_rank[j].now_count =
1296					frag_rank[j - 1].now_count;
1297				frag_rank[j].best_count =
1298					frag_rank[j - 1].best_count;
1299				frag_rank[j].size_per_ext =
1300					frag_rank[j - 1].size_per_ext;
1301				frag_rank[j].ratio =
1302					frag_rank[j - 1].ratio;
1303			}
1304			memset(&frag_rank[i], 0,
1305					sizeof(struct frag_statistic_ino));
1306			strncpy(frag_rank[i].msg_buffer, file,
1307						strnlen(file, PATH_MAX));
1308			frag_rank[i].now_count = now_ext_count;
1309			frag_rank[i].best_count = best_ext_count;
1310			frag_rank[i].size_per_ext = size_per_ext;
1311			frag_rank[i].ratio = ratio;
1312			break;
1313		}
1314	}
1315
1316	succeed_cnt++;
1317
1318out:
1319	close(fd);
1320	free_ext(physical_list_head);
1321	free_ext(logical_list_head);
1322	return 0;
1323}
1324
1325/*
1326 * print_progress -	Print defrag progress
1327 *
1328 * @file:		file name.
1329 * @start:		logical offset for defrag target file
1330 * @file_size:		defrag target filesize
1331 */
1332static void print_progress(const char *file, loff_t start, loff_t file_size)
1333{
1334	int percent = (start * 100) / file_size;
1335	printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1336		defraged_file_count, total_count, file, min(percent, 100));
1337	fflush(stdout);
1338
1339	return;
1340}
1341
1342/*
1343 * call_defrag() -	Execute the defrag program.
1344 *
1345 * @fd:			target file descriptor.
1346 * @donor_fd:		donor file descriptor.
1347 * @file:			target file name.
1348 * @buf:			pointer of the struct stat64.
1349 * @ext_list_head:	head of the extent list.
1350 */
1351static int call_defrag(int fd, int donor_fd, const char *file,
1352	const struct stat64 *buf, struct fiemap_extent_list *ext_list_head)
1353{
1354	loff_t	start = 0;
1355	unsigned int	page_num;
1356	unsigned char	*vec = NULL;
1357	int	defraged_ret = 0;
1358	int	ret;
1359	struct move_extent	move_data;
1360	struct fiemap_extent_list	*ext_list_tmp = NULL;
1361
1362	memset(&move_data, 0, sizeof(struct move_extent));
1363	move_data.donor_fd = donor_fd;
1364
1365	/* Print defrag progress */
1366	print_progress(file, start, buf->st_size);
1367
1368	ext_list_tmp = ext_list_head;
1369	do {
1370		move_data.orig_start = ext_list_tmp->data.logical;
1371		/* Logical offset of orig and donor should be same */
1372		move_data.donor_start = move_data.orig_start;
1373		move_data.len = ext_list_tmp->data.len;
1374		move_data.moved_len = 0;
1375
1376		ret = page_in_core(fd, move_data, &vec, &page_num);
1377		if (ret < 0) {
1378			if (mode_flag & DETAIL) {
1379				printf("\n");
1380				PRINT_ERR_MSG_WITH_ERRNO(
1381						"Failed to get file map");
1382			} else {
1383				printf("\t[ NG ]\n");
1384			}
1385			return -1;
1386		}
1387
1388		/* EXT4_IOC_MOVE_EXT */
1389		defraged_ret =
1390			ioctl(fd, EXT4_IOC_MOVE_EXT, &move_data);
1391
1392		/* Free pages */
1393		ret = defrag_fadvise(fd, move_data, vec, page_num);
1394		if (vec) {
1395			free(vec);
1396			vec = NULL;
1397		}
1398		if (ret < 0) {
1399			if (mode_flag & DETAIL) {
1400				printf("\n");
1401				PRINT_ERR_MSG_WITH_ERRNO(
1402					"Failed to free page");
1403			} else {
1404				printf("\t[ NG ]\n");
1405			}
1406			return -1;
1407		}
1408
1409		if (defraged_ret < 0) {
1410			if (mode_flag & DETAIL) {
1411				printf("\n");
1412				PRINT_ERR_MSG_WITH_ERRNO(
1413					"Failed to defrag with "
1414					"EXT4_IOC_MOVE_EXT ioctl");
1415				if (errno == ENOTTY)
1416					printf("\tAt least 2.6.31-rc1 of "
1417						"vanilla kernel is required\n");
1418			} else {
1419				printf("\t[ NG ]\n");
1420			}
1421			return -1;
1422		}
1423		/* Adjust logical offset for next ioctl */
1424		move_data.orig_start += move_data.moved_len;
1425		move_data.donor_start = move_data.orig_start;
1426
1427		start = move_data.orig_start * buf->st_blksize;
1428
1429		/* Print defrag progress */
1430		print_progress(file, start, buf->st_size);
1431
1432		/* End of file */
1433		if (start >= buf->st_size)
1434			break;
1435
1436		ext_list_tmp = ext_list_tmp->next;
1437	} while (ext_list_tmp != ext_list_head);
1438
1439	return 0;
1440}
1441
1442/*
1443 * file_defrag() -		Check file attributes and call ioctl to defrag.
1444 *
1445 * @file:		the file's name.
1446 * @buf:		the pointer of the struct stat64.
1447 * @flag:		file type.
1448 * @ftwbuf:		the pointer of a struct FTW.
1449 */
1450static int file_defrag(const char *file, const struct stat64 *buf,
1451			int flag EXT2FS_ATTR((unused)),
1452			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1453{
1454	int	fd;
1455	int	donor_fd = -1;
1456	int	ret;
1457	int	best;
1458	int	file_frags_start, file_frags_end;
1459	int	orig_physical_cnt, donor_physical_cnt = 0;
1460	char	tmp_inode_name[PATH_MAX + 8];
1461	ext4_fsblk_t			blk_count = 0;
1462	struct fiemap_extent_list	*orig_list_physical = NULL;
1463	struct fiemap_extent_list	*orig_list_logical = NULL;
1464	struct fiemap_extent_list	*donor_list_physical = NULL;
1465	struct fiemap_extent_list	*donor_list_logical = NULL;
1466	struct fiemap_extent_group	*orig_group_head = NULL;
1467	struct fiemap_extent_group	*orig_group_tmp = NULL;
1468
1469	defraged_file_count++;
1470
1471	if (mode_flag & DETAIL) {
1472		printf("[%u/%u]", defraged_file_count, total_count);
1473		fflush(stdout);
1474	}
1475
1476	if (lost_found_dir[0] != '\0' &&
1477	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1478		if (mode_flag & DETAIL) {
1479			PRINT_FILE_NAME(file);
1480			IN_FTW_PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1481		}
1482		return 0;
1483	}
1484
1485	if (!S_ISREG(buf->st_mode)) {
1486		if (mode_flag & DETAIL) {
1487			PRINT_FILE_NAME(file);
1488			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1489		}
1490		return 0;
1491	}
1492
1493	/* Empty file */
1494	if (buf->st_size == 0) {
1495		if (mode_flag & DETAIL) {
1496			PRINT_FILE_NAME(file);
1497			IN_FTW_PRINT_ERR_MSG("File size is 0");
1498		}
1499		return 0;
1500	}
1501
1502	/* Has no blocks */
1503	if (buf->st_blocks == 0) {
1504		if (mode_flag & DETAIL) {
1505			PRINT_FILE_NAME(file);
1506			STATISTIC_ERR_MSG("File has no blocks");
1507		}
1508		return 0;
1509	}
1510
1511	fd = open64(file, O_RDWR);
1512	if (fd < 0) {
1513		if (mode_flag & DETAIL) {
1514			PRINT_FILE_NAME(file);
1515			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1516		}
1517		return 0;
1518	}
1519
1520	/* Get file's extents */
1521	ret = get_file_extents(fd, &orig_list_physical);
1522	if (ret < 0) {
1523		if (mode_flag & DETAIL) {
1524			PRINT_FILE_NAME(file);
1525			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1526		}
1527		goto out;
1528	}
1529
1530	/* Get the count of file's continuous physical region */
1531	orig_physical_cnt = get_physical_count(orig_list_physical);
1532
1533	/* Change list from physical to logical */
1534	ret = change_physical_to_logical(&orig_list_physical,
1535							&orig_list_logical);
1536	if (ret < 0) {
1537		if (mode_flag & DETAIL) {
1538			PRINT_FILE_NAME(file);
1539			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1540		}
1541		goto out;
1542	}
1543
1544	/* Count file fragments before defrag */
1545	file_frags_start = get_logical_count(orig_list_logical);
1546
1547	blk_count = get_file_blocks(orig_list_logical);
1548	if (file_check(fd, buf, file, file_frags_start, blk_count) < 0)
1549		goto out;
1550
1551	if (fsync(fd) < 0) {
1552		if (mode_flag & DETAIL) {
1553			PRINT_FILE_NAME(file);
1554			PRINT_ERR_MSG_WITH_ERRNO("Failed to sync(fsync)");
1555		}
1556		goto out;
1557	}
1558
1559	if (current_uid == ROOT_UID)
1560		best = get_best_count(blk_count);
1561	else
1562		best = 1;
1563
1564	if (file_frags_start <= best)
1565		goto check_improvement;
1566
1567	/* Combine extents to group */
1568	ret = join_extents(orig_list_logical, &orig_group_head);
1569	if (ret < 0) {
1570		if (mode_flag & DETAIL) {
1571			PRINT_FILE_NAME(file);
1572			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1573		}
1574		goto out;
1575	}
1576
1577	/* Create donor inode */
1578	memset(tmp_inode_name, 0, PATH_MAX + 8);
1579	sprintf(tmp_inode_name, "%.*s.defrag",
1580				(int)strnlen(file, PATH_MAX), file);
1581	donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
1582	if (donor_fd < 0) {
1583		if (mode_flag & DETAIL) {
1584			PRINT_FILE_NAME(file);
1585			if (errno == EEXIST)
1586				PRINT_ERR_MSG_WITH_ERRNO(
1587				"File is being defraged by other program");
1588			else
1589				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1590		}
1591		goto out;
1592	}
1593
1594	/* Unlink donor inode */
1595	ret = unlink(tmp_inode_name);
1596	if (ret < 0) {
1597		if (mode_flag & DETAIL) {
1598			PRINT_FILE_NAME(file);
1599			PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
1600		}
1601		goto out;
1602	}
1603
1604	/* Allocate space for donor inode */
1605	orig_group_tmp = orig_group_head;
1606	do {
1607		ret = fallocate64(donor_fd, 0,
1608		  (loff_t)orig_group_tmp->start->data.logical * block_size,
1609		  (loff_t)orig_group_tmp->len * block_size);
1610		if (ret < 0) {
1611			if (mode_flag & DETAIL) {
1612				PRINT_FILE_NAME(file);
1613				PRINT_ERR_MSG_WITH_ERRNO("Failed to fallocate");
1614			}
1615			goto out;
1616		}
1617
1618		orig_group_tmp = orig_group_tmp->next;
1619	} while (orig_group_tmp != orig_group_head);
1620
1621	/* Get donor inode's extents */
1622	ret = get_file_extents(donor_fd, &donor_list_physical);
1623	if (ret < 0) {
1624		if (mode_flag & DETAIL) {
1625			PRINT_FILE_NAME(file);
1626			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1627		}
1628		goto out;
1629	}
1630
1631	/* Calcuate donor inode's continuous physical region */
1632	donor_physical_cnt = get_physical_count(donor_list_physical);
1633
1634	/* Change donor extent list from physical to logical */
1635	ret = change_physical_to_logical(&donor_list_physical,
1636							&donor_list_logical);
1637	if (ret < 0) {
1638		if (mode_flag & DETAIL) {
1639			PRINT_FILE_NAME(file);
1640			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1641		}
1642		goto out;
1643	}
1644
1645check_improvement:
1646	if (mode_flag & DETAIL) {
1647		if (file_frags_start != 1)
1648			frag_files_before_defrag++;
1649
1650		extents_before_defrag += file_frags_start;
1651	}
1652
1653	if (file_frags_start <= best ||
1654			orig_physical_cnt <= donor_physical_cnt) {
1655		printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1656			defraged_file_count, total_count, file, 100);
1657		if (mode_flag & DETAIL)
1658			printf("  extents: %d -> %d",
1659				file_frags_start, file_frags_start);
1660
1661		printf("\t[ OK ]\n");
1662		succeed_cnt++;
1663
1664		if (file_frags_start != 1)
1665			frag_files_after_defrag++;
1666
1667		extents_after_defrag += file_frags_start;
1668		goto out;
1669	}
1670
1671	/* Defrag the file */
1672	ret = call_defrag(fd, donor_fd, file, buf, donor_list_logical);
1673
1674	/* Count file fragments after defrag and print extents info */
1675	if (mode_flag & DETAIL) {
1676		file_frags_end = file_frag_count(fd);
1677		if (file_frags_end < 0) {
1678			printf("\n");
1679			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
1680			goto out;
1681		}
1682
1683		if (file_frags_end != 1)
1684			frag_files_after_defrag++;
1685
1686		extents_after_defrag += file_frags_end;
1687
1688		if (ret < 0)
1689			goto out;
1690
1691		printf("  extents: %d -> %d",
1692			file_frags_start, file_frags_end);
1693		fflush(stdout);
1694	}
1695
1696	if (ret < 0)
1697		goto out;
1698
1699	printf("\t[ OK ]\n");
1700	fflush(stdout);
1701	succeed_cnt++;
1702
1703out:
1704	close(fd);
1705	if (donor_fd != -1)
1706		close(donor_fd);
1707	free_ext(orig_list_physical);
1708	free_ext(orig_list_logical);
1709	free_ext(donor_list_physical);
1710	free_exts_group(orig_group_head);
1711	return 0;
1712}
1713
1714/*
1715 * main() -		Ext4 online defrag.
1716 *
1717 * @argc:		the number of parameter.
1718 * @argv[]:		the pointer array of parameter.
1719 */
1720int main(int argc, char *argv[])
1721{
1722	int	opt;
1723	int	i, j, ret = 0;
1724	int	flags = FTW_PHYS | FTW_MOUNT;
1725	int	arg_type = -1;
1726	int	success_flag = 0;
1727	char	dir_name[PATH_MAX + 1];
1728	char	dev_name[PATH_MAX + 1];
1729	struct stat64	buf;
1730	ext2_filsys fs = NULL;
1731
1732	/* Parse arguments */
1733	if (argc == 1)
1734		goto out;
1735
1736	while ((opt = getopt(argc, argv, "vc")) != EOF) {
1737		switch (opt) {
1738		case 'v':
1739			mode_flag |= DETAIL;
1740			break;
1741		case 'c':
1742			mode_flag |= STATISTIC;
1743			break;
1744		default:
1745			goto out;
1746		}
1747	}
1748
1749	if (argc == optind)
1750		goto out;
1751
1752	current_uid = getuid();
1753
1754	/* Main process */
1755	for (i = optind; i < argc; i++) {
1756		succeed_cnt = 0;
1757		regular_count = 0;
1758		total_count = 0;
1759		frag_files_before_defrag = 0;
1760		frag_files_after_defrag = 0;
1761		extents_before_defrag = 0;
1762		extents_after_defrag = 0;
1763		defraged_file_count = 0;
1764		files_block_count = 0;
1765		blocks_per_group = 0;
1766		feature_incompat = 0;
1767		log_groups_per_flex = 0;
1768
1769		memset(dir_name, 0, PATH_MAX + 1);
1770		memset(dev_name, 0, PATH_MAX + 1);
1771		memset(lost_found_dir, 0, PATH_MAX + 1);
1772		memset(frag_rank, 0,
1773			sizeof(struct frag_statistic_ino) * SHOW_FRAG_FILES);
1774
1775		if ((mode_flag & STATISTIC) && i > optind)
1776			printf("\n");
1777
1778#if BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN
1779		PRINT_ERR_MSG("Endian's type is not big/little endian");
1780		PRINT_FILE_NAME(argv[i]);
1781		continue;
1782#endif
1783
1784		if (lstat64(argv[i], &buf) < 0) {
1785			perror(NGMSG_FILE_INFO);
1786			PRINT_FILE_NAME(argv[i]);
1787			continue;
1788		}
1789
1790		/* Handle i.e. lvm device symlinks */
1791		if (S_ISLNK(buf.st_mode)) {
1792			struct stat64	buf2;
1793
1794			if (stat64(argv[i], &buf2) == 0 &&
1795			    S_ISBLK(buf2.st_mode))
1796				buf = buf2;
1797		}
1798
1799		if (S_ISBLK(buf.st_mode)) {
1800			/* Block device */
1801			strncpy(dev_name, argv[i], strnlen(argv[i], PATH_MAX));
1802			if (get_mount_point(argv[i], dir_name, PATH_MAX) < 0)
1803				continue;
1804			if (lstat64(dir_name, &buf) < 0) {
1805				perror(NGMSG_FILE_INFO);
1806				PRINT_FILE_NAME(argv[i]);
1807				continue;
1808			}
1809			arg_type = DEVNAME;
1810			if (!(mode_flag & STATISTIC))
1811				printf("ext4 defragmentation for device(%s)\n",
1812					argv[i]);
1813		} else if (S_ISDIR(buf.st_mode)) {
1814			/* Directory */
1815			if (access(argv[i], R_OK) < 0) {
1816				perror(argv[i]);
1817				continue;
1818			}
1819			arg_type = DIRNAME;
1820			strncpy(dir_name, argv[i], strnlen(argv[i], PATH_MAX));
1821		} else if (S_ISREG(buf.st_mode)) {
1822			/* Regular file */
1823			arg_type = FILENAME;
1824		} else {
1825			/* Irregular file */
1826			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1827			PRINT_FILE_NAME(argv[i]);
1828			continue;
1829		}
1830
1831		/* Set blocksize */
1832		block_size = buf.st_blksize;
1833
1834		/* For device case,
1835		 * filesystem type checked in get_mount_point()
1836		 */
1837		if (arg_type == FILENAME || arg_type == DIRNAME) {
1838			if (is_ext4(argv[i], dev_name) < 0)
1839				continue;
1840			if (realpath(argv[i], dir_name) == NULL) {
1841				perror("Couldn't get full path");
1842				PRINT_FILE_NAME(argv[i]);
1843				continue;
1844			}
1845		}
1846
1847		if (current_uid == ROOT_UID) {
1848			/* Get super block info */
1849			ret = ext2fs_open(dev_name, 0, 0, block_size,
1850					unix_io_manager, &fs);
1851			if (ret) {
1852				if (mode_flag & DETAIL) {
1853					perror("Can't get super block info");
1854					PRINT_FILE_NAME(argv[i]);
1855				}
1856				continue;
1857			}
1858
1859			blocks_per_group = fs->super->s_blocks_per_group;
1860			feature_incompat = fs->super->s_feature_incompat;
1861			log_groups_per_flex = fs->super->s_log_groups_per_flex;
1862
1863			ext2fs_close(fs);
1864		}
1865
1866		switch (arg_type) {
1867			int mount_dir_len = 0;
1868
1869		case DIRNAME:
1870			if (!(mode_flag & STATISTIC))
1871				printf("ext4 defragmentation "
1872					"for directory(%s)\n", argv[i]);
1873
1874			mount_dir_len = strnlen(lost_found_dir, PATH_MAX);
1875
1876			strncat(lost_found_dir, "/lost+found",
1877				PATH_MAX - strnlen(lost_found_dir, PATH_MAX));
1878
1879			/* Not the case("e4defrag  mount_piont_dir") */
1880			if (dir_name[mount_dir_len] != '\0') {
1881				/*
1882				 * "e4defrag mount_piont_dir/lost+found"
1883				 * or "e4defrag mount_piont_dir/lost+found/"
1884				 */
1885				if (strncmp(lost_found_dir, dir_name,
1886					    strnlen(lost_found_dir,
1887						    PATH_MAX)) == 0 &&
1888				    (dir_name[strnlen(lost_found_dir,
1889						      PATH_MAX)] == '\0' ||
1890				     dir_name[strnlen(lost_found_dir,
1891						      PATH_MAX)] == '/')) {
1892					PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1893					PRINT_FILE_NAME(argv[i]);
1894					continue;
1895				}
1896
1897				/* "e4defrag mount_piont_dir/else_dir" */
1898				memset(lost_found_dir, 0, PATH_MAX + 1);
1899			}
1900		case DEVNAME:
1901			if (arg_type == DEVNAME) {
1902				strncpy(lost_found_dir, dir_name,
1903					strnlen(dir_name, PATH_MAX));
1904				strncat(lost_found_dir, "/lost+found/",
1905					PATH_MAX - strnlen(lost_found_dir,
1906							   PATH_MAX));
1907			}
1908
1909			nftw64(dir_name, calc_entry_counts, FTW_OPEN_FD, flags);
1910
1911			if (mode_flag & STATISTIC) {
1912				if (mode_flag & DETAIL)
1913					printf("%-40s%10s/%-10s%9s\n",
1914					"<File>", "now", "best", "size/ext");
1915
1916				if (!(mode_flag & DETAIL) &&
1917						current_uid != ROOT_UID) {
1918					printf(" Done.\n");
1919					success_flag = 1;
1920					continue;
1921				}
1922
1923				nftw64(dir_name, file_statistic,
1924							FTW_OPEN_FD, flags);
1925
1926				if (succeed_cnt != 0 &&
1927					current_uid == ROOT_UID) {
1928					if (mode_flag & DETAIL)
1929						printf("\n");
1930					printf("%-40s%10s/%-10s%9s\n",
1931						"<Fragmented files>", "now",
1932						"best", "size/ext");
1933					for (j = 0; j < SHOW_FRAG_FILES; j++) {
1934						if (strlen(frag_rank[j].
1935							msg_buffer) > 37) {
1936							printf("%d. %s\n%50d/"
1937							"%-10d%6llu KB\n",
1938							j + 1,
1939							frag_rank[j].msg_buffer,
1940							frag_rank[j].now_count,
1941							frag_rank[j].best_count,
1942							frag_rank[j].
1943								size_per_ext);
1944						} else if (strlen(frag_rank[j].
1945							msg_buffer) > 0) {
1946							printf("%d. %-37s%10d/"
1947							"%-10d%6llu KB\n",
1948							j + 1,
1949							frag_rank[j].msg_buffer,
1950							frag_rank[j].now_count,
1951							frag_rank[j].best_count,
1952							frag_rank[j].
1953								size_per_ext);
1954						} else
1955							break;
1956					}
1957				}
1958				break;
1959			}
1960			/* File tree walk */
1961			nftw64(dir_name, file_defrag, FTW_OPEN_FD, flags);
1962			printf("\n\tSuccess:\t\t\t[ %u/%u ]\n", succeed_cnt,
1963				total_count);
1964			printf("\tFailure:\t\t\t[ %u/%u ]\n",
1965				total_count - succeed_cnt, total_count);
1966			if (mode_flag & DETAIL) {
1967				printf("\tTotal extents:\t\t\t%4d->%d\n",
1968					extents_before_defrag,
1969					extents_after_defrag);
1970				printf("\tFragmented percentage:\t\t"
1971					"%3llu%%->%llu%%\n",
1972					!regular_count ? 0 :
1973					((unsigned long long)
1974					frag_files_before_defrag * 100) /
1975					regular_count,
1976					!regular_count ? 0 :
1977					((unsigned long long)
1978					frag_files_after_defrag * 100) /
1979					regular_count);
1980			}
1981			break;
1982		case FILENAME:
1983			total_count = 1;
1984			regular_count = 1;
1985			strncat(lost_found_dir, "/lost+found/",
1986				PATH_MAX - strnlen(lost_found_dir,
1987						   PATH_MAX));
1988			if (strncmp(lost_found_dir, dir_name,
1989				    strnlen(lost_found_dir,
1990					    PATH_MAX)) == 0) {
1991				PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1992				PRINT_FILE_NAME(argv[i]);
1993				continue;
1994			}
1995
1996			if (mode_flag & STATISTIC) {
1997				file_statistic(argv[i], &buf, FTW_F, NULL);
1998				break;
1999			} else
2000				printf("ext4 defragmentation for %s\n",
2001								 argv[i]);
2002			/* Defrag single file process */
2003			file_defrag(argv[i], &buf, FTW_F, NULL);
2004			if (succeed_cnt != 0)
2005				printf(" Success:\t\t\t[1/1]\n");
2006			else
2007				printf(" Success:\t\t\t[0/1]\n");
2008
2009			break;
2010		}
2011
2012		if (succeed_cnt != 0)
2013			success_flag = 1;
2014		if (mode_flag & STATISTIC) {
2015			if (current_uid != ROOT_UID) {
2016				printf(" Done.\n");
2017				continue;
2018			}
2019
2020			if (!succeed_cnt) {
2021				if (mode_flag & DETAIL)
2022					printf("\n");
2023
2024				if (arg_type == DEVNAME)
2025					printf(" In this device(%s), "
2026					"none can be defragmented.\n", argv[i]);
2027				else if (arg_type == DIRNAME)
2028					printf(" In this directory(%s), "
2029					"none can be defragmented.\n", argv[i]);
2030				else
2031					printf(" This file(%s) "
2032					"can't be defragmented.\n", argv[i]);
2033			} else {
2034				float files_ratio = 0.0;
2035				float score = 0.0;
2036				__u64 size_per_ext = files_block_count *
2037						(buf.st_blksize / 1024) /
2038						extents_before_defrag;
2039				files_ratio = (float)(extents_before_defrag -
2040						extents_after_defrag) *
2041						100 / files_block_count;
2042				score = CALC_SCORE(files_ratio);
2043				printf("\n Total/best extents\t\t\t\t%d/%d\n"
2044					" Average size per extent"
2045					"\t\t\t%llu KB\n"
2046					" Fragmentation score\t\t\t\t%.0f\n",
2047						extents_before_defrag,
2048						extents_after_defrag,
2049						size_per_ext, score);
2050				printf(" [0-30 no problem:"
2051					" 31-55 a little bit fragmented:"
2052					" 56- needs defrag]\n");
2053
2054				if (arg_type == DEVNAME)
2055					printf(" This device (%s) ", argv[i]);
2056				else if (arg_type == DIRNAME)
2057					printf(" This directory (%s) ",
2058								argv[i]);
2059				else
2060					printf(" This file (%s) ", argv[i]);
2061
2062				if (score > BOUND_SCORE)
2063					printf("needs defragmentation.\n");
2064				else
2065					printf("does not need "
2066							"defragmentation.\n");
2067			}
2068			printf(" Done.\n");
2069		}
2070
2071	}
2072
2073	if (success_flag)
2074		return 0;
2075
2076	exit(1);
2077
2078out:
2079	printf(MSG_USAGE);
2080	exit(1);
2081}
2082
2083