1/*
2 * e4defrag.c - ext4 filesystem defragmenter
3 *
4 * Copyright (C) 2009 NEC Software Tohoku, Ltd.
5 *
6 * Author: Akira Fujita	<a-fujita@rs.jp.nec.com>
7 *         Takashi Sato	<t-sato@yk.jp.nec.com>
8 */
9
10#ifndef _LARGEFILE_SOURCE
11#define _LARGEFILE_SOURCE
12#endif
13
14#ifndef _LARGEFILE64_SOURCE
15#define _LARGEFILE64_SOURCE
16#endif
17
18#ifndef _GNU_SOURCE
19#define _GNU_SOURCE
20#endif
21
22#include "config.h"
23#include <ctype.h>
24#include <dirent.h>
25#include <endian.h>
26#include <errno.h>
27#include <fcntl.h>
28#include <ftw.h>
29#include <limits.h>
30#include <mntent.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35#include <ext2fs/ext2_types.h>
36#include <ext2fs/ext2fs.h>
37#include <sys/ioctl.h>
38#include <ext2fs/fiemap.h>
39#include <sys/mman.h>
40#include <sys/stat.h>
41#include <sys/statfs.h>
42#include <sys/vfs.h>
43
44/* A relatively new ioctl interface ... */
45#ifndef EXT4_IOC_MOVE_EXT
46#define EXT4_IOC_MOVE_EXT      _IOWR('f', 15, struct move_extent)
47#endif
48
49/* Macro functions */
50#define PRINT_ERR_MSG(msg)	fprintf(stderr, "%s\n", (msg))
51#define IN_FTW_PRINT_ERR_MSG(msg)	\
52	fprintf(stderr, "\t%s\t\t[ NG ]\n", (msg))
53#define PRINT_FILE_NAME(file)	fprintf(stderr, " \"%s\"\n", (file))
54#define PRINT_ERR_MSG_WITH_ERRNO(msg)	\
55	fprintf(stderr, "\t%s:%s\t[ NG ]\n", (msg), strerror(errno))
56#define STATISTIC_ERR_MSG(msg)	\
57	fprintf(stderr, "\t%s\n", (msg))
58#define STATISTIC_ERR_MSG_WITH_ERRNO(msg)	\
59	fprintf(stderr, "\t%s:%s\n", (msg), strerror(errno))
60#define min(x, y) (((x) > (y)) ? (y) : (x))
61#define CALC_SCORE(ratio) \
62	((ratio) > 10 ? (80 + 20 * (ratio) / 100) : (8 * (ratio)))
63/* Wrap up the free function */
64#define FREE(tmp)				\
65	do {					\
66		if ((tmp) != NULL)		\
67			free(tmp);		\
68	} while (0)				\
69/* Insert list2 after list1 */
70#define insert(list1, list2)			\
71	do {					\
72		list2->next = list1->next;	\
73		list1->next->prev = list2;	\
74		list2->prev = list1;		\
75		list1->next = list2;		\
76	} while (0)
77
78/* To delete unused warning */
79#ifdef __GNUC__
80#define EXT2FS_ATTR(x) __attribute__(x)
81#else
82#define EXT2FS_ATTR(x)
83#endif
84
85/* The mode of defrag */
86#define DETAIL			0x01
87#define STATISTIC		0x02
88
89#define DEVNAME			0
90#define DIRNAME			1
91#define FILENAME		2
92
93#define FTW_OPEN_FD		2000
94
95#define FS_EXT4			"ext4"
96#define ROOT_UID		0
97
98#define BOUND_SCORE		55
99#define SHOW_FRAG_FILES	5
100
101/* Magic number for ext4 */
102#define EXT4_SUPER_MAGIC	0xEF53
103
104/* Definition of flex_bg */
105#define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
106
107/* The following macro is used for ioctl FS_IOC_FIEMAP
108 * EXTENT_MAX_COUNT:	the maximum number of extents for exchanging between
109 *			kernel-space and user-space per ioctl
110 */
111#define EXTENT_MAX_COUNT	512
112
113/* The following macros are error message */
114#define MSG_USAGE		\
115"Usage	: e4defrag [-v] file...| directory...| device...\n\
116	: e4defrag  -c  file...| directory...| device...\n"
117
118#define NGMSG_EXT4		"Filesystem is not ext4 filesystem"
119#define NGMSG_FILE_EXTENT	"Failed to get file extents"
120#define NGMSG_FILE_INFO		"Failed to get file information"
121#define NGMSG_FILE_OPEN		"Failed to open"
122#define NGMSG_FILE_UNREG	"File is not regular file"
123#define NGMSG_LOST_FOUND	"Can not process \"lost+found\""
124
125/* Data type for filesystem-wide blocks number */
126typedef unsigned long long ext4_fsblk_t;
127
128struct fiemap_extent_data {
129	__u64 len;			/* blocks count */
130	__u64 logical;		/* start logical block number */
131	ext4_fsblk_t physical;		/* start physical block number */
132};
133
134struct fiemap_extent_list {
135	struct fiemap_extent_list *prev;
136	struct fiemap_extent_list *next;
137	struct fiemap_extent_data data;	/* extent belong to file */
138};
139
140struct fiemap_extent_group {
141	struct fiemap_extent_group *prev;
142	struct fiemap_extent_group *next;
143	__u64 len;	/* length of this continuous region */
144	struct fiemap_extent_list *start;	/* start ext */
145	struct fiemap_extent_list *end;		/* end ext */
146};
147
148struct move_extent {
149	__s32 reserved;	/* original file descriptor */
150	__u32 donor_fd;	/* donor file descriptor */
151	__u64 orig_start;	/* logical start offset in block for orig */
152	__u64 donor_start;	/* logical start offset in block for donor */
153	__u64 len;	/* block length to be moved */
154	__u64 moved_len;	/* moved block length */
155};
156
157struct frag_statistic_ino {
158	int now_count;	/* the file's extents count of before defrag */
159	int best_count; /* the best file's extents count */
160	__u64 size_per_ext;	/* size(KB) per extent */
161	float ratio;	/* the ratio of fragmentation */
162	char msg_buffer[PATH_MAX + 1];	/* pathname of the file */
163};
164
165static char	lost_found_dir[PATH_MAX + 1];
166static int	block_size;
167static int	extents_before_defrag;
168static int	extents_after_defrag;
169static int	mode_flag;
170static unsigned int	current_uid;
171static unsigned int	defraged_file_count;
172static unsigned int	frag_files_before_defrag;
173static unsigned int	frag_files_after_defrag;
174static unsigned int	regular_count;
175static unsigned int	succeed_cnt;
176static unsigned int	total_count;
177static __u8 log_groups_per_flex;
178static __u32 blocks_per_group;
179static __u32 feature_incompat;
180static ext4_fsblk_t	files_block_count;
181static struct frag_statistic_ino	frag_rank[SHOW_FRAG_FILES];
182
183
184/*
185 * We prefer posix_fadvise64 when available, as it allows 64bit offset on
186 * 32bit systems
187 */
188#if defined(HAVE_POSIX_FADVISE64)
189#define posix_fadvise	posix_fadvise64
190#elif defined(HAVE_FADVISE64)
191#define posix_fadvise	fadvise64
192#elif !defined(HAVE_POSIX_FADVISE)
193#error posix_fadvise not available!
194#endif
195
196#ifndef HAVE_FALLOCATE64
197#error fallocate64 not available!
198#endif /* ! HAVE_FALLOCATE64 */
199
200/*
201 * get_mount_point() -	Get device's mount point.
202 *
203 * @devname:		the device's name.
204 * @mount_point:	the mount point.
205 * @dir_path_len:	the length of directory.
206 */
207static int get_mount_point(const char *devname, char *mount_point,
208							int dir_path_len)
209{
210	/* Refer to /etc/mtab */
211	const char	*mtab = MOUNTED;
212	FILE		*fp = NULL;
213	struct mntent	*mnt = NULL;
214	struct stat64	sb;
215
216	if (stat64(devname, &sb) < 0) {
217		perror(NGMSG_FILE_INFO);
218		PRINT_FILE_NAME(devname);
219		return -1;
220	}
221
222	fp = setmntent(mtab, "r");
223	if (fp == NULL) {
224		perror("Couldn't access /etc/mtab");
225		return -1;
226	}
227
228	while ((mnt = getmntent(fp)) != NULL) {
229		struct stat64 ms;
230
231		/*
232		 * To handle device symlinks, we see if the
233		 * device number matches, not the name
234		 */
235		if (stat64(mnt->mnt_fsname, &ms) < 0)
236			continue;
237		if (sb.st_rdev != ms.st_rdev)
238			continue;
239
240		endmntent(fp);
241		if (strcmp(mnt->mnt_type, FS_EXT4) == 0) {
242			strncpy(mount_point, mnt->mnt_dir,
243				dir_path_len);
244			return 0;
245		}
246		PRINT_ERR_MSG(NGMSG_EXT4);
247		return -1;
248	}
249	endmntent(fp);
250	PRINT_ERR_MSG("Filesystem is not mounted");
251	return -1;
252}
253
254/*
255 * is_ext4() -		Whether on an ext4 filesystem.
256 *
257 * @file:		the file's name.
258 */
259static int is_ext4(const char *file, char *devname)
260{
261	int 	maxlen = 0;
262	int	len, ret;
263	FILE	*fp = NULL;
264	char	*mnt_type = NULL;
265	/* Refer to /etc/mtab */
266	const char	*mtab = MOUNTED;
267	char	file_path[PATH_MAX + 1];
268	struct mntent	*mnt = NULL;
269	struct statfs64	fsbuf;
270
271	/* Get full path */
272	if (realpath(file, file_path) == NULL) {
273		perror("Couldn't get full path");
274		PRINT_FILE_NAME(file);
275		return -1;
276	}
277
278	if (statfs64(file_path, &fsbuf) < 0) {
279		perror("Failed to get filesystem information");
280		PRINT_FILE_NAME(file);
281		return -1;
282	}
283
284	if (fsbuf.f_type != EXT4_SUPER_MAGIC) {
285		PRINT_ERR_MSG(NGMSG_EXT4);
286		return -1;
287	}
288
289	fp = setmntent(mtab, "r");
290	if (fp == NULL) {
291		perror("Couldn't access /etc/mtab");
292		return -1;
293	}
294
295	while ((mnt = getmntent(fp)) != NULL) {
296		if (mnt->mnt_fsname[0] != '/')
297			continue;
298		len = strlen(mnt->mnt_dir);
299		ret = memcmp(file_path, mnt->mnt_dir, len);
300		if (ret != 0)
301			continue;
302
303		if (maxlen >= len)
304			continue;
305
306		maxlen = len;
307
308		mnt_type = realloc(mnt_type, strlen(mnt->mnt_type) + 1);
309		if (mnt_type == NULL) {
310			endmntent(fp);
311			return -1;
312		}
313		memset(mnt_type, 0, strlen(mnt->mnt_type) + 1);
314		strncpy(mnt_type, mnt->mnt_type, strlen(mnt->mnt_type));
315		strncpy(lost_found_dir, mnt->mnt_dir, PATH_MAX);
316		strncpy(devname, mnt->mnt_fsname, strlen(mnt->mnt_fsname) + 1);
317	}
318
319	endmntent(fp);
320	if (mnt_type && strcmp(mnt_type, FS_EXT4) == 0) {
321		FREE(mnt_type);
322		return 0;
323	} else {
324		FREE(mnt_type);
325		PRINT_ERR_MSG(NGMSG_EXT4);
326		return -1;
327	}
328}
329
330/*
331 * calc_entry_counts() -	Calculate file counts.
332 *
333 * @file:		file name.
334 * @buf:		file info.
335 * @flag:		file type.
336 * @ftwbuf:		the pointer of a struct FTW.
337 */
338static int calc_entry_counts(const char *file EXT2FS_ATTR((unused)),
339		const struct stat64 *buf, int flag EXT2FS_ATTR((unused)),
340		struct FTW *ftwbuf EXT2FS_ATTR((unused)))
341{
342	if (S_ISREG(buf->st_mode))
343		regular_count++;
344
345	total_count++;
346
347	return 0;
348}
349
350/*
351 * page_in_core() -	Get information on whether pages are in core.
352 *
353 * @fd:			defrag target file's descriptor.
354 * @defrag_data:	data used for defrag.
355 * @vec:		page state array.
356 * @page_num:		page number.
357 */
358static int page_in_core(int fd, struct move_extent defrag_data,
359			unsigned char **vec, unsigned int *page_num)
360{
361	long	pagesize;
362	void	*page = NULL;
363	loff_t	offset, end_offset, length;
364
365	if (vec == NULL || *vec != NULL)
366		return -1;
367
368	pagesize = sysconf(_SC_PAGESIZE);
369	if (pagesize < 0)
370		return -1;
371	/* In mmap, offset should be a multiple of the page size */
372	offset = (loff_t)defrag_data.orig_start * block_size;
373	length = (loff_t)defrag_data.len * block_size;
374	end_offset = offset + length;
375	/* Round the offset down to the nearest multiple of pagesize */
376	offset = (offset / pagesize) * pagesize;
377	length = end_offset - offset;
378
379	page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset);
380	if (page == MAP_FAILED)
381		return -1;
382
383	*page_num = 0;
384	*page_num = (length + pagesize - 1) / pagesize;
385	*vec = (unsigned char *)calloc(*page_num, 1);
386	if (*vec == NULL) {
387		munmap(page, length);
388		return -1;
389	}
390
391	/* Get information on whether pages are in core */
392	if (mincore(page, (size_t)length, *vec) == -1 ||
393		munmap(page, length) == -1) {
394		FREE(*vec);
395		return -1;
396	}
397
398	return 0;
399}
400
401/*
402 * defrag_fadvise() -	Predeclare an access pattern for file data.
403 *
404 * @fd:			defrag target file's descriptor.
405 * @defrag_data:	data used for defrag.
406 * @vec:		page state array.
407 * @page_num:		page number.
408 */
409static int defrag_fadvise(int fd, struct move_extent defrag_data,
410		   unsigned char *vec, unsigned int page_num)
411{
412	int	flag = 1;
413	long	pagesize = sysconf(_SC_PAGESIZE);
414	int	fadvise_flag = POSIX_FADV_DONTNEED;
415	int	sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE |
416			    SYNC_FILE_RANGE_WRITE |
417			    SYNC_FILE_RANGE_WAIT_AFTER;
418	unsigned int	i;
419	loff_t	offset;
420
421	if (pagesize < 1)
422		return -1;
423
424	offset = (loff_t)defrag_data.orig_start * block_size;
425	offset = (offset / pagesize) * pagesize;
426
427#ifdef HAVE_SYNC_FILE_RANGE
428	/* Sync file for fadvise process */
429	if (sync_file_range(fd, offset,
430		(loff_t)pagesize * page_num, sync_flag) < 0)
431		return -1;
432#endif
433
434	/* Try to release buffer cache which this process used,
435	 * then other process can use the released buffer
436	 */
437	for (i = 0; i < page_num; i++) {
438		if ((vec[i] & 0x1) == 0) {
439			offset += pagesize;
440			continue;
441		}
442		if (posix_fadvise(fd, offset, pagesize, fadvise_flag) < 0) {
443			if ((mode_flag & DETAIL) && flag) {
444				perror("\tFailed to fadvise");
445				flag = 0;
446			}
447		}
448		offset += pagesize;
449	}
450
451	return 0;
452}
453
454/*
455 * check_free_size() -	Check if there's enough disk space.
456 *
457 * @fd:			defrag target file's descriptor.
458 * @file:		file name.
459 * @blk_count:		file blocks.
460 */
461static int check_free_size(int fd, const char *file, ext4_fsblk_t blk_count)
462{
463	ext4_fsblk_t	free_blk_count;
464	struct statfs64	fsbuf;
465
466	if (fstatfs64(fd, &fsbuf) < 0) {
467		if (mode_flag & DETAIL) {
468			PRINT_FILE_NAME(file);
469			PRINT_ERR_MSG_WITH_ERRNO(
470				"Failed to get filesystem information");
471		}
472		return -1;
473	}
474
475	/* Compute free space for root and normal user separately */
476	if (current_uid == ROOT_UID)
477		free_blk_count = fsbuf.f_bfree;
478	else
479		free_blk_count = fsbuf.f_bavail;
480
481	if (free_blk_count >= blk_count)
482		return 0;
483
484	return -ENOSPC;
485}
486
487/*
488 * file_frag_count() -	Get file fragment count.
489 *
490 * @fd:			defrag target file's descriptor.
491 */
492static int file_frag_count(int fd)
493{
494	int	ret;
495	struct fiemap	fiemap_buf;
496
497	/* When fm_extent_count is 0,
498	 * ioctl just get file fragment count.
499	 */
500	memset(&fiemap_buf, 0, sizeof(struct fiemap));
501	fiemap_buf.fm_start = 0;
502	fiemap_buf.fm_length = FIEMAP_MAX_OFFSET;
503	fiemap_buf.fm_flags |= FIEMAP_FLAG_SYNC;
504
505	ret = ioctl(fd, FS_IOC_FIEMAP, &fiemap_buf);
506	if (ret < 0)
507		return ret;
508
509	return fiemap_buf.fm_mapped_extents;
510}
511
512/*
513 * file_check() -	Check file's attributes.
514 *
515 * @fd:			defrag target file's descriptor.
516 * @buf:		a pointer of the struct stat64.
517 * @file:		file name.
518 * @extents:		file extents.
519 * @blk_count:		file blocks.
520 */
521static int file_check(int fd, const struct stat64 *buf, const char *file,
522		int extents, ext4_fsblk_t blk_count)
523{
524	int	ret;
525	struct flock	lock;
526
527	/* Write-lock check is more reliable */
528	lock.l_type = F_WRLCK;
529	lock.l_start = 0;
530	lock.l_whence = SEEK_SET;
531	lock.l_len = 0;
532
533	/* Free space */
534	ret = check_free_size(fd, file, blk_count);
535	if (ret < 0) {
536		if ((mode_flag & DETAIL) && ret == -ENOSPC) {
537			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
538				"  extents: %d -> %d\n", defraged_file_count,
539				total_count, file, extents, extents);
540			IN_FTW_PRINT_ERR_MSG(
541			"Defrag size is larger than filesystem's free space");
542		}
543		return -1;
544	}
545
546	/* Access authority */
547	if (current_uid != ROOT_UID &&
548		buf->st_uid != current_uid) {
549		if (mode_flag & DETAIL) {
550			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
551				"  extents: %d -> %d\n", defraged_file_count,
552				total_count, file, extents, extents);
553			IN_FTW_PRINT_ERR_MSG(
554				"File is not current user's file"
555				" or current user is not root");
556		}
557		return -1;
558	}
559
560	/* Lock status */
561	if (fcntl(fd, F_GETLK, &lock) < 0) {
562		if (mode_flag & DETAIL) {
563			PRINT_FILE_NAME(file);
564			PRINT_ERR_MSG_WITH_ERRNO(
565				"Failed to get lock information");
566		}
567		return -1;
568	} else if (lock.l_type != F_UNLCK) {
569		if (mode_flag & DETAIL) {
570			PRINT_FILE_NAME(file);
571			IN_FTW_PRINT_ERR_MSG("File has been locked");
572		}
573		return -1;
574	}
575
576	return 0;
577}
578
579/*
580 * insert_extent_by_logical() -	Sequentially insert extent by logical.
581 *
582 * @ext_list_head:	the head of logical extent list.
583 * @ext:		the extent element which will be inserted.
584 */
585static int insert_extent_by_logical(struct fiemap_extent_list **ext_list_head,
586			struct fiemap_extent_list *ext)
587{
588	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
589
590	if (ext == NULL)
591		goto out;
592
593	/* First element */
594	if (*ext_list_head == NULL) {
595		(*ext_list_head) = ext;
596		(*ext_list_head)->prev = *ext_list_head;
597		(*ext_list_head)->next = *ext_list_head;
598		return 0;
599	}
600
601	if (ext->data.logical <= ext_list_tmp->data.logical) {
602		/* Insert before head */
603		if (ext_list_tmp->data.logical <
604			ext->data.logical + ext->data.len)
605			/* Overlap */
606			goto out;
607		/* Adjust head */
608		*ext_list_head = ext;
609	} else {
610		/* Insert into the middle or last of the list */
611		do {
612			if (ext->data.logical < ext_list_tmp->data.logical)
613				break;
614			ext_list_tmp = ext_list_tmp->next;
615		} while (ext_list_tmp != (*ext_list_head));
616		if (ext->data.logical <
617		    ext_list_tmp->prev->data.logical +
618			ext_list_tmp->prev->data.len)
619			/* Overlap */
620			goto out;
621
622		if (ext_list_tmp != *ext_list_head &&
623		    ext_list_tmp->data.logical <
624		    ext->data.logical + ext->data.len)
625			/* Overlap */
626			goto out;
627	}
628	ext_list_tmp = ext_list_tmp->prev;
629	/* Insert "ext" after "ext_list_tmp" */
630	insert(ext_list_tmp, ext);
631	return 0;
632out:
633	errno = EINVAL;
634	return -1;
635}
636
637/*
638 * insert_extent_by_physical() -	Sequentially insert extent by physical.
639 *
640 * @ext_list_head:	the head of physical extent list.
641 * @ext:		the extent element which will be inserted.
642 */
643static int insert_extent_by_physical(struct fiemap_extent_list **ext_list_head,
644			struct fiemap_extent_list *ext)
645{
646	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
647
648	if (ext == NULL)
649		goto out;
650
651	/* First element */
652	if (*ext_list_head == NULL) {
653		(*ext_list_head) = ext;
654		(*ext_list_head)->prev = *ext_list_head;
655		(*ext_list_head)->next = *ext_list_head;
656		return 0;
657	}
658
659	if (ext->data.physical <= ext_list_tmp->data.physical) {
660		/* Insert before head */
661		if (ext_list_tmp->data.physical <
662					ext->data.physical + ext->data.len)
663			/* Overlap */
664			goto out;
665		/* Adjust head */
666		*ext_list_head = ext;
667	} else {
668		/* Insert into the middle or last of the list */
669		do {
670			if (ext->data.physical < ext_list_tmp->data.physical)
671				break;
672			ext_list_tmp = ext_list_tmp->next;
673		} while (ext_list_tmp != (*ext_list_head));
674		if (ext->data.physical <
675		    ext_list_tmp->prev->data.physical +
676				ext_list_tmp->prev->data.len)
677			/* Overlap */
678			goto out;
679
680		if (ext_list_tmp != *ext_list_head &&
681		    ext_list_tmp->data.physical <
682				ext->data.physical + ext->data.len)
683			/* Overlap */
684			goto out;
685	}
686	ext_list_tmp = ext_list_tmp->prev;
687	/* Insert "ext" after "ext_list_tmp" */
688	insert(ext_list_tmp, ext);
689	return 0;
690out:
691	errno = EINVAL;
692	return -1;
693}
694
695/*
696 * insert_exts_group() -	Insert a exts_group.
697 *
698 * @ext_group_head:		the head of a exts_group list.
699 * @exts_group:			the exts_group element which will be inserted.
700 */
701static int insert_exts_group(struct fiemap_extent_group **ext_group_head,
702				struct fiemap_extent_group *exts_group)
703{
704	struct fiemap_extent_group	*ext_group_tmp = NULL;
705
706	if (exts_group == NULL) {
707		errno = EINVAL;
708		return -1;
709	}
710
711	/* Initialize list */
712	if (*ext_group_head == NULL) {
713		(*ext_group_head) = exts_group;
714		(*ext_group_head)->prev = *ext_group_head;
715		(*ext_group_head)->next = *ext_group_head;
716		return 0;
717	}
718
719	ext_group_tmp = (*ext_group_head)->prev;
720	insert(ext_group_tmp, exts_group);
721
722	return 0;
723}
724
725/*
726 * join_extents() -		Find continuous region(exts_group).
727 *
728 * @ext_list_head:		the head of the extent list.
729 * @ext_group_head:		the head of the target exts_group list.
730 */
731static int join_extents(struct fiemap_extent_list *ext_list_head,
732		struct fiemap_extent_group **ext_group_head)
733{
734	__u64	len = ext_list_head->data.len;
735	struct fiemap_extent_list *ext_list_start = ext_list_head;
736	struct fiemap_extent_list *ext_list_tmp = ext_list_head->next;
737
738	do {
739		struct fiemap_extent_group	*ext_group_tmp = NULL;
740
741		/* This extent and previous extent are not continuous,
742		 * so, all previous extents are treated as an extent group.
743		 */
744		if ((ext_list_tmp->prev->data.logical +
745			ext_list_tmp->prev->data.len)
746				!= ext_list_tmp->data.logical) {
747			ext_group_tmp =
748				malloc(sizeof(struct fiemap_extent_group));
749			if (ext_group_tmp == NULL)
750				return -1;
751
752			memset(ext_group_tmp, 0,
753				sizeof(struct fiemap_extent_group));
754			ext_group_tmp->len = len;
755			ext_group_tmp->start = ext_list_start;
756			ext_group_tmp->end = ext_list_tmp->prev;
757
758			if (insert_exts_group(ext_group_head,
759				ext_group_tmp) < 0) {
760				FREE(ext_group_tmp);
761				return -1;
762			}
763			ext_list_start = ext_list_tmp;
764			len = ext_list_tmp->data.len;
765			ext_list_tmp = ext_list_tmp->next;
766			continue;
767		}
768
769		/* This extent and previous extent are continuous,
770		 * so, they belong to the same extent group, and we check
771		 * if the next extent belongs to the same extent group.
772		 */
773		len += ext_list_tmp->data.len;
774		ext_list_tmp = ext_list_tmp->next;
775	} while (ext_list_tmp != ext_list_head->next);
776
777	return 0;
778}
779
780/*
781 * get_file_extents() -	Get file's extent list.
782 *
783 * @fd:			defrag target file's descriptor.
784 * @ext_list_head:	the head of the extent list.
785 */
786static int get_file_extents(int fd, struct fiemap_extent_list **ext_list_head)
787{
788	__u32	i;
789	int	ret;
790	int	ext_buf_size, fie_buf_size;
791	__u64	pos = 0;
792	struct fiemap	*fiemap_buf = NULL;
793	struct fiemap_extent	*ext_buf = NULL;
794	struct fiemap_extent_list	*ext_list = NULL;
795
796	/* Convert units, in bytes.
797	 * Be careful : now, physical block number in extent is 48bit,
798	 * and the maximum blocksize for ext4 is 4K(12bit),
799	 * so there is no overflow, but in future it may be changed.
800	 */
801
802	/* Alloc space for fiemap */
803	ext_buf_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);
804	fie_buf_size = sizeof(struct fiemap) + ext_buf_size;
805
806	fiemap_buf = malloc(fie_buf_size);
807	if (fiemap_buf == NULL)
808		return -1;
809
810	ext_buf = fiemap_buf->fm_extents;
811	memset(fiemap_buf, 0, fie_buf_size);
812	fiemap_buf->fm_length = FIEMAP_MAX_OFFSET;
813	fiemap_buf->fm_flags |= FIEMAP_FLAG_SYNC;
814	fiemap_buf->fm_extent_count = EXTENT_MAX_COUNT;
815
816	do {
817		fiemap_buf->fm_start = pos;
818		memset(ext_buf, 0, ext_buf_size);
819		ret = ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
820		if (ret < 0 || fiemap_buf->fm_mapped_extents == 0)
821			goto out;
822		for (i = 0; i < fiemap_buf->fm_mapped_extents; i++) {
823			ext_list = NULL;
824			ext_list = malloc(sizeof(struct fiemap_extent_list));
825			if (ext_list == NULL)
826				goto out;
827
828			ext_list->data.physical = ext_buf[i].fe_physical
829						/ block_size;
830			ext_list->data.logical = ext_buf[i].fe_logical
831						/ block_size;
832			ext_list->data.len = ext_buf[i].fe_length
833						/ block_size;
834
835			ret = insert_extent_by_physical(
836					ext_list_head, ext_list);
837			if (ret < 0) {
838				FREE(ext_list);
839				goto out;
840			}
841		}
842		/* Record file's logical offset this time */
843		pos = ext_buf[EXTENT_MAX_COUNT-1].fe_logical +
844			ext_buf[EXTENT_MAX_COUNT-1].fe_length;
845		/*
846		 * If fm_extents array has been filled and
847		 * there are extents left, continue to cycle.
848		 */
849	} while (fiemap_buf->fm_mapped_extents
850					== EXTENT_MAX_COUNT &&
851		!(ext_buf[EXTENT_MAX_COUNT-1].fe_flags
852					& FIEMAP_EXTENT_LAST));
853
854	FREE(fiemap_buf);
855	return 0;
856out:
857	FREE(fiemap_buf);
858	return -1;
859}
860
861/*
862 * get_logical_count() -	Get the file logical extents count.
863 *
864 * @logical_list_head:	the head of the logical extent list.
865 */
866static int get_logical_count(struct fiemap_extent_list *logical_list_head)
867{
868	int ret = 0;
869	struct fiemap_extent_list *ext_list_tmp  = logical_list_head;
870
871	do {
872		ret++;
873		ext_list_tmp = ext_list_tmp->next;
874	} while (ext_list_tmp != logical_list_head);
875
876	return ret;
877}
878
879/*
880 * get_physical_count() -	Get the file physical extents count.
881 *
882 * @physical_list_head:	the head of the physical extent list.
883 */
884static int get_physical_count(struct fiemap_extent_list *physical_list_head)
885{
886	int ret = 0;
887	struct fiemap_extent_list *ext_list_tmp = physical_list_head;
888
889	do {
890		if ((ext_list_tmp->data.physical + ext_list_tmp->data.len)
891				!= ext_list_tmp->next->data.physical ||
892		    (ext_list_tmp->data.logical + ext_list_tmp->data.len)
893				!= ext_list_tmp->next->data.logical) {
894			/* This extent and next extent are not continuous. */
895			ret++;
896		}
897
898		ext_list_tmp = ext_list_tmp->next;
899	} while (ext_list_tmp != physical_list_head);
900
901	return ret;
902}
903
904/*
905 * change_physical_to_logical() -	Change list from physical to logical.
906 *
907 * @physical_list_head:	the head of physical extent list.
908 * @logical_list_head:	the head of logical extent list.
909 */
910static int change_physical_to_logical(
911			struct fiemap_extent_list **physical_list_head,
912			struct fiemap_extent_list **logical_list_head)
913{
914	int ret;
915	struct fiemap_extent_list *ext_list_tmp = *physical_list_head;
916	struct fiemap_extent_list *ext_list_next = ext_list_tmp->next;
917
918	while (1) {
919		if (ext_list_tmp == ext_list_next) {
920			ret = insert_extent_by_logical(
921				logical_list_head, ext_list_tmp);
922			if (ret < 0)
923				return -1;
924
925			*physical_list_head = NULL;
926			break;
927		}
928
929		ext_list_tmp->prev->next = ext_list_tmp->next;
930		ext_list_tmp->next->prev = ext_list_tmp->prev;
931		*physical_list_head = ext_list_next;
932
933		ret = insert_extent_by_logical(
934			logical_list_head, ext_list_tmp);
935		if (ret < 0) {
936			FREE(ext_list_tmp);
937			return -1;
938		}
939		ext_list_tmp = ext_list_next;
940		ext_list_next = ext_list_next->next;
941	}
942
943	return 0;
944}
945
946/* get_file_blocks() -  Get total file blocks.
947 *
948 * @ext_list_head:	the extent list head of the target file
949 */
950static ext4_fsblk_t get_file_blocks(struct fiemap_extent_list *ext_list_head)
951{
952	ext4_fsblk_t blk_count = 0;
953	struct fiemap_extent_list *ext_list_tmp = ext_list_head;
954
955	do {
956		blk_count += ext_list_tmp->data.len;
957		ext_list_tmp = ext_list_tmp->next;
958	} while (ext_list_tmp != ext_list_head);
959
960	return blk_count;
961}
962
963/*
964 * free_ext() -		Free the extent list.
965 *
966 * @ext_list_head:	the extent list head of which will be free.
967 */
968static void free_ext(struct fiemap_extent_list *ext_list_head)
969{
970	struct fiemap_extent_list	*ext_list_tmp = NULL;
971
972	if (ext_list_head == NULL)
973		return;
974
975	while (ext_list_head->next != ext_list_head) {
976		ext_list_tmp = ext_list_head;
977		ext_list_head->prev->next = ext_list_head->next;
978		ext_list_head->next->prev = ext_list_head->prev;
979		ext_list_head = ext_list_head->next;
980		free(ext_list_tmp);
981	}
982	free(ext_list_head);
983}
984
985/*
986 * free_exts_group() -		Free the exts_group.
987 *
988 * @*ext_group_head:	the exts_group list head which will be free.
989 */
990static void free_exts_group(struct fiemap_extent_group *ext_group_head)
991{
992	struct fiemap_extent_group	*ext_group_tmp = NULL;
993
994	if (ext_group_head == NULL)
995		return;
996
997	while (ext_group_head->next != ext_group_head) {
998		ext_group_tmp = ext_group_head;
999		ext_group_head->prev->next = ext_group_head->next;
1000		ext_group_head->next->prev = ext_group_head->prev;
1001		ext_group_head = ext_group_head->next;
1002		free(ext_group_tmp);
1003	}
1004	free(ext_group_head);
1005}
1006
1007/*
1008 * get_best_count() -	Get the file best extents count.
1009 *
1010 * @block_count:		the file's physical block count.
1011 */
1012static int get_best_count(ext4_fsblk_t block_count)
1013{
1014	int ret;
1015	unsigned int flex_bg_num;
1016
1017	/* Calcuate best extents count */
1018	if (feature_incompat & EXT4_FEATURE_INCOMPAT_FLEX_BG) {
1019		flex_bg_num = 1 << log_groups_per_flex;
1020		ret = ((block_count - 1) /
1021			((ext4_fsblk_t)blocks_per_group *
1022				flex_bg_num)) + 1;
1023	} else
1024		ret = ((block_count - 1) / blocks_per_group) + 1;
1025
1026	return ret;
1027}
1028
1029
1030/*
1031 * file_statistic() -	Get statistic info of the file's fragments.
1032 *
1033 * @file:		the file's name.
1034 * @buf:		the pointer of the struct stat64.
1035 * @flag:		file type.
1036 * @ftwbuf:		the pointer of a struct FTW.
1037 */
1038static int file_statistic(const char *file, const struct stat64 *buf,
1039			int flag EXT2FS_ATTR((unused)),
1040			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1041{
1042	int	fd;
1043	int	ret;
1044	int	now_ext_count, best_ext_count = 0, physical_ext_count;
1045	int	i, j;
1046	__u64	size_per_ext = 0;
1047	float	ratio = 0.0;
1048	ext4_fsblk_t	blk_count = 0;
1049	char	msg_buffer[PATH_MAX + 24];
1050	struct fiemap_extent_list *physical_list_head = NULL;
1051	struct fiemap_extent_list *logical_list_head = NULL;
1052
1053	defraged_file_count++;
1054
1055	if (mode_flag & DETAIL) {
1056		if (total_count == 1 && regular_count == 1)
1057			printf("<File>\n");
1058		else {
1059			printf("[%u/%u]", defraged_file_count, total_count);
1060			fflush(stdout);
1061		}
1062	}
1063	if (lost_found_dir[0] != '\0' &&
1064	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1065		if (mode_flag & DETAIL) {
1066			PRINT_FILE_NAME(file);
1067			STATISTIC_ERR_MSG(NGMSG_LOST_FOUND);
1068		}
1069			return 0;
1070	}
1071
1072	if (!S_ISREG(buf->st_mode)) {
1073		if (mode_flag & DETAIL) {
1074			PRINT_FILE_NAME(file);
1075			STATISTIC_ERR_MSG(NGMSG_FILE_UNREG);
1076		}
1077		return 0;
1078	}
1079
1080	/* Access authority */
1081	if (current_uid != ROOT_UID &&
1082		buf->st_uid != current_uid) {
1083		if (mode_flag & DETAIL) {
1084			PRINT_FILE_NAME(file);
1085			STATISTIC_ERR_MSG(
1086				"File is not current user's file"
1087				" or current user is not root");
1088		}
1089		return 0;
1090	}
1091
1092	/* Empty file */
1093	if (buf->st_size == 0) {
1094		if (mode_flag & DETAIL) {
1095			PRINT_FILE_NAME(file);
1096			STATISTIC_ERR_MSG("File size is 0");
1097		}
1098		return 0;
1099	}
1100
1101	/* Has no blocks */
1102	if (buf->st_blocks == 0) {
1103		if (mode_flag & DETAIL) {
1104			PRINT_FILE_NAME(file);
1105			STATISTIC_ERR_MSG("File has no blocks");
1106		}
1107		return 0;
1108	}
1109
1110	fd = open64(file, O_RDONLY);
1111	if (fd < 0) {
1112		if (mode_flag & DETAIL) {
1113			PRINT_FILE_NAME(file);
1114			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1115		}
1116		return 0;
1117	}
1118
1119	/* Get file's physical extents  */
1120	ret = get_file_extents(fd, &physical_list_head);
1121	if (ret < 0) {
1122		if (mode_flag & DETAIL) {
1123			PRINT_FILE_NAME(file);
1124			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1125		}
1126		goto out;
1127	}
1128
1129	/* Get the count of file's continuous physical region */
1130	physical_ext_count = get_physical_count(physical_list_head);
1131
1132	/* Change list from physical to logical */
1133	ret = change_physical_to_logical(&physical_list_head,
1134							&logical_list_head);
1135	if (ret < 0) {
1136		if (mode_flag & DETAIL) {
1137			PRINT_FILE_NAME(file);
1138			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1139		}
1140		goto out;
1141	}
1142
1143	/* Count file fragments before defrag */
1144	now_ext_count = get_logical_count(logical_list_head);
1145
1146	if (current_uid == ROOT_UID) {
1147		/* Calculate the size per extent */
1148		blk_count = get_file_blocks(logical_list_head);
1149
1150		best_ext_count = get_best_count(blk_count);
1151
1152		/* e4defrag rounds size_per_ext up to a block size boundary */
1153		size_per_ext = blk_count * (buf->st_blksize / 1024) /
1154							now_ext_count;
1155
1156		ratio = (float)(physical_ext_count - best_ext_count) * 100 /
1157							blk_count;
1158
1159		extents_before_defrag += now_ext_count;
1160		extents_after_defrag += best_ext_count;
1161		files_block_count += blk_count;
1162	}
1163
1164	if (total_count == 1 && regular_count == 1) {
1165		/* File only */
1166		if (mode_flag & DETAIL) {
1167			int count = 0;
1168			struct fiemap_extent_list *ext_list_tmp =
1169						logical_list_head;
1170
1171			/* Print extents info */
1172			do {
1173				count++;
1174				printf("[ext %d]:\tstart %llu:\tlogical "
1175						"%llu:\tlen %llu\n", count,
1176						ext_list_tmp->data.physical,
1177						ext_list_tmp->data.logical,
1178						ext_list_tmp->data.len);
1179				ext_list_tmp = ext_list_tmp->next;
1180			} while (ext_list_tmp != logical_list_head);
1181
1182		} else {
1183			printf("%-40s%10s/%-10s%9s\n",
1184					"<File>", "now", "best", "size/ext");
1185			if (current_uid == ROOT_UID) {
1186				if (strlen(file) > 40)
1187					printf("%s\n%50d/%-10d%6llu KB\n",
1188						file, now_ext_count,
1189						best_ext_count, size_per_ext);
1190				else
1191					printf("%-40s%10d/%-10d%6llu KB\n",
1192						file, now_ext_count,
1193						best_ext_count, size_per_ext);
1194			} else {
1195				if (strlen(file) > 40)
1196					printf("%s\n%50d/%-10s%7s\n",
1197							file, now_ext_count,
1198							"-", "-");
1199				else
1200					printf("%-40s%10d/%-10s%7s\n",
1201							file, now_ext_count,
1202							"-", "-");
1203			}
1204		}
1205		succeed_cnt++;
1206		goto out;
1207	}
1208
1209	if (mode_flag & DETAIL) {
1210		/* Print statistic info */
1211		sprintf(msg_buffer, "[%u/%u]%s",
1212				defraged_file_count, total_count, file);
1213		if (current_uid == ROOT_UID) {
1214			if (strlen(msg_buffer) > 40)
1215				printf("\033[79;0H\033[K%s\n"
1216						"%50d/%-10d%6llu KB\n",
1217						msg_buffer, now_ext_count,
1218						best_ext_count, size_per_ext);
1219			else
1220				printf("\033[79;0H\033[K%-40s"
1221						"%10d/%-10d%6llu KB\n",
1222						msg_buffer, now_ext_count,
1223						best_ext_count, size_per_ext);
1224		} else {
1225			if (strlen(msg_buffer) > 40)
1226				printf("\033[79;0H\033[K%s\n%50d/%-10s%7s\n",
1227						msg_buffer, now_ext_count,
1228							"-", "-");
1229			else
1230				printf("\033[79;0H\033[K%-40s%10d/%-10s%7s\n",
1231						msg_buffer, now_ext_count,
1232							"-", "-");
1233		}
1234	}
1235
1236	for (i = 0; i < SHOW_FRAG_FILES; i++) {
1237		if (ratio >= frag_rank[i].ratio) {
1238			for (j = SHOW_FRAG_FILES - 1; j > i; j--) {
1239				memset(&frag_rank[j], 0,
1240					sizeof(struct frag_statistic_ino));
1241				strncpy(frag_rank[j].msg_buffer,
1242					frag_rank[j - 1].msg_buffer,
1243					strnlen(frag_rank[j - 1].msg_buffer,
1244					PATH_MAX));
1245				frag_rank[j].now_count =
1246					frag_rank[j - 1].now_count;
1247				frag_rank[j].best_count =
1248					frag_rank[j - 1].best_count;
1249				frag_rank[j].size_per_ext =
1250					frag_rank[j - 1].size_per_ext;
1251				frag_rank[j].ratio =
1252					frag_rank[j - 1].ratio;
1253			}
1254			memset(&frag_rank[i], 0,
1255					sizeof(struct frag_statistic_ino));
1256			strncpy(frag_rank[i].msg_buffer, file,
1257						strnlen(file, PATH_MAX));
1258			frag_rank[i].now_count = now_ext_count;
1259			frag_rank[i].best_count = best_ext_count;
1260			frag_rank[i].size_per_ext = size_per_ext;
1261			frag_rank[i].ratio = ratio;
1262			break;
1263		}
1264	}
1265
1266	succeed_cnt++;
1267
1268out:
1269	close(fd);
1270	free_ext(physical_list_head);
1271	free_ext(logical_list_head);
1272	return 0;
1273}
1274
1275/*
1276 * print_progress -	Print defrag progress
1277 *
1278 * @file:		file name.
1279 * @start:		logical offset for defrag target file
1280 * @file_size:		defrag target filesize
1281 */
1282static void print_progress(const char *file, loff_t start, loff_t file_size)
1283{
1284	int percent = (start * 100) / file_size;
1285	printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1286		defraged_file_count, total_count, file, min(percent, 100));
1287	fflush(stdout);
1288
1289	return;
1290}
1291
1292/*
1293 * call_defrag() -	Execute the defrag program.
1294 *
1295 * @fd:			target file descriptor.
1296 * @donor_fd:		donor file descriptor.
1297 * @file:			target file name.
1298 * @buf:			pointer of the struct stat64.
1299 * @ext_list_head:	head of the extent list.
1300 */
1301static int call_defrag(int fd, int donor_fd, const char *file,
1302	const struct stat64 *buf, struct fiemap_extent_list *ext_list_head)
1303{
1304	loff_t	start = 0;
1305	unsigned int	page_num;
1306	unsigned char	*vec = NULL;
1307	int	defraged_ret = 0;
1308	int	ret;
1309	struct move_extent	move_data;
1310	struct fiemap_extent_list	*ext_list_tmp = NULL;
1311
1312	memset(&move_data, 0, sizeof(struct move_extent));
1313	move_data.donor_fd = donor_fd;
1314
1315	/* Print defrag progress */
1316	print_progress(file, start, buf->st_size);
1317
1318	ext_list_tmp = ext_list_head;
1319	do {
1320		move_data.orig_start = ext_list_tmp->data.logical;
1321		/* Logical offset of orig and donor should be same */
1322		move_data.donor_start = move_data.orig_start;
1323		move_data.len = ext_list_tmp->data.len;
1324		move_data.moved_len = 0;
1325
1326		ret = page_in_core(fd, move_data, &vec, &page_num);
1327		if (ret < 0) {
1328			if (mode_flag & DETAIL) {
1329				printf("\n");
1330				PRINT_ERR_MSG_WITH_ERRNO(
1331						"Failed to get file map");
1332			} else {
1333				printf("\t[ NG ]\n");
1334			}
1335			return -1;
1336		}
1337
1338		/* EXT4_IOC_MOVE_EXT */
1339		defraged_ret =
1340			ioctl(fd, EXT4_IOC_MOVE_EXT, &move_data);
1341
1342		/* Free pages */
1343		ret = defrag_fadvise(fd, move_data, vec, page_num);
1344		if (vec) {
1345			free(vec);
1346			vec = NULL;
1347		}
1348		if (ret < 0) {
1349			if (mode_flag & DETAIL) {
1350				printf("\n");
1351				PRINT_ERR_MSG_WITH_ERRNO(
1352					"Failed to free page");
1353			} else {
1354				printf("\t[ NG ]\n");
1355			}
1356			return -1;
1357		}
1358
1359		if (defraged_ret < 0) {
1360			if (mode_flag & DETAIL) {
1361				printf("\n");
1362				PRINT_ERR_MSG_WITH_ERRNO(
1363					"Failed to defrag with "
1364					"EXT4_IOC_MOVE_EXT ioctl");
1365				if (errno == ENOTTY)
1366					printf("\tAt least 2.6.31-rc1 of "
1367						"vanilla kernel is required\n");
1368			} else {
1369				printf("\t[ NG ]\n");
1370			}
1371			return -1;
1372		}
1373		/* Adjust logical offset for next ioctl */
1374		move_data.orig_start += move_data.moved_len;
1375		move_data.donor_start = move_data.orig_start;
1376
1377		start = move_data.orig_start * buf->st_blksize;
1378
1379		/* Print defrag progress */
1380		print_progress(file, start, buf->st_size);
1381
1382		/* End of file */
1383		if (start >= buf->st_size)
1384			break;
1385
1386		ext_list_tmp = ext_list_tmp->next;
1387	} while (ext_list_tmp != ext_list_head);
1388
1389	return 0;
1390}
1391
1392/*
1393 * file_defrag() -		Check file attributes and call ioctl to defrag.
1394 *
1395 * @file:		the file's name.
1396 * @buf:		the pointer of the struct stat64.
1397 * @flag:		file type.
1398 * @ftwbuf:		the pointer of a struct FTW.
1399 */
1400static int file_defrag(const char *file, const struct stat64 *buf,
1401			int flag EXT2FS_ATTR((unused)),
1402			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1403{
1404	int	fd;
1405	int	donor_fd = -1;
1406	int	ret;
1407	int	best;
1408	int	file_frags_start, file_frags_end;
1409	int	orig_physical_cnt, donor_physical_cnt = 0;
1410	char	tmp_inode_name[PATH_MAX + 8];
1411	ext4_fsblk_t			blk_count = 0;
1412	struct fiemap_extent_list	*orig_list_physical = NULL;
1413	struct fiemap_extent_list	*orig_list_logical = NULL;
1414	struct fiemap_extent_list	*donor_list_physical = NULL;
1415	struct fiemap_extent_list	*donor_list_logical = NULL;
1416	struct fiemap_extent_group	*orig_group_head = NULL;
1417	struct fiemap_extent_group	*orig_group_tmp = NULL;
1418
1419	defraged_file_count++;
1420
1421	if (mode_flag & DETAIL) {
1422		printf("[%u/%u]", defraged_file_count, total_count);
1423		fflush(stdout);
1424	}
1425
1426	if (lost_found_dir[0] != '\0' &&
1427	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1428		if (mode_flag & DETAIL) {
1429			PRINT_FILE_NAME(file);
1430			IN_FTW_PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1431		}
1432		return 0;
1433	}
1434
1435	if (!S_ISREG(buf->st_mode)) {
1436		if (mode_flag & DETAIL) {
1437			PRINT_FILE_NAME(file);
1438			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1439		}
1440		return 0;
1441	}
1442
1443	/* Empty file */
1444	if (buf->st_size == 0) {
1445		if (mode_flag & DETAIL) {
1446			PRINT_FILE_NAME(file);
1447			IN_FTW_PRINT_ERR_MSG("File size is 0");
1448		}
1449		return 0;
1450	}
1451
1452	/* Has no blocks */
1453	if (buf->st_blocks == 0) {
1454		if (mode_flag & DETAIL) {
1455			PRINT_FILE_NAME(file);
1456			STATISTIC_ERR_MSG("File has no blocks");
1457		}
1458		return 0;
1459	}
1460
1461	fd = open64(file, O_RDWR);
1462	if (fd < 0) {
1463		if (mode_flag & DETAIL) {
1464			PRINT_FILE_NAME(file);
1465			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1466		}
1467		return 0;
1468	}
1469
1470	/* Get file's extents */
1471	ret = get_file_extents(fd, &orig_list_physical);
1472	if (ret < 0) {
1473		if (mode_flag & DETAIL) {
1474			PRINT_FILE_NAME(file);
1475			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1476		}
1477		goto out;
1478	}
1479
1480	/* Get the count of file's continuous physical region */
1481	orig_physical_cnt = get_physical_count(orig_list_physical);
1482
1483	/* Change list from physical to logical */
1484	ret = change_physical_to_logical(&orig_list_physical,
1485							&orig_list_logical);
1486	if (ret < 0) {
1487		if (mode_flag & DETAIL) {
1488			PRINT_FILE_NAME(file);
1489			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1490		}
1491		goto out;
1492	}
1493
1494	/* Count file fragments before defrag */
1495	file_frags_start = get_logical_count(orig_list_logical);
1496
1497	blk_count = get_file_blocks(orig_list_logical);
1498	if (file_check(fd, buf, file, file_frags_start, blk_count) < 0)
1499		goto out;
1500
1501	if (fsync(fd) < 0) {
1502		if (mode_flag & DETAIL) {
1503			PRINT_FILE_NAME(file);
1504			PRINT_ERR_MSG_WITH_ERRNO("Failed to sync(fsync)");
1505		}
1506		goto out;
1507	}
1508
1509	if (current_uid == ROOT_UID)
1510		best = get_best_count(blk_count);
1511	else
1512		best = 1;
1513
1514	if (file_frags_start <= best)
1515		goto check_improvement;
1516
1517	/* Combine extents to group */
1518	ret = join_extents(orig_list_logical, &orig_group_head);
1519	if (ret < 0) {
1520		if (mode_flag & DETAIL) {
1521			PRINT_FILE_NAME(file);
1522			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1523		}
1524		goto out;
1525	}
1526
1527	/* Create donor inode */
1528	memset(tmp_inode_name, 0, PATH_MAX + 8);
1529	sprintf(tmp_inode_name, "%.*s.defrag",
1530				(int)strnlen(file, PATH_MAX), file);
1531	donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
1532	if (donor_fd < 0) {
1533		if (mode_flag & DETAIL) {
1534			PRINT_FILE_NAME(file);
1535			if (errno == EEXIST)
1536				PRINT_ERR_MSG_WITH_ERRNO(
1537				"File is being defraged by other program");
1538			else
1539				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1540		}
1541		goto out;
1542	}
1543
1544	/* Unlink donor inode */
1545	ret = unlink(tmp_inode_name);
1546	if (ret < 0) {
1547		if (mode_flag & DETAIL) {
1548			PRINT_FILE_NAME(file);
1549			PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
1550		}
1551		goto out;
1552	}
1553
1554	/* Allocate space for donor inode */
1555	orig_group_tmp = orig_group_head;
1556	do {
1557		ret = fallocate64(donor_fd, 0,
1558		  (loff_t)orig_group_tmp->start->data.logical * block_size,
1559		  (loff_t)orig_group_tmp->len * block_size);
1560		if (ret < 0) {
1561			if (mode_flag & DETAIL) {
1562				PRINT_FILE_NAME(file);
1563				PRINT_ERR_MSG_WITH_ERRNO("Failed to fallocate");
1564			}
1565			goto out;
1566		}
1567
1568		orig_group_tmp = orig_group_tmp->next;
1569	} while (orig_group_tmp != orig_group_head);
1570
1571	/* Get donor inode's extents */
1572	ret = get_file_extents(donor_fd, &donor_list_physical);
1573	if (ret < 0) {
1574		if (mode_flag & DETAIL) {
1575			PRINT_FILE_NAME(file);
1576			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1577		}
1578		goto out;
1579	}
1580
1581	/* Calcuate donor inode's continuous physical region */
1582	donor_physical_cnt = get_physical_count(donor_list_physical);
1583
1584	/* Change donor extent list from physical to logical */
1585	ret = change_physical_to_logical(&donor_list_physical,
1586							&donor_list_logical);
1587	if (ret < 0) {
1588		if (mode_flag & DETAIL) {
1589			PRINT_FILE_NAME(file);
1590			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1591		}
1592		goto out;
1593	}
1594
1595check_improvement:
1596	if (mode_flag & DETAIL) {
1597		if (file_frags_start != 1)
1598			frag_files_before_defrag++;
1599
1600		extents_before_defrag += file_frags_start;
1601	}
1602
1603	if (file_frags_start <= best ||
1604			orig_physical_cnt <= donor_physical_cnt) {
1605		printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1606			defraged_file_count, total_count, file, 100);
1607		if (mode_flag & DETAIL)
1608			printf("  extents: %d -> %d",
1609				file_frags_start, file_frags_start);
1610
1611		printf("\t[ OK ]\n");
1612		succeed_cnt++;
1613
1614		if (file_frags_start != 1)
1615			frag_files_after_defrag++;
1616
1617		extents_after_defrag += file_frags_start;
1618		goto out;
1619	}
1620
1621	/* Defrag the file */
1622	ret = call_defrag(fd, donor_fd, file, buf, donor_list_logical);
1623
1624	/* Count file fragments after defrag and print extents info */
1625	if (mode_flag & DETAIL) {
1626		file_frags_end = file_frag_count(fd);
1627		if (file_frags_end < 0) {
1628			printf("\n");
1629			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
1630			goto out;
1631		}
1632
1633		if (file_frags_end != 1)
1634			frag_files_after_defrag++;
1635
1636		extents_after_defrag += file_frags_end;
1637
1638		if (ret < 0)
1639			goto out;
1640
1641		printf("  extents: %d -> %d",
1642			file_frags_start, file_frags_end);
1643		fflush(stdout);
1644	}
1645
1646	if (ret < 0)
1647		goto out;
1648
1649	printf("\t[ OK ]\n");
1650	fflush(stdout);
1651	succeed_cnt++;
1652
1653out:
1654	close(fd);
1655	if (donor_fd != -1)
1656		close(donor_fd);
1657	free_ext(orig_list_physical);
1658	free_ext(orig_list_logical);
1659	free_ext(donor_list_physical);
1660	free_exts_group(orig_group_head);
1661	return 0;
1662}
1663
1664/*
1665 * main() -		Ext4 online defrag.
1666 *
1667 * @argc:		the number of parameter.
1668 * @argv[]:		the pointer array of parameter.
1669 */
1670int main(int argc, char *argv[])
1671{
1672	int	opt;
1673	int	i, j, ret = 0;
1674	int	flags = FTW_PHYS | FTW_MOUNT;
1675	int	arg_type = -1;
1676	int	success_flag = 0;
1677	char	dir_name[PATH_MAX + 1];
1678	char	dev_name[PATH_MAX + 1];
1679	struct stat64	buf;
1680	ext2_filsys fs = NULL;
1681
1682	/* Parse arguments */
1683	if (argc == 1)
1684		goto out;
1685
1686	while ((opt = getopt(argc, argv, "vc")) != EOF) {
1687		switch (opt) {
1688		case 'v':
1689			mode_flag |= DETAIL;
1690			break;
1691		case 'c':
1692			mode_flag |= STATISTIC;
1693			break;
1694		default:
1695			goto out;
1696		}
1697	}
1698
1699	if (argc == optind)
1700		goto out;
1701
1702	current_uid = getuid();
1703
1704	/* Main process */
1705	for (i = optind; i < argc; i++) {
1706		succeed_cnt = 0;
1707		regular_count = 0;
1708		total_count = 0;
1709		frag_files_before_defrag = 0;
1710		frag_files_after_defrag = 0;
1711		extents_before_defrag = 0;
1712		extents_after_defrag = 0;
1713		defraged_file_count = 0;
1714		files_block_count = 0;
1715		blocks_per_group = 0;
1716		feature_incompat = 0;
1717		log_groups_per_flex = 0;
1718
1719		memset(dir_name, 0, PATH_MAX + 1);
1720		memset(dev_name, 0, PATH_MAX + 1);
1721		memset(lost_found_dir, 0, PATH_MAX + 1);
1722		memset(frag_rank, 0,
1723			sizeof(struct frag_statistic_ino) * SHOW_FRAG_FILES);
1724
1725		if ((mode_flag & STATISTIC) && i > optind)
1726			printf("\n");
1727
1728#if BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN
1729		PRINT_ERR_MSG("Endian's type is not big/little endian");
1730		PRINT_FILE_NAME(argv[i]);
1731		continue;
1732#endif
1733
1734		if (lstat64(argv[i], &buf) < 0) {
1735			perror(NGMSG_FILE_INFO);
1736			PRINT_FILE_NAME(argv[i]);
1737			continue;
1738		}
1739
1740		/* Handle i.e. lvm device symlinks */
1741		if (S_ISLNK(buf.st_mode)) {
1742			struct stat64	buf2;
1743
1744			if (stat64(argv[i], &buf2) == 0 &&
1745			    S_ISBLK(buf2.st_mode))
1746				buf = buf2;
1747		}
1748
1749		if (S_ISBLK(buf.st_mode)) {
1750			/* Block device */
1751			strncpy(dev_name, argv[i], strnlen(argv[i], PATH_MAX));
1752			if (get_mount_point(argv[i], dir_name, PATH_MAX) < 0)
1753				continue;
1754			if (lstat64(dir_name, &buf) < 0) {
1755				perror(NGMSG_FILE_INFO);
1756				PRINT_FILE_NAME(argv[i]);
1757				continue;
1758			}
1759			arg_type = DEVNAME;
1760			if (!(mode_flag & STATISTIC))
1761				printf("ext4 defragmentation for device(%s)\n",
1762					argv[i]);
1763		} else if (S_ISDIR(buf.st_mode)) {
1764			/* Directory */
1765			if (access(argv[i], R_OK) < 0) {
1766				perror(argv[i]);
1767				continue;
1768			}
1769			arg_type = DIRNAME;
1770			strncpy(dir_name, argv[i], strnlen(argv[i], PATH_MAX));
1771		} else if (S_ISREG(buf.st_mode)) {
1772			/* Regular file */
1773			arg_type = FILENAME;
1774		} else {
1775			/* Irregular file */
1776			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1777			PRINT_FILE_NAME(argv[i]);
1778			continue;
1779		}
1780
1781		/* Set blocksize */
1782		block_size = buf.st_blksize;
1783
1784		/* For device case,
1785		 * filesystem type checked in get_mount_point()
1786		 */
1787		if (arg_type == FILENAME || arg_type == DIRNAME) {
1788			if (is_ext4(argv[i], dev_name) < 0)
1789				continue;
1790			if (realpath(argv[i], dir_name) == NULL) {
1791				perror("Couldn't get full path");
1792				PRINT_FILE_NAME(argv[i]);
1793				continue;
1794			}
1795		}
1796
1797		if (current_uid == ROOT_UID) {
1798			/* Get super block info */
1799			ret = ext2fs_open(dev_name, EXT2_FLAG_64BITS, 0,
1800					  block_size, unix_io_manager, &fs);
1801			if (ret) {
1802				if (mode_flag & DETAIL)
1803					com_err(argv[1], ret,
1804						"while trying to open file system: %s",
1805						dev_name);
1806				continue;
1807			}
1808
1809			blocks_per_group = fs->super->s_blocks_per_group;
1810			feature_incompat = fs->super->s_feature_incompat;
1811			log_groups_per_flex = fs->super->s_log_groups_per_flex;
1812
1813			ext2fs_close_free(&fs);
1814		}
1815
1816		switch (arg_type) {
1817			int mount_dir_len = 0;
1818
1819		case DIRNAME:
1820			if (!(mode_flag & STATISTIC))
1821				printf("ext4 defragmentation "
1822					"for directory(%s)\n", argv[i]);
1823
1824			mount_dir_len = strnlen(lost_found_dir, PATH_MAX);
1825
1826			strncat(lost_found_dir, "/lost+found",
1827				PATH_MAX - strnlen(lost_found_dir, PATH_MAX));
1828
1829			/* Not the case("e4defrag  mount_piont_dir") */
1830			if (dir_name[mount_dir_len] != '\0') {
1831				/*
1832				 * "e4defrag mount_piont_dir/lost+found"
1833				 * or "e4defrag mount_piont_dir/lost+found/"
1834				 */
1835				if (strncmp(lost_found_dir, dir_name,
1836					    strnlen(lost_found_dir,
1837						    PATH_MAX)) == 0 &&
1838				    (dir_name[strnlen(lost_found_dir,
1839						      PATH_MAX)] == '\0' ||
1840				     dir_name[strnlen(lost_found_dir,
1841						      PATH_MAX)] == '/')) {
1842					PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1843					PRINT_FILE_NAME(argv[i]);
1844					continue;
1845				}
1846
1847				/* "e4defrag mount_piont_dir/else_dir" */
1848				memset(lost_found_dir, 0, PATH_MAX + 1);
1849			}
1850		case DEVNAME:
1851			if (arg_type == DEVNAME) {
1852				strncpy(lost_found_dir, dir_name,
1853					strnlen(dir_name, PATH_MAX));
1854				strncat(lost_found_dir, "/lost+found/",
1855					PATH_MAX - strnlen(lost_found_dir,
1856							   PATH_MAX));
1857			}
1858
1859			nftw64(dir_name, calc_entry_counts, FTW_OPEN_FD, flags);
1860
1861			if (mode_flag & STATISTIC) {
1862				if (mode_flag & DETAIL)
1863					printf("%-40s%10s/%-10s%9s\n",
1864					"<File>", "now", "best", "size/ext");
1865
1866				if (!(mode_flag & DETAIL) &&
1867						current_uid != ROOT_UID) {
1868					printf(" Done.\n");
1869					success_flag = 1;
1870					continue;
1871				}
1872
1873				nftw64(dir_name, file_statistic,
1874							FTW_OPEN_FD, flags);
1875
1876				if (succeed_cnt != 0 &&
1877					current_uid == ROOT_UID) {
1878					if (mode_flag & DETAIL)
1879						printf("\n");
1880					printf("%-40s%10s/%-10s%9s\n",
1881						"<Fragmented files>", "now",
1882						"best", "size/ext");
1883					for (j = 0; j < SHOW_FRAG_FILES; j++) {
1884						if (strlen(frag_rank[j].
1885							msg_buffer) > 37) {
1886							printf("%d. %s\n%50d/"
1887							"%-10d%6llu KB\n",
1888							j + 1,
1889							frag_rank[j].msg_buffer,
1890							frag_rank[j].now_count,
1891							frag_rank[j].best_count,
1892							frag_rank[j].
1893								size_per_ext);
1894						} else if (strlen(frag_rank[j].
1895							msg_buffer) > 0) {
1896							printf("%d. %-37s%10d/"
1897							"%-10d%6llu KB\n",
1898							j + 1,
1899							frag_rank[j].msg_buffer,
1900							frag_rank[j].now_count,
1901							frag_rank[j].best_count,
1902							frag_rank[j].
1903								size_per_ext);
1904						} else
1905							break;
1906					}
1907				}
1908				break;
1909			}
1910			/* File tree walk */
1911			nftw64(dir_name, file_defrag, FTW_OPEN_FD, flags);
1912			printf("\n\tSuccess:\t\t\t[ %u/%u ]\n", succeed_cnt,
1913				total_count);
1914			printf("\tFailure:\t\t\t[ %u/%u ]\n",
1915				total_count - succeed_cnt, total_count);
1916			if (mode_flag & DETAIL) {
1917				printf("\tTotal extents:\t\t\t%4d->%d\n",
1918					extents_before_defrag,
1919					extents_after_defrag);
1920				printf("\tFragmented percentage:\t\t"
1921					"%3llu%%->%llu%%\n",
1922					!regular_count ? 0 :
1923					((unsigned long long)
1924					frag_files_before_defrag * 100) /
1925					regular_count,
1926					!regular_count ? 0 :
1927					((unsigned long long)
1928					frag_files_after_defrag * 100) /
1929					regular_count);
1930			}
1931			break;
1932		case FILENAME:
1933			total_count = 1;
1934			regular_count = 1;
1935			strncat(lost_found_dir, "/lost+found/",
1936				PATH_MAX - strnlen(lost_found_dir,
1937						   PATH_MAX));
1938			if (strncmp(lost_found_dir, dir_name,
1939				    strnlen(lost_found_dir,
1940					    PATH_MAX)) == 0) {
1941				PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1942				PRINT_FILE_NAME(argv[i]);
1943				continue;
1944			}
1945
1946			if (mode_flag & STATISTIC) {
1947				file_statistic(argv[i], &buf, FTW_F, NULL);
1948				break;
1949			} else
1950				printf("ext4 defragmentation for %s\n",
1951								 argv[i]);
1952			/* Defrag single file process */
1953			file_defrag(argv[i], &buf, FTW_F, NULL);
1954			if (succeed_cnt != 0)
1955				printf(" Success:\t\t\t[1/1]\n");
1956			else
1957				printf(" Success:\t\t\t[0/1]\n");
1958
1959			break;
1960		}
1961
1962		if (succeed_cnt != 0)
1963			success_flag = 1;
1964		if (mode_flag & STATISTIC) {
1965			if (current_uid != ROOT_UID) {
1966				printf(" Done.\n");
1967				continue;
1968			}
1969
1970			if (!succeed_cnt) {
1971				if (mode_flag & DETAIL)
1972					printf("\n");
1973
1974				if (arg_type == DEVNAME)
1975					printf(" In this device(%s), "
1976					"none can be defragmented.\n", argv[i]);
1977				else if (arg_type == DIRNAME)
1978					printf(" In this directory(%s), "
1979					"none can be defragmented.\n", argv[i]);
1980				else
1981					printf(" This file(%s) "
1982					"can't be defragmented.\n", argv[i]);
1983			} else {
1984				float files_ratio = 0.0;
1985				float score = 0.0;
1986				__u64 size_per_ext = files_block_count *
1987						(buf.st_blksize / 1024) /
1988						extents_before_defrag;
1989				files_ratio = (float)(extents_before_defrag -
1990						extents_after_defrag) *
1991						100 / files_block_count;
1992				score = CALC_SCORE(files_ratio);
1993				printf("\n Total/best extents\t\t\t\t%d/%d\n"
1994					" Average size per extent"
1995					"\t\t\t%llu KB\n"
1996					" Fragmentation score\t\t\t\t%.0f\n",
1997						extents_before_defrag,
1998						extents_after_defrag,
1999						size_per_ext, score);
2000				printf(" [0-30 no problem:"
2001					" 31-55 a little bit fragmented:"
2002					" 56- needs defrag]\n");
2003
2004				if (arg_type == DEVNAME)
2005					printf(" This device (%s) ", argv[i]);
2006				else if (arg_type == DIRNAME)
2007					printf(" This directory (%s) ",
2008								argv[i]);
2009				else
2010					printf(" This file (%s) ", argv[i]);
2011
2012				if (score > BOUND_SCORE)
2013					printf("needs defragmentation.\n");
2014				else
2015					printf("does not need "
2016							"defragmentation.\n");
2017			}
2018			printf(" Done.\n");
2019		}
2020
2021	}
2022
2023	if (success_flag)
2024		return 0;
2025
2026	exit(1);
2027
2028out:
2029	printf(MSG_USAGE);
2030	exit(1);
2031}
2032
2033