e4defrag.c revision e661d2815194d6db3f3e38223277569ad55bb420
1/*
2 * e4defrag.c - ext4 filesystem defragmenter
3 *
4 * Copyright (C) 2009 NEC Software Tohoku, Ltd.
5 *
6 * Author: Akira Fujita	<a-fujita@rs.jp.nec.com>
7 *         Takashi Sato	<t-sato@yk.jp.nec.com>
8 */
9
10#ifndef _LARGEFILE_SOURCE
11#define _LARGEFILE_SOURCE
12#endif
13
14#ifndef _LARGEFILE64_SOURCE
15#define _LARGEFILE64_SOURCE
16#endif
17
18#ifndef _GNU_SOURCE
19#define _GNU_SOURCE
20#endif
21
22#include "config.h"
23#include <ctype.h>
24#include <dirent.h>
25#include <endian.h>
26#include <errno.h>
27#include <fcntl.h>
28#include <ftw.h>
29#include <limits.h>
30#include <mntent.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35#include <ext2fs/ext2_types.h>
36#include <ext2fs/ext2fs.h>
37#include <linux/fs.h>
38#include <sys/ioctl.h>
39#include <ext2fs/fiemap.h>
40#include <sys/mman.h>
41#include <sys/stat.h>
42#include <sys/statfs.h>
43#include <sys/syscall.h>
44#include <sys/vfs.h>
45
46/* A relatively new ioctl interface ... */
47#ifndef EXT4_IOC_MOVE_EXT
48#define EXT4_IOC_MOVE_EXT      _IOWR('f', 15, struct move_extent)
49#endif
50
51/* Macro functions */
52#define PRINT_ERR_MSG(msg)	fprintf(stderr, "%s\n", (msg))
53#define IN_FTW_PRINT_ERR_MSG(msg)	\
54	fprintf(stderr, "\t%s\t\t[ NG ]\n", (msg))
55#define PRINT_FILE_NAME(file)	fprintf(stderr, " \"%s\"\n", (file))
56#define PRINT_ERR_MSG_WITH_ERRNO(msg)	\
57	fprintf(stderr, "\t%s:%s\t[ NG ]\n", (msg), strerror(errno))
58#define STATISTIC_ERR_MSG(msg)	\
59	fprintf(stderr, "\t%s\n", (msg))
60#define STATISTIC_ERR_MSG_WITH_ERRNO(msg)	\
61	fprintf(stderr, "\t%s:%s\n", (msg), strerror(errno))
62#define min(x, y) (((x) > (y)) ? (y) : (x))
63#define CALC_SCORE(ratio) \
64	((ratio) > 10 ? (80 + 20 * (ratio) / 100) : (8 * (ratio)))
65/* Wrap up the free function */
66#define FREE(tmp)				\
67	do {					\
68		if ((tmp) != NULL)		\
69			free(tmp);		\
70	} while (0)				\
71/* Insert list2 after list1 */
72#define insert(list1, list2)			\
73	do {					\
74		list2->next = list1->next;	\
75		list1->next->prev = list2;	\
76		list2->prev = list1;		\
77		list1->next = list2;		\
78	} while (0)
79
80/* To delete unused warning */
81#ifdef __GNUC__
82#define EXT2FS_ATTR(x) __attribute__(x)
83#else
84#define EXT2FS_ATTR(x)
85#endif
86
87/* The mode of defrag */
88#define DETAIL			0x01
89#define STATISTIC		0x02
90
91#define DEVNAME			0
92#define DIRNAME			1
93#define FILENAME		2
94
95#define FTW_OPEN_FD		2000
96
97#define FS_EXT4			"ext4"
98#define ROOT_UID		0
99
100#define BOUND_SCORE		55
101#define SHOW_FRAG_FILES	5
102
103/* Magic number for ext4 */
104#define EXT4_SUPER_MAGIC	0xEF53
105
106/* Definition of flex_bg */
107#define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
108
109/* The following macro is used for ioctl FS_IOC_FIEMAP
110 * EXTENT_MAX_COUNT:	the maximum number of extents for exchanging between
111 *			kernel-space and user-space per ioctl
112 */
113#define EXTENT_MAX_COUNT	512
114
115/* The following macros are error message */
116#define MSG_USAGE		\
117"Usage	: e4defrag [-v] file...| directory...| device...\n\
118	: e4defrag  -c  file...| directory...| device...\n"
119
120#define NGMSG_EXT4		"Filesystem is not ext4 filesystem"
121#define NGMSG_FILE_EXTENT	"Failed to get file extents"
122#define NGMSG_FILE_INFO		"Failed to get file information"
123#define NGMSG_FILE_OPEN		"Failed to open"
124#define NGMSG_FILE_UNREG	"File is not regular file"
125#define NGMSG_LOST_FOUND	"Can not process \"lost+found\""
126
127/* Data type for filesystem-wide blocks number */
128typedef unsigned long long ext4_fsblk_t;
129
130struct fiemap_extent_data {
131	__u64 len;			/* blocks count */
132	__u64 logical;		/* start logical block number */
133	ext4_fsblk_t physical;		/* start physical block number */
134};
135
136struct fiemap_extent_list {
137	struct fiemap_extent_list *prev;
138	struct fiemap_extent_list *next;
139	struct fiemap_extent_data data;	/* extent belong to file */
140};
141
142struct fiemap_extent_group {
143	struct fiemap_extent_group *prev;
144	struct fiemap_extent_group *next;
145	__u64 len;	/* length of this continuous region */
146	struct fiemap_extent_list *start;	/* start ext */
147	struct fiemap_extent_list *end;		/* end ext */
148};
149
150struct move_extent {
151	__s32 reserved;	/* original file descriptor */
152	__u32 donor_fd;	/* donor file descriptor */
153	__u64 orig_start;	/* logical start offset in block for orig */
154	__u64 donor_start;	/* logical start offset in block for donor */
155	__u64 len;	/* block length to be moved */
156	__u64 moved_len;	/* moved block length */
157};
158
159struct frag_statistic_ino {
160	int now_count;	/* the file's extents count of before defrag */
161	int best_count; /* the best file's extents count */
162	__u64 size_per_ext;	/* size(KB) per extent */
163	float ratio;	/* the ratio of fragmentation */
164	char msg_buffer[PATH_MAX + 1];	/* pathname of the file */
165};
166
167char	lost_found_dir[PATH_MAX + 1];
168int	block_size;
169int	extents_before_defrag;
170int	extents_after_defrag;
171int	mode_flag;
172unsigned int	current_uid;
173unsigned int	defraged_file_count;
174unsigned int	frag_files_before_defrag;
175unsigned int	frag_files_after_defrag;
176unsigned int	regular_count;
177unsigned int	succeed_cnt;
178unsigned int	total_count;
179__u8 log_groups_per_flex;
180__u32 blocks_per_group;
181__u32 feature_incompat;
182ext4_fsblk_t	files_block_count;
183struct frag_statistic_ino	frag_rank[SHOW_FRAG_FILES];
184
185
186/* Local definitions of some syscalls glibc may not yet have */
187
188#ifndef HAVE_POSIX_FADVISE
189#warning Using locally defined posix_fadvise interface.
190
191#ifndef __NR_fadvise64_64
192#error Your kernel headers dont define __NR_fadvise64_64
193#endif
194
195/*
196 * fadvise() -		Give advice about file access.
197 *
198 * @fd:			defrag target file's descriptor.
199 * @offset:		file offset.
200 * @len:		area length.
201 * @advise:		process flag.
202 */
203static int posix_fadvise(int fd, loff_t offset, size_t len, int advise)
204{
205	return syscall(__NR_fadvise64_64, fd, offset, len, advise);
206}
207#endif /* ! HAVE_FADVISE64_64 */
208
209#ifndef HAVE_SYNC_FILE_RANGE
210#warning Using locally defined sync_file_range interface.
211
212#ifndef __NR_sync_file_range
213#ifndef __NR_sync_file_range2 /* ppc */
214#error Your kernel headers dont define __NR_sync_file_range
215#endif
216#endif
217
218/*
219 * sync_file_range() -	Sync file region.
220 *
221 * @fd:			defrag target file's descriptor.
222 * @offset:		file offset.
223 * @length:		area length.
224 * @flag:		process flag.
225 */
226int sync_file_range(int fd, loff_t offset, loff_t length, unsigned int flag)
227{
228#ifdef __NR_sync_file_range
229	return syscall(__NR_sync_file_range, fd, offset, length, flag);
230#else
231	return syscall(__NR_sync_file_range2, fd, flag, offset, length);
232#endif
233}
234#endif /* ! HAVE_SYNC_FILE_RANGE */
235
236#ifndef HAVE_FALLOCATE64
237#warning Using locally defined fallocate syscall interface.
238
239#ifndef __NR_fallocate
240#error Your kernel headers dont define __NR_fallocate
241#endif
242
243/*
244 * fallocate64() -	Manipulate file space.
245 *
246 * @fd:			defrag target file's descriptor.
247 * @mode:		process flag.
248 * @offset:		file offset.
249 * @len:		file size.
250 */
251static int fallocate64(int fd, int mode, loff_t offset, loff_t len)
252{
253	return syscall(__NR_fallocate, fd, mode, offset, len);
254}
255#endif /* ! HAVE_FALLOCATE */
256
257/*
258 * get_mount_point() -	Get device's mount point.
259 *
260 * @devname:		the device's name.
261 * @mount_point:	the mount point.
262 * @dir_path_len:	the length of directory.
263 */
264static int get_mount_point(const char *devname, char *mount_point,
265							int dir_path_len)
266{
267	/* Refer to /etc/mtab */
268	const char	*mtab = MOUNTED;
269	FILE		*fp = NULL;
270	struct mntent	*mnt = NULL;
271	struct stat64	sb;
272
273	if (stat64(devname, &sb) < 0) {
274		perror(NGMSG_FILE_INFO);
275		PRINT_FILE_NAME(devname);
276		return -1;
277	}
278
279	fp = setmntent(mtab, "r");
280	if (fp == NULL) {
281		perror("Couldn't access /etc/mtab");
282		return -1;
283	}
284
285	while ((mnt = getmntent(fp)) != NULL) {
286		struct stat64 ms;
287
288		/*
289		 * To handle device symlinks, we see if the
290		 * device number matches, not the name
291		 */
292		if (stat64(mnt->mnt_fsname, &ms) < 0)
293			continue;
294		if (sb.st_rdev != ms.st_rdev)
295			continue;
296
297		endmntent(fp);
298		if (strcmp(mnt->mnt_type, FS_EXT4) == 0) {
299			strncpy(mount_point, mnt->mnt_dir,
300				dir_path_len);
301			return 0;
302		}
303		PRINT_ERR_MSG(NGMSG_EXT4);
304		return -1;
305	}
306	endmntent(fp);
307	PRINT_ERR_MSG("Filesystem is not mounted");
308	return -1;
309}
310
311/*
312 * is_ext4() -		Whether on an ext4 filesystem.
313 *
314 * @file:		the file's name.
315 */
316static int is_ext4(const char *file, char *devname)
317{
318	int 	maxlen = 0;
319	int	len, ret;
320	FILE	*fp = NULL;
321	char	*mnt_type = NULL;
322	/* Refer to /etc/mtab */
323	const char	*mtab = MOUNTED;
324	char	file_path[PATH_MAX + 1];
325	struct mntent	*mnt = NULL;
326	struct statfs64	fsbuf;
327
328	/* Get full path */
329	if (realpath(file, file_path) == NULL) {
330		perror("Couldn't get full path");
331		PRINT_FILE_NAME(file);
332		return -1;
333	}
334
335	if (statfs64(file_path, &fsbuf) < 0) {
336		perror("Failed to get filesystem information");
337		PRINT_FILE_NAME(file);
338		return -1;
339	}
340
341	if (fsbuf.f_type != EXT4_SUPER_MAGIC) {
342		PRINT_ERR_MSG(NGMSG_EXT4);
343		return -1;
344	}
345
346	fp = setmntent(mtab, "r");
347	if (fp == NULL) {
348		perror("Couldn't access /etc/mtab");
349		return -1;
350	}
351
352	while ((mnt = getmntent(fp)) != NULL) {
353		if (mnt->mnt_fsname[0] != '/')
354			continue;
355		len = strlen(mnt->mnt_dir);
356		ret = memcmp(file_path, mnt->mnt_dir, len);
357		if (ret != 0)
358			continue;
359
360		if (maxlen >= len)
361			continue;
362
363		maxlen = len;
364
365		mnt_type = realloc(mnt_type, strlen(mnt->mnt_type) + 1);
366		if (mnt_type == NULL) {
367			endmntent(fp);
368			return -1;
369		}
370		memset(mnt_type, 0, strlen(mnt->mnt_type) + 1);
371		strncpy(mnt_type, mnt->mnt_type, strlen(mnt->mnt_type));
372		strncpy(lost_found_dir, mnt->mnt_dir, PATH_MAX);
373		strncpy(devname, mnt->mnt_fsname, strlen(mnt->mnt_fsname) + 1);
374	}
375
376	endmntent(fp);
377	if (mnt_type && strcmp(mnt_type, FS_EXT4) == 0) {
378		FREE(mnt_type);
379		return 0;
380	} else {
381		FREE(mnt_type);
382		PRINT_ERR_MSG(NGMSG_EXT4);
383		return -1;
384	}
385}
386
387/*
388 * calc_entry_counts() -	Calculate file counts.
389 *
390 * @file:		file name.
391 * @buf:		file info.
392 * @flag:		file type.
393 * @ftwbuf:		the pointer of a struct FTW.
394 */
395static int calc_entry_counts(const char *file EXT2FS_ATTR((unused)),
396		const struct stat64 *buf, int flag EXT2FS_ATTR((unused)),
397		struct FTW *ftwbuf EXT2FS_ATTR((unused)))
398{
399	if (S_ISREG(buf->st_mode))
400		regular_count++;
401
402	total_count++;
403
404	return 0;
405}
406
407/*
408 * page_in_core() -	Get information on whether pages are in core.
409 *
410 * @fd:			defrag target file's descriptor.
411 * @defrag_data:	data used for defrag.
412 * @vec:		page state array.
413 * @page_num:		page number.
414 */
415static int page_in_core(int fd, struct move_extent defrag_data,
416			unsigned char **vec, unsigned int *page_num)
417{
418	long	pagesize;
419	void	*page = NULL;
420	loff_t	offset, end_offset, length;
421
422	if (vec == NULL || *vec != NULL)
423		return -1;
424
425	pagesize = sysconf(_SC_PAGESIZE);
426	if (pagesize < 0)
427		return -1;
428	/* In mmap, offset should be a multiple of the page size */
429	offset = (loff_t)defrag_data.orig_start * block_size;
430	length = (loff_t)defrag_data.len * block_size;
431	end_offset = offset + length;
432	/* Round the offset down to the nearest multiple of pagesize */
433	offset = (offset / pagesize) * pagesize;
434	length = end_offset - offset;
435
436	page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset);
437	if (page == MAP_FAILED)
438		return -1;
439
440	*page_num = 0;
441	*page_num = (length + pagesize - 1) / pagesize;
442	*vec = (unsigned char *)calloc(*page_num, 1);
443	if (*vec == NULL)
444		return -1;
445
446	/* Get information on whether pages are in core */
447	if (mincore(page, (size_t)length, *vec) == -1 ||
448		munmap(page, length) == -1) {
449		FREE(*vec);
450		return -1;
451	}
452
453	return 0;
454}
455
456/*
457 * defrag_fadvise() -	Predeclare an access pattern for file data.
458 *
459 * @fd:			defrag target file's descriptor.
460 * @defrag_data:	data used for defrag.
461 * @vec:		page state array.
462 * @page_num:		page number.
463 */
464static int defrag_fadvise(int fd, struct move_extent defrag_data,
465		   unsigned char *vec, unsigned int page_num)
466{
467	int	flag = 1;
468	long	pagesize = sysconf(_SC_PAGESIZE);
469	int	fadvise_flag = POSIX_FADV_DONTNEED;
470	int	sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE |
471			    SYNC_FILE_RANGE_WRITE |
472			    SYNC_FILE_RANGE_WAIT_AFTER;
473	unsigned int	i;
474	loff_t	offset;
475
476	offset = (loff_t)defrag_data.orig_start * block_size;
477	offset = (offset / pagesize) * pagesize;
478
479	/* Sync file for fadvise process */
480	if (sync_file_range(fd, offset,
481		(loff_t)pagesize * page_num, sync_flag) < 0)
482		return -1;
483
484	/* Try to release buffer cache which this process used,
485	 * then other process can use the released buffer
486	 */
487	for (i = 0; i < page_num; i++) {
488		if ((vec[i] & 0x1) == 0) {
489			offset += pagesize;
490			continue;
491		}
492		if (posix_fadvise(fd, offset, pagesize, fadvise_flag) < 0) {
493			if ((mode_flag & DETAIL) && flag) {
494				perror("\tFailed to fadvise");
495				flag = 0;
496			}
497		}
498		offset += pagesize;
499	}
500
501	return 0;
502}
503
504/*
505 * check_free_size() -	Check if there's enough disk space.
506 *
507 * @fd:			defrag target file's descriptor.
508 * @file:		file name.
509 * @blk_count:		file blocks.
510 */
511static int check_free_size(int fd, const char *file, ext4_fsblk_t blk_count)
512{
513	ext4_fsblk_t	free_blk_count;
514	struct statfs64	fsbuf;
515
516	if (fstatfs64(fd, &fsbuf) < 0) {
517		if (mode_flag & DETAIL) {
518			PRINT_FILE_NAME(file);
519			PRINT_ERR_MSG_WITH_ERRNO(
520				"Failed to get filesystem information");
521		}
522		return -1;
523	}
524
525	/* Compute free space for root and normal user separately */
526	if (current_uid == ROOT_UID)
527		free_blk_count = fsbuf.f_bfree;
528	else
529		free_blk_count = fsbuf.f_bavail;
530
531	if (free_blk_count >= blk_count)
532		return 0;
533
534	return -ENOSPC;
535}
536
537/*
538 * file_frag_count() -	Get file fragment count.
539 *
540 * @fd:			defrag target file's descriptor.
541 */
542static int file_frag_count(int fd)
543{
544	int	ret;
545	struct fiemap	fiemap_buf;
546
547	/* When fm_extent_count is 0,
548	 * ioctl just get file fragment count.
549	 */
550	memset(&fiemap_buf, 0, sizeof(struct fiemap));
551	fiemap_buf.fm_start = 0;
552	fiemap_buf.fm_length = FIEMAP_MAX_OFFSET;
553	fiemap_buf.fm_flags |= FIEMAP_FLAG_SYNC;
554
555	ret = ioctl(fd, FS_IOC_FIEMAP, &fiemap_buf);
556	if (ret < 0)
557		return ret;
558
559	return fiemap_buf.fm_mapped_extents;
560}
561
562/*
563 * file_check() -	Check file's attributes.
564 *
565 * @fd:			defrag target file's descriptor.
566 * @buf:		a pointer of the struct stat64.
567 * @file:		file name.
568 * @extents:		file extents.
569 * @blk_count:		file blocks.
570 */
571static int file_check(int fd, const struct stat64 *buf, const char *file,
572		int extents, ext4_fsblk_t blk_count)
573{
574	int	ret;
575	struct flock	lock;
576
577	/* Write-lock check is more reliable */
578	lock.l_type = F_WRLCK;
579	lock.l_start = 0;
580	lock.l_whence = SEEK_SET;
581	lock.l_len = 0;
582
583	/* Free space */
584	ret = check_free_size(fd, file, blk_count);
585	if (ret < 0) {
586		if ((mode_flag & DETAIL) && ret == -ENOSPC) {
587			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
588				"  extents: %d -> %d\n", defraged_file_count,
589				total_count, file, extents, extents);
590			IN_FTW_PRINT_ERR_MSG(
591			"Defrag size is larger than filesystem's free space");
592		}
593		return -1;
594	}
595
596	/* Access authority */
597	if (current_uid != ROOT_UID &&
598		buf->st_uid != current_uid) {
599		if (mode_flag & DETAIL) {
600			printf("\033[79;0H\033[K[%u/%u] \"%s\"\t\t"
601				"  extents: %d -> %d\n", defraged_file_count,
602				total_count, file, extents, extents);
603			IN_FTW_PRINT_ERR_MSG(
604				"File is not current user's file"
605				" or current user is not root");
606		}
607		return -1;
608	}
609
610	/* Lock status */
611	if (fcntl(fd, F_GETLK, &lock) < 0) {
612		if (mode_flag & DETAIL) {
613			PRINT_FILE_NAME(file);
614			PRINT_ERR_MSG_WITH_ERRNO(
615				"Failed to get lock information");
616		}
617		return -1;
618	} else if (lock.l_type != F_UNLCK) {
619		if (mode_flag & DETAIL) {
620			PRINT_FILE_NAME(file);
621			IN_FTW_PRINT_ERR_MSG("File has been locked");
622		}
623		return -1;
624	}
625
626	return 0;
627}
628
629/*
630 * insert_extent_by_logical() -	Sequentially insert extent by logical.
631 *
632 * @ext_list_head:	the head of logical extent list.
633 * @ext:		the extent element which will be inserted.
634 */
635static int insert_extent_by_logical(struct fiemap_extent_list **ext_list_head,
636			struct fiemap_extent_list *ext)
637{
638	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
639
640	if (ext == NULL)
641		goto out;
642
643	/* First element */
644	if (*ext_list_head == NULL) {
645		(*ext_list_head) = ext;
646		(*ext_list_head)->prev = *ext_list_head;
647		(*ext_list_head)->next = *ext_list_head;
648		return 0;
649	}
650
651	if (ext->data.logical <= ext_list_tmp->data.logical) {
652		/* Insert before head */
653		if (ext_list_tmp->data.logical <
654			ext->data.logical + ext->data.len)
655			/* Overlap */
656			goto out;
657		/* Adjust head */
658		*ext_list_head = ext;
659	} else {
660		/* Insert into the middle or last of the list */
661		do {
662			if (ext->data.logical < ext_list_tmp->data.logical)
663				break;
664			ext_list_tmp = ext_list_tmp->next;
665		} while (ext_list_tmp != (*ext_list_head));
666		if (ext->data.logical <
667		    ext_list_tmp->prev->data.logical +
668			ext_list_tmp->prev->data.len)
669			/* Overlap */
670			goto out;
671
672		if (ext_list_tmp != *ext_list_head &&
673		    ext_list_tmp->data.logical <
674		    ext->data.logical + ext->data.len)
675			/* Overlap */
676			goto out;
677	}
678	ext_list_tmp = ext_list_tmp->prev;
679	/* Insert "ext" after "ext_list_tmp" */
680	insert(ext_list_tmp, ext);
681	return 0;
682out:
683	errno = EINVAL;
684	return -1;
685}
686
687/*
688 * insert_extent_by_physical() -	Sequentially insert extent by physical.
689 *
690 * @ext_list_head:	the head of physical extent list.
691 * @ext:		the extent element which will be inserted.
692 */
693static int insert_extent_by_physical(struct fiemap_extent_list **ext_list_head,
694			struct fiemap_extent_list *ext)
695{
696	struct fiemap_extent_list	*ext_list_tmp = *ext_list_head;
697
698	if (ext == NULL)
699		goto out;
700
701	/* First element */
702	if (*ext_list_head == NULL) {
703		(*ext_list_head) = ext;
704		(*ext_list_head)->prev = *ext_list_head;
705		(*ext_list_head)->next = *ext_list_head;
706		return 0;
707	}
708
709	if (ext->data.physical <= ext_list_tmp->data.physical) {
710		/* Insert before head */
711		if (ext_list_tmp->data.physical <
712					ext->data.physical + ext->data.len)
713			/* Overlap */
714			goto out;
715		/* Adjust head */
716		*ext_list_head = ext;
717	} else {
718		/* Insert into the middle or last of the list */
719		do {
720			if (ext->data.physical < ext_list_tmp->data.physical)
721				break;
722			ext_list_tmp = ext_list_tmp->next;
723		} while (ext_list_tmp != (*ext_list_head));
724		if (ext->data.physical <
725		    ext_list_tmp->prev->data.physical +
726				ext_list_tmp->prev->data.len)
727			/* Overlap */
728			goto out;
729
730		if (ext_list_tmp != *ext_list_head &&
731		    ext_list_tmp->data.physical <
732				ext->data.physical + ext->data.len)
733			/* Overlap */
734			goto out;
735	}
736	ext_list_tmp = ext_list_tmp->prev;
737	/* Insert "ext" after "ext_list_tmp" */
738	insert(ext_list_tmp, ext);
739	return 0;
740out:
741	errno = EINVAL;
742	return -1;
743}
744
745/*
746 * insert_exts_group() -	Insert a exts_group.
747 *
748 * @ext_group_head:		the head of a exts_group list.
749 * @exts_group:			the exts_group element which will be inserted.
750 */
751static int insert_exts_group(struct fiemap_extent_group **ext_group_head,
752				struct fiemap_extent_group *exts_group)
753{
754	struct fiemap_extent_group	*ext_group_tmp = NULL;
755
756	if (exts_group == NULL) {
757		errno = EINVAL;
758		return -1;
759	}
760
761	/* Initialize list */
762	if (*ext_group_head == NULL) {
763		(*ext_group_head) = exts_group;
764		(*ext_group_head)->prev = *ext_group_head;
765		(*ext_group_head)->next = *ext_group_head;
766		return 0;
767	}
768
769	ext_group_tmp = (*ext_group_head)->prev;
770	insert(ext_group_tmp, exts_group);
771
772	return 0;
773}
774
775/*
776 * join_extents() -		Find continuous region(exts_group).
777 *
778 * @ext_list_head:		the head of the extent list.
779 * @ext_group_head:		the head of the target exts_group list.
780 */
781static int join_extents(struct fiemap_extent_list *ext_list_head,
782		struct fiemap_extent_group **ext_group_head)
783{
784	__u64	len = ext_list_head->data.len;
785	struct fiemap_extent_list *ext_list_start = ext_list_head;
786	struct fiemap_extent_list *ext_list_tmp = ext_list_head->next;
787
788	do {
789		struct fiemap_extent_group	*ext_group_tmp = NULL;
790
791		/* This extent and previous extent are not continuous,
792		 * so, all previous extents are treated as an extent group.
793		 */
794		if ((ext_list_tmp->prev->data.logical +
795			ext_list_tmp->prev->data.len)
796				!= ext_list_tmp->data.logical) {
797			ext_group_tmp =
798				malloc(sizeof(struct fiemap_extent_group));
799			if (ext_group_tmp == NULL)
800				return -1;
801
802			memset(ext_group_tmp, 0,
803				sizeof(struct fiemap_extent_group));
804			ext_group_tmp->len = len;
805			ext_group_tmp->start = ext_list_start;
806			ext_group_tmp->end = ext_list_tmp->prev;
807
808			if (insert_exts_group(ext_group_head,
809				ext_group_tmp) < 0) {
810				FREE(ext_group_tmp);
811				return -1;
812			}
813			ext_list_start = ext_list_tmp;
814			len = ext_list_tmp->data.len;
815			ext_list_tmp = ext_list_tmp->next;
816			continue;
817		}
818
819		/* This extent and previous extent are continuous,
820		 * so, they belong to the same extent group, and we check
821		 * if the next extent belongs to the same extent group.
822		 */
823		len += ext_list_tmp->data.len;
824		ext_list_tmp = ext_list_tmp->next;
825	} while (ext_list_tmp != ext_list_head->next);
826
827	return 0;
828}
829
830/*
831 * get_file_extents() -	Get file's extent list.
832 *
833 * @fd:			defrag target file's descriptor.
834 * @ext_list_head:	the head of the extent list.
835 */
836static int get_file_extents(int fd, struct fiemap_extent_list **ext_list_head)
837{
838	__u32	i;
839	int	ret;
840	int	ext_buf_size, fie_buf_size;
841	__u64	pos = 0;
842	struct fiemap	*fiemap_buf = NULL;
843	struct fiemap_extent	*ext_buf = NULL;
844	struct fiemap_extent_list	*ext_list = NULL;
845
846	/* Convert units, in bytes.
847	 * Be careful : now, physical block number in extent is 48bit,
848	 * and the maximum blocksize for ext4 is 4K(12bit),
849	 * so there is no overflow, but in future it may be changed.
850	 */
851
852	/* Alloc space for fiemap */
853	ext_buf_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);
854	fie_buf_size = sizeof(struct fiemap) + ext_buf_size;
855
856	fiemap_buf = malloc(fie_buf_size);
857	if (fiemap_buf == NULL)
858		return -1;
859
860	ext_buf = fiemap_buf->fm_extents;
861	memset(fiemap_buf, 0, fie_buf_size);
862	fiemap_buf->fm_length = FIEMAP_MAX_OFFSET;
863	fiemap_buf->fm_flags |= FIEMAP_FLAG_SYNC;
864	fiemap_buf->fm_extent_count = EXTENT_MAX_COUNT;
865
866	do {
867		fiemap_buf->fm_start = pos;
868		memset(ext_buf, 0, ext_buf_size);
869		ret = ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
870		if (ret < 0 || fiemap_buf->fm_mapped_extents == 0)
871			goto out;
872		for (i = 0; i < fiemap_buf->fm_mapped_extents; i++) {
873			ext_list = NULL;
874			ext_list = malloc(sizeof(struct fiemap_extent_list));
875			if (ext_list == NULL)
876				goto out;
877
878			ext_list->data.physical = ext_buf[i].fe_physical
879						/ block_size;
880			ext_list->data.logical = ext_buf[i].fe_logical
881						/ block_size;
882			ext_list->data.len = ext_buf[i].fe_length
883						/ block_size;
884
885			ret = insert_extent_by_physical(
886					ext_list_head, ext_list);
887			if (ret < 0) {
888				FREE(ext_list);
889				goto out;
890			}
891		}
892		/* Record file's logical offset this time */
893		pos = ext_buf[EXTENT_MAX_COUNT-1].fe_logical +
894			ext_buf[EXTENT_MAX_COUNT-1].fe_length;
895		/*
896		 * If fm_extents array has been filled and
897		 * there are extents left, continue to cycle.
898		 */
899	} while (fiemap_buf->fm_mapped_extents
900					== EXTENT_MAX_COUNT &&
901		!(ext_buf[EXTENT_MAX_COUNT-1].fe_flags
902					& FIEMAP_EXTENT_LAST));
903
904	FREE(fiemap_buf);
905	return 0;
906out:
907	FREE(fiemap_buf);
908	return -1;
909}
910
911/*
912 * get_logical_count() -	Get the file logical extents count.
913 *
914 * @logical_list_head:	the head of the logical extent list.
915 */
916static int get_logical_count(struct fiemap_extent_list *logical_list_head)
917{
918	int ret = 0;
919	struct fiemap_extent_list *ext_list_tmp  = logical_list_head;
920
921	do {
922		ret++;
923		ext_list_tmp = ext_list_tmp->next;
924	} while (ext_list_tmp != logical_list_head);
925
926	return ret;
927}
928
929/*
930 * get_physical_count() -	Get the file physical extents count.
931 *
932 * @physical_list_head:	the head of the physical extent list.
933 */
934static int get_physical_count(struct fiemap_extent_list *physical_list_head)
935{
936	int ret = 0;
937	struct fiemap_extent_list *ext_list_tmp = physical_list_head;
938
939	do {
940		if ((ext_list_tmp->data.physical + ext_list_tmp->data.len)
941				!= ext_list_tmp->next->data.physical) {
942			/* This extent and next extent are not continuous. */
943			ret++;
944		}
945
946		ext_list_tmp = ext_list_tmp->next;
947	} while (ext_list_tmp != physical_list_head);
948
949	return ret;
950}
951
952/*
953 * change_physical_to_logical() -	Change list from physical to logical.
954 *
955 * @physical_list_head:	the head of physical extent list.
956 * @logical_list_head:	the head of logical extent list.
957 */
958static int change_physical_to_logical(
959			struct fiemap_extent_list **physical_list_head,
960			struct fiemap_extent_list **logical_list_head)
961{
962	int ret;
963	struct fiemap_extent_list *ext_list_tmp = *physical_list_head;
964	struct fiemap_extent_list *ext_list_next = ext_list_tmp->next;
965
966	while (1) {
967		if (ext_list_tmp == ext_list_next) {
968			ret = insert_extent_by_logical(
969				logical_list_head, ext_list_tmp);
970			if (ret < 0)
971				return -1;
972
973			*physical_list_head = NULL;
974			break;
975		}
976
977		ext_list_tmp->prev->next = ext_list_tmp->next;
978		ext_list_tmp->next->prev = ext_list_tmp->prev;
979		*physical_list_head = ext_list_next;
980
981		ret = insert_extent_by_logical(
982			logical_list_head, ext_list_tmp);
983		if (ret < 0) {
984			FREE(ext_list_tmp);
985			return -1;
986		}
987		ext_list_tmp = ext_list_next;
988		ext_list_next = ext_list_next->next;
989	}
990
991	return 0;
992}
993
994/* get_file_blocks() -  Get total file blocks.
995 *
996 * @ext_list_head:	the extent list head of the target file
997 */
998static ext4_fsblk_t get_file_blocks(struct fiemap_extent_list *ext_list_head)
999{
1000	ext4_fsblk_t blk_count = 0;
1001	struct fiemap_extent_list *ext_list_tmp = ext_list_head;
1002
1003	do {
1004		blk_count += ext_list_tmp->data.len;
1005		ext_list_tmp = ext_list_tmp->next;
1006	} while (ext_list_tmp != ext_list_head);
1007
1008	return blk_count;
1009}
1010
1011/*
1012 * free_ext() -		Free the extent list.
1013 *
1014 * @ext_list_head:	the extent list head of which will be free.
1015 */
1016static void free_ext(struct fiemap_extent_list *ext_list_head)
1017{
1018	struct fiemap_extent_list	*ext_list_tmp = NULL;
1019
1020	if (ext_list_head == NULL)
1021		return;
1022
1023	while (ext_list_head->next != ext_list_head) {
1024		ext_list_tmp = ext_list_head;
1025		ext_list_head->prev->next = ext_list_head->next;
1026		ext_list_head->next->prev = ext_list_head->prev;
1027		ext_list_head = ext_list_head->next;
1028		free(ext_list_tmp);
1029	}
1030	free(ext_list_head);
1031}
1032
1033/*
1034 * free_exts_group() -		Free the exts_group.
1035 *
1036 * @*ext_group_head:	the exts_group list head which will be free.
1037 */
1038static void free_exts_group(struct fiemap_extent_group *ext_group_head)
1039{
1040	struct fiemap_extent_group	*ext_group_tmp = NULL;
1041
1042	if (ext_group_head == NULL)
1043		return;
1044
1045	while (ext_group_head->next != ext_group_head) {
1046		ext_group_tmp = ext_group_head;
1047		ext_group_head->prev->next = ext_group_head->next;
1048		ext_group_head->next->prev = ext_group_head->prev;
1049		ext_group_head = ext_group_head->next;
1050		free(ext_group_tmp);
1051	}
1052	free(ext_group_head);
1053}
1054
1055/*
1056 * get_best_count() -	Get the file best extents count.
1057 *
1058 * @block_count:		the file's physical block count.
1059 */
1060static int get_best_count(ext4_fsblk_t block_count)
1061{
1062	int ret;
1063	unsigned int flex_bg_num;
1064
1065	/* Calcuate best extents count */
1066	if (feature_incompat & EXT4_FEATURE_INCOMPAT_FLEX_BG) {
1067		flex_bg_num = 1 << log_groups_per_flex;
1068		ret = ((block_count - 1) /
1069			((ext4_fsblk_t)blocks_per_group *
1070				flex_bg_num)) + 1;
1071	} else
1072		ret = ((block_count - 1) / blocks_per_group) + 1;
1073
1074	return ret;
1075}
1076
1077
1078/*
1079 * file_statistic() -	Get statistic info of the file's fragments.
1080 *
1081 * @file:		the file's name.
1082 * @buf:		the pointer of the struct stat64.
1083 * @flag:		file type.
1084 * @ftwbuf:		the pointer of a struct FTW.
1085 */
1086static int file_statistic(const char *file, const struct stat64 *buf,
1087			int flag EXT2FS_ATTR((unused)),
1088			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1089{
1090	int	fd;
1091	int	ret;
1092	int	now_ext_count, best_ext_count = 0, physical_ext_count;
1093	int	i, j;
1094	__u64	size_per_ext = 0;
1095	float	ratio = 0.0;
1096	ext4_fsblk_t	blk_count = 0;
1097	char	msg_buffer[PATH_MAX + 24];
1098	struct fiemap_extent_list *physical_list_head = NULL;
1099	struct fiemap_extent_list *logical_list_head = NULL;
1100
1101	defraged_file_count++;
1102
1103	if (mode_flag & DETAIL) {
1104		if (total_count == 1 && regular_count == 1)
1105			printf("<File>\n");
1106		else {
1107			printf("[%u/%u]", defraged_file_count, total_count);
1108			fflush(stdout);
1109		}
1110	}
1111	if (lost_found_dir[0] != '\0' &&
1112	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1113		if (mode_flag & DETAIL) {
1114			PRINT_FILE_NAME(file);
1115			STATISTIC_ERR_MSG(NGMSG_LOST_FOUND);
1116		}
1117			return 0;
1118	}
1119
1120	if (!S_ISREG(buf->st_mode)) {
1121		if (mode_flag & DETAIL) {
1122			PRINT_FILE_NAME(file);
1123			STATISTIC_ERR_MSG(NGMSG_FILE_UNREG);
1124		}
1125		return 0;
1126	}
1127
1128	/* Access authority */
1129	if (current_uid != ROOT_UID &&
1130		buf->st_uid != current_uid) {
1131		if (mode_flag & DETAIL) {
1132			PRINT_FILE_NAME(file);
1133			STATISTIC_ERR_MSG(
1134				"File is not current user's file"
1135				" or current user is not root");
1136		}
1137		return 0;
1138	}
1139
1140	/* Empty file */
1141	if (buf->st_size == 0) {
1142		if (mode_flag & DETAIL) {
1143			PRINT_FILE_NAME(file);
1144			STATISTIC_ERR_MSG("File size is 0");
1145		}
1146		return 0;
1147	}
1148
1149	/* Has no blocks */
1150	if (buf->st_blocks == 0) {
1151		if (mode_flag & DETAIL) {
1152			PRINT_FILE_NAME(file);
1153			STATISTIC_ERR_MSG("File has no blocks");
1154		}
1155		return 0;
1156	}
1157
1158	fd = open64(file, O_RDONLY);
1159	if (fd < 0) {
1160		if (mode_flag & DETAIL) {
1161			PRINT_FILE_NAME(file);
1162			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1163		}
1164		return 0;
1165	}
1166
1167	/* Get file's physical extents  */
1168	ret = get_file_extents(fd, &physical_list_head);
1169	if (ret < 0) {
1170		if (mode_flag & DETAIL) {
1171			PRINT_FILE_NAME(file);
1172			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1173		}
1174		goto out;
1175	}
1176
1177	/* Get the count of file's continuous physical region */
1178	physical_ext_count = get_physical_count(physical_list_head);
1179
1180	/* Change list from physical to logical */
1181	ret = change_physical_to_logical(&physical_list_head,
1182							&logical_list_head);
1183	if (ret < 0) {
1184		if (mode_flag & DETAIL) {
1185			PRINT_FILE_NAME(file);
1186			STATISTIC_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1187		}
1188		goto out;
1189	}
1190
1191	/* Count file fragments before defrag */
1192	now_ext_count = get_logical_count(logical_list_head);
1193
1194	if (current_uid == ROOT_UID) {
1195		/* Calculate the size per extent */
1196		blk_count = get_file_blocks(logical_list_head);
1197
1198		best_ext_count = get_best_count(blk_count);
1199
1200		/* e4defrag rounds size_per_ext up to a block size boundary */
1201		size_per_ext = blk_count * (buf->st_blksize / 1024) /
1202							now_ext_count;
1203
1204		ratio = (float)(physical_ext_count - best_ext_count) * 100 /
1205							blk_count;
1206
1207		extents_before_defrag += now_ext_count;
1208		extents_after_defrag += best_ext_count;
1209		files_block_count += blk_count;
1210	}
1211
1212	if (total_count == 1 && regular_count == 1) {
1213		/* File only */
1214		if (mode_flag & DETAIL) {
1215			int count = 0;
1216			struct fiemap_extent_list *ext_list_tmp =
1217						logical_list_head;
1218
1219			/* Print extents info */
1220			do {
1221				count++;
1222				printf("[ext %d]:\tstart %llu:\tlogical "
1223						"%llu:\tlen %llu\n", count,
1224						ext_list_tmp->data.physical,
1225						ext_list_tmp->data.logical,
1226						ext_list_tmp->data.len);
1227				ext_list_tmp = ext_list_tmp->next;
1228			} while (ext_list_tmp != logical_list_head);
1229
1230		} else {
1231			printf("%-40s%10s/%-10s%9s\n",
1232					"<File>", "now", "best", "size/ext");
1233			if (current_uid == ROOT_UID) {
1234				if (strlen(file) > 40)
1235					printf("%s\n%50d/%-10d%6llu KB\n",
1236						file, now_ext_count,
1237						best_ext_count, size_per_ext);
1238				else
1239					printf("%-40s%10d/%-10d%6llu KB\n",
1240						file, now_ext_count,
1241						best_ext_count, size_per_ext);
1242			} else {
1243				if (strlen(file) > 40)
1244					printf("%s\n%50d/%-10s%7s\n",
1245							file, now_ext_count,
1246							"-", "-");
1247				else
1248					printf("%-40s%10d/%-10s%7s\n",
1249							file, now_ext_count,
1250							"-", "-");
1251			}
1252		}
1253		succeed_cnt++;
1254		goto out;
1255	}
1256
1257	if (mode_flag & DETAIL) {
1258		/* Print statistic info */
1259		sprintf(msg_buffer, "[%u/%u]%s",
1260				defraged_file_count, total_count, file);
1261		if (current_uid == ROOT_UID) {
1262			if (strlen(msg_buffer) > 40)
1263				printf("\033[79;0H\033[K%s\n"
1264						"%50d/%-10d%6llu KB\n",
1265						msg_buffer, now_ext_count,
1266						best_ext_count, size_per_ext);
1267			else
1268				printf("\033[79;0H\033[K%-40s"
1269						"%10d/%-10d%6llu KB\n",
1270						msg_buffer, now_ext_count,
1271						best_ext_count, size_per_ext);
1272		} else {
1273			if (strlen(msg_buffer) > 40)
1274				printf("\033[79;0H\033[K%s\n%50d/%-10s%7s\n",
1275						msg_buffer, now_ext_count,
1276							"-", "-");
1277			else
1278				printf("\033[79;0H\033[K%-40s%10d/%-10s%7s\n",
1279						msg_buffer, now_ext_count,
1280							"-", "-");
1281		}
1282	}
1283
1284	for (i = 0; i < SHOW_FRAG_FILES; i++) {
1285		if (ratio >= frag_rank[i].ratio) {
1286			for (j = SHOW_FRAG_FILES - 1; j > i; j--) {
1287				memset(&frag_rank[j], 0,
1288					sizeof(struct frag_statistic_ino));
1289				strncpy(frag_rank[j].msg_buffer,
1290					frag_rank[j - 1].msg_buffer,
1291					strnlen(frag_rank[j - 1].msg_buffer,
1292					PATH_MAX));
1293				frag_rank[j].now_count =
1294					frag_rank[j - 1].now_count;
1295				frag_rank[j].best_count =
1296					frag_rank[j - 1].best_count;
1297				frag_rank[j].size_per_ext =
1298					frag_rank[j - 1].size_per_ext;
1299				frag_rank[j].ratio =
1300					frag_rank[j - 1].ratio;
1301			}
1302			memset(&frag_rank[i], 0,
1303					sizeof(struct frag_statistic_ino));
1304			strncpy(frag_rank[i].msg_buffer, file,
1305						strnlen(file, PATH_MAX));
1306			frag_rank[i].now_count = now_ext_count;
1307			frag_rank[i].best_count = best_ext_count;
1308			frag_rank[i].size_per_ext = size_per_ext;
1309			frag_rank[i].ratio = ratio;
1310			break;
1311		}
1312	}
1313
1314	succeed_cnt++;
1315
1316out:
1317	close(fd);
1318	free_ext(physical_list_head);
1319	free_ext(logical_list_head);
1320	return 0;
1321}
1322
1323/*
1324 * print_progress -	Print defrag progress
1325 *
1326 * @file:		file name.
1327 * @start:		logical offset for defrag target file
1328 * @file_size:		defrag target filesize
1329 */
1330static void print_progress(const char *file, loff_t start, loff_t file_size)
1331{
1332	int percent = (start * 100) / file_size;
1333	printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1334		defraged_file_count, total_count, file, min(percent, 100));
1335	fflush(stdout);
1336
1337	return;
1338}
1339
1340/*
1341 * call_defrag() -	Execute the defrag program.
1342 *
1343 * @fd:			target file descriptor.
1344 * @donor_fd:		donor file descriptor.
1345 * @file:			target file name.
1346 * @buf:			pointer of the struct stat64.
1347 * @ext_list_head:	head of the extent list.
1348 */
1349static int call_defrag(int fd, int donor_fd, const char *file,
1350	const struct stat64 *buf, struct fiemap_extent_list *ext_list_head)
1351{
1352	loff_t	start = 0;
1353	unsigned int	page_num;
1354	unsigned char	*vec = NULL;
1355	int	defraged_ret = 0;
1356	int	ret;
1357	struct move_extent	move_data;
1358	struct fiemap_extent_list	*ext_list_tmp = NULL;
1359
1360	memset(&move_data, 0, sizeof(struct move_extent));
1361	move_data.donor_fd = donor_fd;
1362
1363	/* Print defrag progress */
1364	print_progress(file, start, buf->st_size);
1365
1366	ext_list_tmp = ext_list_head;
1367	do {
1368		move_data.orig_start = ext_list_tmp->data.logical;
1369		/* Logical offset of orig and donor should be same */
1370		move_data.donor_start = move_data.orig_start;
1371		move_data.len = ext_list_tmp->data.len;
1372		move_data.moved_len = 0;
1373
1374		ret = page_in_core(fd, move_data, &vec, &page_num);
1375		if (ret < 0) {
1376			if (mode_flag & DETAIL) {
1377				printf("\n");
1378				PRINT_ERR_MSG_WITH_ERRNO(
1379						"Failed to get file map");
1380			} else {
1381				printf("\t[ NG ]\n");
1382			}
1383			return -1;
1384		}
1385
1386		/* EXT4_IOC_MOVE_EXT */
1387		defraged_ret =
1388			ioctl(fd, EXT4_IOC_MOVE_EXT, &move_data);
1389
1390		/* Free pages */
1391		ret = defrag_fadvise(fd, move_data, vec, page_num);
1392		if (vec) {
1393			free(vec);
1394			vec = NULL;
1395		}
1396		if (ret < 0) {
1397			if (mode_flag & DETAIL) {
1398				printf("\n");
1399				PRINT_ERR_MSG_WITH_ERRNO(
1400					"Failed to free page");
1401			} else {
1402				printf("\t[ NG ]\n");
1403			}
1404			return -1;
1405		}
1406
1407		if (defraged_ret < 0) {
1408			if (mode_flag & DETAIL) {
1409				printf("\n");
1410				PRINT_ERR_MSG_WITH_ERRNO(
1411					"Failed to defrag with "
1412					"EXT4_IOC_MOVE_EXT ioctl");
1413				if (errno == ENOTTY)
1414					printf("\tAt least 2.6.31-rc1 of "
1415						"vanilla kernel is required\n");
1416			} else {
1417				printf("\t[ NG ]\n");
1418			}
1419			return -1;
1420		}
1421		/* Adjust logical offset for next ioctl */
1422		move_data.orig_start += move_data.moved_len;
1423		move_data.donor_start = move_data.orig_start;
1424
1425		start = move_data.orig_start * buf->st_blksize;
1426
1427		/* Print defrag progress */
1428		print_progress(file, start, buf->st_size);
1429
1430		/* End of file */
1431		if (start >= buf->st_size)
1432			break;
1433
1434		ext_list_tmp = ext_list_tmp->next;
1435	} while (ext_list_tmp != ext_list_head);
1436
1437	return 0;
1438}
1439
1440/*
1441 * file_defrag() -		Check file attributes and call ioctl to defrag.
1442 *
1443 * @file:		the file's name.
1444 * @buf:		the pointer of the struct stat64.
1445 * @flag:		file type.
1446 * @ftwbuf:		the pointer of a struct FTW.
1447 */
1448static int file_defrag(const char *file, const struct stat64 *buf,
1449			int flag EXT2FS_ATTR((unused)),
1450			struct FTW *ftwbuf EXT2FS_ATTR((unused)))
1451{
1452	int	fd;
1453	int	donor_fd = -1;
1454	int	ret;
1455	int	best;
1456	int	file_frags_start, file_frags_end;
1457	int	orig_physical_cnt, donor_physical_cnt = 0;
1458	char	tmp_inode_name[PATH_MAX + 8];
1459	ext4_fsblk_t			blk_count = 0;
1460	struct fiemap_extent_list	*orig_list_physical = NULL;
1461	struct fiemap_extent_list	*orig_list_logical = NULL;
1462	struct fiemap_extent_list	*donor_list_physical = NULL;
1463	struct fiemap_extent_list	*donor_list_logical = NULL;
1464	struct fiemap_extent_group	*orig_group_head = NULL;
1465	struct fiemap_extent_group	*orig_group_tmp = NULL;
1466
1467	defraged_file_count++;
1468
1469	if (mode_flag & DETAIL) {
1470		printf("[%u/%u]", defraged_file_count, total_count);
1471		fflush(stdout);
1472	}
1473
1474	if (lost_found_dir[0] != '\0' &&
1475	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
1476		if (mode_flag & DETAIL) {
1477			PRINT_FILE_NAME(file);
1478			IN_FTW_PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1479		}
1480		return 0;
1481	}
1482
1483	if (!S_ISREG(buf->st_mode)) {
1484		if (mode_flag & DETAIL) {
1485			PRINT_FILE_NAME(file);
1486			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1487		}
1488		return 0;
1489	}
1490
1491	/* Empty file */
1492	if (buf->st_size == 0) {
1493		if (mode_flag & DETAIL) {
1494			PRINT_FILE_NAME(file);
1495			IN_FTW_PRINT_ERR_MSG("File size is 0");
1496		}
1497		return 0;
1498	}
1499
1500	/* Has no blocks */
1501	if (buf->st_blocks == 0) {
1502		if (mode_flag & DETAIL) {
1503			PRINT_FILE_NAME(file);
1504			STATISTIC_ERR_MSG("File has no blocks");
1505		}
1506		return 0;
1507	}
1508
1509	fd = open64(file, O_RDWR);
1510	if (fd < 0) {
1511		if (mode_flag & DETAIL) {
1512			PRINT_FILE_NAME(file);
1513			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1514		}
1515		return 0;
1516	}
1517
1518	/* Get file's extents */
1519	ret = get_file_extents(fd, &orig_list_physical);
1520	if (ret < 0) {
1521		if (mode_flag & DETAIL) {
1522			PRINT_FILE_NAME(file);
1523			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1524		}
1525		goto out;
1526	}
1527
1528	/* Get the count of file's continuous physical region */
1529	orig_physical_cnt = get_physical_count(orig_list_physical);
1530
1531	/* Change list from physical to logical */
1532	ret = change_physical_to_logical(&orig_list_physical,
1533							&orig_list_logical);
1534	if (ret < 0) {
1535		if (mode_flag & DETAIL) {
1536			PRINT_FILE_NAME(file);
1537			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1538		}
1539		goto out;
1540	}
1541
1542	/* Count file fragments before defrag */
1543	file_frags_start = get_logical_count(orig_list_logical);
1544
1545	blk_count = get_file_blocks(orig_list_logical);
1546	if (file_check(fd, buf, file, file_frags_start, blk_count) < 0)
1547		goto out;
1548
1549	if (fsync(fd) < 0) {
1550		if (mode_flag & DETAIL) {
1551			PRINT_FILE_NAME(file);
1552			PRINT_ERR_MSG_WITH_ERRNO("Failed to sync(fsync)");
1553		}
1554		goto out;
1555	}
1556
1557	if (current_uid == ROOT_UID)
1558		best = get_best_count(blk_count);
1559	else
1560		best = 1;
1561
1562	if (file_frags_start <= best)
1563		goto check_improvement;
1564
1565	/* Combine extents to group */
1566	ret = join_extents(orig_list_logical, &orig_group_head);
1567	if (ret < 0) {
1568		if (mode_flag & DETAIL) {
1569			PRINT_FILE_NAME(file);
1570			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1571		}
1572		goto out;
1573	}
1574
1575	/* Create donor inode */
1576	memset(tmp_inode_name, 0, PATH_MAX + 8);
1577	sprintf(tmp_inode_name, "%.*s.defrag",
1578				(int)strnlen(file, PATH_MAX), file);
1579	donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
1580	if (donor_fd < 0) {
1581		if (mode_flag & DETAIL) {
1582			PRINT_FILE_NAME(file);
1583			if (errno == EEXIST)
1584				PRINT_ERR_MSG_WITH_ERRNO(
1585				"File is being defraged by other program");
1586			else
1587				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
1588		}
1589		goto out;
1590	}
1591
1592	/* Unlink donor inode */
1593	ret = unlink(tmp_inode_name);
1594	if (ret < 0) {
1595		if (mode_flag & DETAIL) {
1596			PRINT_FILE_NAME(file);
1597			PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
1598		}
1599		goto out;
1600	}
1601
1602	/* Allocate space for donor inode */
1603	orig_group_tmp = orig_group_head;
1604	do {
1605		ret = fallocate64(donor_fd, 0,
1606		  (loff_t)orig_group_tmp->start->data.logical * block_size,
1607		  (loff_t)orig_group_tmp->len * block_size);
1608		if (ret < 0) {
1609			if (mode_flag & DETAIL) {
1610				PRINT_FILE_NAME(file);
1611				PRINT_ERR_MSG_WITH_ERRNO("Failed to fallocate");
1612			}
1613			goto out;
1614		}
1615
1616		orig_group_tmp = orig_group_tmp->next;
1617	} while (orig_group_tmp != orig_group_head);
1618
1619	/* Get donor inode's extents */
1620	ret = get_file_extents(donor_fd, &donor_list_physical);
1621	if (ret < 0) {
1622		if (mode_flag & DETAIL) {
1623			PRINT_FILE_NAME(file);
1624			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1625		}
1626		goto out;
1627	}
1628
1629	/* Calcuate donor inode's continuous physical region */
1630	donor_physical_cnt = get_physical_count(donor_list_physical);
1631
1632	/* Change donor extent list from physical to logical */
1633	ret = change_physical_to_logical(&donor_list_physical,
1634							&donor_list_logical);
1635	if (ret < 0) {
1636		if (mode_flag & DETAIL) {
1637			PRINT_FILE_NAME(file);
1638			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_EXTENT);
1639		}
1640		goto out;
1641	}
1642
1643check_improvement:
1644	if (mode_flag & DETAIL) {
1645		if (file_frags_start != 1)
1646			frag_files_before_defrag++;
1647
1648		extents_before_defrag += file_frags_start;
1649	}
1650
1651	if (file_frags_start <= best ||
1652			orig_physical_cnt <= donor_physical_cnt) {
1653		printf("\033[79;0H\033[K[%u/%u]%s:\t%3d%%",
1654			defraged_file_count, total_count, file, 100);
1655		if (mode_flag & DETAIL)
1656			printf("  extents: %d -> %d",
1657				file_frags_start, file_frags_start);
1658
1659		printf("\t[ OK ]\n");
1660		succeed_cnt++;
1661
1662		if (file_frags_start != 1)
1663			frag_files_after_defrag++;
1664
1665		extents_after_defrag += file_frags_start;
1666		goto out;
1667	}
1668
1669	/* Defrag the file */
1670	ret = call_defrag(fd, donor_fd, file, buf, donor_list_logical);
1671
1672	/* Count file fragments after defrag and print extents info */
1673	if (mode_flag & DETAIL) {
1674		file_frags_end = file_frag_count(fd);
1675		if (file_frags_end < 0) {
1676			printf("\n");
1677			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
1678			goto out;
1679		}
1680
1681		if (file_frags_end != 1)
1682			frag_files_after_defrag++;
1683
1684		extents_after_defrag += file_frags_end;
1685
1686		if (ret < 0)
1687			goto out;
1688
1689		printf("  extents: %d -> %d",
1690			file_frags_start, file_frags_end);
1691		fflush(stdout);
1692	}
1693
1694	if (ret < 0)
1695		goto out;
1696
1697	printf("\t[ OK ]\n");
1698	fflush(stdout);
1699	succeed_cnt++;
1700
1701out:
1702	close(fd);
1703	if (donor_fd != -1)
1704		close(donor_fd);
1705	free_ext(orig_list_physical);
1706	free_ext(orig_list_logical);
1707	free_ext(donor_list_physical);
1708	free_exts_group(orig_group_head);
1709	return 0;
1710}
1711
1712/*
1713 * main() -		Ext4 online defrag.
1714 *
1715 * @argc:		the number of parameter.
1716 * @argv[]:		the pointer array of parameter.
1717 */
1718int main(int argc, char *argv[])
1719{
1720	int	opt;
1721	int	i, j, ret = 0;
1722	int	flags = FTW_PHYS | FTW_MOUNT;
1723	int	arg_type = -1;
1724	int	success_flag = 0;
1725	char	dir_name[PATH_MAX + 1];
1726	char	dev_name[PATH_MAX + 1];
1727	struct stat64	buf;
1728	ext2_filsys fs = NULL;
1729
1730	/* Parse arguments */
1731	if (argc == 1)
1732		goto out;
1733
1734	while ((opt = getopt(argc, argv, "vc")) != EOF) {
1735		switch (opt) {
1736		case 'v':
1737			mode_flag |= DETAIL;
1738			break;
1739		case 'c':
1740			mode_flag |= STATISTIC;
1741			break;
1742		default:
1743			goto out;
1744		}
1745	}
1746
1747	if (argc == optind)
1748		goto out;
1749
1750	current_uid = getuid();
1751
1752	/* Main process */
1753	for (i = optind; i < argc; i++) {
1754		succeed_cnt = 0;
1755		regular_count = 0;
1756		total_count = 0;
1757		frag_files_before_defrag = 0;
1758		frag_files_after_defrag = 0;
1759		extents_before_defrag = 0;
1760		extents_after_defrag = 0;
1761		defraged_file_count = 0;
1762		files_block_count = 0;
1763		blocks_per_group = 0;
1764		feature_incompat = 0;
1765		log_groups_per_flex = 0;
1766
1767		memset(dir_name, 0, PATH_MAX + 1);
1768		memset(dev_name, 0, PATH_MAX + 1);
1769		memset(lost_found_dir, 0, PATH_MAX + 1);
1770		memset(frag_rank, 0,
1771			sizeof(struct frag_statistic_ino) * SHOW_FRAG_FILES);
1772
1773		if ((mode_flag & STATISTIC) && i > optind)
1774			printf("\n");
1775
1776#if BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN
1777		PRINT_ERR_MSG("Endian's type is not big/little endian");
1778		PRINT_FILE_NAME(argv[i]);
1779		continue;
1780#endif
1781
1782		if (lstat64(argv[i], &buf) < 0) {
1783			perror(NGMSG_FILE_INFO);
1784			PRINT_FILE_NAME(argv[i]);
1785			continue;
1786		}
1787
1788		/* Handle i.e. lvm device symlinks */
1789		if (S_ISLNK(buf.st_mode)) {
1790			struct stat64	buf2;
1791
1792			if (stat64(argv[i], &buf2) == 0 &&
1793			    S_ISBLK(buf2.st_mode))
1794				buf = buf2;
1795		}
1796
1797		if (S_ISBLK(buf.st_mode)) {
1798			/* Block device */
1799			strncpy(dev_name, argv[i], strnlen(argv[i], PATH_MAX));
1800			if (get_mount_point(argv[i], dir_name, PATH_MAX) < 0)
1801				continue;
1802			if (lstat64(dir_name, &buf) < 0) {
1803				perror(NGMSG_FILE_INFO);
1804				PRINT_FILE_NAME(argv[i]);
1805				continue;
1806			}
1807			arg_type = DEVNAME;
1808			if (!(mode_flag & STATISTIC))
1809				printf("ext4 defragmentation for device(%s)\n",
1810					argv[i]);
1811		} else if (S_ISDIR(buf.st_mode)) {
1812			/* Directory */
1813			if (access(argv[i], R_OK) < 0) {
1814				perror(argv[i]);
1815				continue;
1816			}
1817			arg_type = DIRNAME;
1818			strncpy(dir_name, argv[i], strnlen(argv[i], PATH_MAX));
1819		} else if (S_ISREG(buf.st_mode)) {
1820			/* Regular file */
1821			arg_type = FILENAME;
1822		} else {
1823			/* Irregular file */
1824			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
1825			PRINT_FILE_NAME(argv[i]);
1826			continue;
1827		}
1828
1829		/* Set blocksize */
1830		block_size = buf.st_blksize;
1831
1832		/* For device case,
1833		 * filesystem type checked in get_mount_point()
1834		 */
1835		if (arg_type == FILENAME || arg_type == DIRNAME) {
1836			if (is_ext4(argv[i], dev_name) < 0)
1837				continue;
1838			if (realpath(argv[i], dir_name) == NULL) {
1839				perror("Couldn't get full path");
1840				PRINT_FILE_NAME(argv[i]);
1841				continue;
1842			}
1843		}
1844
1845		if (current_uid == ROOT_UID) {
1846			/* Get super block info */
1847			ret = ext2fs_open(dev_name, 0, 0, block_size,
1848					unix_io_manager, &fs);
1849			if (ret) {
1850				if (mode_flag & DETAIL) {
1851					perror("Can't get super block info");
1852					PRINT_FILE_NAME(argv[i]);
1853				}
1854				continue;
1855			}
1856
1857			blocks_per_group = fs->super->s_blocks_per_group;
1858			feature_incompat = fs->super->s_feature_incompat;
1859			log_groups_per_flex = fs->super->s_log_groups_per_flex;
1860
1861			ext2fs_close(fs);
1862		}
1863
1864		switch (arg_type) {
1865		case DIRNAME:
1866			if (!(mode_flag & STATISTIC))
1867				printf("ext4 defragmentation "
1868					"for directory(%s)\n", argv[i]);
1869
1870			int mount_dir_len = 0;
1871			mount_dir_len = strnlen(lost_found_dir, PATH_MAX);
1872
1873			strncat(lost_found_dir, "/lost+found",
1874				PATH_MAX - strnlen(lost_found_dir, PATH_MAX));
1875
1876			/* Not the case("e4defrag  mount_piont_dir") */
1877			if (dir_name[mount_dir_len] != '\0') {
1878				/*
1879				 * "e4defrag mount_piont_dir/lost+found"
1880				 * or "e4defrag mount_piont_dir/lost+found/"
1881				 */
1882				if (strncmp(lost_found_dir, dir_name,
1883					    strnlen(lost_found_dir,
1884						    PATH_MAX)) == 0 &&
1885				    (dir_name[strnlen(lost_found_dir,
1886						      PATH_MAX)] == '\0' ||
1887				     dir_name[strnlen(lost_found_dir,
1888						      PATH_MAX)] == '/')) {
1889					PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1890					PRINT_FILE_NAME(argv[i]);
1891					continue;
1892				}
1893
1894				/* "e4defrag mount_piont_dir/else_dir" */
1895				memset(lost_found_dir, 0, PATH_MAX + 1);
1896			}
1897		case DEVNAME:
1898			if (arg_type == DEVNAME) {
1899				strncpy(lost_found_dir, dir_name,
1900					strnlen(dir_name, PATH_MAX));
1901				strncat(lost_found_dir, "/lost+found/",
1902					PATH_MAX - strnlen(lost_found_dir,
1903							   PATH_MAX));
1904			}
1905
1906			nftw64(dir_name, calc_entry_counts, FTW_OPEN_FD, flags);
1907
1908			if (mode_flag & STATISTIC) {
1909				if (mode_flag & DETAIL)
1910					printf("%-40s%10s/%-10s%9s\n",
1911					"<File>", "now", "best", "size/ext");
1912
1913				if (!(mode_flag & DETAIL) &&
1914						current_uid != ROOT_UID) {
1915					printf(" Done.\n");
1916					success_flag = 1;
1917					continue;
1918				}
1919
1920				nftw64(dir_name, file_statistic,
1921							FTW_OPEN_FD, flags);
1922
1923				if (succeed_cnt != 0 &&
1924					current_uid == ROOT_UID) {
1925					if (mode_flag & DETAIL)
1926						printf("\n");
1927					printf("%-40s%10s/%-10s%9s\n",
1928						"<Fragmented files>", "now",
1929						"best", "size/ext");
1930					for (j = 0; j < SHOW_FRAG_FILES; j++) {
1931						if (strlen(frag_rank[j].
1932							msg_buffer) > 37) {
1933							printf("%d. %s\n%50d/"
1934							"%-10d%6llu KB\n",
1935							j + 1,
1936							frag_rank[j].msg_buffer,
1937							frag_rank[j].now_count,
1938							frag_rank[j].best_count,
1939							frag_rank[j].
1940								size_per_ext);
1941						} else if (strlen(frag_rank[j].
1942							msg_buffer) > 0) {
1943							printf("%d. %-37s%10d/"
1944							"%-10d%6llu KB\n",
1945							j + 1,
1946							frag_rank[j].msg_buffer,
1947							frag_rank[j].now_count,
1948							frag_rank[j].best_count,
1949							frag_rank[j].
1950								size_per_ext);
1951						} else
1952							break;
1953					}
1954				}
1955				break;
1956			}
1957			/* File tree walk */
1958			nftw64(dir_name, file_defrag, FTW_OPEN_FD, flags);
1959			printf("\n\tSuccess:\t\t\t[ %u/%u ]\n", succeed_cnt,
1960				total_count);
1961			printf("\tFailure:\t\t\t[ %u/%u ]\n",
1962				total_count - succeed_cnt, total_count);
1963			if (mode_flag & DETAIL) {
1964				printf("\tTotal extents:\t\t\t%4d->%d\n",
1965					extents_before_defrag,
1966					extents_after_defrag);
1967				printf("\tFragmented percentage:\t\t"
1968					"%3llu%%->%llu%%\n",
1969					!regular_count ? 0 :
1970					((unsigned long long)
1971					frag_files_before_defrag * 100) /
1972					regular_count,
1973					!regular_count ? 0 :
1974					((unsigned long long)
1975					frag_files_after_defrag * 100) /
1976					regular_count);
1977			}
1978			break;
1979		case FILENAME:
1980			total_count = 1;
1981			regular_count = 1;
1982			strncat(lost_found_dir, "/lost+found/",
1983				PATH_MAX - strnlen(lost_found_dir,
1984						   PATH_MAX));
1985			if (strncmp(lost_found_dir, dir_name,
1986				    strnlen(lost_found_dir,
1987					    PATH_MAX)) == 0) {
1988				PRINT_ERR_MSG(NGMSG_LOST_FOUND);
1989				PRINT_FILE_NAME(argv[i]);
1990				continue;
1991			}
1992
1993			if (mode_flag & STATISTIC) {
1994				file_statistic(argv[i], &buf, FTW_F, NULL);
1995				break;
1996			} else
1997				printf("ext4 defragmentation for %s\n",
1998								 argv[i]);
1999			/* Defrag single file process */
2000			file_defrag(argv[i], &buf, FTW_F, NULL);
2001			if (succeed_cnt != 0)
2002				printf(" Success:\t\t\t[1/1]\n");
2003			else
2004				printf(" Success:\t\t\t[0/1]\n");
2005
2006			break;
2007		}
2008
2009		if (succeed_cnt != 0)
2010			success_flag = 1;
2011		if (mode_flag & STATISTIC) {
2012			if (current_uid != ROOT_UID) {
2013				printf(" Done.\n");
2014				continue;
2015			}
2016
2017			if (!succeed_cnt) {
2018				if (mode_flag & DETAIL)
2019					printf("\n");
2020
2021				if (arg_type == DEVNAME)
2022					printf(" In this device(%s), "
2023					"none can be defragmented.\n", argv[i]);
2024				else if (arg_type == DIRNAME)
2025					printf(" In this directory(%s), "
2026					"none can be defragmented.\n", argv[i]);
2027				else
2028					printf(" This file(%s) "
2029					"can't be defragmented.\n", argv[i]);
2030			} else {
2031				float files_ratio = 0.0;
2032				float score = 0.0;
2033				__u64 size_per_ext = files_block_count *
2034						(buf.st_blksize / 1024) /
2035						extents_before_defrag;
2036				files_ratio = (float)(extents_before_defrag -
2037						extents_after_defrag) *
2038						100 / files_block_count;
2039				score = CALC_SCORE(files_ratio);
2040				printf("\n Total/best extents\t\t\t\t%d/%d\n"
2041					" Average size per extent"
2042					"\t\t\t%llu KB\n"
2043					" Fragmentation score\t\t\t\t%.0f\n",
2044						extents_before_defrag,
2045						extents_after_defrag,
2046						size_per_ext, score);
2047				printf(" [0-30 no problem:"
2048					" 31-55 a little bit fragmented:"
2049					" 56- needs defrag]\n");
2050
2051				if (arg_type == DEVNAME)
2052					printf(" This device (%s) ", argv[i]);
2053				else if (arg_type == DIRNAME)
2054					printf(" This directory (%s) ",
2055								argv[i]);
2056				else
2057					printf(" This file (%s) ", argv[i]);
2058
2059				if (score > BOUND_SCORE)
2060					printf("needs defragmentation.\n");
2061				else
2062					printf("does not need "
2063							"defragmentation.\n");
2064			}
2065			printf(" Done.\n");
2066		}
2067
2068	}
2069
2070	if (success_flag)
2071		return 0;
2072
2073	exit(1);
2074
2075out:
2076	printf(MSG_USAGE);
2077	exit(1);
2078}
2079
2080