15aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe/*
25aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe * Small tool to check for dedupable blocks in a file or device. Basically
35aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe * just scans the filename for extents of the given size, checksums them,
45aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe * and orders them up.
55aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe */
65aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <stdio.h>
75aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <stdio.h>
85aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <unistd.h>
95aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <inttypes.h>
105aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <assert.h>
115aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <sys/types.h>
125aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <sys/stat.h>
135aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <sys/ioctl.h>
145aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <fcntl.h>
155aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include <string.h>
165aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
175aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../lib/rbtree.h"
185aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../flist.h"
195aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../log.h"
205aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../mutex.h"
215aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../smalloc.h"
225aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../minmax.h"
235aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../crc/md5.h"
245aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../memalign.h"
255aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe#include "../os/os.h"
26d11a563f99321a4844d52000211af9e772a1f415Jens Axboe#include "../gettime.h"
27d11a563f99321a4844d52000211af9e772a1f415Jens Axboe#include "../fio_time.h"
285aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2976b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe#include "../lib/bloom.h"
307a74197fa3c678aef89a4645c6cd26bfff2ff13aJens Axboe#include "debug.h"
315aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
325aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestruct worker_thread {
335aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	pthread_t thread;
345aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
353f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe	volatile int done;
363f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe
375aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int fd;
385aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t cur_offset;
395aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t size;
405aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
415aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	unsigned long items;
4276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	unsigned long dupes;
435aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int err;
445aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe};
455aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
465aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestruct extent {
475aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct flist_head list;
485aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t offset;
495aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe};
505aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
515aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestruct chunk {
525aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct rb_node rb_node;
535aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t count;
545aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint32_t hash[MD5_HASH_WORDS];
55d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	struct flist_head extent_list[0];
565aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe};
575aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
585aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestruct item {
595aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t offset;
605aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint32_t hash[MD5_HASH_WORDS];
615aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe};
625aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
635aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic struct rb_root rb_root;
6476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboestatic struct bloom *bloom;
655aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic struct fio_mutex *rb_lock;
665aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
675aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic unsigned int blocksize = 4096;
685aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic unsigned int num_threads;
695aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic unsigned int chunk_size = 1048576;
705aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic unsigned int dump_output;
715aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic unsigned int odirect;
725aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic unsigned int collision_check;
733f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboestatic unsigned int print_progress = 1;
7476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboestatic unsigned int use_bloom = 1;
755aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
765aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic uint64_t total_size;
775aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic uint64_t cur_offset;
785aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic struct fio_mutex *size_lock;
795aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
806d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboestatic struct fio_file file;
815aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
826d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboestatic uint64_t get_size(struct fio_file *f, struct stat *sb)
835aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
845aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t ret;
855aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
865aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	if (S_ISBLK(sb->st_mode)) {
876d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		unsigned long long bytes;
886d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe
896d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		if (blockdev_size(f, &bytes)) {
906d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe			log_err("dedupe: failed getting bdev size\n");
915aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			return 0;
925aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		}
936d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		ret = bytes;
945aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	} else
955aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		ret = sb->st_size;
965aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
975aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return (ret & ~((uint64_t)blocksize - 1));
985aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
995aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1005aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic int get_work(uint64_t *offset, uint64_t *size)
1015aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
1025aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t this_chunk;
1035aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int ret = 1;
1045aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1055aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_mutex_down(size_lock);
1065aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1075aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	if (cur_offset < total_size) {
1085aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		*offset = cur_offset;
1095aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		this_chunk = min((uint64_t)chunk_size, total_size - cur_offset);
1105aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		*size = this_chunk;
1115aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		cur_offset += this_chunk;
1125aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		ret = 0;
1135aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
1145aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1155aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_mutex_up(size_lock);
1165aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return ret;
1175aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
1185aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
119d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboestatic int __read_block(int fd, void *buf, off_t offset, size_t count)
1205aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
1215aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	ssize_t ret;
1225aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
123d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	ret = pread(fd, buf, count, offset);
1245aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	if (ret < 0) {
1255aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		perror("pread");
1265aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		return 1;
1275aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	} else if (!ret)
1285aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		return 1;
129d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	else if (ret != count) {
1305aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		log_err("dedupe: short read on block\n");
1315aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		return 1;
1325aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
1335aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1345aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return 0;
1355aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
1365aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
137d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboestatic int read_block(int fd, void *buf, off_t offset)
138d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe{
139d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	return __read_block(fd, buf, offset, blocksize);
140d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe}
141d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe
1425aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic void add_item(struct chunk *c, struct item *i)
1435aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
1444deca3622b28244fd394242f091445d37b45fbdaJens Axboe	/*
1454deca3622b28244fd394242f091445d37b45fbdaJens Axboe	 * Save some memory and don't add extent items, if we don't
1464deca3622b28244fd394242f091445d37b45fbdaJens Axboe	 * use them.
1474deca3622b28244fd394242f091445d37b45fbdaJens Axboe	 */
1484deca3622b28244fd394242f091445d37b45fbdaJens Axboe	if (dump_output || collision_check) {
1494deca3622b28244fd394242f091445d37b45fbdaJens Axboe		struct extent *e;
1504deca3622b28244fd394242f091445d37b45fbdaJens Axboe
1514deca3622b28244fd394242f091445d37b45fbdaJens Axboe		e = malloc(sizeof(*e));
1524deca3622b28244fd394242f091445d37b45fbdaJens Axboe		e->offset = i->offset;
153d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		flist_add_tail(&e->list, &c->extent_list[0]);
1544deca3622b28244fd394242f091445d37b45fbdaJens Axboe	}
1555aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1565aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	c->count++;
1575aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
1585aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1595aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic int col_check(struct chunk *c, struct item *i)
1605aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
1615aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct extent *e;
1625aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	char *cbuf, *ibuf;
1635aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int ret = 1;
1645aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1655aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	cbuf = fio_memalign(blocksize, blocksize);
1665aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	ibuf = fio_memalign(blocksize, blocksize);
1675aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
168d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	e = flist_entry(c->extent_list[0].next, struct extent, list);
1696d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	if (read_block(file.fd, cbuf, e->offset))
1705aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		goto out;
1715aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1726d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	if (read_block(file.fd, ibuf, i->offset))
1735aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		goto out;
1745aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
1755aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	ret = memcmp(ibuf, cbuf, blocksize);
1765aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboeout:
1775aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_memfree(cbuf, blocksize);
1785aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_memfree(ibuf, blocksize);
1795aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return ret;
1805aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
1815aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
182d11a563f99321a4844d52000211af9e772a1f415Jens Axboestatic struct chunk *alloc_chunk(void)
183d11a563f99321a4844d52000211af9e772a1f415Jens Axboe{
184d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	struct chunk *c;
185d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
186d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	if (collision_check || dump_output) {
187d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		c = malloc(sizeof(struct chunk) + sizeof(struct flist_head));
188d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		INIT_FLIST_HEAD(&c->extent_list[0]);
189d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	} else
190d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		c = malloc(sizeof(struct chunk));
191d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
192d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	return c;
193d11a563f99321a4844d52000211af9e772a1f415Jens Axboe}
194d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
1955aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic void insert_chunk(struct item *i)
1965aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
1975aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct rb_node **p, *parent;
1985aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct chunk *c;
1995aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int diff;
2005aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2015aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	p = &rb_root.rb_node;
2025aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	parent = NULL;
2035aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	while (*p) {
2045aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		parent = *p;
2055aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2065aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		c = rb_entry(parent, struct chunk, rb_node);
2075aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		diff = memcmp(i->hash, c->hash, sizeof(i->hash));
2085aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		if (diff < 0)
2095aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			p = &(*p)->rb_left;
2105aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		else if (diff > 0)
2115aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			p = &(*p)->rb_right;
2125aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		else {
2135aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			int ret;
2145aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2155aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			if (!collision_check)
2165aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe				goto add;
2175aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2185aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			fio_mutex_up(rb_lock);
2195aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			ret = col_check(c, i);
2205aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			fio_mutex_down(rb_lock);
2215aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2225aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			if (!ret)
2235aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe				goto add;
2245aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2255aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			p = &(*p)->rb_right;
2265aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		}
2275aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
2285aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
229d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	c = alloc_chunk();
2305aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	RB_CLEAR_NODE(&c->rb_node);
2315aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	c->count = 0;
2325aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	memcpy(c->hash, i->hash, sizeof(i->hash));
2335aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	rb_link_node(&c->rb_node, parent, p);
2345aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	rb_insert_color(&c->rb_node, &rb_root);
2355aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboeadd:
2365aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	add_item(c, i);
2375aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
2385aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
23976b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboestatic void insert_chunks(struct item *items, unsigned int nitems,
24076b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			  uint64_t *ndupes)
2415aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
2425aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int i;
2435aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2445aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_mutex_down(rb_lock);
2455aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
24676b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	for (i = 0; i < nitems; i++) {
24776b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		if (bloom) {
24876b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			unsigned int s;
24976b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			int r;
25076b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
25176b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			s = sizeof(items[i].hash) / sizeof(uint32_t);
25276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			r = bloom_set(bloom, items[i].hash, s);
25376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			*ndupes += r;
25476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		} else
25576b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			insert_chunk(&items[i]);
25676b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	}
2575aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2585aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_mutex_up(rb_lock);
2595aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
2605aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2615aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic void crc_buf(void *buf, uint32_t *hash)
2625aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
2635aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct fio_md5_ctx ctx = { .hash = hash };
2645aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2655aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_md5_init(&ctx);
2665aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_md5_update(&ctx, buf, blocksize);
2675aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_md5_final(&ctx);
2685aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
2695aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
270d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboestatic unsigned int read_blocks(int fd, void *buf, off_t offset, size_t size)
271d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe{
272d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	if (__read_block(fd, buf, offset, size))
273d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe		return 0;
274d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe
275d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	return size / blocksize;
276d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe}
277d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe
2785aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic int do_work(struct worker_thread *thread, void *buf)
2795aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
2805aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	unsigned int nblocks, i;
2815aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	off_t offset;
282d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	int nitems = 0;
28376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	uint64_t ndupes = 0;
2845aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct item *items;
2855aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2865aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	offset = thread->cur_offset;
287d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe
288d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	nblocks = read_blocks(thread->fd, buf, offset, min(thread->size, (uint64_t)chunk_size));
289d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	if (!nblocks)
290d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe		return 1;
291d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe
2925aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	items = malloc(sizeof(*items) * nblocks);
2935aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
2945aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	for (i = 0; i < nblocks; i++) {
295d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe		void *thisptr = buf + (i * blocksize);
296d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe
297bf6238ad99d4c07f3548de957f60b1f1d2848822Jens Axboe		items[i].offset = offset;
298d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe		crc_buf(thisptr, items[i].hash);
2995aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		offset += blocksize;
3005aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		nitems++;
3015aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
3025aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
30376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	insert_chunks(items, nitems, &ndupes);
30476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
3055aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	free(items);
30676b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	thread->items += nitems;
30776b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	thread->dupes += ndupes;
308d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	return 0;
3095aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
3105aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
3115aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic void *thread_fn(void *data)
3125aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
3135aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct worker_thread *thread = data;
3145aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	void *buf;
3155aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
316d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	buf = fio_memalign(blocksize, chunk_size);
3175aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
3185aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	do {
3195aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		if (get_work(&thread->cur_offset, &thread->size)) {
3205aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			thread->err = 1;
3215aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
3225aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		}
3235aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		if (do_work(thread, buf)) {
3245aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			thread->err = 1;
3255aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
3265aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		}
3275aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	} while (1);
3285aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
3293f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe	thread->done = 1;
330d393bcd743c553c5c6b047ceb5b104679df004c5Jens Axboe	fio_memfree(buf, chunk_size);
3315aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return NULL;
3325aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
3335aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
334d11a563f99321a4844d52000211af9e772a1f415Jens Axboestatic void show_progress(struct worker_thread *threads, unsigned long total)
335d11a563f99321a4844d52000211af9e772a1f415Jens Axboe{
336d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	unsigned long last_nitems = 0;
337d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	struct timeval last_tv;
338d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
339d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	fio_gettime(&last_tv, NULL);
340d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
341d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	while (print_progress) {
342d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		unsigned long this_items;
343d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		unsigned long nitems = 0;
344d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		uint64_t tdiff;
345d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		float perc;
346cb8a41db4d9fd90dde70fe19a6994a3da8cb330cJens Axboe		int some_done = 0;
347d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		int i;
348d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
349d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		for (i = 0; i < num_threads; i++) {
350d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			nitems += threads[i].items;
351d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			some_done = threads[i].done;
352d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			if (some_done)
353d11a563f99321a4844d52000211af9e772a1f415Jens Axboe				break;
354d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		}
355d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
356d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		if (some_done)
357d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			break;
358d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
359d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		perc = (float) nitems / (float) total;
360d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		perc *= 100.0;
361d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		this_items = nitems - last_nitems;
362d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		this_items *= blocksize;
363d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		tdiff = mtime_since_now(&last_tv);
364d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		if (tdiff) {
36578340c01f77dc2cbb15ab8ef560d379582bfd5c8Jens Axboe			this_items = (this_items * 1000) / (tdiff * 1024);
366d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			printf("%3.2f%% done (%luKB/sec)\r", perc, this_items);
367d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			last_nitems = nitems;
368d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			fio_gettime(&last_tv, NULL);
369d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		} else
370d11a563f99321a4844d52000211af9e772a1f415Jens Axboe			printf("%3.2f%% done\r", perc);
371d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		fflush(stdout);
372d11a563f99321a4844d52000211af9e772a1f415Jens Axboe		usleep(250000);
373d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	};
374d11a563f99321a4844d52000211af9e772a1f415Jens Axboe}
375d11a563f99321a4844d52000211af9e772a1f415Jens Axboe
3766d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboestatic int run_dedupe_threads(struct fio_file *f, uint64_t dev_size,
3776d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe			      uint64_t *nextents, uint64_t *nchunks)
3785aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
3795aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct worker_thread *threads;
3803f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe	unsigned long nitems, total_items;
3815aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int i, err = 0;
3825aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
3835aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	total_size = dev_size;
3843f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe	total_items = dev_size / blocksize;
3855aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	cur_offset = 0;
3865aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	size_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
3875aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
3885aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	threads = malloc(num_threads * sizeof(struct worker_thread));
3895aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	for (i = 0; i < num_threads; i++) {
390acf7ead2975987d8fa8749f06595123d2f789560Jens Axboe		memset(&threads[i], 0, sizeof(struct worker_thread));
3916d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		threads[i].fd = f->fd;
3925aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
3935aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		err = pthread_create(&threads[i].thread, NULL, thread_fn, &threads[i]);
3945aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		if (err) {
3955aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			log_err("fio: thread startup failed\n");
3965aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
3975aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		}
3985aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
3995aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
400d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	show_progress(threads, total_items);
4013f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe
4025aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	nitems = 0;
40376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	*nextents = 0;
40476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	*nchunks = 1;
4055aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	for (i = 0; i < num_threads; i++) {
4065aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		void *ret;
4075aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		pthread_join(threads[i].thread, &ret);
4085aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		nitems += threads[i].items;
40976b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		*nchunks += threads[i].dupes;
4105aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
4115aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
4125aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	printf("Threads(%u): %lu items processed\n", num_threads, nitems);
4135aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
41476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	*nextents = nitems;
41576b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	*nchunks = nitems - *nchunks;
41676b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
4175aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	fio_mutex_remove(size_lock);
418d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	free(threads);
4195aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return err;
4205aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
4215aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
42276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboestatic int dedupe_check(const char *filename, uint64_t *nextents,
42376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			uint64_t *nchunks)
4245aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
4255aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	uint64_t dev_size;
4265aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct stat sb;
4275aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int flags;
4285aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
4295aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	flags = O_RDONLY;
4305aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	if (odirect)
43118a7ca52492e3139056c9cfba8bf4087fba75e9dJens Axboe		flags |= OS_O_DIRECT;
4325aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
4336d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	memset(&file, 0, sizeof(file));
4346d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	file.file_name = strdup(filename);
4356d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe
4366d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	file.fd = open(filename, flags);
4376d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	if (file.fd == -1) {
4385aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		perror("open");
4396d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		goto err;
4405aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
4415aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
4426d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	if (fstat(file.fd, &sb) < 0) {
4435aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		perror("fstat");
4446d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		goto err;
4455aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
4465aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
4476d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	dev_size = get_size(&file, &sb);
4486d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	if (!dev_size)
4496d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		goto err;
4505aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
45176b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	if (use_bloom) {
45276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		uint64_t bloom_entries;
45376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
4542ccf91acdec8b977c630238cee65c59b1778311aJens Axboe		bloom_entries = 8 * (dev_size / blocksize);
45576b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		bloom = bloom_new(bloom_entries);
45676b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	}
45776b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
4580e794083fb05e9867dc621ee495bad2da99bb49bJens Axboe	printf("Will check <%s>, size <%llu>, using %u threads\n", filename, (unsigned long long) dev_size, num_threads);
4595aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
4606d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	return run_dedupe_threads(&file, dev_size, nextents, nchunks);
4616d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboeerr:
4626d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	if (file.fd != -1)
4636d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe		close(file.fd);
4646d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	free(file.file_name);
4656d686d99c8f32cd23aae98bca7fd2ece348ffa5eJens Axboe	return 1;
4665aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
4675aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
4685aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic void show_chunk(struct chunk *c)
4695aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
4705aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct flist_head *n;
4715aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	struct extent *e;
4725aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
473b2a657f3ed44547dc3a99c23a9839ae2c32cff74Jens Axboe	printf("c hash %8x %8x %8x %8x, count %lu\n", c->hash[0], c->hash[1], c->hash[2], c->hash[3], (unsigned long) c->count);
474d11a563f99321a4844d52000211af9e772a1f415Jens Axboe	flist_for_each(n, &c->extent_list[0]) {
4755aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		e = flist_entry(n, struct extent, list);
476b2a657f3ed44547dc3a99c23a9839ae2c32cff74Jens Axboe		printf("\toffset %llu\n", (unsigned long long) e->offset);
4775aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
4785aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
4795aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
48076b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboestatic void show_stat(uint64_t nextents, uint64_t nchunks)
4815aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
482ca79d44712e3556c65f9acfc9499bfd65ab4dbc9Jens Axboe	double perc, ratio;
4835aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
48476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, (unsigned long) nchunks);
485463fc15cc044a99d7746942869d938f7160f658bJens Axboe
486463fc15cc044a99d7746942869d938f7160f658bJens Axboe	if (nchunks) {
487463fc15cc044a99d7746942869d938f7160f658bJens Axboe		ratio = (double) nextents / (double) nchunks;
488463fc15cc044a99d7746942869d938f7160f658bJens Axboe		printf("De-dupe ratio: 1:%3.2f\n", ratio - 1.0);
489463fc15cc044a99d7746942869d938f7160f658bJens Axboe	} else
490463fc15cc044a99d7746942869d938f7160f658bJens Axboe		printf("De-dupe ratio: 1:infinite\n");
49176b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
49276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	perc = 1.00 - ((double) nchunks / (double) nextents);
49376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	perc *= 100.0;
49476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
49576b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
49676b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe}
49776b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
49876b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboestatic void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks)
49976b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe{
50076b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	struct rb_node *n;
50176b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
50276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	*nchunks = *nextents = 0;
5035aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5045aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	n = rb_first(&rb_root);
5055aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	if (!n)
5065aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		return;
5075aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5085aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	do {
5095aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		struct chunk *c;
5105aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5115aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		c = rb_entry(n, struct chunk, rb_node);
51276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		(*nchunks)++;
51376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		*nextents += c->count;
5145aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5155aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		if (dump_output)
5165aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			show_chunk(c);
5175aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5185aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	} while ((n = rb_next(n)) != NULL);
5195aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
5205aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5215aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboestatic int usage(char *argv[])
5225aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
5235aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	log_err("Check for dedupable blocks on a device/file\n\n");
5245aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	log_err("%s: [options] <device or file>\n", argv[0]);
5255aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	log_err("\t-b\tChunk size to use\n");
5265aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	log_err("\t-t\tNumber of threads to use\n");
5275aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	log_err("\t-d\tFull extent/chunk debug output\n");
5285aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	log_err("\t-o\tUse O_DIRECT\n");
5295aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	log_err("\t-c\tFull collision check\n");
53076b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	log_err("\t-B\tUse probabilistic bloom filter\n");
5313f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe	log_err("\t-p\tPrint progress indicator\n");
5325aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return 1;
5335aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
5345aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5355aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboeint main(int argc, char *argv[])
5365aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe{
537c6f0f390c7072c91a9ac0d980e078e842226b9c1Jens Axboe	uint64_t nextents = 0, nchunks = 0;
5385aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	int c, ret;
5395aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5407a74197fa3c678aef89a4645c6cd26bfff2ff13aJens Axboe	debug_init();
5417a74197fa3c678aef89a4645c6cd26bfff2ff13aJens Axboe
54276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:")) != -1) {
5435aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		switch (c) {
5445aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		case 'b':
5455aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			blocksize = atoi(optarg);
5465aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
5475aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		case 't':
5485aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			num_threads = atoi(optarg);
5495aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
5505aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		case 'd':
5515aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			dump_output = atoi(optarg);
5525aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
5535aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		case 'o':
5545aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			odirect = atoi(optarg);
5555aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
5565aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		case 'c':
5575aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			collision_check = atoi(optarg);
5585aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			break;
5593f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe		case 'p':
5603f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe			print_progress = atoi(optarg);
5613f3415fe484d4c9427f9ab5cc52390df7c54e9a3Jens Axboe			break;
56276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		case 'B':
56376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			use_bloom = atoi(optarg);
56476b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe			break;
5655aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		case '?':
5665aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		default:
5675aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe			return usage(argv);
5685aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		}
5695aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	}
5705aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
57176b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	if (collision_check || dump_output)
57276b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe		use_bloom = 0;
57376b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
5745aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	if (!num_threads)
5755aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		num_threads = cpus_online();
5765aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5775aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	if (argc == optind)
5785aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe		return usage(argv);
5795aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5805aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	sinit();
5815aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
5825aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	rb_root = RB_ROOT;
5835aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	rb_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
5845aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
58576b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe	ret = dedupe_check(argv[optind], &nextents, &nchunks);
58676b9b8306580bf99000be844fae6f91ba08e4ee3Jens Axboe
587343334a87f2ef7541635af4009ead4ea6521ada6Jens Axboe	if (!ret) {
588343334a87f2ef7541635af4009ead4ea6521ada6Jens Axboe		if (!bloom)
589343334a87f2ef7541635af4009ead4ea6521ada6Jens Axboe			iter_rb_tree(&nextents, &nchunks);
5905aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
591343334a87f2ef7541635af4009ead4ea6521ada6Jens Axboe		show_stat(nextents, nchunks);
592343334a87f2ef7541635af4009ead4ea6521ada6Jens Axboe	}
5935aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe
594997c9c3f81288ffb9c3714a153f25b9f2500cb48Jens Axboe	fio_mutex_remove(rb_lock);
595c6f0f390c7072c91a9ac0d980e078e842226b9c1Jens Axboe	if (bloom)
596c6f0f390c7072c91a9ac0d980e078e842226b9c1Jens Axboe		bloom_free(bloom);
5975aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	scleanup();
5985aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe	return ret;
5995aa702ce283d9a4152989bcd72ed918b056b6e88Jens Axboe}
600