send.c revision 35075bb046cc91f42a0e5336bdc07f3279061add
1/*
2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/bsearch.h>
20#include <linux/fs.h>
21#include <linux/file.h>
22#include <linux/sort.h>
23#include <linux/mount.h>
24#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h>
26#include <linux/radix-tree.h>
27#include <linux/crc32c.h>
28#include <linux/vmalloc.h>
29
30#include "send.h"
31#include "backref.h"
32#include "locking.h"
33#include "disk-io.h"
34#include "btrfs_inode.h"
35#include "transaction.h"
36
37static int g_verbose = 0;
38
39#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
40
41/*
42 * A fs_path is a helper to dynamically build path names with unknown size.
43 * It reallocates the internal buffer on demand.
44 * It allows fast adding of path elements on the right side (normal path) and
45 * fast adding to the left side (reversed path). A reversed path can also be
46 * unreversed if needed.
47 */
48struct fs_path {
49	union {
50		struct {
51			char *start;
52			char *end;
53			char *prepared;
54
55			char *buf;
56			int buf_len;
57			int reversed:1;
58			int virtual_mem:1;
59			char inline_buf[];
60		};
61		char pad[PAGE_SIZE];
62	};
63};
64#define FS_PATH_INLINE_SIZE \
65	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
66
67
68/* reused for each extent */
69struct clone_root {
70	struct btrfs_root *root;
71	u64 ino;
72	u64 offset;
73
74	u64 found_refs;
75};
76
77#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
78#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
79
80struct send_ctx {
81	struct file *send_filp;
82	loff_t send_off;
83	char *send_buf;
84	u32 send_size;
85	u32 send_max_size;
86	u64 total_send_size;
87	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
88
89	struct vfsmount *mnt;
90
91	struct btrfs_root *send_root;
92	struct btrfs_root *parent_root;
93	struct clone_root *clone_roots;
94	int clone_roots_cnt;
95
96	/* current state of the compare_tree call */
97	struct btrfs_path *left_path;
98	struct btrfs_path *right_path;
99	struct btrfs_key *cmp_key;
100
101	/*
102	 * infos of the currently processed inode. In case of deleted inodes,
103	 * these are the values from the deleted inode.
104	 */
105	u64 cur_ino;
106	u64 cur_inode_gen;
107	int cur_inode_new;
108	int cur_inode_new_gen;
109	int cur_inode_deleted;
110	u64 cur_inode_size;
111	u64 cur_inode_mode;
112
113	u64 send_progress;
114
115	struct list_head new_refs;
116	struct list_head deleted_refs;
117
118	struct radix_tree_root name_cache;
119	struct list_head name_cache_list;
120	int name_cache_size;
121
122	struct file *cur_inode_filp;
123	char *read_buf;
124};
125
126struct name_cache_entry {
127	struct list_head list;
128	u64 ino;
129	u64 gen;
130	u64 parent_ino;
131	u64 parent_gen;
132	int ret;
133	int need_later_update;
134	int name_len;
135	char name[];
136};
137
138static void fs_path_reset(struct fs_path *p)
139{
140	if (p->reversed) {
141		p->start = p->buf + p->buf_len - 1;
142		p->end = p->start;
143		*p->start = 0;
144	} else {
145		p->start = p->buf;
146		p->end = p->start;
147		*p->start = 0;
148	}
149}
150
151static struct fs_path *fs_path_alloc(struct send_ctx *sctx)
152{
153	struct fs_path *p;
154
155	p = kmalloc(sizeof(*p), GFP_NOFS);
156	if (!p)
157		return NULL;
158	p->reversed = 0;
159	p->virtual_mem = 0;
160	p->buf = p->inline_buf;
161	p->buf_len = FS_PATH_INLINE_SIZE;
162	fs_path_reset(p);
163	return p;
164}
165
166static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx)
167{
168	struct fs_path *p;
169
170	p = fs_path_alloc(sctx);
171	if (!p)
172		return NULL;
173	p->reversed = 1;
174	fs_path_reset(p);
175	return p;
176}
177
178static void fs_path_free(struct send_ctx *sctx, struct fs_path *p)
179{
180	if (!p)
181		return;
182	if (p->buf != p->inline_buf) {
183		if (p->virtual_mem)
184			vfree(p->buf);
185		else
186			kfree(p->buf);
187	}
188	kfree(p);
189}
190
191static int fs_path_len(struct fs_path *p)
192{
193	return p->end - p->start;
194}
195
196static int fs_path_ensure_buf(struct fs_path *p, int len)
197{
198	char *tmp_buf;
199	int path_len;
200	int old_buf_len;
201
202	len++;
203
204	if (p->buf_len >= len)
205		return 0;
206
207	path_len = p->end - p->start;
208	old_buf_len = p->buf_len;
209	len = PAGE_ALIGN(len);
210
211	if (p->buf == p->inline_buf) {
212		tmp_buf = kmalloc(len, GFP_NOFS);
213		if (!tmp_buf) {
214			tmp_buf = vmalloc(len);
215			if (!tmp_buf)
216				return -ENOMEM;
217			p->virtual_mem = 1;
218		}
219		memcpy(tmp_buf, p->buf, p->buf_len);
220		p->buf = tmp_buf;
221		p->buf_len = len;
222	} else {
223		if (p->virtual_mem) {
224			tmp_buf = vmalloc(len);
225			if (!tmp_buf)
226				return -ENOMEM;
227			memcpy(tmp_buf, p->buf, p->buf_len);
228			vfree(p->buf);
229		} else {
230			tmp_buf = krealloc(p->buf, len, GFP_NOFS);
231			if (!tmp_buf) {
232				tmp_buf = vmalloc(len);
233				if (!tmp_buf)
234					return -ENOMEM;
235				memcpy(tmp_buf, p->buf, p->buf_len);
236				kfree(p->buf);
237				p->virtual_mem = 1;
238			}
239		}
240		p->buf = tmp_buf;
241		p->buf_len = len;
242	}
243	if (p->reversed) {
244		tmp_buf = p->buf + old_buf_len - path_len - 1;
245		p->end = p->buf + p->buf_len - 1;
246		p->start = p->end - path_len;
247		memmove(p->start, tmp_buf, path_len + 1);
248	} else {
249		p->start = p->buf;
250		p->end = p->start + path_len;
251	}
252	return 0;
253}
254
255static int fs_path_prepare_for_add(struct fs_path *p, int name_len)
256{
257	int ret;
258	int new_len;
259
260	new_len = p->end - p->start + name_len;
261	if (p->start != p->end)
262		new_len++;
263	ret = fs_path_ensure_buf(p, new_len);
264	if (ret < 0)
265		goto out;
266
267	if (p->reversed) {
268		if (p->start != p->end)
269			*--p->start = '/';
270		p->start -= name_len;
271		p->prepared = p->start;
272	} else {
273		if (p->start != p->end)
274			*p->end++ = '/';
275		p->prepared = p->end;
276		p->end += name_len;
277		*p->end = 0;
278	}
279
280out:
281	return ret;
282}
283
284static int fs_path_add(struct fs_path *p, const char *name, int name_len)
285{
286	int ret;
287
288	ret = fs_path_prepare_for_add(p, name_len);
289	if (ret < 0)
290		goto out;
291	memcpy(p->prepared, name, name_len);
292	p->prepared = NULL;
293
294out:
295	return ret;
296}
297
298static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
299{
300	int ret;
301
302	ret = fs_path_prepare_for_add(p, p2->end - p2->start);
303	if (ret < 0)
304		goto out;
305	memcpy(p->prepared, p2->start, p2->end - p2->start);
306	p->prepared = NULL;
307
308out:
309	return ret;
310}
311
312static int fs_path_add_from_extent_buffer(struct fs_path *p,
313					  struct extent_buffer *eb,
314					  unsigned long off, int len)
315{
316	int ret;
317
318	ret = fs_path_prepare_for_add(p, len);
319	if (ret < 0)
320		goto out;
321
322	read_extent_buffer(eb, p->prepared, off, len);
323	p->prepared = NULL;
324
325out:
326	return ret;
327}
328
329#if 0
330static void fs_path_remove(struct fs_path *p)
331{
332	BUG_ON(p->reversed);
333	while (p->start != p->end && *p->end != '/')
334		p->end--;
335	*p->end = 0;
336}
337#endif
338
339static int fs_path_copy(struct fs_path *p, struct fs_path *from)
340{
341	int ret;
342
343	p->reversed = from->reversed;
344	fs_path_reset(p);
345
346	ret = fs_path_add_path(p, from);
347
348	return ret;
349}
350
351
352static void fs_path_unreverse(struct fs_path *p)
353{
354	char *tmp;
355	int len;
356
357	if (!p->reversed)
358		return;
359
360	tmp = p->start;
361	len = p->end - p->start;
362	p->start = p->buf;
363	p->end = p->start + len;
364	memmove(p->start, tmp, len + 1);
365	p->reversed = 0;
366}
367
368static struct btrfs_path *alloc_path_for_send(void)
369{
370	struct btrfs_path *path;
371
372	path = btrfs_alloc_path();
373	if (!path)
374		return NULL;
375	path->search_commit_root = 1;
376	path->skip_locking = 1;
377	return path;
378}
379
380static int write_buf(struct send_ctx *sctx, const void *buf, u32 len)
381{
382	int ret;
383	mm_segment_t old_fs;
384	u32 pos = 0;
385
386	old_fs = get_fs();
387	set_fs(KERNEL_DS);
388
389	while (pos < len) {
390		ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos,
391				&sctx->send_off);
392		/* TODO handle that correctly */
393		/*if (ret == -ERESTARTSYS) {
394			continue;
395		}*/
396		if (ret < 0)
397			goto out;
398		if (ret == 0) {
399			ret = -EIO;
400			goto out;
401		}
402		pos += ret;
403	}
404
405	ret = 0;
406
407out:
408	set_fs(old_fs);
409	return ret;
410}
411
412static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
413{
414	struct btrfs_tlv_header *hdr;
415	int total_len = sizeof(*hdr) + len;
416	int left = sctx->send_max_size - sctx->send_size;
417
418	if (unlikely(left < total_len))
419		return -EOVERFLOW;
420
421	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
422	hdr->tlv_type = cpu_to_le16(attr);
423	hdr->tlv_len = cpu_to_le16(len);
424	memcpy(hdr + 1, data, len);
425	sctx->send_size += total_len;
426
427	return 0;
428}
429
430#if 0
431static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value)
432{
433	return tlv_put(sctx, attr, &value, sizeof(value));
434}
435
436static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value)
437{
438	__le16 tmp = cpu_to_le16(value);
439	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
440}
441
442static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
443{
444	__le32 tmp = cpu_to_le32(value);
445	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
446}
447#endif
448
449static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value)
450{
451	__le64 tmp = cpu_to_le64(value);
452	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
453}
454
455static int tlv_put_string(struct send_ctx *sctx, u16 attr,
456			  const char *str, int len)
457{
458	if (len == -1)
459		len = strlen(str);
460	return tlv_put(sctx, attr, str, len);
461}
462
463static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
464			const u8 *uuid)
465{
466	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
467}
468
469#if 0
470static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
471			    struct timespec *ts)
472{
473	struct btrfs_timespec bts;
474	bts.sec = cpu_to_le64(ts->tv_sec);
475	bts.nsec = cpu_to_le32(ts->tv_nsec);
476	return tlv_put(sctx, attr, &bts, sizeof(bts));
477}
478#endif
479
480static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
481				  struct extent_buffer *eb,
482				  struct btrfs_timespec *ts)
483{
484	struct btrfs_timespec bts;
485	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
486	return tlv_put(sctx, attr, &bts, sizeof(bts));
487}
488
489
490#define TLV_PUT(sctx, attrtype, attrlen, data) \
491	do { \
492		ret = tlv_put(sctx, attrtype, attrlen, data); \
493		if (ret < 0) \
494			goto tlv_put_failure; \
495	} while (0)
496
497#define TLV_PUT_INT(sctx, attrtype, bits, value) \
498	do { \
499		ret = tlv_put_u##bits(sctx, attrtype, value); \
500		if (ret < 0) \
501			goto tlv_put_failure; \
502	} while (0)
503
504#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
505#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
506#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
507#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
508#define TLV_PUT_STRING(sctx, attrtype, str, len) \
509	do { \
510		ret = tlv_put_string(sctx, attrtype, str, len); \
511		if (ret < 0) \
512			goto tlv_put_failure; \
513	} while (0)
514#define TLV_PUT_PATH(sctx, attrtype, p) \
515	do { \
516		ret = tlv_put_string(sctx, attrtype, p->start, \
517			p->end - p->start); \
518		if (ret < 0) \
519			goto tlv_put_failure; \
520	} while(0)
521#define TLV_PUT_UUID(sctx, attrtype, uuid) \
522	do { \
523		ret = tlv_put_uuid(sctx, attrtype, uuid); \
524		if (ret < 0) \
525			goto tlv_put_failure; \
526	} while (0)
527#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
528	do { \
529		ret = tlv_put_timespec(sctx, attrtype, ts); \
530		if (ret < 0) \
531			goto tlv_put_failure; \
532	} while (0)
533#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
534	do { \
535		ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
536		if (ret < 0) \
537			goto tlv_put_failure; \
538	} while (0)
539
540static int send_header(struct send_ctx *sctx)
541{
542	struct btrfs_stream_header hdr;
543
544	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
545	hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
546
547	return write_buf(sctx, &hdr, sizeof(hdr));
548}
549
550/*
551 * For each command/item we want to send to userspace, we call this function.
552 */
553static int begin_cmd(struct send_ctx *sctx, int cmd)
554{
555	struct btrfs_cmd_header *hdr;
556
557	if (!sctx->send_buf) {
558		WARN_ON(1);
559		return -EINVAL;
560	}
561
562	BUG_ON(sctx->send_size);
563
564	sctx->send_size += sizeof(*hdr);
565	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
566	hdr->cmd = cpu_to_le16(cmd);
567
568	return 0;
569}
570
571static int send_cmd(struct send_ctx *sctx)
572{
573	int ret;
574	struct btrfs_cmd_header *hdr;
575	u32 crc;
576
577	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
578	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
579	hdr->crc = 0;
580
581	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
582	hdr->crc = cpu_to_le32(crc);
583
584	ret = write_buf(sctx, sctx->send_buf, sctx->send_size);
585
586	sctx->total_send_size += sctx->send_size;
587	sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
588	sctx->send_size = 0;
589
590	return ret;
591}
592
593/*
594 * Sends a move instruction to user space
595 */
596static int send_rename(struct send_ctx *sctx,
597		     struct fs_path *from, struct fs_path *to)
598{
599	int ret;
600
601verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
602
603	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
604	if (ret < 0)
605		goto out;
606
607	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
608	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
609
610	ret = send_cmd(sctx);
611
612tlv_put_failure:
613out:
614	return ret;
615}
616
617/*
618 * Sends a link instruction to user space
619 */
620static int send_link(struct send_ctx *sctx,
621		     struct fs_path *path, struct fs_path *lnk)
622{
623	int ret;
624
625verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
626
627	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
628	if (ret < 0)
629		goto out;
630
631	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
632	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
633
634	ret = send_cmd(sctx);
635
636tlv_put_failure:
637out:
638	return ret;
639}
640
641/*
642 * Sends an unlink instruction to user space
643 */
644static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
645{
646	int ret;
647
648verbose_printk("btrfs: send_unlink %s\n", path->start);
649
650	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
651	if (ret < 0)
652		goto out;
653
654	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
655
656	ret = send_cmd(sctx);
657
658tlv_put_failure:
659out:
660	return ret;
661}
662
663/*
664 * Sends a rmdir instruction to user space
665 */
666static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
667{
668	int ret;
669
670verbose_printk("btrfs: send_rmdir %s\n", path->start);
671
672	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
673	if (ret < 0)
674		goto out;
675
676	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
677
678	ret = send_cmd(sctx);
679
680tlv_put_failure:
681out:
682	return ret;
683}
684
685/*
686 * Helper function to retrieve some fields from an inode item.
687 */
688static int get_inode_info(struct btrfs_root *root,
689			  u64 ino, u64 *size, u64 *gen,
690			  u64 *mode, u64 *uid, u64 *gid,
691			  u64 *rdev)
692{
693	int ret;
694	struct btrfs_inode_item *ii;
695	struct btrfs_key key;
696	struct btrfs_path *path;
697
698	path = alloc_path_for_send();
699	if (!path)
700		return -ENOMEM;
701
702	key.objectid = ino;
703	key.type = BTRFS_INODE_ITEM_KEY;
704	key.offset = 0;
705	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
706	if (ret < 0)
707		goto out;
708	if (ret) {
709		ret = -ENOENT;
710		goto out;
711	}
712
713	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
714			struct btrfs_inode_item);
715	if (size)
716		*size = btrfs_inode_size(path->nodes[0], ii);
717	if (gen)
718		*gen = btrfs_inode_generation(path->nodes[0], ii);
719	if (mode)
720		*mode = btrfs_inode_mode(path->nodes[0], ii);
721	if (uid)
722		*uid = btrfs_inode_uid(path->nodes[0], ii);
723	if (gid)
724		*gid = btrfs_inode_gid(path->nodes[0], ii);
725	if (rdev)
726		*rdev = btrfs_inode_rdev(path->nodes[0], ii);
727
728out:
729	btrfs_free_path(path);
730	return ret;
731}
732
733typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
734				   struct fs_path *p,
735				   void *ctx);
736
737/*
738 * Helper function to iterate the entries in ONE btrfs_inode_ref.
739 * The iterate callback may return a non zero value to stop iteration. This can
740 * be a negative value for error codes or 1 to simply stop it.
741 *
742 * path must point to the INODE_REF when called.
743 */
744static int iterate_inode_ref(struct send_ctx *sctx,
745			     struct btrfs_root *root, struct btrfs_path *path,
746			     struct btrfs_key *found_key, int resolve,
747			     iterate_inode_ref_t iterate, void *ctx)
748{
749	struct extent_buffer *eb;
750	struct btrfs_item *item;
751	struct btrfs_inode_ref *iref;
752	struct btrfs_path *tmp_path;
753	struct fs_path *p;
754	u32 cur;
755	u32 len;
756	u32 total;
757	int slot;
758	u32 name_len;
759	char *start;
760	int ret = 0;
761	int num;
762	int index;
763
764	p = fs_path_alloc_reversed(sctx);
765	if (!p)
766		return -ENOMEM;
767
768	tmp_path = alloc_path_for_send();
769	if (!tmp_path) {
770		fs_path_free(sctx, p);
771		return -ENOMEM;
772	}
773
774	eb = path->nodes[0];
775	slot = path->slots[0];
776	item = btrfs_item_nr(eb, slot);
777	iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
778	cur = 0;
779	len = 0;
780	total = btrfs_item_size(eb, item);
781
782	num = 0;
783	while (cur < total) {
784		fs_path_reset(p);
785
786		name_len = btrfs_inode_ref_name_len(eb, iref);
787		index = btrfs_inode_ref_index(eb, iref);
788		if (resolve) {
789			start = btrfs_iref_to_path(root, tmp_path, iref, eb,
790						found_key->offset, p->buf,
791						p->buf_len);
792			if (IS_ERR(start)) {
793				ret = PTR_ERR(start);
794				goto out;
795			}
796			if (start < p->buf) {
797				/* overflow , try again with larger buffer */
798				ret = fs_path_ensure_buf(p,
799						p->buf_len + p->buf - start);
800				if (ret < 0)
801					goto out;
802				start = btrfs_iref_to_path(root, tmp_path, iref,
803						eb, found_key->offset, p->buf,
804						p->buf_len);
805				if (IS_ERR(start)) {
806					ret = PTR_ERR(start);
807					goto out;
808				}
809				BUG_ON(start < p->buf);
810			}
811			p->start = start;
812		} else {
813			ret = fs_path_add_from_extent_buffer(p, eb,
814					(unsigned long)(iref + 1), name_len);
815			if (ret < 0)
816				goto out;
817		}
818
819
820		len = sizeof(*iref) + name_len;
821		iref = (struct btrfs_inode_ref *)((char *)iref + len);
822		cur += len;
823
824		ret = iterate(num, found_key->offset, index, p, ctx);
825		if (ret)
826			goto out;
827
828		num++;
829	}
830
831out:
832	btrfs_free_path(tmp_path);
833	fs_path_free(sctx, p);
834	return ret;
835}
836
837typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
838				  const char *name, int name_len,
839				  const char *data, int data_len,
840				  u8 type, void *ctx);
841
842/*
843 * Helper function to iterate the entries in ONE btrfs_dir_item.
844 * The iterate callback may return a non zero value to stop iteration. This can
845 * be a negative value for error codes or 1 to simply stop it.
846 *
847 * path must point to the dir item when called.
848 */
849static int iterate_dir_item(struct send_ctx *sctx,
850			    struct btrfs_root *root, struct btrfs_path *path,
851			    struct btrfs_key *found_key,
852			    iterate_dir_item_t iterate, void *ctx)
853{
854	int ret = 0;
855	struct extent_buffer *eb;
856	struct btrfs_item *item;
857	struct btrfs_dir_item *di;
858	struct btrfs_path *tmp_path = NULL;
859	struct btrfs_key di_key;
860	char *buf = NULL;
861	char *buf2 = NULL;
862	int buf_len;
863	int buf_virtual = 0;
864	u32 name_len;
865	u32 data_len;
866	u32 cur;
867	u32 len;
868	u32 total;
869	int slot;
870	int num;
871	u8 type;
872
873	buf_len = PAGE_SIZE;
874	buf = kmalloc(buf_len, GFP_NOFS);
875	if (!buf) {
876		ret = -ENOMEM;
877		goto out;
878	}
879
880	tmp_path = alloc_path_for_send();
881	if (!tmp_path) {
882		ret = -ENOMEM;
883		goto out;
884	}
885
886	eb = path->nodes[0];
887	slot = path->slots[0];
888	item = btrfs_item_nr(eb, slot);
889	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
890	cur = 0;
891	len = 0;
892	total = btrfs_item_size(eb, item);
893
894	num = 0;
895	while (cur < total) {
896		name_len = btrfs_dir_name_len(eb, di);
897		data_len = btrfs_dir_data_len(eb, di);
898		type = btrfs_dir_type(eb, di);
899		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
900
901		if (name_len + data_len > buf_len) {
902			buf_len = PAGE_ALIGN(name_len + data_len);
903			if (buf_virtual) {
904				buf2 = vmalloc(buf_len);
905				if (!buf2) {
906					ret = -ENOMEM;
907					goto out;
908				}
909				vfree(buf);
910			} else {
911				buf2 = krealloc(buf, buf_len, GFP_NOFS);
912				if (!buf2) {
913					buf2 = vmalloc(buf_len);
914					if (!buf2) {
915						ret = -ENOMEM;
916						goto out;
917					}
918					kfree(buf);
919					buf_virtual = 1;
920				}
921			}
922
923			buf = buf2;
924			buf2 = NULL;
925		}
926
927		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
928				name_len + data_len);
929
930		len = sizeof(*di) + name_len + data_len;
931		di = (struct btrfs_dir_item *)((char *)di + len);
932		cur += len;
933
934		ret = iterate(num, &di_key, buf, name_len, buf + name_len,
935				data_len, type, ctx);
936		if (ret < 0)
937			goto out;
938		if (ret) {
939			ret = 0;
940			goto out;
941		}
942
943		num++;
944	}
945
946out:
947	btrfs_free_path(tmp_path);
948	if (buf_virtual)
949		vfree(buf);
950	else
951		kfree(buf);
952	return ret;
953}
954
955static int __copy_first_ref(int num, u64 dir, int index,
956			    struct fs_path *p, void *ctx)
957{
958	int ret;
959	struct fs_path *pt = ctx;
960
961	ret = fs_path_copy(pt, p);
962	if (ret < 0)
963		return ret;
964
965	/* we want the first only */
966	return 1;
967}
968
969/*
970 * Retrieve the first path of an inode. If an inode has more then one
971 * ref/hardlink, this is ignored.
972 */
973static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root,
974			  u64 ino, struct fs_path *path)
975{
976	int ret;
977	struct btrfs_key key, found_key;
978	struct btrfs_path *p;
979
980	p = alloc_path_for_send();
981	if (!p)
982		return -ENOMEM;
983
984	fs_path_reset(path);
985
986	key.objectid = ino;
987	key.type = BTRFS_INODE_REF_KEY;
988	key.offset = 0;
989
990	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
991	if (ret < 0)
992		goto out;
993	if (ret) {
994		ret = 1;
995		goto out;
996	}
997	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
998	if (found_key.objectid != ino ||
999		found_key.type != BTRFS_INODE_REF_KEY) {
1000		ret = -ENOENT;
1001		goto out;
1002	}
1003
1004	ret = iterate_inode_ref(sctx, root, p, &found_key, 1,
1005			__copy_first_ref, path);
1006	if (ret < 0)
1007		goto out;
1008	ret = 0;
1009
1010out:
1011	btrfs_free_path(p);
1012	return ret;
1013}
1014
1015struct backref_ctx {
1016	struct send_ctx *sctx;
1017
1018	/* number of total found references */
1019	u64 found;
1020
1021	/*
1022	 * used for clones found in send_root. clones found behind cur_objectid
1023	 * and cur_offset are not considered as allowed clones.
1024	 */
1025	u64 cur_objectid;
1026	u64 cur_offset;
1027
1028	/* may be truncated in case it's the last extent in a file */
1029	u64 extent_len;
1030
1031	/* Just to check for bugs in backref resolving */
1032	int found_itself;
1033};
1034
1035static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1036{
1037	u64 root = (u64)key;
1038	struct clone_root *cr = (struct clone_root *)elt;
1039
1040	if (root < cr->root->objectid)
1041		return -1;
1042	if (root > cr->root->objectid)
1043		return 1;
1044	return 0;
1045}
1046
1047static int __clone_root_cmp_sort(const void *e1, const void *e2)
1048{
1049	struct clone_root *cr1 = (struct clone_root *)e1;
1050	struct clone_root *cr2 = (struct clone_root *)e2;
1051
1052	if (cr1->root->objectid < cr2->root->objectid)
1053		return -1;
1054	if (cr1->root->objectid > cr2->root->objectid)
1055		return 1;
1056	return 0;
1057}
1058
1059/*
1060 * Called for every backref that is found for the current extent.
1061 */
1062static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1063{
1064	struct backref_ctx *bctx = ctx_;
1065	struct clone_root *found;
1066	int ret;
1067	u64 i_size;
1068
1069	/* First check if the root is in the list of accepted clone sources */
1070	found = bsearch((void *)root, bctx->sctx->clone_roots,
1071			bctx->sctx->clone_roots_cnt,
1072			sizeof(struct clone_root),
1073			__clone_root_cmp_bsearch);
1074	if (!found)
1075		return 0;
1076
1077	if (found->root == bctx->sctx->send_root &&
1078	    ino == bctx->cur_objectid &&
1079	    offset == bctx->cur_offset) {
1080		bctx->found_itself = 1;
1081	}
1082
1083	/*
1084	 * There are inodes that have extents that lie behind it's i_size. Don't
1085	 * accept clones from these extents.
1086	 */
1087	ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
1088			NULL);
1089	if (ret < 0)
1090		return ret;
1091
1092	if (offset + bctx->extent_len > i_size)
1093		return 0;
1094
1095	/*
1096	 * Make sure we don't consider clones from send_root that are
1097	 * behind the current inode/offset.
1098	 */
1099	if (found->root == bctx->sctx->send_root) {
1100		/*
1101		 * TODO for the moment we don't accept clones from the inode
1102		 * that is currently send. We may change this when
1103		 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1104		 * file.
1105		 */
1106		if (ino >= bctx->cur_objectid)
1107			return 0;
1108		/*if (ino > ctx->cur_objectid)
1109			return 0;
1110		if (offset + ctx->extent_len > ctx->cur_offset)
1111			return 0;*/
1112
1113		bctx->found++;
1114		found->found_refs++;
1115		found->ino = ino;
1116		found->offset = offset;
1117		return 0;
1118	}
1119
1120	bctx->found++;
1121	found->found_refs++;
1122	if (ino < found->ino) {
1123		found->ino = ino;
1124		found->offset = offset;
1125	} else if (found->ino == ino) {
1126		/*
1127		 * same extent found more then once in the same file.
1128		 */
1129		if (found->offset > offset + bctx->extent_len)
1130			found->offset = offset;
1131	}
1132
1133	return 0;
1134}
1135
1136/*
1137 * path must point to the extent item when called.
1138 */
1139static int find_extent_clone(struct send_ctx *sctx,
1140			     struct btrfs_path *path,
1141			     u64 ino, u64 data_offset,
1142			     u64 ino_size,
1143			     struct clone_root **found)
1144{
1145	int ret;
1146	int extent_type;
1147	u64 logical;
1148	u64 num_bytes;
1149	u64 extent_item_pos;
1150	struct btrfs_file_extent_item *fi;
1151	struct extent_buffer *eb = path->nodes[0];
1152	struct backref_ctx *backref_ctx = NULL;
1153	struct clone_root *cur_clone_root;
1154	struct btrfs_key found_key;
1155	struct btrfs_path *tmp_path;
1156	u32 i;
1157
1158	tmp_path = alloc_path_for_send();
1159	if (!tmp_path)
1160		return -ENOMEM;
1161
1162	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1163	if (!backref_ctx) {
1164		ret = -ENOMEM;
1165		goto out;
1166	}
1167
1168	if (data_offset >= ino_size) {
1169		/*
1170		 * There may be extents that lie behind the file's size.
1171		 * I at least had this in combination with snapshotting while
1172		 * writing large files.
1173		 */
1174		ret = 0;
1175		goto out;
1176	}
1177
1178	fi = btrfs_item_ptr(eb, path->slots[0],
1179			struct btrfs_file_extent_item);
1180	extent_type = btrfs_file_extent_type(eb, fi);
1181	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1182		ret = -ENOENT;
1183		goto out;
1184	}
1185
1186	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1187	logical = btrfs_file_extent_disk_bytenr(eb, fi);
1188	if (logical == 0) {
1189		ret = -ENOENT;
1190		goto out;
1191	}
1192	logical += btrfs_file_extent_offset(eb, fi);
1193
1194	ret = extent_from_logical(sctx->send_root->fs_info,
1195			logical, tmp_path, &found_key);
1196	btrfs_release_path(tmp_path);
1197
1198	if (ret < 0)
1199		goto out;
1200	if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1201		ret = -EIO;
1202		goto out;
1203	}
1204
1205	/*
1206	 * Setup the clone roots.
1207	 */
1208	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1209		cur_clone_root = sctx->clone_roots + i;
1210		cur_clone_root->ino = (u64)-1;
1211		cur_clone_root->offset = 0;
1212		cur_clone_root->found_refs = 0;
1213	}
1214
1215	backref_ctx->sctx = sctx;
1216	backref_ctx->found = 0;
1217	backref_ctx->cur_objectid = ino;
1218	backref_ctx->cur_offset = data_offset;
1219	backref_ctx->found_itself = 0;
1220	backref_ctx->extent_len = num_bytes;
1221
1222	/*
1223	 * The last extent of a file may be too large due to page alignment.
1224	 * We need to adjust extent_len in this case so that the checks in
1225	 * __iterate_backrefs work.
1226	 */
1227	if (data_offset + num_bytes >= ino_size)
1228		backref_ctx->extent_len = ino_size - data_offset;
1229
1230	/*
1231	 * Now collect all backrefs.
1232	 */
1233	extent_item_pos = logical - found_key.objectid;
1234	ret = iterate_extent_inodes(sctx->send_root->fs_info,
1235					found_key.objectid, extent_item_pos, 1,
1236					__iterate_backrefs, backref_ctx);
1237	if (ret < 0)
1238		goto out;
1239
1240	if (!backref_ctx->found_itself) {
1241		/* found a bug in backref code? */
1242		ret = -EIO;
1243		printk(KERN_ERR "btrfs: ERROR did not find backref in "
1244				"send_root. inode=%llu, offset=%llu, "
1245				"logical=%llu\n",
1246				ino, data_offset, logical);
1247		goto out;
1248	}
1249
1250verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1251		"ino=%llu, "
1252		"num_bytes=%llu, logical=%llu\n",
1253		data_offset, ino, num_bytes, logical);
1254
1255	if (!backref_ctx->found)
1256		verbose_printk("btrfs:    no clones found\n");
1257
1258	cur_clone_root = NULL;
1259	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1260		if (sctx->clone_roots[i].found_refs) {
1261			if (!cur_clone_root)
1262				cur_clone_root = sctx->clone_roots + i;
1263			else if (sctx->clone_roots[i].root == sctx->send_root)
1264				/* prefer clones from send_root over others */
1265				cur_clone_root = sctx->clone_roots + i;
1266			break;
1267		}
1268
1269	}
1270
1271	if (cur_clone_root) {
1272		*found = cur_clone_root;
1273		ret = 0;
1274	} else {
1275		ret = -ENOENT;
1276	}
1277
1278out:
1279	btrfs_free_path(tmp_path);
1280	kfree(backref_ctx);
1281	return ret;
1282}
1283
1284static int read_symlink(struct send_ctx *sctx,
1285			struct btrfs_root *root,
1286			u64 ino,
1287			struct fs_path *dest)
1288{
1289	int ret;
1290	struct btrfs_path *path;
1291	struct btrfs_key key;
1292	struct btrfs_file_extent_item *ei;
1293	u8 type;
1294	u8 compression;
1295	unsigned long off;
1296	int len;
1297
1298	path = alloc_path_for_send();
1299	if (!path)
1300		return -ENOMEM;
1301
1302	key.objectid = ino;
1303	key.type = BTRFS_EXTENT_DATA_KEY;
1304	key.offset = 0;
1305	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1306	if (ret < 0)
1307		goto out;
1308	BUG_ON(ret);
1309
1310	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1311			struct btrfs_file_extent_item);
1312	type = btrfs_file_extent_type(path->nodes[0], ei);
1313	compression = btrfs_file_extent_compression(path->nodes[0], ei);
1314	BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1315	BUG_ON(compression);
1316
1317	off = btrfs_file_extent_inline_start(ei);
1318	len = btrfs_file_extent_inline_len(path->nodes[0], ei);
1319
1320	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1321	if (ret < 0)
1322		goto out;
1323
1324out:
1325	btrfs_free_path(path);
1326	return ret;
1327}
1328
1329/*
1330 * Helper function to generate a file name that is unique in the root of
1331 * send_root and parent_root. This is used to generate names for orphan inodes.
1332 */
1333static int gen_unique_name(struct send_ctx *sctx,
1334			   u64 ino, u64 gen,
1335			   struct fs_path *dest)
1336{
1337	int ret = 0;
1338	struct btrfs_path *path;
1339	struct btrfs_dir_item *di;
1340	char tmp[64];
1341	int len;
1342	u64 idx = 0;
1343
1344	path = alloc_path_for_send();
1345	if (!path)
1346		return -ENOMEM;
1347
1348	while (1) {
1349		len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu",
1350				ino, gen, idx);
1351		if (len >= sizeof(tmp)) {
1352			/* should really not happen */
1353			ret = -EOVERFLOW;
1354			goto out;
1355		}
1356
1357		di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1358				path, BTRFS_FIRST_FREE_OBJECTID,
1359				tmp, strlen(tmp), 0);
1360		btrfs_release_path(path);
1361		if (IS_ERR(di)) {
1362			ret = PTR_ERR(di);
1363			goto out;
1364		}
1365		if (di) {
1366			/* not unique, try again */
1367			idx++;
1368			continue;
1369		}
1370
1371		if (!sctx->parent_root) {
1372			/* unique */
1373			ret = 0;
1374			break;
1375		}
1376
1377		di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1378				path, BTRFS_FIRST_FREE_OBJECTID,
1379				tmp, strlen(tmp), 0);
1380		btrfs_release_path(path);
1381		if (IS_ERR(di)) {
1382			ret = PTR_ERR(di);
1383			goto out;
1384		}
1385		if (di) {
1386			/* not unique, try again */
1387			idx++;
1388			continue;
1389		}
1390		/* unique */
1391		break;
1392	}
1393
1394	ret = fs_path_add(dest, tmp, strlen(tmp));
1395
1396out:
1397	btrfs_free_path(path);
1398	return ret;
1399}
1400
1401enum inode_state {
1402	inode_state_no_change,
1403	inode_state_will_create,
1404	inode_state_did_create,
1405	inode_state_will_delete,
1406	inode_state_did_delete,
1407};
1408
1409static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1410{
1411	int ret;
1412	int left_ret;
1413	int right_ret;
1414	u64 left_gen;
1415	u64 right_gen;
1416
1417	ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1418			NULL, NULL);
1419	if (ret < 0 && ret != -ENOENT)
1420		goto out;
1421	left_ret = ret;
1422
1423	if (!sctx->parent_root) {
1424		right_ret = -ENOENT;
1425	} else {
1426		ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1427				NULL, NULL, NULL, NULL);
1428		if (ret < 0 && ret != -ENOENT)
1429			goto out;
1430		right_ret = ret;
1431	}
1432
1433	if (!left_ret && !right_ret) {
1434		if (left_gen == gen && right_gen == gen)
1435			ret = inode_state_no_change;
1436		else if (left_gen == gen) {
1437			if (ino < sctx->send_progress)
1438				ret = inode_state_did_create;
1439			else
1440				ret = inode_state_will_create;
1441		} else if (right_gen == gen) {
1442			if (ino < sctx->send_progress)
1443				ret = inode_state_did_delete;
1444			else
1445				ret = inode_state_will_delete;
1446		} else  {
1447			ret = -ENOENT;
1448		}
1449	} else if (!left_ret) {
1450		if (left_gen == gen) {
1451			if (ino < sctx->send_progress)
1452				ret = inode_state_did_create;
1453			else
1454				ret = inode_state_will_create;
1455		} else {
1456			ret = -ENOENT;
1457		}
1458	} else if (!right_ret) {
1459		if (right_gen == gen) {
1460			if (ino < sctx->send_progress)
1461				ret = inode_state_did_delete;
1462			else
1463				ret = inode_state_will_delete;
1464		} else {
1465			ret = -ENOENT;
1466		}
1467	} else {
1468		ret = -ENOENT;
1469	}
1470
1471out:
1472	return ret;
1473}
1474
1475static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1476{
1477	int ret;
1478
1479	ret = get_cur_inode_state(sctx, ino, gen);
1480	if (ret < 0)
1481		goto out;
1482
1483	if (ret == inode_state_no_change ||
1484	    ret == inode_state_did_create ||
1485	    ret == inode_state_will_delete)
1486		ret = 1;
1487	else
1488		ret = 0;
1489
1490out:
1491	return ret;
1492}
1493
1494/*
1495 * Helper function to lookup a dir item in a dir.
1496 */
1497static int lookup_dir_item_inode(struct btrfs_root *root,
1498				 u64 dir, const char *name, int name_len,
1499				 u64 *found_inode,
1500				 u8 *found_type)
1501{
1502	int ret = 0;
1503	struct btrfs_dir_item *di;
1504	struct btrfs_key key;
1505	struct btrfs_path *path;
1506
1507	path = alloc_path_for_send();
1508	if (!path)
1509		return -ENOMEM;
1510
1511	di = btrfs_lookup_dir_item(NULL, root, path,
1512			dir, name, name_len, 0);
1513	if (!di) {
1514		ret = -ENOENT;
1515		goto out;
1516	}
1517	if (IS_ERR(di)) {
1518		ret = PTR_ERR(di);
1519		goto out;
1520	}
1521	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1522	*found_inode = key.objectid;
1523	*found_type = btrfs_dir_type(path->nodes[0], di);
1524
1525out:
1526	btrfs_free_path(path);
1527	return ret;
1528}
1529
1530static int get_first_ref(struct send_ctx *sctx,
1531			 struct btrfs_root *root, u64 ino,
1532			 u64 *dir, u64 *dir_gen, struct fs_path *name)
1533{
1534	int ret;
1535	struct btrfs_key key;
1536	struct btrfs_key found_key;
1537	struct btrfs_path *path;
1538	struct btrfs_inode_ref *iref;
1539	int len;
1540
1541	path = alloc_path_for_send();
1542	if (!path)
1543		return -ENOMEM;
1544
1545	key.objectid = ino;
1546	key.type = BTRFS_INODE_REF_KEY;
1547	key.offset = 0;
1548
1549	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1550	if (ret < 0)
1551		goto out;
1552	if (!ret)
1553		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1554				path->slots[0]);
1555	if (ret || found_key.objectid != key.objectid ||
1556	    found_key.type != key.type) {
1557		ret = -ENOENT;
1558		goto out;
1559	}
1560
1561	iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1562			struct btrfs_inode_ref);
1563	len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1564	ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1565			(unsigned long)(iref + 1), len);
1566	if (ret < 0)
1567		goto out;
1568	btrfs_release_path(path);
1569
1570	ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL,
1571			NULL, NULL);
1572	if (ret < 0)
1573		goto out;
1574
1575	*dir = found_key.offset;
1576
1577out:
1578	btrfs_free_path(path);
1579	return ret;
1580}
1581
1582static int is_first_ref(struct send_ctx *sctx,
1583			struct btrfs_root *root,
1584			u64 ino, u64 dir,
1585			const char *name, int name_len)
1586{
1587	int ret;
1588	struct fs_path *tmp_name;
1589	u64 tmp_dir;
1590	u64 tmp_dir_gen;
1591
1592	tmp_name = fs_path_alloc(sctx);
1593	if (!tmp_name)
1594		return -ENOMEM;
1595
1596	ret = get_first_ref(sctx, root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
1597	if (ret < 0)
1598		goto out;
1599
1600	if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1601		ret = 0;
1602		goto out;
1603	}
1604
1605	ret = memcmp(tmp_name->start, name, name_len);
1606	if (ret)
1607		ret = 0;
1608	else
1609		ret = 1;
1610
1611out:
1612	fs_path_free(sctx, tmp_name);
1613	return ret;
1614}
1615
1616static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1617			      const char *name, int name_len,
1618			      u64 *who_ino, u64 *who_gen)
1619{
1620	int ret = 0;
1621	u64 other_inode = 0;
1622	u8 other_type = 0;
1623
1624	if (!sctx->parent_root)
1625		goto out;
1626
1627	ret = is_inode_existent(sctx, dir, dir_gen);
1628	if (ret <= 0)
1629		goto out;
1630
1631	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1632			&other_inode, &other_type);
1633	if (ret < 0 && ret != -ENOENT)
1634		goto out;
1635	if (ret) {
1636		ret = 0;
1637		goto out;
1638	}
1639
1640	if (other_inode > sctx->send_progress) {
1641		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1642				who_gen, NULL, NULL, NULL, NULL);
1643		if (ret < 0)
1644			goto out;
1645
1646		ret = 1;
1647		*who_ino = other_inode;
1648	} else {
1649		ret = 0;
1650	}
1651
1652out:
1653	return ret;
1654}
1655
1656static int did_overwrite_ref(struct send_ctx *sctx,
1657			    u64 dir, u64 dir_gen,
1658			    u64 ino, u64 ino_gen,
1659			    const char *name, int name_len)
1660{
1661	int ret = 0;
1662	u64 gen;
1663	u64 ow_inode;
1664	u8 other_type;
1665
1666	if (!sctx->parent_root)
1667		goto out;
1668
1669	ret = is_inode_existent(sctx, dir, dir_gen);
1670	if (ret <= 0)
1671		goto out;
1672
1673	/* check if the ref was overwritten by another ref */
1674	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1675			&ow_inode, &other_type);
1676	if (ret < 0 && ret != -ENOENT)
1677		goto out;
1678	if (ret) {
1679		/* was never and will never be overwritten */
1680		ret = 0;
1681		goto out;
1682	}
1683
1684	ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1685			NULL, NULL);
1686	if (ret < 0)
1687		goto out;
1688
1689	if (ow_inode == ino && gen == ino_gen) {
1690		ret = 0;
1691		goto out;
1692	}
1693
1694	/* we know that it is or will be overwritten. check this now */
1695	if (ow_inode < sctx->send_progress)
1696		ret = 1;
1697	else
1698		ret = 0;
1699
1700out:
1701	return ret;
1702}
1703
1704static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1705{
1706	int ret = 0;
1707	struct fs_path *name = NULL;
1708	u64 dir;
1709	u64 dir_gen;
1710
1711	if (!sctx->parent_root)
1712		goto out;
1713
1714	name = fs_path_alloc(sctx);
1715	if (!name)
1716		return -ENOMEM;
1717
1718	ret = get_first_ref(sctx, sctx->parent_root, ino, &dir, &dir_gen, name);
1719	if (ret < 0)
1720		goto out;
1721
1722	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1723			name->start, fs_path_len(name));
1724	if (ret < 0)
1725		goto out;
1726
1727out:
1728	fs_path_free(sctx, name);
1729	return ret;
1730}
1731
1732static int name_cache_insert(struct send_ctx *sctx,
1733			     struct name_cache_entry *nce)
1734{
1735	int ret = 0;
1736	struct name_cache_entry **ncea;
1737
1738	ncea = radix_tree_lookup(&sctx->name_cache, nce->ino);
1739	if (ncea) {
1740		if (!ncea[0])
1741			ncea[0] = nce;
1742		else if (!ncea[1])
1743			ncea[1] = nce;
1744		else
1745			BUG();
1746	} else {
1747		ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS);
1748		if (!ncea)
1749			return -ENOMEM;
1750
1751		ncea[0] = nce;
1752		ncea[1] = NULL;
1753		ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea);
1754		if (ret < 0)
1755			return ret;
1756	}
1757	list_add_tail(&nce->list, &sctx->name_cache_list);
1758	sctx->name_cache_size++;
1759
1760	return ret;
1761}
1762
1763static void name_cache_delete(struct send_ctx *sctx,
1764			      struct name_cache_entry *nce)
1765{
1766	struct name_cache_entry **ncea;
1767
1768	ncea = radix_tree_lookup(&sctx->name_cache, nce->ino);
1769	BUG_ON(!ncea);
1770
1771	if (ncea[0] == nce)
1772		ncea[0] = NULL;
1773	else if (ncea[1] == nce)
1774		ncea[1] = NULL;
1775	else
1776		BUG();
1777
1778	if (!ncea[0] && !ncea[1]) {
1779		radix_tree_delete(&sctx->name_cache, nce->ino);
1780		kfree(ncea);
1781	}
1782
1783	list_del(&nce->list);
1784
1785	sctx->name_cache_size--;
1786}
1787
1788static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
1789						    u64 ino, u64 gen)
1790{
1791	struct name_cache_entry **ncea;
1792
1793	ncea = radix_tree_lookup(&sctx->name_cache, ino);
1794	if (!ncea)
1795		return NULL;
1796
1797	if (ncea[0] && ncea[0]->gen == gen)
1798		return ncea[0];
1799	else if (ncea[1] && ncea[1]->gen == gen)
1800		return ncea[1];
1801	return NULL;
1802}
1803
1804static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
1805{
1806	list_del(&nce->list);
1807	list_add_tail(&nce->list, &sctx->name_cache_list);
1808}
1809
1810static void name_cache_clean_unused(struct send_ctx *sctx)
1811{
1812	struct name_cache_entry *nce;
1813
1814	if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
1815		return;
1816
1817	while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
1818		nce = list_entry(sctx->name_cache_list.next,
1819				struct name_cache_entry, list);
1820		name_cache_delete(sctx, nce);
1821		kfree(nce);
1822	}
1823}
1824
1825static void name_cache_free(struct send_ctx *sctx)
1826{
1827	struct name_cache_entry *nce;
1828	struct name_cache_entry *tmp;
1829
1830	list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) {
1831		name_cache_delete(sctx, nce);
1832	}
1833}
1834
1835static int __get_cur_name_and_parent(struct send_ctx *sctx,
1836				     u64 ino, u64 gen,
1837				     u64 *parent_ino,
1838				     u64 *parent_gen,
1839				     struct fs_path *dest)
1840{
1841	int ret;
1842	int nce_ret;
1843	struct btrfs_path *path = NULL;
1844	struct name_cache_entry *nce = NULL;
1845
1846	nce = name_cache_search(sctx, ino, gen);
1847	if (nce) {
1848		if (ino < sctx->send_progress && nce->need_later_update) {
1849			name_cache_delete(sctx, nce);
1850			kfree(nce);
1851			nce = NULL;
1852		} else {
1853			name_cache_used(sctx, nce);
1854			*parent_ino = nce->parent_ino;
1855			*parent_gen = nce->parent_gen;
1856			ret = fs_path_add(dest, nce->name, nce->name_len);
1857			if (ret < 0)
1858				goto out;
1859			ret = nce->ret;
1860			goto out;
1861		}
1862	}
1863
1864	path = alloc_path_for_send();
1865	if (!path)
1866		return -ENOMEM;
1867
1868	ret = is_inode_existent(sctx, ino, gen);
1869	if (ret < 0)
1870		goto out;
1871
1872	if (!ret) {
1873		ret = gen_unique_name(sctx, ino, gen, dest);
1874		if (ret < 0)
1875			goto out;
1876		ret = 1;
1877		goto out_cache;
1878	}
1879
1880	if (ino < sctx->send_progress)
1881		ret = get_first_ref(sctx, sctx->send_root, ino,
1882				parent_ino, parent_gen, dest);
1883	else
1884		ret = get_first_ref(sctx, sctx->parent_root, ino,
1885				parent_ino, parent_gen, dest);
1886	if (ret < 0)
1887		goto out;
1888
1889	ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
1890			dest->start, dest->end - dest->start);
1891	if (ret < 0)
1892		goto out;
1893	if (ret) {
1894		fs_path_reset(dest);
1895		ret = gen_unique_name(sctx, ino, gen, dest);
1896		if (ret < 0)
1897			goto out;
1898		ret = 1;
1899	}
1900
1901out_cache:
1902	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
1903	if (!nce) {
1904		ret = -ENOMEM;
1905		goto out;
1906	}
1907
1908	nce->ino = ino;
1909	nce->gen = gen;
1910	nce->parent_ino = *parent_ino;
1911	nce->parent_gen = *parent_gen;
1912	nce->name_len = fs_path_len(dest);
1913	nce->ret = ret;
1914	strcpy(nce->name, dest->start);
1915
1916	if (ino < sctx->send_progress)
1917		nce->need_later_update = 0;
1918	else
1919		nce->need_later_update = 1;
1920
1921	nce_ret = name_cache_insert(sctx, nce);
1922	if (nce_ret < 0)
1923		ret = nce_ret;
1924	name_cache_clean_unused(sctx);
1925
1926out:
1927	btrfs_free_path(path);
1928	return ret;
1929}
1930
1931/*
1932 * Magic happens here. This function returns the first ref to an inode as it
1933 * would look like while receiving the stream at this point in time.
1934 * We walk the path up to the root. For every inode in between, we check if it
1935 * was already processed/sent. If yes, we continue with the parent as found
1936 * in send_root. If not, we continue with the parent as found in parent_root.
1937 * If we encounter an inode that was deleted at this point in time, we use the
1938 * inodes "orphan" name instead of the real name and stop. Same with new inodes
1939 * that were not created yet and overwritten inodes/refs.
1940 *
1941 * When do we have have orphan inodes:
1942 * 1. When an inode is freshly created and thus no valid refs are available yet
1943 * 2. When a directory lost all it's refs (deleted) but still has dir items
1944 *    inside which were not processed yet (pending for move/delete). If anyone
1945 *    tried to get the path to the dir items, it would get a path inside that
1946 *    orphan directory.
1947 * 3. When an inode is moved around or gets new links, it may overwrite the ref
1948 *    of an unprocessed inode. If in that case the first ref would be
1949 *    overwritten, the overwritten inode gets "orphanized". Later when we
1950 *    process this overwritten inode, it is restored at a new place by moving
1951 *    the orphan inode.
1952 *
1953 * sctx->send_progress tells this function at which point in time receiving
1954 * would be.
1955 */
1956static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
1957			struct fs_path *dest)
1958{
1959	int ret = 0;
1960	struct fs_path *name = NULL;
1961	u64 parent_inode = 0;
1962	u64 parent_gen = 0;
1963	int stop = 0;
1964
1965	name = fs_path_alloc(sctx);
1966	if (!name) {
1967		ret = -ENOMEM;
1968		goto out;
1969	}
1970
1971	dest->reversed = 1;
1972	fs_path_reset(dest);
1973
1974	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
1975		fs_path_reset(name);
1976
1977		ret = __get_cur_name_and_parent(sctx, ino, gen,
1978				&parent_inode, &parent_gen, name);
1979		if (ret < 0)
1980			goto out;
1981		if (ret)
1982			stop = 1;
1983
1984		ret = fs_path_add_path(dest, name);
1985		if (ret < 0)
1986			goto out;
1987
1988		ino = parent_inode;
1989		gen = parent_gen;
1990	}
1991
1992out:
1993	fs_path_free(sctx, name);
1994	if (!ret)
1995		fs_path_unreverse(dest);
1996	return ret;
1997}
1998
1999/*
2000 * Called for regular files when sending extents data. Opens a struct file
2001 * to read from the file.
2002 */
2003static int open_cur_inode_file(struct send_ctx *sctx)
2004{
2005	int ret = 0;
2006	struct btrfs_key key;
2007	struct path path;
2008	struct inode *inode;
2009	struct dentry *dentry;
2010	struct file *filp;
2011	int new = 0;
2012
2013	if (sctx->cur_inode_filp)
2014		goto out;
2015
2016	key.objectid = sctx->cur_ino;
2017	key.type = BTRFS_INODE_ITEM_KEY;
2018	key.offset = 0;
2019
2020	inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
2021			&new);
2022	if (IS_ERR(inode)) {
2023		ret = PTR_ERR(inode);
2024		goto out;
2025	}
2026
2027	dentry = d_obtain_alias(inode);
2028	inode = NULL;
2029	if (IS_ERR(dentry)) {
2030		ret = PTR_ERR(dentry);
2031		goto out;
2032	}
2033
2034	path.mnt = sctx->mnt;
2035	path.dentry = dentry;
2036	filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
2037	dput(dentry);
2038	dentry = NULL;
2039	if (IS_ERR(filp)) {
2040		ret = PTR_ERR(filp);
2041		goto out;
2042	}
2043	sctx->cur_inode_filp = filp;
2044
2045out:
2046	/*
2047	 * no xxxput required here as every vfs op
2048	 * does it by itself on failure
2049	 */
2050	return ret;
2051}
2052
2053/*
2054 * Closes the struct file that was created in open_cur_inode_file
2055 */
2056static int close_cur_inode_file(struct send_ctx *sctx)
2057{
2058	int ret = 0;
2059
2060	if (!sctx->cur_inode_filp)
2061		goto out;
2062
2063	ret = filp_close(sctx->cur_inode_filp, NULL);
2064	sctx->cur_inode_filp = NULL;
2065
2066out:
2067	return ret;
2068}
2069
2070/*
2071 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2072 */
2073static int send_subvol_begin(struct send_ctx *sctx)
2074{
2075	int ret;
2076	struct btrfs_root *send_root = sctx->send_root;
2077	struct btrfs_root *parent_root = sctx->parent_root;
2078	struct btrfs_path *path;
2079	struct btrfs_key key;
2080	struct btrfs_root_ref *ref;
2081	struct extent_buffer *leaf;
2082	char *name = NULL;
2083	int namelen;
2084
2085	path = alloc_path_for_send();
2086	if (!path)
2087		return -ENOMEM;
2088
2089	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2090	if (!name) {
2091		btrfs_free_path(path);
2092		return -ENOMEM;
2093	}
2094
2095	key.objectid = send_root->objectid;
2096	key.type = BTRFS_ROOT_BACKREF_KEY;
2097	key.offset = 0;
2098
2099	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2100				&key, path, 1, 0);
2101	if (ret < 0)
2102		goto out;
2103	if (ret) {
2104		ret = -ENOENT;
2105		goto out;
2106	}
2107
2108	leaf = path->nodes[0];
2109	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2110	if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2111	    key.objectid != send_root->objectid) {
2112		ret = -ENOENT;
2113		goto out;
2114	}
2115	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2116	namelen = btrfs_root_ref_name_len(leaf, ref);
2117	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2118	btrfs_release_path(path);
2119
2120	if (ret < 0)
2121		goto out;
2122
2123	if (parent_root) {
2124		ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2125		if (ret < 0)
2126			goto out;
2127	} else {
2128		ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2129		if (ret < 0)
2130			goto out;
2131	}
2132
2133	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2134	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2135			sctx->send_root->root_item.uuid);
2136	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2137			sctx->send_root->root_item.ctransid);
2138	if (parent_root) {
2139		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2140				sctx->parent_root->root_item.uuid);
2141		TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2142				sctx->parent_root->root_item.ctransid);
2143	}
2144
2145	ret = send_cmd(sctx);
2146
2147tlv_put_failure:
2148out:
2149	btrfs_free_path(path);
2150	kfree(name);
2151	return ret;
2152}
2153
2154static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2155{
2156	int ret = 0;
2157	struct fs_path *p;
2158
2159verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2160
2161	p = fs_path_alloc(sctx);
2162	if (!p)
2163		return -ENOMEM;
2164
2165	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2166	if (ret < 0)
2167		goto out;
2168
2169	ret = get_cur_path(sctx, ino, gen, p);
2170	if (ret < 0)
2171		goto out;
2172	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2173	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2174
2175	ret = send_cmd(sctx);
2176
2177tlv_put_failure:
2178out:
2179	fs_path_free(sctx, p);
2180	return ret;
2181}
2182
2183static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2184{
2185	int ret = 0;
2186	struct fs_path *p;
2187
2188verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2189
2190	p = fs_path_alloc(sctx);
2191	if (!p)
2192		return -ENOMEM;
2193
2194	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2195	if (ret < 0)
2196		goto out;
2197
2198	ret = get_cur_path(sctx, ino, gen, p);
2199	if (ret < 0)
2200		goto out;
2201	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2202	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2203
2204	ret = send_cmd(sctx);
2205
2206tlv_put_failure:
2207out:
2208	fs_path_free(sctx, p);
2209	return ret;
2210}
2211
2212static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2213{
2214	int ret = 0;
2215	struct fs_path *p;
2216
2217verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2218
2219	p = fs_path_alloc(sctx);
2220	if (!p)
2221		return -ENOMEM;
2222
2223	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2224	if (ret < 0)
2225		goto out;
2226
2227	ret = get_cur_path(sctx, ino, gen, p);
2228	if (ret < 0)
2229		goto out;
2230	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2231	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2232	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2233
2234	ret = send_cmd(sctx);
2235
2236tlv_put_failure:
2237out:
2238	fs_path_free(sctx, p);
2239	return ret;
2240}
2241
2242static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2243{
2244	int ret = 0;
2245	struct fs_path *p = NULL;
2246	struct btrfs_inode_item *ii;
2247	struct btrfs_path *path = NULL;
2248	struct extent_buffer *eb;
2249	struct btrfs_key key;
2250	int slot;
2251
2252verbose_printk("btrfs: send_utimes %llu\n", ino);
2253
2254	p = fs_path_alloc(sctx);
2255	if (!p)
2256		return -ENOMEM;
2257
2258	path = alloc_path_for_send();
2259	if (!path) {
2260		ret = -ENOMEM;
2261		goto out;
2262	}
2263
2264	key.objectid = ino;
2265	key.type = BTRFS_INODE_ITEM_KEY;
2266	key.offset = 0;
2267	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2268	if (ret < 0)
2269		goto out;
2270
2271	eb = path->nodes[0];
2272	slot = path->slots[0];
2273	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2274
2275	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2276	if (ret < 0)
2277		goto out;
2278
2279	ret = get_cur_path(sctx, ino, gen, p);
2280	if (ret < 0)
2281		goto out;
2282	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2283	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
2284			btrfs_inode_atime(ii));
2285	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
2286			btrfs_inode_mtime(ii));
2287	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2288			btrfs_inode_ctime(ii));
2289	/* TODO otime? */
2290
2291	ret = send_cmd(sctx);
2292
2293tlv_put_failure:
2294out:
2295	fs_path_free(sctx, p);
2296	btrfs_free_path(path);
2297	return ret;
2298}
2299
2300/*
2301 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2302 * a valid path yet because we did not process the refs yet. So, the inode
2303 * is created as orphan.
2304 */
2305static int send_create_inode(struct send_ctx *sctx, u64 ino)
2306{
2307	int ret = 0;
2308	struct fs_path *p;
2309	int cmd;
2310	u64 gen;
2311	u64 mode;
2312	u64 rdev;
2313
2314verbose_printk("btrfs: send_create_inode %llu\n", ino);
2315
2316	p = fs_path_alloc(sctx);
2317	if (!p)
2318		return -ENOMEM;
2319
2320	ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL,
2321			NULL, &rdev);
2322	if (ret < 0)
2323		goto out;
2324
2325	if (S_ISREG(mode))
2326		cmd = BTRFS_SEND_C_MKFILE;
2327	else if (S_ISDIR(mode))
2328		cmd = BTRFS_SEND_C_MKDIR;
2329	else if (S_ISLNK(mode))
2330		cmd = BTRFS_SEND_C_SYMLINK;
2331	else if (S_ISCHR(mode) || S_ISBLK(mode))
2332		cmd = BTRFS_SEND_C_MKNOD;
2333	else if (S_ISFIFO(mode))
2334		cmd = BTRFS_SEND_C_MKFIFO;
2335	else if (S_ISSOCK(mode))
2336		cmd = BTRFS_SEND_C_MKSOCK;
2337	else {
2338		printk(KERN_WARNING "btrfs: unexpected inode type %o",
2339				(int)(mode & S_IFMT));
2340		ret = -ENOTSUPP;
2341		goto out;
2342	}
2343
2344	ret = begin_cmd(sctx, cmd);
2345	if (ret < 0)
2346		goto out;
2347
2348	ret = gen_unique_name(sctx, ino, gen, p);
2349	if (ret < 0)
2350		goto out;
2351
2352	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2353	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2354
2355	if (S_ISLNK(mode)) {
2356		fs_path_reset(p);
2357		ret = read_symlink(sctx, sctx->send_root, ino, p);
2358		if (ret < 0)
2359			goto out;
2360		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2361	} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2362		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2363		TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev);
2364	}
2365
2366	ret = send_cmd(sctx);
2367	if (ret < 0)
2368		goto out;
2369
2370
2371tlv_put_failure:
2372out:
2373	fs_path_free(sctx, p);
2374	return ret;
2375}
2376
2377/*
2378 * We need some special handling for inodes that get processed before the parent
2379 * directory got created. See process_recorded_refs for details.
2380 * This function does the check if we already created the dir out of order.
2381 */
2382static int did_create_dir(struct send_ctx *sctx, u64 dir)
2383{
2384	int ret = 0;
2385	struct btrfs_path *path = NULL;
2386	struct btrfs_key key;
2387	struct btrfs_key found_key;
2388	struct btrfs_key di_key;
2389	struct extent_buffer *eb;
2390	struct btrfs_dir_item *di;
2391	int slot;
2392
2393	path = alloc_path_for_send();
2394	if (!path) {
2395		ret = -ENOMEM;
2396		goto out;
2397	}
2398
2399	key.objectid = dir;
2400	key.type = BTRFS_DIR_INDEX_KEY;
2401	key.offset = 0;
2402	while (1) {
2403		ret = btrfs_search_slot_for_read(sctx->send_root, &key, path,
2404				1, 0);
2405		if (ret < 0)
2406			goto out;
2407		if (!ret) {
2408			eb = path->nodes[0];
2409			slot = path->slots[0];
2410			btrfs_item_key_to_cpu(eb, &found_key, slot);
2411		}
2412		if (ret || found_key.objectid != key.objectid ||
2413		    found_key.type != key.type) {
2414			ret = 0;
2415			goto out;
2416		}
2417
2418		di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2419		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2420
2421		if (di_key.objectid < sctx->send_progress) {
2422			ret = 1;
2423			goto out;
2424		}
2425
2426		key.offset = found_key.offset + 1;
2427		btrfs_release_path(path);
2428	}
2429
2430out:
2431	btrfs_free_path(path);
2432	return ret;
2433}
2434
2435/*
2436 * Only creates the inode if it is:
2437 * 1. Not a directory
2438 * 2. Or a directory which was not created already due to out of order
2439 *    directories. See did_create_dir and process_recorded_refs for details.
2440 */
2441static int send_create_inode_if_needed(struct send_ctx *sctx)
2442{
2443	int ret;
2444
2445	if (S_ISDIR(sctx->cur_inode_mode)) {
2446		ret = did_create_dir(sctx, sctx->cur_ino);
2447		if (ret < 0)
2448			goto out;
2449		if (ret) {
2450			ret = 0;
2451			goto out;
2452		}
2453	}
2454
2455	ret = send_create_inode(sctx, sctx->cur_ino);
2456	if (ret < 0)
2457		goto out;
2458
2459out:
2460	return ret;
2461}
2462
2463struct recorded_ref {
2464	struct list_head list;
2465	char *dir_path;
2466	char *name;
2467	struct fs_path *full_path;
2468	u64 dir;
2469	u64 dir_gen;
2470	int dir_path_len;
2471	int name_len;
2472};
2473
2474/*
2475 * We need to process new refs before deleted refs, but compare_tree gives us
2476 * everything mixed. So we first record all refs and later process them.
2477 * This function is a helper to record one ref.
2478 */
2479static int record_ref(struct list_head *head, u64 dir,
2480		      u64 dir_gen, struct fs_path *path)
2481{
2482	struct recorded_ref *ref;
2483	char *tmp;
2484
2485	ref = kmalloc(sizeof(*ref), GFP_NOFS);
2486	if (!ref)
2487		return -ENOMEM;
2488
2489	ref->dir = dir;
2490	ref->dir_gen = dir_gen;
2491	ref->full_path = path;
2492
2493	tmp = strrchr(ref->full_path->start, '/');
2494	if (!tmp) {
2495		ref->name_len = ref->full_path->end - ref->full_path->start;
2496		ref->name = ref->full_path->start;
2497		ref->dir_path_len = 0;
2498		ref->dir_path = ref->full_path->start;
2499	} else {
2500		tmp++;
2501		ref->name_len = ref->full_path->end - tmp;
2502		ref->name = tmp;
2503		ref->dir_path = ref->full_path->start;
2504		ref->dir_path_len = ref->full_path->end -
2505				ref->full_path->start - 1 - ref->name_len;
2506	}
2507
2508	list_add_tail(&ref->list, head);
2509	return 0;
2510}
2511
2512static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head)
2513{
2514	struct recorded_ref *cur;
2515	struct recorded_ref *tmp;
2516
2517	list_for_each_entry_safe(cur, tmp, head, list) {
2518		fs_path_free(sctx, cur->full_path);
2519		kfree(cur);
2520	}
2521	INIT_LIST_HEAD(head);
2522}
2523
2524static void free_recorded_refs(struct send_ctx *sctx)
2525{
2526	__free_recorded_refs(sctx, &sctx->new_refs);
2527	__free_recorded_refs(sctx, &sctx->deleted_refs);
2528}
2529
2530/*
2531 * Renames/moves a file/dir to it's orphan name. Used when the first
2532 * ref of an unprocessed inode gets overwritten and for all non empty
2533 * directories.
2534 */
2535static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2536			  struct fs_path *path)
2537{
2538	int ret;
2539	struct fs_path *orphan;
2540
2541	orphan = fs_path_alloc(sctx);
2542	if (!orphan)
2543		return -ENOMEM;
2544
2545	ret = gen_unique_name(sctx, ino, gen, orphan);
2546	if (ret < 0)
2547		goto out;
2548
2549	ret = send_rename(sctx, path, orphan);
2550
2551out:
2552	fs_path_free(sctx, orphan);
2553	return ret;
2554}
2555
2556/*
2557 * Returns 1 if a directory can be removed at this point in time.
2558 * We check this by iterating all dir items and checking if the inode behind
2559 * the dir item was already processed.
2560 */
2561static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress)
2562{
2563	int ret = 0;
2564	struct btrfs_root *root = sctx->parent_root;
2565	struct btrfs_path *path;
2566	struct btrfs_key key;
2567	struct btrfs_key found_key;
2568	struct btrfs_key loc;
2569	struct btrfs_dir_item *di;
2570
2571	path = alloc_path_for_send();
2572	if (!path)
2573		return -ENOMEM;
2574
2575	key.objectid = dir;
2576	key.type = BTRFS_DIR_INDEX_KEY;
2577	key.offset = 0;
2578
2579	while (1) {
2580		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
2581		if (ret < 0)
2582			goto out;
2583		if (!ret) {
2584			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2585					path->slots[0]);
2586		}
2587		if (ret || found_key.objectid != key.objectid ||
2588		    found_key.type != key.type) {
2589			break;
2590		}
2591
2592		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2593				struct btrfs_dir_item);
2594		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2595
2596		if (loc.objectid > send_progress) {
2597			ret = 0;
2598			goto out;
2599		}
2600
2601		btrfs_release_path(path);
2602		key.offset = found_key.offset + 1;
2603	}
2604
2605	ret = 1;
2606
2607out:
2608	btrfs_free_path(path);
2609	return ret;
2610}
2611
2612/*
2613 * This does all the move/link/unlink/rmdir magic.
2614 */
2615static int process_recorded_refs(struct send_ctx *sctx)
2616{
2617	int ret = 0;
2618	struct recorded_ref *cur;
2619	struct recorded_ref *cur2;
2620	struct ulist *check_dirs = NULL;
2621	struct ulist_iterator uit;
2622	struct ulist_node *un;
2623	struct fs_path *valid_path = NULL;
2624	u64 ow_inode = 0;
2625	u64 ow_gen;
2626	int did_overwrite = 0;
2627	int is_orphan = 0;
2628
2629verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2630
2631	valid_path = fs_path_alloc(sctx);
2632	if (!valid_path) {
2633		ret = -ENOMEM;
2634		goto out;
2635	}
2636
2637	check_dirs = ulist_alloc(GFP_NOFS);
2638	if (!check_dirs) {
2639		ret = -ENOMEM;
2640		goto out;
2641	}
2642
2643	/*
2644	 * First, check if the first ref of the current inode was overwritten
2645	 * before. If yes, we know that the current inode was already orphanized
2646	 * and thus use the orphan name. If not, we can use get_cur_path to
2647	 * get the path of the first ref as it would like while receiving at
2648	 * this point in time.
2649	 * New inodes are always orphan at the beginning, so force to use the
2650	 * orphan name in this case.
2651	 * The first ref is stored in valid_path and will be updated if it
2652	 * gets moved around.
2653	 */
2654	if (!sctx->cur_inode_new) {
2655		ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
2656				sctx->cur_inode_gen);
2657		if (ret < 0)
2658			goto out;
2659		if (ret)
2660			did_overwrite = 1;
2661	}
2662	if (sctx->cur_inode_new || did_overwrite) {
2663		ret = gen_unique_name(sctx, sctx->cur_ino,
2664				sctx->cur_inode_gen, valid_path);
2665		if (ret < 0)
2666			goto out;
2667		is_orphan = 1;
2668	} else {
2669		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
2670				valid_path);
2671		if (ret < 0)
2672			goto out;
2673	}
2674
2675	list_for_each_entry(cur, &sctx->new_refs, list) {
2676		/*
2677		 * We may have refs where the parent directory does not exist
2678		 * yet. This happens if the parent directories inum is higher
2679		 * the the current inum. To handle this case, we create the
2680		 * parent directory out of order. But we need to check if this
2681		 * did already happen before due to other refs in the same dir.
2682		 */
2683		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
2684		if (ret < 0)
2685			goto out;
2686		if (ret == inode_state_will_create) {
2687			ret = 0;
2688			/*
2689			 * First check if any of the current inodes refs did
2690			 * already create the dir.
2691			 */
2692			list_for_each_entry(cur2, &sctx->new_refs, list) {
2693				if (cur == cur2)
2694					break;
2695				if (cur2->dir == cur->dir) {
2696					ret = 1;
2697					break;
2698				}
2699			}
2700
2701			/*
2702			 * If that did not happen, check if a previous inode
2703			 * did already create the dir.
2704			 */
2705			if (!ret)
2706				ret = did_create_dir(sctx, cur->dir);
2707			if (ret < 0)
2708				goto out;
2709			if (!ret) {
2710				ret = send_create_inode(sctx, cur->dir);
2711				if (ret < 0)
2712					goto out;
2713			}
2714		}
2715
2716		/*
2717		 * Check if this new ref would overwrite the first ref of
2718		 * another unprocessed inode. If yes, orphanize the
2719		 * overwritten inode. If we find an overwritten ref that is
2720		 * not the first ref, simply unlink it.
2721		 */
2722		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2723				cur->name, cur->name_len,
2724				&ow_inode, &ow_gen);
2725		if (ret < 0)
2726			goto out;
2727		if (ret) {
2728			ret = is_first_ref(sctx, sctx->parent_root,
2729					ow_inode, cur->dir, cur->name,
2730					cur->name_len);
2731			if (ret < 0)
2732				goto out;
2733			if (ret) {
2734				ret = orphanize_inode(sctx, ow_inode, ow_gen,
2735						cur->full_path);
2736				if (ret < 0)
2737					goto out;
2738			} else {
2739				ret = send_unlink(sctx, cur->full_path);
2740				if (ret < 0)
2741					goto out;
2742			}
2743		}
2744
2745		/*
2746		 * link/move the ref to the new place. If we have an orphan
2747		 * inode, move it and update valid_path. If not, link or move
2748		 * it depending on the inode mode.
2749		 */
2750		if (is_orphan) {
2751			ret = send_rename(sctx, valid_path, cur->full_path);
2752			if (ret < 0)
2753				goto out;
2754			is_orphan = 0;
2755			ret = fs_path_copy(valid_path, cur->full_path);
2756			if (ret < 0)
2757				goto out;
2758		} else {
2759			if (S_ISDIR(sctx->cur_inode_mode)) {
2760				/*
2761				 * Dirs can't be linked, so move it. For moved
2762				 * dirs, we always have one new and one deleted
2763				 * ref. The deleted ref is ignored later.
2764				 */
2765				ret = send_rename(sctx, valid_path,
2766						cur->full_path);
2767				if (ret < 0)
2768					goto out;
2769				ret = fs_path_copy(valid_path, cur->full_path);
2770				if (ret < 0)
2771					goto out;
2772			} else {
2773				ret = send_link(sctx, cur->full_path,
2774						valid_path);
2775				if (ret < 0)
2776					goto out;
2777			}
2778		}
2779		ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2780				GFP_NOFS);
2781		if (ret < 0)
2782			goto out;
2783	}
2784
2785	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
2786		/*
2787		 * Check if we can already rmdir the directory. If not,
2788		 * orphanize it. For every dir item inside that gets deleted
2789		 * later, we do this check again and rmdir it then if possible.
2790		 * See the use of check_dirs for more details.
2791		 */
2792		ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino);
2793		if (ret < 0)
2794			goto out;
2795		if (ret) {
2796			ret = send_rmdir(sctx, valid_path);
2797			if (ret < 0)
2798				goto out;
2799		} else if (!is_orphan) {
2800			ret = orphanize_inode(sctx, sctx->cur_ino,
2801					sctx->cur_inode_gen, valid_path);
2802			if (ret < 0)
2803				goto out;
2804			is_orphan = 1;
2805		}
2806
2807		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2808			ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2809					GFP_NOFS);
2810			if (ret < 0)
2811				goto out;
2812		}
2813	} else if (S_ISDIR(sctx->cur_inode_mode) &&
2814		   !list_empty(&sctx->deleted_refs)) {
2815		/*
2816		 * We have a moved dir. Add the old parent to check_dirs
2817		 */
2818		cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
2819				list);
2820		ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2821				GFP_NOFS);
2822		if (ret < 0)
2823			goto out;
2824	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
2825		/*
2826		 * We have a non dir inode. Go through all deleted refs and
2827		 * unlink them if they were not already overwritten by other
2828		 * inodes.
2829		 */
2830		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2831			ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2832					sctx->cur_ino, sctx->cur_inode_gen,
2833					cur->name, cur->name_len);
2834			if (ret < 0)
2835				goto out;
2836			if (!ret) {
2837				ret = send_unlink(sctx, cur->full_path);
2838				if (ret < 0)
2839					goto out;
2840			}
2841			ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2842					GFP_NOFS);
2843			if (ret < 0)
2844				goto out;
2845		}
2846
2847		/*
2848		 * If the inode is still orphan, unlink the orphan. This may
2849		 * happen when a previous inode did overwrite the first ref
2850		 * of this inode and no new refs were added for the current
2851		 * inode.
2852		 */
2853		if (is_orphan) {
2854			ret = send_unlink(sctx, valid_path);
2855			if (ret < 0)
2856				goto out;
2857		}
2858	}
2859
2860	/*
2861	 * We did collect all parent dirs where cur_inode was once located. We
2862	 * now go through all these dirs and check if they are pending for
2863	 * deletion and if it's finally possible to perform the rmdir now.
2864	 * We also update the inode stats of the parent dirs here.
2865	 */
2866	ULIST_ITER_INIT(&uit);
2867	while ((un = ulist_next(check_dirs, &uit))) {
2868		if (un->val > sctx->cur_ino)
2869			continue;
2870
2871		ret = get_cur_inode_state(sctx, un->val, un->aux);
2872		if (ret < 0)
2873			goto out;
2874
2875		if (ret == inode_state_did_create ||
2876		    ret == inode_state_no_change) {
2877			/* TODO delayed utimes */
2878			ret = send_utimes(sctx, un->val, un->aux);
2879			if (ret < 0)
2880				goto out;
2881		} else if (ret == inode_state_did_delete) {
2882			ret = can_rmdir(sctx, un->val, sctx->cur_ino);
2883			if (ret < 0)
2884				goto out;
2885			if (ret) {
2886				ret = get_cur_path(sctx, un->val, un->aux,
2887						valid_path);
2888				if (ret < 0)
2889					goto out;
2890				ret = send_rmdir(sctx, valid_path);
2891				if (ret < 0)
2892					goto out;
2893			}
2894		}
2895	}
2896
2897	/*
2898	 * Current inode is now at it's new position, so we must increase
2899	 * send_progress
2900	 */
2901	sctx->send_progress = sctx->cur_ino + 1;
2902
2903	ret = 0;
2904
2905out:
2906	free_recorded_refs(sctx);
2907	ulist_free(check_dirs);
2908	fs_path_free(sctx, valid_path);
2909	return ret;
2910}
2911
2912static int __record_new_ref(int num, u64 dir, int index,
2913			    struct fs_path *name,
2914			    void *ctx)
2915{
2916	int ret = 0;
2917	struct send_ctx *sctx = ctx;
2918	struct fs_path *p;
2919	u64 gen;
2920
2921	p = fs_path_alloc(sctx);
2922	if (!p)
2923		return -ENOMEM;
2924
2925	ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
2926			NULL, NULL);
2927	if (ret < 0)
2928		goto out;
2929
2930	ret = get_cur_path(sctx, dir, gen, p);
2931	if (ret < 0)
2932		goto out;
2933	ret = fs_path_add_path(p, name);
2934	if (ret < 0)
2935		goto out;
2936
2937	ret = record_ref(&sctx->new_refs, dir, gen, p);
2938
2939out:
2940	if (ret)
2941		fs_path_free(sctx, p);
2942	return ret;
2943}
2944
2945static int __record_deleted_ref(int num, u64 dir, int index,
2946				struct fs_path *name,
2947				void *ctx)
2948{
2949	int ret = 0;
2950	struct send_ctx *sctx = ctx;
2951	struct fs_path *p;
2952	u64 gen;
2953
2954	p = fs_path_alloc(sctx);
2955	if (!p)
2956		return -ENOMEM;
2957
2958	ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
2959			NULL, NULL);
2960	if (ret < 0)
2961		goto out;
2962
2963	ret = get_cur_path(sctx, dir, gen, p);
2964	if (ret < 0)
2965		goto out;
2966	ret = fs_path_add_path(p, name);
2967	if (ret < 0)
2968		goto out;
2969
2970	ret = record_ref(&sctx->deleted_refs, dir, gen, p);
2971
2972out:
2973	if (ret)
2974		fs_path_free(sctx, p);
2975	return ret;
2976}
2977
2978static int record_new_ref(struct send_ctx *sctx)
2979{
2980	int ret;
2981
2982	ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path,
2983			sctx->cmp_key, 0, __record_new_ref, sctx);
2984	if (ret < 0)
2985		goto out;
2986	ret = 0;
2987
2988out:
2989	return ret;
2990}
2991
2992static int record_deleted_ref(struct send_ctx *sctx)
2993{
2994	int ret;
2995
2996	ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path,
2997			sctx->cmp_key, 0, __record_deleted_ref, sctx);
2998	if (ret < 0)
2999		goto out;
3000	ret = 0;
3001
3002out:
3003	return ret;
3004}
3005
3006struct find_ref_ctx {
3007	u64 dir;
3008	struct fs_path *name;
3009	int found_idx;
3010};
3011
3012static int __find_iref(int num, u64 dir, int index,
3013		       struct fs_path *name,
3014		       void *ctx_)
3015{
3016	struct find_ref_ctx *ctx = ctx_;
3017
3018	if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3019	    strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3020		ctx->found_idx = num;
3021		return 1;
3022	}
3023	return 0;
3024}
3025
3026static int find_iref(struct send_ctx *sctx,
3027		     struct btrfs_root *root,
3028		     struct btrfs_path *path,
3029		     struct btrfs_key *key,
3030		     u64 dir, struct fs_path *name)
3031{
3032	int ret;
3033	struct find_ref_ctx ctx;
3034
3035	ctx.dir = dir;
3036	ctx.name = name;
3037	ctx.found_idx = -1;
3038
3039	ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref, &ctx);
3040	if (ret < 0)
3041		return ret;
3042
3043	if (ctx.found_idx == -1)
3044		return -ENOENT;
3045
3046	return ctx.found_idx;
3047}
3048
3049static int __record_changed_new_ref(int num, u64 dir, int index,
3050				    struct fs_path *name,
3051				    void *ctx)
3052{
3053	int ret;
3054	struct send_ctx *sctx = ctx;
3055
3056	ret = find_iref(sctx, sctx->parent_root, sctx->right_path,
3057			sctx->cmp_key, dir, name);
3058	if (ret == -ENOENT)
3059		ret = __record_new_ref(num, dir, index, name, sctx);
3060	else if (ret > 0)
3061		ret = 0;
3062
3063	return ret;
3064}
3065
3066static int __record_changed_deleted_ref(int num, u64 dir, int index,
3067					struct fs_path *name,
3068					void *ctx)
3069{
3070	int ret;
3071	struct send_ctx *sctx = ctx;
3072
3073	ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key,
3074			dir, name);
3075	if (ret == -ENOENT)
3076		ret = __record_deleted_ref(num, dir, index, name, sctx);
3077	else if (ret > 0)
3078		ret = 0;
3079
3080	return ret;
3081}
3082
3083static int record_changed_ref(struct send_ctx *sctx)
3084{
3085	int ret = 0;
3086
3087	ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path,
3088			sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3089	if (ret < 0)
3090		goto out;
3091	ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path,
3092			sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
3093	if (ret < 0)
3094		goto out;
3095	ret = 0;
3096
3097out:
3098	return ret;
3099}
3100
3101/*
3102 * Record and process all refs at once. Needed when an inode changes the
3103 * generation number, which means that it was deleted and recreated.
3104 */
3105static int process_all_refs(struct send_ctx *sctx,
3106			    enum btrfs_compare_tree_result cmd)
3107{
3108	int ret;
3109	struct btrfs_root *root;
3110	struct btrfs_path *path;
3111	struct btrfs_key key;
3112	struct btrfs_key found_key;
3113	struct extent_buffer *eb;
3114	int slot;
3115	iterate_inode_ref_t cb;
3116
3117	path = alloc_path_for_send();
3118	if (!path)
3119		return -ENOMEM;
3120
3121	if (cmd == BTRFS_COMPARE_TREE_NEW) {
3122		root = sctx->send_root;
3123		cb = __record_new_ref;
3124	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
3125		root = sctx->parent_root;
3126		cb = __record_deleted_ref;
3127	} else {
3128		BUG();
3129	}
3130
3131	key.objectid = sctx->cmp_key->objectid;
3132	key.type = BTRFS_INODE_REF_KEY;
3133	key.offset = 0;
3134	while (1) {
3135		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3136		if (ret < 0) {
3137			btrfs_release_path(path);
3138			goto out;
3139		}
3140		if (ret) {
3141			btrfs_release_path(path);
3142			break;
3143		}
3144
3145		eb = path->nodes[0];
3146		slot = path->slots[0];
3147		btrfs_item_key_to_cpu(eb, &found_key, slot);
3148
3149		if (found_key.objectid != key.objectid ||
3150		    found_key.type != key.type) {
3151			btrfs_release_path(path);
3152			break;
3153		}
3154
3155		ret = iterate_inode_ref(sctx, sctx->parent_root, path,
3156				&found_key, 0, cb, sctx);
3157		btrfs_release_path(path);
3158		if (ret < 0)
3159			goto out;
3160
3161		key.offset = found_key.offset + 1;
3162	}
3163
3164	ret = process_recorded_refs(sctx);
3165
3166out:
3167	btrfs_free_path(path);
3168	return ret;
3169}
3170
3171static int send_set_xattr(struct send_ctx *sctx,
3172			  struct fs_path *path,
3173			  const char *name, int name_len,
3174			  const char *data, int data_len)
3175{
3176	int ret = 0;
3177
3178	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
3179	if (ret < 0)
3180		goto out;
3181
3182	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3183	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3184	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
3185
3186	ret = send_cmd(sctx);
3187
3188tlv_put_failure:
3189out:
3190	return ret;
3191}
3192
3193static int send_remove_xattr(struct send_ctx *sctx,
3194			  struct fs_path *path,
3195			  const char *name, int name_len)
3196{
3197	int ret = 0;
3198
3199	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
3200	if (ret < 0)
3201		goto out;
3202
3203	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3204	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3205
3206	ret = send_cmd(sctx);
3207
3208tlv_put_failure:
3209out:
3210	return ret;
3211}
3212
3213static int __process_new_xattr(int num, struct btrfs_key *di_key,
3214			       const char *name, int name_len,
3215			       const char *data, int data_len,
3216			       u8 type, void *ctx)
3217{
3218	int ret;
3219	struct send_ctx *sctx = ctx;
3220	struct fs_path *p;
3221	posix_acl_xattr_header dummy_acl;
3222
3223	p = fs_path_alloc(sctx);
3224	if (!p)
3225		return -ENOMEM;
3226
3227	/*
3228	 * This hack is needed because empty acl's are stored as zero byte
3229	 * data in xattrs. Problem with that is, that receiving these zero byte
3230	 * acl's will fail later. To fix this, we send a dummy acl list that
3231	 * only contains the version number and no entries.
3232	 */
3233	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
3234	    !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
3235		if (data_len == 0) {
3236			dummy_acl.a_version =
3237					cpu_to_le32(POSIX_ACL_XATTR_VERSION);
3238			data = (char *)&dummy_acl;
3239			data_len = sizeof(dummy_acl);
3240		}
3241	}
3242
3243	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3244	if (ret < 0)
3245		goto out;
3246
3247	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
3248
3249out:
3250	fs_path_free(sctx, p);
3251	return ret;
3252}
3253
3254static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
3255				   const char *name, int name_len,
3256				   const char *data, int data_len,
3257				   u8 type, void *ctx)
3258{
3259	int ret;
3260	struct send_ctx *sctx = ctx;
3261	struct fs_path *p;
3262
3263	p = fs_path_alloc(sctx);
3264	if (!p)
3265		return -ENOMEM;
3266
3267	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3268	if (ret < 0)
3269		goto out;
3270
3271	ret = send_remove_xattr(sctx, p, name, name_len);
3272
3273out:
3274	fs_path_free(sctx, p);
3275	return ret;
3276}
3277
3278static int process_new_xattr(struct send_ctx *sctx)
3279{
3280	int ret = 0;
3281
3282	ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path,
3283			sctx->cmp_key, __process_new_xattr, sctx);
3284
3285	return ret;
3286}
3287
3288static int process_deleted_xattr(struct send_ctx *sctx)
3289{
3290	int ret;
3291
3292	ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path,
3293			sctx->cmp_key, __process_deleted_xattr, sctx);
3294
3295	return ret;
3296}
3297
3298struct find_xattr_ctx {
3299	const char *name;
3300	int name_len;
3301	int found_idx;
3302	char *found_data;
3303	int found_data_len;
3304};
3305
3306static int __find_xattr(int num, struct btrfs_key *di_key,
3307			const char *name, int name_len,
3308			const char *data, int data_len,
3309			u8 type, void *vctx)
3310{
3311	struct find_xattr_ctx *ctx = vctx;
3312
3313	if (name_len == ctx->name_len &&
3314	    strncmp(name, ctx->name, name_len) == 0) {
3315		ctx->found_idx = num;
3316		ctx->found_data_len = data_len;
3317		ctx->found_data = kmalloc(data_len, GFP_NOFS);
3318		if (!ctx->found_data)
3319			return -ENOMEM;
3320		memcpy(ctx->found_data, data, data_len);
3321		return 1;
3322	}
3323	return 0;
3324}
3325
3326static int find_xattr(struct send_ctx *sctx,
3327		      struct btrfs_root *root,
3328		      struct btrfs_path *path,
3329		      struct btrfs_key *key,
3330		      const char *name, int name_len,
3331		      char **data, int *data_len)
3332{
3333	int ret;
3334	struct find_xattr_ctx ctx;
3335
3336	ctx.name = name;
3337	ctx.name_len = name_len;
3338	ctx.found_idx = -1;
3339	ctx.found_data = NULL;
3340	ctx.found_data_len = 0;
3341
3342	ret = iterate_dir_item(sctx, root, path, key, __find_xattr, &ctx);
3343	if (ret < 0)
3344		return ret;
3345
3346	if (ctx.found_idx == -1)
3347		return -ENOENT;
3348	if (data) {
3349		*data = ctx.found_data;
3350		*data_len = ctx.found_data_len;
3351	} else {
3352		kfree(ctx.found_data);
3353	}
3354	return ctx.found_idx;
3355}
3356
3357
3358static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3359				       const char *name, int name_len,
3360				       const char *data, int data_len,
3361				       u8 type, void *ctx)
3362{
3363	int ret;
3364	struct send_ctx *sctx = ctx;
3365	char *found_data = NULL;
3366	int found_data_len  = 0;
3367	struct fs_path *p = NULL;
3368
3369	ret = find_xattr(sctx, sctx->parent_root, sctx->right_path,
3370			sctx->cmp_key, name, name_len, &found_data,
3371			&found_data_len);
3372	if (ret == -ENOENT) {
3373		ret = __process_new_xattr(num, di_key, name, name_len, data,
3374				data_len, type, ctx);
3375	} else if (ret >= 0) {
3376		if (data_len != found_data_len ||
3377		    memcmp(data, found_data, data_len)) {
3378			ret = __process_new_xattr(num, di_key, name, name_len,
3379					data, data_len, type, ctx);
3380		} else {
3381			ret = 0;
3382		}
3383	}
3384
3385	kfree(found_data);
3386	fs_path_free(sctx, p);
3387	return ret;
3388}
3389
3390static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
3391					   const char *name, int name_len,
3392					   const char *data, int data_len,
3393					   u8 type, void *ctx)
3394{
3395	int ret;
3396	struct send_ctx *sctx = ctx;
3397
3398	ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key,
3399			name, name_len, NULL, NULL);
3400	if (ret == -ENOENT)
3401		ret = __process_deleted_xattr(num, di_key, name, name_len, data,
3402				data_len, type, ctx);
3403	else if (ret >= 0)
3404		ret = 0;
3405
3406	return ret;
3407}
3408
3409static int process_changed_xattr(struct send_ctx *sctx)
3410{
3411	int ret = 0;
3412
3413	ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path,
3414			sctx->cmp_key, __process_changed_new_xattr, sctx);
3415	if (ret < 0)
3416		goto out;
3417	ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path,
3418			sctx->cmp_key, __process_changed_deleted_xattr, sctx);
3419
3420out:
3421	return ret;
3422}
3423
3424static int process_all_new_xattrs(struct send_ctx *sctx)
3425{
3426	int ret;
3427	struct btrfs_root *root;
3428	struct btrfs_path *path;
3429	struct btrfs_key key;
3430	struct btrfs_key found_key;
3431	struct extent_buffer *eb;
3432	int slot;
3433
3434	path = alloc_path_for_send();
3435	if (!path)
3436		return -ENOMEM;
3437
3438	root = sctx->send_root;
3439
3440	key.objectid = sctx->cmp_key->objectid;
3441	key.type = BTRFS_XATTR_ITEM_KEY;
3442	key.offset = 0;
3443	while (1) {
3444		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3445		if (ret < 0)
3446			goto out;
3447		if (ret) {
3448			ret = 0;
3449			goto out;
3450		}
3451
3452		eb = path->nodes[0];
3453		slot = path->slots[0];
3454		btrfs_item_key_to_cpu(eb, &found_key, slot);
3455
3456		if (found_key.objectid != key.objectid ||
3457		    found_key.type != key.type) {
3458			ret = 0;
3459			goto out;
3460		}
3461
3462		ret = iterate_dir_item(sctx, root, path, &found_key,
3463				__process_new_xattr, sctx);
3464		if (ret < 0)
3465			goto out;
3466
3467		btrfs_release_path(path);
3468		key.offset = found_key.offset + 1;
3469	}
3470
3471out:
3472	btrfs_free_path(path);
3473	return ret;
3474}
3475
3476/*
3477 * Read some bytes from the current inode/file and send a write command to
3478 * user space.
3479 */
3480static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
3481{
3482	int ret = 0;
3483	struct fs_path *p;
3484	loff_t pos = offset;
3485	int readed = 0;
3486	mm_segment_t old_fs;
3487
3488	p = fs_path_alloc(sctx);
3489	if (!p)
3490		return -ENOMEM;
3491
3492	/*
3493	 * vfs normally only accepts user space buffers for security reasons.
3494	 * we only read from the file and also only provide the read_buf buffer
3495	 * to vfs. As this buffer does not come from a user space call, it's
3496	 * ok to temporary allow kernel space buffers.
3497	 */
3498	old_fs = get_fs();
3499	set_fs(KERNEL_DS);
3500
3501verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
3502
3503	ret = open_cur_inode_file(sctx);
3504	if (ret < 0)
3505		goto out;
3506
3507	ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
3508	if (ret < 0)
3509		goto out;
3510	readed = ret;
3511	if (!readed)
3512		goto out;
3513
3514	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
3515	if (ret < 0)
3516		goto out;
3517
3518	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3519	if (ret < 0)
3520		goto out;
3521
3522	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3523	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3524	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed);
3525
3526	ret = send_cmd(sctx);
3527
3528tlv_put_failure:
3529out:
3530	fs_path_free(sctx, p);
3531	set_fs(old_fs);
3532	if (ret < 0)
3533		return ret;
3534	return readed;
3535}
3536
3537/*
3538 * Send a clone command to user space.
3539 */
3540static int send_clone(struct send_ctx *sctx,
3541		      u64 offset, u32 len,
3542		      struct clone_root *clone_root)
3543{
3544	int ret = 0;
3545	struct btrfs_root *clone_root2 = clone_root->root;
3546	struct fs_path *p;
3547	u64 gen;
3548
3549verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3550	       "clone_inode=%llu, clone_offset=%llu\n", offset, len,
3551		clone_root->root->objectid, clone_root->ino,
3552		clone_root->offset);
3553
3554	p = fs_path_alloc(sctx);
3555	if (!p)
3556		return -ENOMEM;
3557
3558	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
3559	if (ret < 0)
3560		goto out;
3561
3562	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3563	if (ret < 0)
3564		goto out;
3565
3566	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3567	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
3568	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3569
3570	if (clone_root2 == sctx->send_root) {
3571		ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
3572				&gen, NULL, NULL, NULL, NULL);
3573		if (ret < 0)
3574			goto out;
3575		ret = get_cur_path(sctx, clone_root->ino, gen, p);
3576	} else {
3577		ret = get_inode_path(sctx, clone_root2, clone_root->ino, p);
3578	}
3579	if (ret < 0)
3580		goto out;
3581
3582	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3583			clone_root2->root_item.uuid);
3584	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3585			clone_root2->root_item.ctransid);
3586	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3587	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3588			clone_root->offset);
3589
3590	ret = send_cmd(sctx);
3591
3592tlv_put_failure:
3593out:
3594	fs_path_free(sctx, p);
3595	return ret;
3596}
3597
3598static int send_write_or_clone(struct send_ctx *sctx,
3599			       struct btrfs_path *path,
3600			       struct btrfs_key *key,
3601			       struct clone_root *clone_root)
3602{
3603	int ret = 0;
3604	struct btrfs_file_extent_item *ei;
3605	u64 offset = key->offset;
3606	u64 pos = 0;
3607	u64 len;
3608	u32 l;
3609	u8 type;
3610
3611	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3612			struct btrfs_file_extent_item);
3613	type = btrfs_file_extent_type(path->nodes[0], ei);
3614	if (type == BTRFS_FILE_EXTENT_INLINE)
3615		len = btrfs_file_extent_inline_len(path->nodes[0], ei);
3616	else
3617		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
3618
3619	if (offset + len > sctx->cur_inode_size)
3620		len = sctx->cur_inode_size - offset;
3621	if (len == 0) {
3622		ret = 0;
3623		goto out;
3624	}
3625
3626	if (!clone_root) {
3627		while (pos < len) {
3628			l = len - pos;
3629			if (l > BTRFS_SEND_READ_SIZE)
3630				l = BTRFS_SEND_READ_SIZE;
3631			ret = send_write(sctx, pos + offset, l);
3632			if (ret < 0)
3633				goto out;
3634			if (!ret)
3635				break;
3636			pos += ret;
3637		}
3638		ret = 0;
3639	} else {
3640		ret = send_clone(sctx, offset, len, clone_root);
3641	}
3642
3643out:
3644	return ret;
3645}
3646
3647static int is_extent_unchanged(struct send_ctx *sctx,
3648			       struct btrfs_path *left_path,
3649			       struct btrfs_key *ekey)
3650{
3651	int ret = 0;
3652	struct btrfs_key key;
3653	struct btrfs_path *path = NULL;
3654	struct extent_buffer *eb;
3655	int slot;
3656	struct btrfs_key found_key;
3657	struct btrfs_file_extent_item *ei;
3658	u64 left_disknr;
3659	u64 right_disknr;
3660	u64 left_offset;
3661	u64 right_offset;
3662	u64 left_offset_fixed;
3663	u64 left_len;
3664	u64 right_len;
3665	u8 left_type;
3666	u8 right_type;
3667
3668	path = alloc_path_for_send();
3669	if (!path)
3670		return -ENOMEM;
3671
3672	eb = left_path->nodes[0];
3673	slot = left_path->slots[0];
3674
3675	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3676	left_type = btrfs_file_extent_type(eb, ei);
3677	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3678	left_len = btrfs_file_extent_num_bytes(eb, ei);
3679	left_offset = btrfs_file_extent_offset(eb, ei);
3680
3681	if (left_type != BTRFS_FILE_EXTENT_REG) {
3682		ret = 0;
3683		goto out;
3684	}
3685
3686	/*
3687	 * Following comments will refer to these graphics. L is the left
3688	 * extents which we are checking at the moment. 1-8 are the right
3689	 * extents that we iterate.
3690	 *
3691	 *       |-----L-----|
3692	 * |-1-|-2a-|-3-|-4-|-5-|-6-|
3693	 *
3694	 *       |-----L-----|
3695	 * |--1--|-2b-|...(same as above)
3696	 *
3697	 * Alternative situation. Happens on files where extents got split.
3698	 *       |-----L-----|
3699	 * |-----------7-----------|-6-|
3700	 *
3701	 * Alternative situation. Happens on files which got larger.
3702	 *       |-----L-----|
3703	 * |-8-|
3704	 * Nothing follows after 8.
3705	 */
3706
3707	key.objectid = ekey->objectid;
3708	key.type = BTRFS_EXTENT_DATA_KEY;
3709	key.offset = ekey->offset;
3710	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
3711	if (ret < 0)
3712		goto out;
3713	if (ret) {
3714		ret = 0;
3715		goto out;
3716	}
3717
3718	/*
3719	 * Handle special case where the right side has no extents at all.
3720	 */
3721	eb = path->nodes[0];
3722	slot = path->slots[0];
3723	btrfs_item_key_to_cpu(eb, &found_key, slot);
3724	if (found_key.objectid != key.objectid ||
3725	    found_key.type != key.type) {
3726		ret = 0;
3727		goto out;
3728	}
3729
3730	/*
3731	 * We're now on 2a, 2b or 7.
3732	 */
3733	key = found_key;
3734	while (key.offset < ekey->offset + left_len) {
3735		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3736		right_type = btrfs_file_extent_type(eb, ei);
3737		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3738		right_len = btrfs_file_extent_num_bytes(eb, ei);
3739		right_offset = btrfs_file_extent_offset(eb, ei);
3740
3741		if (right_type != BTRFS_FILE_EXTENT_REG) {
3742			ret = 0;
3743			goto out;
3744		}
3745
3746		/*
3747		 * Are we at extent 8? If yes, we know the extent is changed.
3748		 * This may only happen on the first iteration.
3749		 */
3750		if (found_key.offset + right_len < ekey->offset) {
3751			ret = 0;
3752			goto out;
3753		}
3754
3755		left_offset_fixed = left_offset;
3756		if (key.offset < ekey->offset) {
3757			/* Fix the right offset for 2a and 7. */
3758			right_offset += ekey->offset - key.offset;
3759		} else {
3760			/* Fix the left offset for all behind 2a and 2b */
3761			left_offset_fixed += key.offset - ekey->offset;
3762		}
3763
3764		/*
3765		 * Check if we have the same extent.
3766		 */
3767		if (left_disknr + left_offset_fixed !=
3768				right_disknr + right_offset) {
3769			ret = 0;
3770			goto out;
3771		}
3772
3773		/*
3774		 * Go to the next extent.
3775		 */
3776		ret = btrfs_next_item(sctx->parent_root, path);
3777		if (ret < 0)
3778			goto out;
3779		if (!ret) {
3780			eb = path->nodes[0];
3781			slot = path->slots[0];
3782			btrfs_item_key_to_cpu(eb, &found_key, slot);
3783		}
3784		if (ret || found_key.objectid != key.objectid ||
3785		    found_key.type != key.type) {
3786			key.offset += right_len;
3787			break;
3788		} else {
3789			if (found_key.offset != key.offset + right_len) {
3790				/* Should really not happen */
3791				ret = -EIO;
3792				goto out;
3793			}
3794		}
3795		key = found_key;
3796	}
3797
3798	/*
3799	 * We're now behind the left extent (treat as unchanged) or at the end
3800	 * of the right side (treat as changed).
3801	 */
3802	if (key.offset >= ekey->offset + left_len)
3803		ret = 1;
3804	else
3805		ret = 0;
3806
3807
3808out:
3809	btrfs_free_path(path);
3810	return ret;
3811}
3812
3813static int process_extent(struct send_ctx *sctx,
3814			  struct btrfs_path *path,
3815			  struct btrfs_key *key)
3816{
3817	int ret = 0;
3818	struct clone_root *found_clone = NULL;
3819
3820	if (S_ISLNK(sctx->cur_inode_mode))
3821		return 0;
3822
3823	if (sctx->parent_root && !sctx->cur_inode_new) {
3824		ret = is_extent_unchanged(sctx, path, key);
3825		if (ret < 0)
3826			goto out;
3827		if (ret) {
3828			ret = 0;
3829			goto out;
3830		}
3831	}
3832
3833	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
3834			sctx->cur_inode_size, &found_clone);
3835	if (ret != -ENOENT && ret < 0)
3836		goto out;
3837
3838	ret = send_write_or_clone(sctx, path, key, found_clone);
3839
3840out:
3841	return ret;
3842}
3843
3844static int process_all_extents(struct send_ctx *sctx)
3845{
3846	int ret;
3847	struct btrfs_root *root;
3848	struct btrfs_path *path;
3849	struct btrfs_key key;
3850	struct btrfs_key found_key;
3851	struct extent_buffer *eb;
3852	int slot;
3853
3854	root = sctx->send_root;
3855	path = alloc_path_for_send();
3856	if (!path)
3857		return -ENOMEM;
3858
3859	key.objectid = sctx->cmp_key->objectid;
3860	key.type = BTRFS_EXTENT_DATA_KEY;
3861	key.offset = 0;
3862	while (1) {
3863		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3864		if (ret < 0)
3865			goto out;
3866		if (ret) {
3867			ret = 0;
3868			goto out;
3869		}
3870
3871		eb = path->nodes[0];
3872		slot = path->slots[0];
3873		btrfs_item_key_to_cpu(eb, &found_key, slot);
3874
3875		if (found_key.objectid != key.objectid ||
3876		    found_key.type != key.type) {
3877			ret = 0;
3878			goto out;
3879		}
3880
3881		ret = process_extent(sctx, path, &found_key);
3882		if (ret < 0)
3883			goto out;
3884
3885		btrfs_release_path(path);
3886		key.offset = found_key.offset + 1;
3887	}
3888
3889out:
3890	btrfs_free_path(path);
3891	return ret;
3892}
3893
3894static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
3895{
3896	int ret = 0;
3897
3898	if (sctx->cur_ino == 0)
3899		goto out;
3900	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
3901	    sctx->cmp_key->type <= BTRFS_INODE_REF_KEY)
3902		goto out;
3903	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
3904		goto out;
3905
3906	ret = process_recorded_refs(sctx);
3907
3908out:
3909	return ret;
3910}
3911
3912static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
3913{
3914	int ret = 0;
3915	u64 left_mode;
3916	u64 left_uid;
3917	u64 left_gid;
3918	u64 right_mode;
3919	u64 right_uid;
3920	u64 right_gid;
3921	int need_chmod = 0;
3922	int need_chown = 0;
3923
3924	ret = process_recorded_refs_if_needed(sctx, at_end);
3925	if (ret < 0)
3926		goto out;
3927
3928	if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
3929		goto out;
3930	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
3931		goto out;
3932
3933	ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
3934			&left_mode, &left_uid, &left_gid, NULL);
3935	if (ret < 0)
3936		goto out;
3937
3938	if (!S_ISLNK(sctx->cur_inode_mode)) {
3939		if (!sctx->parent_root || sctx->cur_inode_new) {
3940			need_chmod = 1;
3941			need_chown = 1;
3942		} else {
3943			ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
3944					NULL, NULL, &right_mode, &right_uid,
3945					&right_gid, NULL);
3946			if (ret < 0)
3947				goto out;
3948
3949			if (left_uid != right_uid || left_gid != right_gid)
3950				need_chown = 1;
3951			if (left_mode != right_mode)
3952				need_chmod = 1;
3953		}
3954	}
3955
3956	if (S_ISREG(sctx->cur_inode_mode)) {
3957		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3958				sctx->cur_inode_size);
3959		if (ret < 0)
3960			goto out;
3961	}
3962
3963	if (need_chown) {
3964		ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3965				left_uid, left_gid);
3966		if (ret < 0)
3967			goto out;
3968	}
3969	if (need_chmod) {
3970		ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3971				left_mode);
3972		if (ret < 0)
3973			goto out;
3974	}
3975
3976	/*
3977	 * Need to send that every time, no matter if it actually changed
3978	 * between the two trees as we have done changes to the inode before.
3979	 */
3980	ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
3981	if (ret < 0)
3982		goto out;
3983
3984out:
3985	return ret;
3986}
3987
3988static int changed_inode(struct send_ctx *sctx,
3989			 enum btrfs_compare_tree_result result)
3990{
3991	int ret = 0;
3992	struct btrfs_key *key = sctx->cmp_key;
3993	struct btrfs_inode_item *left_ii = NULL;
3994	struct btrfs_inode_item *right_ii = NULL;
3995	u64 left_gen = 0;
3996	u64 right_gen = 0;
3997
3998	ret = close_cur_inode_file(sctx);
3999	if (ret < 0)
4000		goto out;
4001
4002	sctx->cur_ino = key->objectid;
4003	sctx->cur_inode_new_gen = 0;
4004	sctx->send_progress = sctx->cur_ino;
4005
4006	if (result == BTRFS_COMPARE_TREE_NEW ||
4007	    result == BTRFS_COMPARE_TREE_CHANGED) {
4008		left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
4009				sctx->left_path->slots[0],
4010				struct btrfs_inode_item);
4011		left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
4012				left_ii);
4013	} else {
4014		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4015				sctx->right_path->slots[0],
4016				struct btrfs_inode_item);
4017		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4018				right_ii);
4019	}
4020	if (result == BTRFS_COMPARE_TREE_CHANGED) {
4021		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4022				sctx->right_path->slots[0],
4023				struct btrfs_inode_item);
4024
4025		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4026				right_ii);
4027		if (left_gen != right_gen)
4028			sctx->cur_inode_new_gen = 1;
4029	}
4030
4031	if (result == BTRFS_COMPARE_TREE_NEW) {
4032		sctx->cur_inode_gen = left_gen;
4033		sctx->cur_inode_new = 1;
4034		sctx->cur_inode_deleted = 0;
4035		sctx->cur_inode_size = btrfs_inode_size(
4036				sctx->left_path->nodes[0], left_ii);
4037		sctx->cur_inode_mode = btrfs_inode_mode(
4038				sctx->left_path->nodes[0], left_ii);
4039		if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4040			ret = send_create_inode_if_needed(sctx);
4041	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
4042		sctx->cur_inode_gen = right_gen;
4043		sctx->cur_inode_new = 0;
4044		sctx->cur_inode_deleted = 1;
4045		sctx->cur_inode_size = btrfs_inode_size(
4046				sctx->right_path->nodes[0], right_ii);
4047		sctx->cur_inode_mode = btrfs_inode_mode(
4048				sctx->right_path->nodes[0], right_ii);
4049	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
4050		if (sctx->cur_inode_new_gen) {
4051			sctx->cur_inode_gen = right_gen;
4052			sctx->cur_inode_new = 0;
4053			sctx->cur_inode_deleted = 1;
4054			sctx->cur_inode_size = btrfs_inode_size(
4055					sctx->right_path->nodes[0], right_ii);
4056			sctx->cur_inode_mode = btrfs_inode_mode(
4057					sctx->right_path->nodes[0], right_ii);
4058			ret = process_all_refs(sctx,
4059					BTRFS_COMPARE_TREE_DELETED);
4060			if (ret < 0)
4061				goto out;
4062
4063			sctx->cur_inode_gen = left_gen;
4064			sctx->cur_inode_new = 1;
4065			sctx->cur_inode_deleted = 0;
4066			sctx->cur_inode_size = btrfs_inode_size(
4067					sctx->left_path->nodes[0], left_ii);
4068			sctx->cur_inode_mode = btrfs_inode_mode(
4069					sctx->left_path->nodes[0], left_ii);
4070			ret = send_create_inode_if_needed(sctx);
4071			if (ret < 0)
4072				goto out;
4073
4074			ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
4075			if (ret < 0)
4076				goto out;
4077			ret = process_all_extents(sctx);
4078			if (ret < 0)
4079				goto out;
4080			ret = process_all_new_xattrs(sctx);
4081			if (ret < 0)
4082				goto out;
4083		} else {
4084			sctx->cur_inode_gen = left_gen;
4085			sctx->cur_inode_new = 0;
4086			sctx->cur_inode_new_gen = 0;
4087			sctx->cur_inode_deleted = 0;
4088			sctx->cur_inode_size = btrfs_inode_size(
4089					sctx->left_path->nodes[0], left_ii);
4090			sctx->cur_inode_mode = btrfs_inode_mode(
4091					sctx->left_path->nodes[0], left_ii);
4092		}
4093	}
4094
4095out:
4096	return ret;
4097}
4098
4099static int changed_ref(struct send_ctx *sctx,
4100		       enum btrfs_compare_tree_result result)
4101{
4102	int ret = 0;
4103
4104	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4105
4106	if (!sctx->cur_inode_new_gen &&
4107	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
4108		if (result == BTRFS_COMPARE_TREE_NEW)
4109			ret = record_new_ref(sctx);
4110		else if (result == BTRFS_COMPARE_TREE_DELETED)
4111			ret = record_deleted_ref(sctx);
4112		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4113			ret = record_changed_ref(sctx);
4114	}
4115
4116	return ret;
4117}
4118
4119static int changed_xattr(struct send_ctx *sctx,
4120			 enum btrfs_compare_tree_result result)
4121{
4122	int ret = 0;
4123
4124	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4125
4126	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4127		if (result == BTRFS_COMPARE_TREE_NEW)
4128			ret = process_new_xattr(sctx);
4129		else if (result == BTRFS_COMPARE_TREE_DELETED)
4130			ret = process_deleted_xattr(sctx);
4131		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4132			ret = process_changed_xattr(sctx);
4133	}
4134
4135	return ret;
4136}
4137
4138static int changed_extent(struct send_ctx *sctx,
4139			  enum btrfs_compare_tree_result result)
4140{
4141	int ret = 0;
4142
4143	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4144
4145	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4146		if (result != BTRFS_COMPARE_TREE_DELETED)
4147			ret = process_extent(sctx, sctx->left_path,
4148					sctx->cmp_key);
4149	}
4150
4151	return ret;
4152}
4153
4154
4155static int changed_cb(struct btrfs_root *left_root,
4156		      struct btrfs_root *right_root,
4157		      struct btrfs_path *left_path,
4158		      struct btrfs_path *right_path,
4159		      struct btrfs_key *key,
4160		      enum btrfs_compare_tree_result result,
4161		      void *ctx)
4162{
4163	int ret = 0;
4164	struct send_ctx *sctx = ctx;
4165
4166	sctx->left_path = left_path;
4167	sctx->right_path = right_path;
4168	sctx->cmp_key = key;
4169
4170	ret = finish_inode_if_needed(sctx, 0);
4171	if (ret < 0)
4172		goto out;
4173
4174	if (key->type == BTRFS_INODE_ITEM_KEY)
4175		ret = changed_inode(sctx, result);
4176	else if (key->type == BTRFS_INODE_REF_KEY)
4177		ret = changed_ref(sctx, result);
4178	else if (key->type == BTRFS_XATTR_ITEM_KEY)
4179		ret = changed_xattr(sctx, result);
4180	else if (key->type == BTRFS_EXTENT_DATA_KEY)
4181		ret = changed_extent(sctx, result);
4182
4183out:
4184	return ret;
4185}
4186
4187static int full_send_tree(struct send_ctx *sctx)
4188{
4189	int ret;
4190	struct btrfs_trans_handle *trans = NULL;
4191	struct btrfs_root *send_root = sctx->send_root;
4192	struct btrfs_key key;
4193	struct btrfs_key found_key;
4194	struct btrfs_path *path;
4195	struct extent_buffer *eb;
4196	int slot;
4197	u64 start_ctransid;
4198	u64 ctransid;
4199
4200	path = alloc_path_for_send();
4201	if (!path)
4202		return -ENOMEM;
4203
4204	spin_lock(&send_root->root_times_lock);
4205	start_ctransid = btrfs_root_ctransid(&send_root->root_item);
4206	spin_unlock(&send_root->root_times_lock);
4207
4208	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4209	key.type = BTRFS_INODE_ITEM_KEY;
4210	key.offset = 0;
4211
4212join_trans:
4213	/*
4214	 * We need to make sure the transaction does not get committed
4215	 * while we do anything on commit roots. Join a transaction to prevent
4216	 * this.
4217	 */
4218	trans = btrfs_join_transaction(send_root);
4219	if (IS_ERR(trans)) {
4220		ret = PTR_ERR(trans);
4221		trans = NULL;
4222		goto out;
4223	}
4224
4225	/*
4226	 * Make sure the tree has not changed
4227	 */
4228	spin_lock(&send_root->root_times_lock);
4229	ctransid = btrfs_root_ctransid(&send_root->root_item);
4230	spin_unlock(&send_root->root_times_lock);
4231
4232	if (ctransid != start_ctransid) {
4233		WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
4234				     "send was modified in between. This is "
4235				     "probably a bug.\n");
4236		ret = -EIO;
4237		goto out;
4238	}
4239
4240	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
4241	if (ret < 0)
4242		goto out;
4243	if (ret)
4244		goto out_finish;
4245
4246	while (1) {
4247		/*
4248		 * When someone want to commit while we iterate, end the
4249		 * joined transaction and rejoin.
4250		 */
4251		if (btrfs_should_end_transaction(trans, send_root)) {
4252			ret = btrfs_end_transaction(trans, send_root);
4253			trans = NULL;
4254			if (ret < 0)
4255				goto out;
4256			btrfs_release_path(path);
4257			goto join_trans;
4258		}
4259
4260		eb = path->nodes[0];
4261		slot = path->slots[0];
4262		btrfs_item_key_to_cpu(eb, &found_key, slot);
4263
4264		ret = changed_cb(send_root, NULL, path, NULL,
4265				&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
4266		if (ret < 0)
4267			goto out;
4268
4269		key.objectid = found_key.objectid;
4270		key.type = found_key.type;
4271		key.offset = found_key.offset + 1;
4272
4273		ret = btrfs_next_item(send_root, path);
4274		if (ret < 0)
4275			goto out;
4276		if (ret) {
4277			ret  = 0;
4278			break;
4279		}
4280	}
4281
4282out_finish:
4283	ret = finish_inode_if_needed(sctx, 1);
4284
4285out:
4286	btrfs_free_path(path);
4287	if (trans) {
4288		if (!ret)
4289			ret = btrfs_end_transaction(trans, send_root);
4290		else
4291			btrfs_end_transaction(trans, send_root);
4292	}
4293	return ret;
4294}
4295
4296static int send_subvol(struct send_ctx *sctx)
4297{
4298	int ret;
4299
4300	ret = send_header(sctx);
4301	if (ret < 0)
4302		goto out;
4303
4304	ret = send_subvol_begin(sctx);
4305	if (ret < 0)
4306		goto out;
4307
4308	if (sctx->parent_root) {
4309		ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
4310				changed_cb, sctx);
4311		if (ret < 0)
4312			goto out;
4313		ret = finish_inode_if_needed(sctx, 1);
4314		if (ret < 0)
4315			goto out;
4316	} else {
4317		ret = full_send_tree(sctx);
4318		if (ret < 0)
4319			goto out;
4320	}
4321
4322out:
4323	if (!ret)
4324		ret = close_cur_inode_file(sctx);
4325	else
4326		close_cur_inode_file(sctx);
4327
4328	free_recorded_refs(sctx);
4329	return ret;
4330}
4331
4332long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4333{
4334	int ret = 0;
4335	struct btrfs_root *send_root;
4336	struct btrfs_root *clone_root;
4337	struct btrfs_fs_info *fs_info;
4338	struct btrfs_ioctl_send_args *arg = NULL;
4339	struct btrfs_key key;
4340	struct file *filp = NULL;
4341	struct send_ctx *sctx = NULL;
4342	u32 i;
4343	u64 *clone_sources_tmp = NULL;
4344
4345	if (!capable(CAP_SYS_ADMIN))
4346		return -EPERM;
4347
4348	send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root;
4349	fs_info = send_root->fs_info;
4350
4351	arg = memdup_user(arg_, sizeof(*arg));
4352	if (IS_ERR(arg)) {
4353		ret = PTR_ERR(arg);
4354		arg = NULL;
4355		goto out;
4356	}
4357
4358	if (!access_ok(VERIFY_READ, arg->clone_sources,
4359			sizeof(*arg->clone_sources *
4360			arg->clone_sources_count))) {
4361		ret = -EFAULT;
4362		goto out;
4363	}
4364
4365	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
4366	if (!sctx) {
4367		ret = -ENOMEM;
4368		goto out;
4369	}
4370
4371	INIT_LIST_HEAD(&sctx->new_refs);
4372	INIT_LIST_HEAD(&sctx->deleted_refs);
4373	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
4374	INIT_LIST_HEAD(&sctx->name_cache_list);
4375
4376	sctx->send_filp = fget(arg->send_fd);
4377	if (IS_ERR(sctx->send_filp)) {
4378		ret = PTR_ERR(sctx->send_filp);
4379		goto out;
4380	}
4381
4382	sctx->mnt = mnt_file->f_path.mnt;
4383
4384	sctx->send_root = send_root;
4385	sctx->clone_roots_cnt = arg->clone_sources_count;
4386
4387	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
4388	sctx->send_buf = vmalloc(sctx->send_max_size);
4389	if (!sctx->send_buf) {
4390		ret = -ENOMEM;
4391		goto out;
4392	}
4393
4394	sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
4395	if (!sctx->read_buf) {
4396		ret = -ENOMEM;
4397		goto out;
4398	}
4399
4400	sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4401			(arg->clone_sources_count + 1));
4402	if (!sctx->clone_roots) {
4403		ret = -ENOMEM;
4404		goto out;
4405	}
4406
4407	if (arg->clone_sources_count) {
4408		clone_sources_tmp = vmalloc(arg->clone_sources_count *
4409				sizeof(*arg->clone_sources));
4410		if (!clone_sources_tmp) {
4411			ret = -ENOMEM;
4412			goto out;
4413		}
4414
4415		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
4416				arg->clone_sources_count *
4417				sizeof(*arg->clone_sources));
4418		if (ret) {
4419			ret = -EFAULT;
4420			goto out;
4421		}
4422
4423		for (i = 0; i < arg->clone_sources_count; i++) {
4424			key.objectid = clone_sources_tmp[i];
4425			key.type = BTRFS_ROOT_ITEM_KEY;
4426			key.offset = (u64)-1;
4427			clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4428			if (!clone_root) {
4429				ret = -EINVAL;
4430				goto out;
4431			}
4432			if (IS_ERR(clone_root)) {
4433				ret = PTR_ERR(clone_root);
4434				goto out;
4435			}
4436			sctx->clone_roots[i].root = clone_root;
4437		}
4438		vfree(clone_sources_tmp);
4439		clone_sources_tmp = NULL;
4440	}
4441
4442	if (arg->parent_root) {
4443		key.objectid = arg->parent_root;
4444		key.type = BTRFS_ROOT_ITEM_KEY;
4445		key.offset = (u64)-1;
4446		sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4447		if (!sctx->parent_root) {
4448			ret = -EINVAL;
4449			goto out;
4450		}
4451	}
4452
4453	/*
4454	 * Clones from send_root are allowed, but only if the clone source
4455	 * is behind the current send position. This is checked while searching
4456	 * for possible clone sources.
4457	 */
4458	sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
4459
4460	/* We do a bsearch later */
4461	sort(sctx->clone_roots, sctx->clone_roots_cnt,
4462			sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4463			NULL);
4464
4465	ret = send_subvol(sctx);
4466	if (ret < 0)
4467		goto out;
4468
4469	ret = begin_cmd(sctx, BTRFS_SEND_C_END);
4470	if (ret < 0)
4471		goto out;
4472	ret = send_cmd(sctx);
4473	if (ret < 0)
4474		goto out;
4475
4476out:
4477	if (filp)
4478		fput(filp);
4479	kfree(arg);
4480	vfree(clone_sources_tmp);
4481
4482	if (sctx) {
4483		if (sctx->send_filp)
4484			fput(sctx->send_filp);
4485
4486		vfree(sctx->clone_roots);
4487		vfree(sctx->send_buf);
4488		vfree(sctx->read_buf);
4489
4490		name_cache_free(sctx);
4491
4492		kfree(sctx);
4493	}
4494
4495	return ret;
4496}
4497