send.c revision d79e50433b2bea09eb680ed5fae15e8a12356353
1/*
2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/bsearch.h>
20#include <linux/fs.h>
21#include <linux/file.h>
22#include <linux/sort.h>
23#include <linux/mount.h>
24#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h>
26#include <linux/radix-tree.h>
27#include <linux/crc32c.h>
28#include <linux/vmalloc.h>
29
30#include "send.h"
31#include "backref.h"
32#include "locking.h"
33#include "disk-io.h"
34#include "btrfs_inode.h"
35#include "transaction.h"
36
37static int g_verbose = 0;
38
39#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
40
41/*
42 * A fs_path is a helper to dynamically build path names with unknown size.
43 * It reallocates the internal buffer on demand.
44 * It allows fast adding of path elements on the right side (normal path) and
45 * fast adding to the left side (reversed path). A reversed path can also be
46 * unreversed if needed.
47 */
48struct fs_path {
49	union {
50		struct {
51			char *start;
52			char *end;
53			char *prepared;
54
55			char *buf;
56			int buf_len;
57			int reversed:1;
58			int virtual_mem:1;
59			char inline_buf[];
60		};
61		char pad[PAGE_SIZE];
62	};
63};
64#define FS_PATH_INLINE_SIZE \
65	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
66
67
68/* reused for each extent */
69struct clone_root {
70	struct btrfs_root *root;
71	u64 ino;
72	u64 offset;
73
74	u64 found_refs;
75};
76
77#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
78#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
79
80struct send_ctx {
81	struct file *send_filp;
82	loff_t send_off;
83	char *send_buf;
84	u32 send_size;
85	u32 send_max_size;
86	u64 total_send_size;
87	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
88
89	struct vfsmount *mnt;
90
91	struct btrfs_root *send_root;
92	struct btrfs_root *parent_root;
93	struct clone_root *clone_roots;
94	int clone_roots_cnt;
95
96	/* current state of the compare_tree call */
97	struct btrfs_path *left_path;
98	struct btrfs_path *right_path;
99	struct btrfs_key *cmp_key;
100
101	/*
102	 * infos of the currently processed inode. In case of deleted inodes,
103	 * these are the values from the deleted inode.
104	 */
105	u64 cur_ino;
106	u64 cur_inode_gen;
107	int cur_inode_new;
108	int cur_inode_new_gen;
109	int cur_inode_deleted;
110	u64 cur_inode_size;
111	u64 cur_inode_mode;
112
113	u64 send_progress;
114
115	struct list_head new_refs;
116	struct list_head deleted_refs;
117
118	struct radix_tree_root name_cache;
119	struct list_head name_cache_list;
120	int name_cache_size;
121
122	struct file *cur_inode_filp;
123	char *read_buf;
124};
125
126struct name_cache_entry {
127	struct list_head list;
128	/*
129	 * radix_tree has only 32bit entries but we need to handle 64bit inums.
130	 * We use the lower 32bit of the 64bit inum to store it in the tree. If
131	 * more then one inum would fall into the same entry, we use radix_list
132	 * to store the additional entries. radix_list is also used to store
133	 * entries where two entries have the same inum but different
134	 * generations.
135	 */
136	struct list_head radix_list;
137	u64 ino;
138	u64 gen;
139	u64 parent_ino;
140	u64 parent_gen;
141	int ret;
142	int need_later_update;
143	int name_len;
144	char name[];
145};
146
147static void fs_path_reset(struct fs_path *p)
148{
149	if (p->reversed) {
150		p->start = p->buf + p->buf_len - 1;
151		p->end = p->start;
152		*p->start = 0;
153	} else {
154		p->start = p->buf;
155		p->end = p->start;
156		*p->start = 0;
157	}
158}
159
160static struct fs_path *fs_path_alloc(struct send_ctx *sctx)
161{
162	struct fs_path *p;
163
164	p = kmalloc(sizeof(*p), GFP_NOFS);
165	if (!p)
166		return NULL;
167	p->reversed = 0;
168	p->virtual_mem = 0;
169	p->buf = p->inline_buf;
170	p->buf_len = FS_PATH_INLINE_SIZE;
171	fs_path_reset(p);
172	return p;
173}
174
175static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx)
176{
177	struct fs_path *p;
178
179	p = fs_path_alloc(sctx);
180	if (!p)
181		return NULL;
182	p->reversed = 1;
183	fs_path_reset(p);
184	return p;
185}
186
187static void fs_path_free(struct send_ctx *sctx, struct fs_path *p)
188{
189	if (!p)
190		return;
191	if (p->buf != p->inline_buf) {
192		if (p->virtual_mem)
193			vfree(p->buf);
194		else
195			kfree(p->buf);
196	}
197	kfree(p);
198}
199
200static int fs_path_len(struct fs_path *p)
201{
202	return p->end - p->start;
203}
204
205static int fs_path_ensure_buf(struct fs_path *p, int len)
206{
207	char *tmp_buf;
208	int path_len;
209	int old_buf_len;
210
211	len++;
212
213	if (p->buf_len >= len)
214		return 0;
215
216	path_len = p->end - p->start;
217	old_buf_len = p->buf_len;
218	len = PAGE_ALIGN(len);
219
220	if (p->buf == p->inline_buf) {
221		tmp_buf = kmalloc(len, GFP_NOFS);
222		if (!tmp_buf) {
223			tmp_buf = vmalloc(len);
224			if (!tmp_buf)
225				return -ENOMEM;
226			p->virtual_mem = 1;
227		}
228		memcpy(tmp_buf, p->buf, p->buf_len);
229		p->buf = tmp_buf;
230		p->buf_len = len;
231	} else {
232		if (p->virtual_mem) {
233			tmp_buf = vmalloc(len);
234			if (!tmp_buf)
235				return -ENOMEM;
236			memcpy(tmp_buf, p->buf, p->buf_len);
237			vfree(p->buf);
238		} else {
239			tmp_buf = krealloc(p->buf, len, GFP_NOFS);
240			if (!tmp_buf) {
241				tmp_buf = vmalloc(len);
242				if (!tmp_buf)
243					return -ENOMEM;
244				memcpy(tmp_buf, p->buf, p->buf_len);
245				kfree(p->buf);
246				p->virtual_mem = 1;
247			}
248		}
249		p->buf = tmp_buf;
250		p->buf_len = len;
251	}
252	if (p->reversed) {
253		tmp_buf = p->buf + old_buf_len - path_len - 1;
254		p->end = p->buf + p->buf_len - 1;
255		p->start = p->end - path_len;
256		memmove(p->start, tmp_buf, path_len + 1);
257	} else {
258		p->start = p->buf;
259		p->end = p->start + path_len;
260	}
261	return 0;
262}
263
264static int fs_path_prepare_for_add(struct fs_path *p, int name_len)
265{
266	int ret;
267	int new_len;
268
269	new_len = p->end - p->start + name_len;
270	if (p->start != p->end)
271		new_len++;
272	ret = fs_path_ensure_buf(p, new_len);
273	if (ret < 0)
274		goto out;
275
276	if (p->reversed) {
277		if (p->start != p->end)
278			*--p->start = '/';
279		p->start -= name_len;
280		p->prepared = p->start;
281	} else {
282		if (p->start != p->end)
283			*p->end++ = '/';
284		p->prepared = p->end;
285		p->end += name_len;
286		*p->end = 0;
287	}
288
289out:
290	return ret;
291}
292
293static int fs_path_add(struct fs_path *p, const char *name, int name_len)
294{
295	int ret;
296
297	ret = fs_path_prepare_for_add(p, name_len);
298	if (ret < 0)
299		goto out;
300	memcpy(p->prepared, name, name_len);
301	p->prepared = NULL;
302
303out:
304	return ret;
305}
306
307static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
308{
309	int ret;
310
311	ret = fs_path_prepare_for_add(p, p2->end - p2->start);
312	if (ret < 0)
313		goto out;
314	memcpy(p->prepared, p2->start, p2->end - p2->start);
315	p->prepared = NULL;
316
317out:
318	return ret;
319}
320
321static int fs_path_add_from_extent_buffer(struct fs_path *p,
322					  struct extent_buffer *eb,
323					  unsigned long off, int len)
324{
325	int ret;
326
327	ret = fs_path_prepare_for_add(p, len);
328	if (ret < 0)
329		goto out;
330
331	read_extent_buffer(eb, p->prepared, off, len);
332	p->prepared = NULL;
333
334out:
335	return ret;
336}
337
338#if 0
339static void fs_path_remove(struct fs_path *p)
340{
341	BUG_ON(p->reversed);
342	while (p->start != p->end && *p->end != '/')
343		p->end--;
344	*p->end = 0;
345}
346#endif
347
348static int fs_path_copy(struct fs_path *p, struct fs_path *from)
349{
350	int ret;
351
352	p->reversed = from->reversed;
353	fs_path_reset(p);
354
355	ret = fs_path_add_path(p, from);
356
357	return ret;
358}
359
360
361static void fs_path_unreverse(struct fs_path *p)
362{
363	char *tmp;
364	int len;
365
366	if (!p->reversed)
367		return;
368
369	tmp = p->start;
370	len = p->end - p->start;
371	p->start = p->buf;
372	p->end = p->start + len;
373	memmove(p->start, tmp, len + 1);
374	p->reversed = 0;
375}
376
377static struct btrfs_path *alloc_path_for_send(void)
378{
379	struct btrfs_path *path;
380
381	path = btrfs_alloc_path();
382	if (!path)
383		return NULL;
384	path->search_commit_root = 1;
385	path->skip_locking = 1;
386	return path;
387}
388
389int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
390{
391	int ret;
392	mm_segment_t old_fs;
393	u32 pos = 0;
394
395	old_fs = get_fs();
396	set_fs(KERNEL_DS);
397
398	while (pos < len) {
399		ret = vfs_write(filp, (char *)buf + pos, len - pos, off);
400		/* TODO handle that correctly */
401		/*if (ret == -ERESTARTSYS) {
402			continue;
403		}*/
404		if (ret < 0)
405			goto out;
406		if (ret == 0) {
407			ret = -EIO;
408			goto out;
409		}
410		pos += ret;
411	}
412
413	ret = 0;
414
415out:
416	set_fs(old_fs);
417	return ret;
418}
419
420static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
421{
422	struct btrfs_tlv_header *hdr;
423	int total_len = sizeof(*hdr) + len;
424	int left = sctx->send_max_size - sctx->send_size;
425
426	if (unlikely(left < total_len))
427		return -EOVERFLOW;
428
429	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
430	hdr->tlv_type = cpu_to_le16(attr);
431	hdr->tlv_len = cpu_to_le16(len);
432	memcpy(hdr + 1, data, len);
433	sctx->send_size += total_len;
434
435	return 0;
436}
437
438#if 0
439static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value)
440{
441	return tlv_put(sctx, attr, &value, sizeof(value));
442}
443
444static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value)
445{
446	__le16 tmp = cpu_to_le16(value);
447	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
448}
449
450static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
451{
452	__le32 tmp = cpu_to_le32(value);
453	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
454}
455#endif
456
457static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value)
458{
459	__le64 tmp = cpu_to_le64(value);
460	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
461}
462
463static int tlv_put_string(struct send_ctx *sctx, u16 attr,
464			  const char *str, int len)
465{
466	if (len == -1)
467		len = strlen(str);
468	return tlv_put(sctx, attr, str, len);
469}
470
471static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
472			const u8 *uuid)
473{
474	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
475}
476
477#if 0
478static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
479			    struct timespec *ts)
480{
481	struct btrfs_timespec bts;
482	bts.sec = cpu_to_le64(ts->tv_sec);
483	bts.nsec = cpu_to_le32(ts->tv_nsec);
484	return tlv_put(sctx, attr, &bts, sizeof(bts));
485}
486#endif
487
488static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
489				  struct extent_buffer *eb,
490				  struct btrfs_timespec *ts)
491{
492	struct btrfs_timespec bts;
493	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
494	return tlv_put(sctx, attr, &bts, sizeof(bts));
495}
496
497
498#define TLV_PUT(sctx, attrtype, attrlen, data) \
499	do { \
500		ret = tlv_put(sctx, attrtype, attrlen, data); \
501		if (ret < 0) \
502			goto tlv_put_failure; \
503	} while (0)
504
505#define TLV_PUT_INT(sctx, attrtype, bits, value) \
506	do { \
507		ret = tlv_put_u##bits(sctx, attrtype, value); \
508		if (ret < 0) \
509			goto tlv_put_failure; \
510	} while (0)
511
512#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
513#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
514#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
515#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
516#define TLV_PUT_STRING(sctx, attrtype, str, len) \
517	do { \
518		ret = tlv_put_string(sctx, attrtype, str, len); \
519		if (ret < 0) \
520			goto tlv_put_failure; \
521	} while (0)
522#define TLV_PUT_PATH(sctx, attrtype, p) \
523	do { \
524		ret = tlv_put_string(sctx, attrtype, p->start, \
525			p->end - p->start); \
526		if (ret < 0) \
527			goto tlv_put_failure; \
528	} while(0)
529#define TLV_PUT_UUID(sctx, attrtype, uuid) \
530	do { \
531		ret = tlv_put_uuid(sctx, attrtype, uuid); \
532		if (ret < 0) \
533			goto tlv_put_failure; \
534	} while (0)
535#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
536	do { \
537		ret = tlv_put_timespec(sctx, attrtype, ts); \
538		if (ret < 0) \
539			goto tlv_put_failure; \
540	} while (0)
541#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
542	do { \
543		ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
544		if (ret < 0) \
545			goto tlv_put_failure; \
546	} while (0)
547
548static int send_header(struct send_ctx *sctx)
549{
550	struct btrfs_stream_header hdr;
551
552	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
553	hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
554
555	return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
556					&sctx->send_off);
557}
558
559/*
560 * For each command/item we want to send to userspace, we call this function.
561 */
562static int begin_cmd(struct send_ctx *sctx, int cmd)
563{
564	struct btrfs_cmd_header *hdr;
565
566	if (!sctx->send_buf) {
567		WARN_ON(1);
568		return -EINVAL;
569	}
570
571	BUG_ON(sctx->send_size);
572
573	sctx->send_size += sizeof(*hdr);
574	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
575	hdr->cmd = cpu_to_le16(cmd);
576
577	return 0;
578}
579
580static int send_cmd(struct send_ctx *sctx)
581{
582	int ret;
583	struct btrfs_cmd_header *hdr;
584	u32 crc;
585
586	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
587	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
588	hdr->crc = 0;
589
590	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
591	hdr->crc = cpu_to_le32(crc);
592
593	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
594					&sctx->send_off);
595
596	sctx->total_send_size += sctx->send_size;
597	sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
598	sctx->send_size = 0;
599
600	return ret;
601}
602
603/*
604 * Sends a move instruction to user space
605 */
606static int send_rename(struct send_ctx *sctx,
607		     struct fs_path *from, struct fs_path *to)
608{
609	int ret;
610
611verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
612
613	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
614	if (ret < 0)
615		goto out;
616
617	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
618	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
619
620	ret = send_cmd(sctx);
621
622tlv_put_failure:
623out:
624	return ret;
625}
626
627/*
628 * Sends a link instruction to user space
629 */
630static int send_link(struct send_ctx *sctx,
631		     struct fs_path *path, struct fs_path *lnk)
632{
633	int ret;
634
635verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
636
637	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
638	if (ret < 0)
639		goto out;
640
641	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
642	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
643
644	ret = send_cmd(sctx);
645
646tlv_put_failure:
647out:
648	return ret;
649}
650
651/*
652 * Sends an unlink instruction to user space
653 */
654static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
655{
656	int ret;
657
658verbose_printk("btrfs: send_unlink %s\n", path->start);
659
660	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
661	if (ret < 0)
662		goto out;
663
664	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
665
666	ret = send_cmd(sctx);
667
668tlv_put_failure:
669out:
670	return ret;
671}
672
673/*
674 * Sends a rmdir instruction to user space
675 */
676static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
677{
678	int ret;
679
680verbose_printk("btrfs: send_rmdir %s\n", path->start);
681
682	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
683	if (ret < 0)
684		goto out;
685
686	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
687
688	ret = send_cmd(sctx);
689
690tlv_put_failure:
691out:
692	return ret;
693}
694
695/*
696 * Helper function to retrieve some fields from an inode item.
697 */
698static int get_inode_info(struct btrfs_root *root,
699			  u64 ino, u64 *size, u64 *gen,
700			  u64 *mode, u64 *uid, u64 *gid,
701			  u64 *rdev)
702{
703	int ret;
704	struct btrfs_inode_item *ii;
705	struct btrfs_key key;
706	struct btrfs_path *path;
707
708	path = alloc_path_for_send();
709	if (!path)
710		return -ENOMEM;
711
712	key.objectid = ino;
713	key.type = BTRFS_INODE_ITEM_KEY;
714	key.offset = 0;
715	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
716	if (ret < 0)
717		goto out;
718	if (ret) {
719		ret = -ENOENT;
720		goto out;
721	}
722
723	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
724			struct btrfs_inode_item);
725	if (size)
726		*size = btrfs_inode_size(path->nodes[0], ii);
727	if (gen)
728		*gen = btrfs_inode_generation(path->nodes[0], ii);
729	if (mode)
730		*mode = btrfs_inode_mode(path->nodes[0], ii);
731	if (uid)
732		*uid = btrfs_inode_uid(path->nodes[0], ii);
733	if (gid)
734		*gid = btrfs_inode_gid(path->nodes[0], ii);
735	if (rdev)
736		*rdev = btrfs_inode_rdev(path->nodes[0], ii);
737
738out:
739	btrfs_free_path(path);
740	return ret;
741}
742
743typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
744				   struct fs_path *p,
745				   void *ctx);
746
747/*
748 * Helper function to iterate the entries in ONE btrfs_inode_ref or
749 * btrfs_inode_extref.
750 * The iterate callback may return a non zero value to stop iteration. This can
751 * be a negative value for error codes or 1 to simply stop it.
752 *
753 * path must point to the INODE_REF or INODE_EXTREF when called.
754 */
755static int iterate_inode_ref(struct send_ctx *sctx,
756			     struct btrfs_root *root, struct btrfs_path *path,
757			     struct btrfs_key *found_key, int resolve,
758			     iterate_inode_ref_t iterate, void *ctx)
759{
760	struct extent_buffer *eb = path->nodes[0];
761	struct btrfs_item *item;
762	struct btrfs_inode_ref *iref;
763	struct btrfs_inode_extref *extref;
764	struct btrfs_path *tmp_path;
765	struct fs_path *p;
766	u32 cur = 0;
767	u32 total;
768	int slot = path->slots[0];
769	u32 name_len;
770	char *start;
771	int ret = 0;
772	int num = 0;
773	int index;
774	u64 dir;
775	unsigned long name_off;
776	unsigned long elem_size;
777	unsigned long ptr;
778
779	p = fs_path_alloc_reversed(sctx);
780	if (!p)
781		return -ENOMEM;
782
783	tmp_path = alloc_path_for_send();
784	if (!tmp_path) {
785		fs_path_free(sctx, p);
786		return -ENOMEM;
787	}
788
789
790	if (found_key->type == BTRFS_INODE_REF_KEY) {
791		ptr = (unsigned long)btrfs_item_ptr(eb, slot,
792						    struct btrfs_inode_ref);
793		item = btrfs_item_nr(eb, slot);
794		total = btrfs_item_size(eb, item);
795		elem_size = sizeof(*iref);
796	} else {
797		ptr = btrfs_item_ptr_offset(eb, slot);
798		total = btrfs_item_size_nr(eb, slot);
799		elem_size = sizeof(*extref);
800	}
801
802	while (cur < total) {
803		fs_path_reset(p);
804
805		if (found_key->type == BTRFS_INODE_REF_KEY) {
806			iref = (struct btrfs_inode_ref *)(ptr + cur);
807			name_len = btrfs_inode_ref_name_len(eb, iref);
808			name_off = (unsigned long)(iref + 1);
809			index = btrfs_inode_ref_index(eb, iref);
810			dir = found_key->offset;
811		} else {
812			extref = (struct btrfs_inode_extref *)(ptr + cur);
813			name_len = btrfs_inode_extref_name_len(eb, extref);
814			name_off = (unsigned long)&extref->name;
815			index = btrfs_inode_extref_index(eb, extref);
816			dir = btrfs_inode_extref_parent(eb, extref);
817		}
818
819		if (resolve) {
820			start = btrfs_ref_to_path(root, tmp_path, name_len,
821						  name_off, eb, dir,
822						  p->buf, p->buf_len);
823			if (IS_ERR(start)) {
824				ret = PTR_ERR(start);
825				goto out;
826			}
827			if (start < p->buf) {
828				/* overflow , try again with larger buffer */
829				ret = fs_path_ensure_buf(p,
830						p->buf_len + p->buf - start);
831				if (ret < 0)
832					goto out;
833				start = btrfs_ref_to_path(root, tmp_path,
834							  name_len, name_off,
835							  eb, dir,
836							  p->buf, p->buf_len);
837				if (IS_ERR(start)) {
838					ret = PTR_ERR(start);
839					goto out;
840				}
841				BUG_ON(start < p->buf);
842			}
843			p->start = start;
844		} else {
845			ret = fs_path_add_from_extent_buffer(p, eb, name_off,
846							     name_len);
847			if (ret < 0)
848				goto out;
849		}
850
851		cur += elem_size + name_len;
852		ret = iterate(num, dir, index, p, ctx);
853		if (ret)
854			goto out;
855		num++;
856	}
857
858out:
859	btrfs_free_path(tmp_path);
860	fs_path_free(sctx, p);
861	return ret;
862}
863
864typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
865				  const char *name, int name_len,
866				  const char *data, int data_len,
867				  u8 type, void *ctx);
868
869/*
870 * Helper function to iterate the entries in ONE btrfs_dir_item.
871 * The iterate callback may return a non zero value to stop iteration. This can
872 * be a negative value for error codes or 1 to simply stop it.
873 *
874 * path must point to the dir item when called.
875 */
876static int iterate_dir_item(struct send_ctx *sctx,
877			    struct btrfs_root *root, struct btrfs_path *path,
878			    struct btrfs_key *found_key,
879			    iterate_dir_item_t iterate, void *ctx)
880{
881	int ret = 0;
882	struct extent_buffer *eb;
883	struct btrfs_item *item;
884	struct btrfs_dir_item *di;
885	struct btrfs_key di_key;
886	char *buf = NULL;
887	char *buf2 = NULL;
888	int buf_len;
889	int buf_virtual = 0;
890	u32 name_len;
891	u32 data_len;
892	u32 cur;
893	u32 len;
894	u32 total;
895	int slot;
896	int num;
897	u8 type;
898
899	buf_len = PAGE_SIZE;
900	buf = kmalloc(buf_len, GFP_NOFS);
901	if (!buf) {
902		ret = -ENOMEM;
903		goto out;
904	}
905
906	eb = path->nodes[0];
907	slot = path->slots[0];
908	item = btrfs_item_nr(eb, slot);
909	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
910	cur = 0;
911	len = 0;
912	total = btrfs_item_size(eb, item);
913
914	num = 0;
915	while (cur < total) {
916		name_len = btrfs_dir_name_len(eb, di);
917		data_len = btrfs_dir_data_len(eb, di);
918		type = btrfs_dir_type(eb, di);
919		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
920
921		if (name_len + data_len > buf_len) {
922			buf_len = PAGE_ALIGN(name_len + data_len);
923			if (buf_virtual) {
924				buf2 = vmalloc(buf_len);
925				if (!buf2) {
926					ret = -ENOMEM;
927					goto out;
928				}
929				vfree(buf);
930			} else {
931				buf2 = krealloc(buf, buf_len, GFP_NOFS);
932				if (!buf2) {
933					buf2 = vmalloc(buf_len);
934					if (!buf2) {
935						ret = -ENOMEM;
936						goto out;
937					}
938					kfree(buf);
939					buf_virtual = 1;
940				}
941			}
942
943			buf = buf2;
944			buf2 = NULL;
945		}
946
947		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
948				name_len + data_len);
949
950		len = sizeof(*di) + name_len + data_len;
951		di = (struct btrfs_dir_item *)((char *)di + len);
952		cur += len;
953
954		ret = iterate(num, &di_key, buf, name_len, buf + name_len,
955				data_len, type, ctx);
956		if (ret < 0)
957			goto out;
958		if (ret) {
959			ret = 0;
960			goto out;
961		}
962
963		num++;
964	}
965
966out:
967	if (buf_virtual)
968		vfree(buf);
969	else
970		kfree(buf);
971	return ret;
972}
973
974static int __copy_first_ref(int num, u64 dir, int index,
975			    struct fs_path *p, void *ctx)
976{
977	int ret;
978	struct fs_path *pt = ctx;
979
980	ret = fs_path_copy(pt, p);
981	if (ret < 0)
982		return ret;
983
984	/* we want the first only */
985	return 1;
986}
987
988/*
989 * Retrieve the first path of an inode. If an inode has more then one
990 * ref/hardlink, this is ignored.
991 */
992static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root,
993			  u64 ino, struct fs_path *path)
994{
995	int ret;
996	struct btrfs_key key, found_key;
997	struct btrfs_path *p;
998
999	p = alloc_path_for_send();
1000	if (!p)
1001		return -ENOMEM;
1002
1003	fs_path_reset(path);
1004
1005	key.objectid = ino;
1006	key.type = BTRFS_INODE_REF_KEY;
1007	key.offset = 0;
1008
1009	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1010	if (ret < 0)
1011		goto out;
1012	if (ret) {
1013		ret = 1;
1014		goto out;
1015	}
1016	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1017	if (found_key.objectid != ino ||
1018	    (found_key.type != BTRFS_INODE_REF_KEY &&
1019	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1020		ret = -ENOENT;
1021		goto out;
1022	}
1023
1024	ret = iterate_inode_ref(sctx, root, p, &found_key, 1,
1025			__copy_first_ref, path);
1026	if (ret < 0)
1027		goto out;
1028	ret = 0;
1029
1030out:
1031	btrfs_free_path(p);
1032	return ret;
1033}
1034
1035struct backref_ctx {
1036	struct send_ctx *sctx;
1037
1038	/* number of total found references */
1039	u64 found;
1040
1041	/*
1042	 * used for clones found in send_root. clones found behind cur_objectid
1043	 * and cur_offset are not considered as allowed clones.
1044	 */
1045	u64 cur_objectid;
1046	u64 cur_offset;
1047
1048	/* may be truncated in case it's the last extent in a file */
1049	u64 extent_len;
1050
1051	/* Just to check for bugs in backref resolving */
1052	int found_itself;
1053};
1054
1055static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1056{
1057	u64 root = (u64)(uintptr_t)key;
1058	struct clone_root *cr = (struct clone_root *)elt;
1059
1060	if (root < cr->root->objectid)
1061		return -1;
1062	if (root > cr->root->objectid)
1063		return 1;
1064	return 0;
1065}
1066
1067static int __clone_root_cmp_sort(const void *e1, const void *e2)
1068{
1069	struct clone_root *cr1 = (struct clone_root *)e1;
1070	struct clone_root *cr2 = (struct clone_root *)e2;
1071
1072	if (cr1->root->objectid < cr2->root->objectid)
1073		return -1;
1074	if (cr1->root->objectid > cr2->root->objectid)
1075		return 1;
1076	return 0;
1077}
1078
1079/*
1080 * Called for every backref that is found for the current extent.
1081 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1082 */
1083static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1084{
1085	struct backref_ctx *bctx = ctx_;
1086	struct clone_root *found;
1087	int ret;
1088	u64 i_size;
1089
1090	/* First check if the root is in the list of accepted clone sources */
1091	found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1092			bctx->sctx->clone_roots_cnt,
1093			sizeof(struct clone_root),
1094			__clone_root_cmp_bsearch);
1095	if (!found)
1096		return 0;
1097
1098	if (found->root == bctx->sctx->send_root &&
1099	    ino == bctx->cur_objectid &&
1100	    offset == bctx->cur_offset) {
1101		bctx->found_itself = 1;
1102	}
1103
1104	/*
1105	 * There are inodes that have extents that lie behind its i_size. Don't
1106	 * accept clones from these extents.
1107	 */
1108	ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
1109			NULL);
1110	if (ret < 0)
1111		return ret;
1112
1113	if (offset + bctx->extent_len > i_size)
1114		return 0;
1115
1116	/*
1117	 * Make sure we don't consider clones from send_root that are
1118	 * behind the current inode/offset.
1119	 */
1120	if (found->root == bctx->sctx->send_root) {
1121		/*
1122		 * TODO for the moment we don't accept clones from the inode
1123		 * that is currently send. We may change this when
1124		 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1125		 * file.
1126		 */
1127		if (ino >= bctx->cur_objectid)
1128			return 0;
1129#if 0
1130		if (ino > bctx->cur_objectid)
1131			return 0;
1132		if (offset + bctx->extent_len > bctx->cur_offset)
1133			return 0;
1134#endif
1135	}
1136
1137	bctx->found++;
1138	found->found_refs++;
1139	if (ino < found->ino) {
1140		found->ino = ino;
1141		found->offset = offset;
1142	} else if (found->ino == ino) {
1143		/*
1144		 * same extent found more then once in the same file.
1145		 */
1146		if (found->offset > offset + bctx->extent_len)
1147			found->offset = offset;
1148	}
1149
1150	return 0;
1151}
1152
1153/*
1154 * Given an inode, offset and extent item, it finds a good clone for a clone
1155 * instruction. Returns -ENOENT when none could be found. The function makes
1156 * sure that the returned clone is usable at the point where sending is at the
1157 * moment. This means, that no clones are accepted which lie behind the current
1158 * inode+offset.
1159 *
1160 * path must point to the extent item when called.
1161 */
1162static int find_extent_clone(struct send_ctx *sctx,
1163			     struct btrfs_path *path,
1164			     u64 ino, u64 data_offset,
1165			     u64 ino_size,
1166			     struct clone_root **found)
1167{
1168	int ret;
1169	int extent_type;
1170	u64 logical;
1171	u64 disk_byte;
1172	u64 num_bytes;
1173	u64 extent_item_pos;
1174	u64 flags = 0;
1175	struct btrfs_file_extent_item *fi;
1176	struct extent_buffer *eb = path->nodes[0];
1177	struct backref_ctx *backref_ctx = NULL;
1178	struct clone_root *cur_clone_root;
1179	struct btrfs_key found_key;
1180	struct btrfs_path *tmp_path;
1181	int compressed;
1182	u32 i;
1183
1184	tmp_path = alloc_path_for_send();
1185	if (!tmp_path)
1186		return -ENOMEM;
1187
1188	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1189	if (!backref_ctx) {
1190		ret = -ENOMEM;
1191		goto out;
1192	}
1193
1194	if (data_offset >= ino_size) {
1195		/*
1196		 * There may be extents that lie behind the file's size.
1197		 * I at least had this in combination with snapshotting while
1198		 * writing large files.
1199		 */
1200		ret = 0;
1201		goto out;
1202	}
1203
1204	fi = btrfs_item_ptr(eb, path->slots[0],
1205			struct btrfs_file_extent_item);
1206	extent_type = btrfs_file_extent_type(eb, fi);
1207	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1208		ret = -ENOENT;
1209		goto out;
1210	}
1211	compressed = btrfs_file_extent_compression(eb, fi);
1212
1213	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1214	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1215	if (disk_byte == 0) {
1216		ret = -ENOENT;
1217		goto out;
1218	}
1219	logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1220
1221	ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1222				  &found_key, &flags);
1223	btrfs_release_path(tmp_path);
1224
1225	if (ret < 0)
1226		goto out;
1227	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1228		ret = -EIO;
1229		goto out;
1230	}
1231
1232	/*
1233	 * Setup the clone roots.
1234	 */
1235	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1236		cur_clone_root = sctx->clone_roots + i;
1237		cur_clone_root->ino = (u64)-1;
1238		cur_clone_root->offset = 0;
1239		cur_clone_root->found_refs = 0;
1240	}
1241
1242	backref_ctx->sctx = sctx;
1243	backref_ctx->found = 0;
1244	backref_ctx->cur_objectid = ino;
1245	backref_ctx->cur_offset = data_offset;
1246	backref_ctx->found_itself = 0;
1247	backref_ctx->extent_len = num_bytes;
1248
1249	/*
1250	 * The last extent of a file may be too large due to page alignment.
1251	 * We need to adjust extent_len in this case so that the checks in
1252	 * __iterate_backrefs work.
1253	 */
1254	if (data_offset + num_bytes >= ino_size)
1255		backref_ctx->extent_len = ino_size - data_offset;
1256
1257	/*
1258	 * Now collect all backrefs.
1259	 */
1260	if (compressed == BTRFS_COMPRESS_NONE)
1261		extent_item_pos = logical - found_key.objectid;
1262	else
1263		extent_item_pos = 0;
1264
1265	extent_item_pos = logical - found_key.objectid;
1266	ret = iterate_extent_inodes(sctx->send_root->fs_info,
1267					found_key.objectid, extent_item_pos, 1,
1268					__iterate_backrefs, backref_ctx);
1269
1270	if (ret < 0)
1271		goto out;
1272
1273	if (!backref_ctx->found_itself) {
1274		/* found a bug in backref code? */
1275		ret = -EIO;
1276		printk(KERN_ERR "btrfs: ERROR did not find backref in "
1277				"send_root. inode=%llu, offset=%llu, "
1278				"disk_byte=%llu found extent=%llu\n",
1279				ino, data_offset, disk_byte, found_key.objectid);
1280		goto out;
1281	}
1282
1283verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1284		"ino=%llu, "
1285		"num_bytes=%llu, logical=%llu\n",
1286		data_offset, ino, num_bytes, logical);
1287
1288	if (!backref_ctx->found)
1289		verbose_printk("btrfs:    no clones found\n");
1290
1291	cur_clone_root = NULL;
1292	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1293		if (sctx->clone_roots[i].found_refs) {
1294			if (!cur_clone_root)
1295				cur_clone_root = sctx->clone_roots + i;
1296			else if (sctx->clone_roots[i].root == sctx->send_root)
1297				/* prefer clones from send_root over others */
1298				cur_clone_root = sctx->clone_roots + i;
1299		}
1300
1301	}
1302
1303	if (cur_clone_root) {
1304		*found = cur_clone_root;
1305		ret = 0;
1306	} else {
1307		ret = -ENOENT;
1308	}
1309
1310out:
1311	btrfs_free_path(tmp_path);
1312	kfree(backref_ctx);
1313	return ret;
1314}
1315
1316static int read_symlink(struct send_ctx *sctx,
1317			struct btrfs_root *root,
1318			u64 ino,
1319			struct fs_path *dest)
1320{
1321	int ret;
1322	struct btrfs_path *path;
1323	struct btrfs_key key;
1324	struct btrfs_file_extent_item *ei;
1325	u8 type;
1326	u8 compression;
1327	unsigned long off;
1328	int len;
1329
1330	path = alloc_path_for_send();
1331	if (!path)
1332		return -ENOMEM;
1333
1334	key.objectid = ino;
1335	key.type = BTRFS_EXTENT_DATA_KEY;
1336	key.offset = 0;
1337	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1338	if (ret < 0)
1339		goto out;
1340	BUG_ON(ret);
1341
1342	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1343			struct btrfs_file_extent_item);
1344	type = btrfs_file_extent_type(path->nodes[0], ei);
1345	compression = btrfs_file_extent_compression(path->nodes[0], ei);
1346	BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1347	BUG_ON(compression);
1348
1349	off = btrfs_file_extent_inline_start(ei);
1350	len = btrfs_file_extent_inline_len(path->nodes[0], ei);
1351
1352	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1353
1354out:
1355	btrfs_free_path(path);
1356	return ret;
1357}
1358
1359/*
1360 * Helper function to generate a file name that is unique in the root of
1361 * send_root and parent_root. This is used to generate names for orphan inodes.
1362 */
1363static int gen_unique_name(struct send_ctx *sctx,
1364			   u64 ino, u64 gen,
1365			   struct fs_path *dest)
1366{
1367	int ret = 0;
1368	struct btrfs_path *path;
1369	struct btrfs_dir_item *di;
1370	char tmp[64];
1371	int len;
1372	u64 idx = 0;
1373
1374	path = alloc_path_for_send();
1375	if (!path)
1376		return -ENOMEM;
1377
1378	while (1) {
1379		len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu",
1380				ino, gen, idx);
1381		if (len >= sizeof(tmp)) {
1382			/* should really not happen */
1383			ret = -EOVERFLOW;
1384			goto out;
1385		}
1386
1387		di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1388				path, BTRFS_FIRST_FREE_OBJECTID,
1389				tmp, strlen(tmp), 0);
1390		btrfs_release_path(path);
1391		if (IS_ERR(di)) {
1392			ret = PTR_ERR(di);
1393			goto out;
1394		}
1395		if (di) {
1396			/* not unique, try again */
1397			idx++;
1398			continue;
1399		}
1400
1401		if (!sctx->parent_root) {
1402			/* unique */
1403			ret = 0;
1404			break;
1405		}
1406
1407		di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1408				path, BTRFS_FIRST_FREE_OBJECTID,
1409				tmp, strlen(tmp), 0);
1410		btrfs_release_path(path);
1411		if (IS_ERR(di)) {
1412			ret = PTR_ERR(di);
1413			goto out;
1414		}
1415		if (di) {
1416			/* not unique, try again */
1417			idx++;
1418			continue;
1419		}
1420		/* unique */
1421		break;
1422	}
1423
1424	ret = fs_path_add(dest, tmp, strlen(tmp));
1425
1426out:
1427	btrfs_free_path(path);
1428	return ret;
1429}
1430
1431enum inode_state {
1432	inode_state_no_change,
1433	inode_state_will_create,
1434	inode_state_did_create,
1435	inode_state_will_delete,
1436	inode_state_did_delete,
1437};
1438
1439static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1440{
1441	int ret;
1442	int left_ret;
1443	int right_ret;
1444	u64 left_gen;
1445	u64 right_gen;
1446
1447	ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1448			NULL, NULL);
1449	if (ret < 0 && ret != -ENOENT)
1450		goto out;
1451	left_ret = ret;
1452
1453	if (!sctx->parent_root) {
1454		right_ret = -ENOENT;
1455	} else {
1456		ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1457				NULL, NULL, NULL, NULL);
1458		if (ret < 0 && ret != -ENOENT)
1459			goto out;
1460		right_ret = ret;
1461	}
1462
1463	if (!left_ret && !right_ret) {
1464		if (left_gen == gen && right_gen == gen) {
1465			ret = inode_state_no_change;
1466		} else if (left_gen == gen) {
1467			if (ino < sctx->send_progress)
1468				ret = inode_state_did_create;
1469			else
1470				ret = inode_state_will_create;
1471		} else if (right_gen == gen) {
1472			if (ino < sctx->send_progress)
1473				ret = inode_state_did_delete;
1474			else
1475				ret = inode_state_will_delete;
1476		} else  {
1477			ret = -ENOENT;
1478		}
1479	} else if (!left_ret) {
1480		if (left_gen == gen) {
1481			if (ino < sctx->send_progress)
1482				ret = inode_state_did_create;
1483			else
1484				ret = inode_state_will_create;
1485		} else {
1486			ret = -ENOENT;
1487		}
1488	} else if (!right_ret) {
1489		if (right_gen == gen) {
1490			if (ino < sctx->send_progress)
1491				ret = inode_state_did_delete;
1492			else
1493				ret = inode_state_will_delete;
1494		} else {
1495			ret = -ENOENT;
1496		}
1497	} else {
1498		ret = -ENOENT;
1499	}
1500
1501out:
1502	return ret;
1503}
1504
1505static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1506{
1507	int ret;
1508
1509	ret = get_cur_inode_state(sctx, ino, gen);
1510	if (ret < 0)
1511		goto out;
1512
1513	if (ret == inode_state_no_change ||
1514	    ret == inode_state_did_create ||
1515	    ret == inode_state_will_delete)
1516		ret = 1;
1517	else
1518		ret = 0;
1519
1520out:
1521	return ret;
1522}
1523
1524/*
1525 * Helper function to lookup a dir item in a dir.
1526 */
1527static int lookup_dir_item_inode(struct btrfs_root *root,
1528				 u64 dir, const char *name, int name_len,
1529				 u64 *found_inode,
1530				 u8 *found_type)
1531{
1532	int ret = 0;
1533	struct btrfs_dir_item *di;
1534	struct btrfs_key key;
1535	struct btrfs_path *path;
1536
1537	path = alloc_path_for_send();
1538	if (!path)
1539		return -ENOMEM;
1540
1541	di = btrfs_lookup_dir_item(NULL, root, path,
1542			dir, name, name_len, 0);
1543	if (!di) {
1544		ret = -ENOENT;
1545		goto out;
1546	}
1547	if (IS_ERR(di)) {
1548		ret = PTR_ERR(di);
1549		goto out;
1550	}
1551	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1552	*found_inode = key.objectid;
1553	*found_type = btrfs_dir_type(path->nodes[0], di);
1554
1555out:
1556	btrfs_free_path(path);
1557	return ret;
1558}
1559
1560/*
1561 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1562 * generation of the parent dir and the name of the dir entry.
1563 */
1564static int get_first_ref(struct send_ctx *sctx,
1565			 struct btrfs_root *root, u64 ino,
1566			 u64 *dir, u64 *dir_gen, struct fs_path *name)
1567{
1568	int ret;
1569	struct btrfs_key key;
1570	struct btrfs_key found_key;
1571	struct btrfs_path *path;
1572	int len;
1573	u64 parent_dir;
1574
1575	path = alloc_path_for_send();
1576	if (!path)
1577		return -ENOMEM;
1578
1579	key.objectid = ino;
1580	key.type = BTRFS_INODE_REF_KEY;
1581	key.offset = 0;
1582
1583	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1584	if (ret < 0)
1585		goto out;
1586	if (!ret)
1587		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1588				path->slots[0]);
1589	if (ret || found_key.objectid != ino ||
1590	    (found_key.type != BTRFS_INODE_REF_KEY &&
1591	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1592		ret = -ENOENT;
1593		goto out;
1594	}
1595
1596	if (key.type == BTRFS_INODE_REF_KEY) {
1597		struct btrfs_inode_ref *iref;
1598		iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1599				      struct btrfs_inode_ref);
1600		len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1601		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1602						     (unsigned long)(iref + 1),
1603						     len);
1604		parent_dir = found_key.offset;
1605	} else {
1606		struct btrfs_inode_extref *extref;
1607		extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1608					struct btrfs_inode_extref);
1609		len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1610		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1611					(unsigned long)&extref->name, len);
1612		parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1613	}
1614	if (ret < 0)
1615		goto out;
1616	btrfs_release_path(path);
1617
1618	ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
1619			NULL, NULL);
1620	if (ret < 0)
1621		goto out;
1622
1623	*dir = parent_dir;
1624
1625out:
1626	btrfs_free_path(path);
1627	return ret;
1628}
1629
1630static int is_first_ref(struct send_ctx *sctx,
1631			struct btrfs_root *root,
1632			u64 ino, u64 dir,
1633			const char *name, int name_len)
1634{
1635	int ret;
1636	struct fs_path *tmp_name;
1637	u64 tmp_dir;
1638	u64 tmp_dir_gen;
1639
1640	tmp_name = fs_path_alloc(sctx);
1641	if (!tmp_name)
1642		return -ENOMEM;
1643
1644	ret = get_first_ref(sctx, root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
1645	if (ret < 0)
1646		goto out;
1647
1648	if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1649		ret = 0;
1650		goto out;
1651	}
1652
1653	ret = !memcmp(tmp_name->start, name, name_len);
1654
1655out:
1656	fs_path_free(sctx, tmp_name);
1657	return ret;
1658}
1659
1660/*
1661 * Used by process_recorded_refs to determine if a new ref would overwrite an
1662 * already existing ref. In case it detects an overwrite, it returns the
1663 * inode/gen in who_ino/who_gen.
1664 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1665 * to make sure later references to the overwritten inode are possible.
1666 * Orphanizing is however only required for the first ref of an inode.
1667 * process_recorded_refs does an additional is_first_ref check to see if
1668 * orphanizing is really required.
1669 */
1670static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1671			      const char *name, int name_len,
1672			      u64 *who_ino, u64 *who_gen)
1673{
1674	int ret = 0;
1675	u64 other_inode = 0;
1676	u8 other_type = 0;
1677
1678	if (!sctx->parent_root)
1679		goto out;
1680
1681	ret = is_inode_existent(sctx, dir, dir_gen);
1682	if (ret <= 0)
1683		goto out;
1684
1685	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1686			&other_inode, &other_type);
1687	if (ret < 0 && ret != -ENOENT)
1688		goto out;
1689	if (ret) {
1690		ret = 0;
1691		goto out;
1692	}
1693
1694	/*
1695	 * Check if the overwritten ref was already processed. If yes, the ref
1696	 * was already unlinked/moved, so we can safely assume that we will not
1697	 * overwrite anything at this point in time.
1698	 */
1699	if (other_inode > sctx->send_progress) {
1700		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1701				who_gen, NULL, NULL, NULL, NULL);
1702		if (ret < 0)
1703			goto out;
1704
1705		ret = 1;
1706		*who_ino = other_inode;
1707	} else {
1708		ret = 0;
1709	}
1710
1711out:
1712	return ret;
1713}
1714
1715/*
1716 * Checks if the ref was overwritten by an already processed inode. This is
1717 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1718 * thus the orphan name needs be used.
1719 * process_recorded_refs also uses it to avoid unlinking of refs that were
1720 * overwritten.
1721 */
1722static int did_overwrite_ref(struct send_ctx *sctx,
1723			    u64 dir, u64 dir_gen,
1724			    u64 ino, u64 ino_gen,
1725			    const char *name, int name_len)
1726{
1727	int ret = 0;
1728	u64 gen;
1729	u64 ow_inode;
1730	u8 other_type;
1731
1732	if (!sctx->parent_root)
1733		goto out;
1734
1735	ret = is_inode_existent(sctx, dir, dir_gen);
1736	if (ret <= 0)
1737		goto out;
1738
1739	/* check if the ref was overwritten by another ref */
1740	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1741			&ow_inode, &other_type);
1742	if (ret < 0 && ret != -ENOENT)
1743		goto out;
1744	if (ret) {
1745		/* was never and will never be overwritten */
1746		ret = 0;
1747		goto out;
1748	}
1749
1750	ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1751			NULL, NULL);
1752	if (ret < 0)
1753		goto out;
1754
1755	if (ow_inode == ino && gen == ino_gen) {
1756		ret = 0;
1757		goto out;
1758	}
1759
1760	/* we know that it is or will be overwritten. check this now */
1761	if (ow_inode < sctx->send_progress)
1762		ret = 1;
1763	else
1764		ret = 0;
1765
1766out:
1767	return ret;
1768}
1769
1770/*
1771 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1772 * that got overwritten. This is used by process_recorded_refs to determine
1773 * if it has to use the path as returned by get_cur_path or the orphan name.
1774 */
1775static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1776{
1777	int ret = 0;
1778	struct fs_path *name = NULL;
1779	u64 dir;
1780	u64 dir_gen;
1781
1782	if (!sctx->parent_root)
1783		goto out;
1784
1785	name = fs_path_alloc(sctx);
1786	if (!name)
1787		return -ENOMEM;
1788
1789	ret = get_first_ref(sctx, sctx->parent_root, ino, &dir, &dir_gen, name);
1790	if (ret < 0)
1791		goto out;
1792
1793	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1794			name->start, fs_path_len(name));
1795
1796out:
1797	fs_path_free(sctx, name);
1798	return ret;
1799}
1800
1801/*
1802 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
1803 * so we need to do some special handling in case we have clashes. This function
1804 * takes care of this with the help of name_cache_entry::radix_list.
1805 * In case of error, nce is kfreed.
1806 */
1807static int name_cache_insert(struct send_ctx *sctx,
1808			     struct name_cache_entry *nce)
1809{
1810	int ret = 0;
1811	struct list_head *nce_head;
1812
1813	nce_head = radix_tree_lookup(&sctx->name_cache,
1814			(unsigned long)nce->ino);
1815	if (!nce_head) {
1816		nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1817		if (!nce_head)
1818			return -ENOMEM;
1819		INIT_LIST_HEAD(nce_head);
1820
1821		ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
1822		if (ret < 0) {
1823			kfree(nce_head);
1824			kfree(nce);
1825			return ret;
1826		}
1827	}
1828	list_add_tail(&nce->radix_list, nce_head);
1829	list_add_tail(&nce->list, &sctx->name_cache_list);
1830	sctx->name_cache_size++;
1831
1832	return ret;
1833}
1834
1835static void name_cache_delete(struct send_ctx *sctx,
1836			      struct name_cache_entry *nce)
1837{
1838	struct list_head *nce_head;
1839
1840	nce_head = radix_tree_lookup(&sctx->name_cache,
1841			(unsigned long)nce->ino);
1842	BUG_ON(!nce_head);
1843
1844	list_del(&nce->radix_list);
1845	list_del(&nce->list);
1846	sctx->name_cache_size--;
1847
1848	if (list_empty(nce_head)) {
1849		radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
1850		kfree(nce_head);
1851	}
1852}
1853
1854static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
1855						    u64 ino, u64 gen)
1856{
1857	struct list_head *nce_head;
1858	struct name_cache_entry *cur;
1859
1860	nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
1861	if (!nce_head)
1862		return NULL;
1863
1864	list_for_each_entry(cur, nce_head, radix_list) {
1865		if (cur->ino == ino && cur->gen == gen)
1866			return cur;
1867	}
1868	return NULL;
1869}
1870
1871/*
1872 * Removes the entry from the list and adds it back to the end. This marks the
1873 * entry as recently used so that name_cache_clean_unused does not remove it.
1874 */
1875static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
1876{
1877	list_del(&nce->list);
1878	list_add_tail(&nce->list, &sctx->name_cache_list);
1879}
1880
1881/*
1882 * Remove some entries from the beginning of name_cache_list.
1883 */
1884static void name_cache_clean_unused(struct send_ctx *sctx)
1885{
1886	struct name_cache_entry *nce;
1887
1888	if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
1889		return;
1890
1891	while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
1892		nce = list_entry(sctx->name_cache_list.next,
1893				struct name_cache_entry, list);
1894		name_cache_delete(sctx, nce);
1895		kfree(nce);
1896	}
1897}
1898
1899static void name_cache_free(struct send_ctx *sctx)
1900{
1901	struct name_cache_entry *nce;
1902
1903	while (!list_empty(&sctx->name_cache_list)) {
1904		nce = list_entry(sctx->name_cache_list.next,
1905				struct name_cache_entry, list);
1906		name_cache_delete(sctx, nce);
1907		kfree(nce);
1908	}
1909}
1910
1911/*
1912 * Used by get_cur_path for each ref up to the root.
1913 * Returns 0 if it succeeded.
1914 * Returns 1 if the inode is not existent or got overwritten. In that case, the
1915 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
1916 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
1917 * Returns <0 in case of error.
1918 */
1919static int __get_cur_name_and_parent(struct send_ctx *sctx,
1920				     u64 ino, u64 gen,
1921				     u64 *parent_ino,
1922				     u64 *parent_gen,
1923				     struct fs_path *dest)
1924{
1925	int ret;
1926	int nce_ret;
1927	struct btrfs_path *path = NULL;
1928	struct name_cache_entry *nce = NULL;
1929
1930	/*
1931	 * First check if we already did a call to this function with the same
1932	 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
1933	 * return the cached result.
1934	 */
1935	nce = name_cache_search(sctx, ino, gen);
1936	if (nce) {
1937		if (ino < sctx->send_progress && nce->need_later_update) {
1938			name_cache_delete(sctx, nce);
1939			kfree(nce);
1940			nce = NULL;
1941		} else {
1942			name_cache_used(sctx, nce);
1943			*parent_ino = nce->parent_ino;
1944			*parent_gen = nce->parent_gen;
1945			ret = fs_path_add(dest, nce->name, nce->name_len);
1946			if (ret < 0)
1947				goto out;
1948			ret = nce->ret;
1949			goto out;
1950		}
1951	}
1952
1953	path = alloc_path_for_send();
1954	if (!path)
1955		return -ENOMEM;
1956
1957	/*
1958	 * If the inode is not existent yet, add the orphan name and return 1.
1959	 * This should only happen for the parent dir that we determine in
1960	 * __record_new_ref
1961	 */
1962	ret = is_inode_existent(sctx, ino, gen);
1963	if (ret < 0)
1964		goto out;
1965
1966	if (!ret) {
1967		ret = gen_unique_name(sctx, ino, gen, dest);
1968		if (ret < 0)
1969			goto out;
1970		ret = 1;
1971		goto out_cache;
1972	}
1973
1974	/*
1975	 * Depending on whether the inode was already processed or not, use
1976	 * send_root or parent_root for ref lookup.
1977	 */
1978	if (ino < sctx->send_progress)
1979		ret = get_first_ref(sctx, sctx->send_root, ino,
1980				parent_ino, parent_gen, dest);
1981	else
1982		ret = get_first_ref(sctx, sctx->parent_root, ino,
1983				parent_ino, parent_gen, dest);
1984	if (ret < 0)
1985		goto out;
1986
1987	/*
1988	 * Check if the ref was overwritten by an inode's ref that was processed
1989	 * earlier. If yes, treat as orphan and return 1.
1990	 */
1991	ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
1992			dest->start, dest->end - dest->start);
1993	if (ret < 0)
1994		goto out;
1995	if (ret) {
1996		fs_path_reset(dest);
1997		ret = gen_unique_name(sctx, ino, gen, dest);
1998		if (ret < 0)
1999			goto out;
2000		ret = 1;
2001	}
2002
2003out_cache:
2004	/*
2005	 * Store the result of the lookup in the name cache.
2006	 */
2007	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
2008	if (!nce) {
2009		ret = -ENOMEM;
2010		goto out;
2011	}
2012
2013	nce->ino = ino;
2014	nce->gen = gen;
2015	nce->parent_ino = *parent_ino;
2016	nce->parent_gen = *parent_gen;
2017	nce->name_len = fs_path_len(dest);
2018	nce->ret = ret;
2019	strcpy(nce->name, dest->start);
2020
2021	if (ino < sctx->send_progress)
2022		nce->need_later_update = 0;
2023	else
2024		nce->need_later_update = 1;
2025
2026	nce_ret = name_cache_insert(sctx, nce);
2027	if (nce_ret < 0)
2028		ret = nce_ret;
2029	name_cache_clean_unused(sctx);
2030
2031out:
2032	btrfs_free_path(path);
2033	return ret;
2034}
2035
2036/*
2037 * Magic happens here. This function returns the first ref to an inode as it
2038 * would look like while receiving the stream at this point in time.
2039 * We walk the path up to the root. For every inode in between, we check if it
2040 * was already processed/sent. If yes, we continue with the parent as found
2041 * in send_root. If not, we continue with the parent as found in parent_root.
2042 * If we encounter an inode that was deleted at this point in time, we use the
2043 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2044 * that were not created yet and overwritten inodes/refs.
2045 *
2046 * When do we have have orphan inodes:
2047 * 1. When an inode is freshly created and thus no valid refs are available yet
2048 * 2. When a directory lost all it's refs (deleted) but still has dir items
2049 *    inside which were not processed yet (pending for move/delete). If anyone
2050 *    tried to get the path to the dir items, it would get a path inside that
2051 *    orphan directory.
2052 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2053 *    of an unprocessed inode. If in that case the first ref would be
2054 *    overwritten, the overwritten inode gets "orphanized". Later when we
2055 *    process this overwritten inode, it is restored at a new place by moving
2056 *    the orphan inode.
2057 *
2058 * sctx->send_progress tells this function at which point in time receiving
2059 * would be.
2060 */
2061static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2062			struct fs_path *dest)
2063{
2064	int ret = 0;
2065	struct fs_path *name = NULL;
2066	u64 parent_inode = 0;
2067	u64 parent_gen = 0;
2068	int stop = 0;
2069
2070	name = fs_path_alloc(sctx);
2071	if (!name) {
2072		ret = -ENOMEM;
2073		goto out;
2074	}
2075
2076	dest->reversed = 1;
2077	fs_path_reset(dest);
2078
2079	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2080		fs_path_reset(name);
2081
2082		ret = __get_cur_name_and_parent(sctx, ino, gen,
2083				&parent_inode, &parent_gen, name);
2084		if (ret < 0)
2085			goto out;
2086		if (ret)
2087			stop = 1;
2088
2089		ret = fs_path_add_path(dest, name);
2090		if (ret < 0)
2091			goto out;
2092
2093		ino = parent_inode;
2094		gen = parent_gen;
2095	}
2096
2097out:
2098	fs_path_free(sctx, name);
2099	if (!ret)
2100		fs_path_unreverse(dest);
2101	return ret;
2102}
2103
2104/*
2105 * Called for regular files when sending extents data. Opens a struct file
2106 * to read from the file.
2107 */
2108static int open_cur_inode_file(struct send_ctx *sctx)
2109{
2110	int ret = 0;
2111	struct btrfs_key key;
2112	struct path path;
2113	struct inode *inode;
2114	struct dentry *dentry;
2115	struct file *filp;
2116	int new = 0;
2117
2118	if (sctx->cur_inode_filp)
2119		goto out;
2120
2121	key.objectid = sctx->cur_ino;
2122	key.type = BTRFS_INODE_ITEM_KEY;
2123	key.offset = 0;
2124
2125	inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
2126			&new);
2127	if (IS_ERR(inode)) {
2128		ret = PTR_ERR(inode);
2129		goto out;
2130	}
2131
2132	dentry = d_obtain_alias(inode);
2133	inode = NULL;
2134	if (IS_ERR(dentry)) {
2135		ret = PTR_ERR(dentry);
2136		goto out;
2137	}
2138
2139	path.mnt = sctx->mnt;
2140	path.dentry = dentry;
2141	filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
2142	dput(dentry);
2143	dentry = NULL;
2144	if (IS_ERR(filp)) {
2145		ret = PTR_ERR(filp);
2146		goto out;
2147	}
2148	sctx->cur_inode_filp = filp;
2149
2150out:
2151	/*
2152	 * no xxxput required here as every vfs op
2153	 * does it by itself on failure
2154	 */
2155	return ret;
2156}
2157
2158/*
2159 * Closes the struct file that was created in open_cur_inode_file
2160 */
2161static int close_cur_inode_file(struct send_ctx *sctx)
2162{
2163	int ret = 0;
2164
2165	if (!sctx->cur_inode_filp)
2166		goto out;
2167
2168	ret = filp_close(sctx->cur_inode_filp, NULL);
2169	sctx->cur_inode_filp = NULL;
2170
2171out:
2172	return ret;
2173}
2174
2175/*
2176 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2177 */
2178static int send_subvol_begin(struct send_ctx *sctx)
2179{
2180	int ret;
2181	struct btrfs_root *send_root = sctx->send_root;
2182	struct btrfs_root *parent_root = sctx->parent_root;
2183	struct btrfs_path *path;
2184	struct btrfs_key key;
2185	struct btrfs_root_ref *ref;
2186	struct extent_buffer *leaf;
2187	char *name = NULL;
2188	int namelen;
2189
2190	path = alloc_path_for_send();
2191	if (!path)
2192		return -ENOMEM;
2193
2194	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2195	if (!name) {
2196		btrfs_free_path(path);
2197		return -ENOMEM;
2198	}
2199
2200	key.objectid = send_root->objectid;
2201	key.type = BTRFS_ROOT_BACKREF_KEY;
2202	key.offset = 0;
2203
2204	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2205				&key, path, 1, 0);
2206	if (ret < 0)
2207		goto out;
2208	if (ret) {
2209		ret = -ENOENT;
2210		goto out;
2211	}
2212
2213	leaf = path->nodes[0];
2214	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2215	if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2216	    key.objectid != send_root->objectid) {
2217		ret = -ENOENT;
2218		goto out;
2219	}
2220	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2221	namelen = btrfs_root_ref_name_len(leaf, ref);
2222	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2223	btrfs_release_path(path);
2224
2225	if (parent_root) {
2226		ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2227		if (ret < 0)
2228			goto out;
2229	} else {
2230		ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2231		if (ret < 0)
2232			goto out;
2233	}
2234
2235	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2236	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2237			sctx->send_root->root_item.uuid);
2238	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2239			sctx->send_root->root_item.ctransid);
2240	if (parent_root) {
2241		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2242				sctx->parent_root->root_item.uuid);
2243		TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2244				sctx->parent_root->root_item.ctransid);
2245	}
2246
2247	ret = send_cmd(sctx);
2248
2249tlv_put_failure:
2250out:
2251	btrfs_free_path(path);
2252	kfree(name);
2253	return ret;
2254}
2255
2256static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2257{
2258	int ret = 0;
2259	struct fs_path *p;
2260
2261verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2262
2263	p = fs_path_alloc(sctx);
2264	if (!p)
2265		return -ENOMEM;
2266
2267	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2268	if (ret < 0)
2269		goto out;
2270
2271	ret = get_cur_path(sctx, ino, gen, p);
2272	if (ret < 0)
2273		goto out;
2274	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2275	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2276
2277	ret = send_cmd(sctx);
2278
2279tlv_put_failure:
2280out:
2281	fs_path_free(sctx, p);
2282	return ret;
2283}
2284
2285static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2286{
2287	int ret = 0;
2288	struct fs_path *p;
2289
2290verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2291
2292	p = fs_path_alloc(sctx);
2293	if (!p)
2294		return -ENOMEM;
2295
2296	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2297	if (ret < 0)
2298		goto out;
2299
2300	ret = get_cur_path(sctx, ino, gen, p);
2301	if (ret < 0)
2302		goto out;
2303	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2304	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2305
2306	ret = send_cmd(sctx);
2307
2308tlv_put_failure:
2309out:
2310	fs_path_free(sctx, p);
2311	return ret;
2312}
2313
2314static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2315{
2316	int ret = 0;
2317	struct fs_path *p;
2318
2319verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2320
2321	p = fs_path_alloc(sctx);
2322	if (!p)
2323		return -ENOMEM;
2324
2325	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2326	if (ret < 0)
2327		goto out;
2328
2329	ret = get_cur_path(sctx, ino, gen, p);
2330	if (ret < 0)
2331		goto out;
2332	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2333	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2334	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2335
2336	ret = send_cmd(sctx);
2337
2338tlv_put_failure:
2339out:
2340	fs_path_free(sctx, p);
2341	return ret;
2342}
2343
2344static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2345{
2346	int ret = 0;
2347	struct fs_path *p = NULL;
2348	struct btrfs_inode_item *ii;
2349	struct btrfs_path *path = NULL;
2350	struct extent_buffer *eb;
2351	struct btrfs_key key;
2352	int slot;
2353
2354verbose_printk("btrfs: send_utimes %llu\n", ino);
2355
2356	p = fs_path_alloc(sctx);
2357	if (!p)
2358		return -ENOMEM;
2359
2360	path = alloc_path_for_send();
2361	if (!path) {
2362		ret = -ENOMEM;
2363		goto out;
2364	}
2365
2366	key.objectid = ino;
2367	key.type = BTRFS_INODE_ITEM_KEY;
2368	key.offset = 0;
2369	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2370	if (ret < 0)
2371		goto out;
2372
2373	eb = path->nodes[0];
2374	slot = path->slots[0];
2375	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2376
2377	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2378	if (ret < 0)
2379		goto out;
2380
2381	ret = get_cur_path(sctx, ino, gen, p);
2382	if (ret < 0)
2383		goto out;
2384	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2385	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
2386			btrfs_inode_atime(ii));
2387	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
2388			btrfs_inode_mtime(ii));
2389	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2390			btrfs_inode_ctime(ii));
2391	/* TODO Add otime support when the otime patches get into upstream */
2392
2393	ret = send_cmd(sctx);
2394
2395tlv_put_failure:
2396out:
2397	fs_path_free(sctx, p);
2398	btrfs_free_path(path);
2399	return ret;
2400}
2401
2402/*
2403 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2404 * a valid path yet because we did not process the refs yet. So, the inode
2405 * is created as orphan.
2406 */
2407static int send_create_inode(struct send_ctx *sctx, u64 ino)
2408{
2409	int ret = 0;
2410	struct fs_path *p;
2411	int cmd;
2412	u64 gen;
2413	u64 mode;
2414	u64 rdev;
2415
2416verbose_printk("btrfs: send_create_inode %llu\n", ino);
2417
2418	p = fs_path_alloc(sctx);
2419	if (!p)
2420		return -ENOMEM;
2421
2422	ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL,
2423			NULL, &rdev);
2424	if (ret < 0)
2425		goto out;
2426
2427	if (S_ISREG(mode)) {
2428		cmd = BTRFS_SEND_C_MKFILE;
2429	} else if (S_ISDIR(mode)) {
2430		cmd = BTRFS_SEND_C_MKDIR;
2431	} else if (S_ISLNK(mode)) {
2432		cmd = BTRFS_SEND_C_SYMLINK;
2433	} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2434		cmd = BTRFS_SEND_C_MKNOD;
2435	} else if (S_ISFIFO(mode)) {
2436		cmd = BTRFS_SEND_C_MKFIFO;
2437	} else if (S_ISSOCK(mode)) {
2438		cmd = BTRFS_SEND_C_MKSOCK;
2439	} else {
2440		printk(KERN_WARNING "btrfs: unexpected inode type %o",
2441				(int)(mode & S_IFMT));
2442		ret = -ENOTSUPP;
2443		goto out;
2444	}
2445
2446	ret = begin_cmd(sctx, cmd);
2447	if (ret < 0)
2448		goto out;
2449
2450	ret = gen_unique_name(sctx, ino, gen, p);
2451	if (ret < 0)
2452		goto out;
2453
2454	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2455	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2456
2457	if (S_ISLNK(mode)) {
2458		fs_path_reset(p);
2459		ret = read_symlink(sctx, sctx->send_root, ino, p);
2460		if (ret < 0)
2461			goto out;
2462		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2463	} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2464		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2465		TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2466		TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2467	}
2468
2469	ret = send_cmd(sctx);
2470	if (ret < 0)
2471		goto out;
2472
2473
2474tlv_put_failure:
2475out:
2476	fs_path_free(sctx, p);
2477	return ret;
2478}
2479
2480/*
2481 * We need some special handling for inodes that get processed before the parent
2482 * directory got created. See process_recorded_refs for details.
2483 * This function does the check if we already created the dir out of order.
2484 */
2485static int did_create_dir(struct send_ctx *sctx, u64 dir)
2486{
2487	int ret = 0;
2488	struct btrfs_path *path = NULL;
2489	struct btrfs_key key;
2490	struct btrfs_key found_key;
2491	struct btrfs_key di_key;
2492	struct extent_buffer *eb;
2493	struct btrfs_dir_item *di;
2494	int slot;
2495
2496	path = alloc_path_for_send();
2497	if (!path) {
2498		ret = -ENOMEM;
2499		goto out;
2500	}
2501
2502	key.objectid = dir;
2503	key.type = BTRFS_DIR_INDEX_KEY;
2504	key.offset = 0;
2505	while (1) {
2506		ret = btrfs_search_slot_for_read(sctx->send_root, &key, path,
2507				1, 0);
2508		if (ret < 0)
2509			goto out;
2510		if (!ret) {
2511			eb = path->nodes[0];
2512			slot = path->slots[0];
2513			btrfs_item_key_to_cpu(eb, &found_key, slot);
2514		}
2515		if (ret || found_key.objectid != key.objectid ||
2516		    found_key.type != key.type) {
2517			ret = 0;
2518			goto out;
2519		}
2520
2521		di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2522		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2523
2524		if (di_key.objectid < sctx->send_progress) {
2525			ret = 1;
2526			goto out;
2527		}
2528
2529		key.offset = found_key.offset + 1;
2530		btrfs_release_path(path);
2531	}
2532
2533out:
2534	btrfs_free_path(path);
2535	return ret;
2536}
2537
2538/*
2539 * Only creates the inode if it is:
2540 * 1. Not a directory
2541 * 2. Or a directory which was not created already due to out of order
2542 *    directories. See did_create_dir and process_recorded_refs for details.
2543 */
2544static int send_create_inode_if_needed(struct send_ctx *sctx)
2545{
2546	int ret;
2547
2548	if (S_ISDIR(sctx->cur_inode_mode)) {
2549		ret = did_create_dir(sctx, sctx->cur_ino);
2550		if (ret < 0)
2551			goto out;
2552		if (ret) {
2553			ret = 0;
2554			goto out;
2555		}
2556	}
2557
2558	ret = send_create_inode(sctx, sctx->cur_ino);
2559	if (ret < 0)
2560		goto out;
2561
2562out:
2563	return ret;
2564}
2565
2566struct recorded_ref {
2567	struct list_head list;
2568	char *dir_path;
2569	char *name;
2570	struct fs_path *full_path;
2571	u64 dir;
2572	u64 dir_gen;
2573	int dir_path_len;
2574	int name_len;
2575};
2576
2577/*
2578 * We need to process new refs before deleted refs, but compare_tree gives us
2579 * everything mixed. So we first record all refs and later process them.
2580 * This function is a helper to record one ref.
2581 */
2582static int record_ref(struct list_head *head, u64 dir,
2583		      u64 dir_gen, struct fs_path *path)
2584{
2585	struct recorded_ref *ref;
2586	char *tmp;
2587
2588	ref = kmalloc(sizeof(*ref), GFP_NOFS);
2589	if (!ref)
2590		return -ENOMEM;
2591
2592	ref->dir = dir;
2593	ref->dir_gen = dir_gen;
2594	ref->full_path = path;
2595
2596	tmp = strrchr(ref->full_path->start, '/');
2597	if (!tmp) {
2598		ref->name_len = ref->full_path->end - ref->full_path->start;
2599		ref->name = ref->full_path->start;
2600		ref->dir_path_len = 0;
2601		ref->dir_path = ref->full_path->start;
2602	} else {
2603		tmp++;
2604		ref->name_len = ref->full_path->end - tmp;
2605		ref->name = tmp;
2606		ref->dir_path = ref->full_path->start;
2607		ref->dir_path_len = ref->full_path->end -
2608				ref->full_path->start - 1 - ref->name_len;
2609	}
2610
2611	list_add_tail(&ref->list, head);
2612	return 0;
2613}
2614
2615static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head)
2616{
2617	struct recorded_ref *cur;
2618
2619	while (!list_empty(head)) {
2620		cur = list_entry(head->next, struct recorded_ref, list);
2621		fs_path_free(sctx, cur->full_path);
2622		list_del(&cur->list);
2623		kfree(cur);
2624	}
2625}
2626
2627static void free_recorded_refs(struct send_ctx *sctx)
2628{
2629	__free_recorded_refs(sctx, &sctx->new_refs);
2630	__free_recorded_refs(sctx, &sctx->deleted_refs);
2631}
2632
2633/*
2634 * Renames/moves a file/dir to its orphan name. Used when the first
2635 * ref of an unprocessed inode gets overwritten and for all non empty
2636 * directories.
2637 */
2638static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2639			  struct fs_path *path)
2640{
2641	int ret;
2642	struct fs_path *orphan;
2643
2644	orphan = fs_path_alloc(sctx);
2645	if (!orphan)
2646		return -ENOMEM;
2647
2648	ret = gen_unique_name(sctx, ino, gen, orphan);
2649	if (ret < 0)
2650		goto out;
2651
2652	ret = send_rename(sctx, path, orphan);
2653
2654out:
2655	fs_path_free(sctx, orphan);
2656	return ret;
2657}
2658
2659/*
2660 * Returns 1 if a directory can be removed at this point in time.
2661 * We check this by iterating all dir items and checking if the inode behind
2662 * the dir item was already processed.
2663 */
2664static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress)
2665{
2666	int ret = 0;
2667	struct btrfs_root *root = sctx->parent_root;
2668	struct btrfs_path *path;
2669	struct btrfs_key key;
2670	struct btrfs_key found_key;
2671	struct btrfs_key loc;
2672	struct btrfs_dir_item *di;
2673
2674	/*
2675	 * Don't try to rmdir the top/root subvolume dir.
2676	 */
2677	if (dir == BTRFS_FIRST_FREE_OBJECTID)
2678		return 0;
2679
2680	path = alloc_path_for_send();
2681	if (!path)
2682		return -ENOMEM;
2683
2684	key.objectid = dir;
2685	key.type = BTRFS_DIR_INDEX_KEY;
2686	key.offset = 0;
2687
2688	while (1) {
2689		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
2690		if (ret < 0)
2691			goto out;
2692		if (!ret) {
2693			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2694					path->slots[0]);
2695		}
2696		if (ret || found_key.objectid != key.objectid ||
2697		    found_key.type != key.type) {
2698			break;
2699		}
2700
2701		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2702				struct btrfs_dir_item);
2703		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2704
2705		if (loc.objectid > send_progress) {
2706			ret = 0;
2707			goto out;
2708		}
2709
2710		btrfs_release_path(path);
2711		key.offset = found_key.offset + 1;
2712	}
2713
2714	ret = 1;
2715
2716out:
2717	btrfs_free_path(path);
2718	return ret;
2719}
2720
2721/*
2722 * This does all the move/link/unlink/rmdir magic.
2723 */
2724static int process_recorded_refs(struct send_ctx *sctx)
2725{
2726	int ret = 0;
2727	struct recorded_ref *cur;
2728	struct recorded_ref *cur2;
2729	struct ulist *check_dirs = NULL;
2730	struct ulist_iterator uit;
2731	struct ulist_node *un;
2732	struct fs_path *valid_path = NULL;
2733	u64 ow_inode = 0;
2734	u64 ow_gen;
2735	int did_overwrite = 0;
2736	int is_orphan = 0;
2737
2738verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2739
2740	/*
2741	 * This should never happen as the root dir always has the same ref
2742	 * which is always '..'
2743	 */
2744	BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
2745
2746	valid_path = fs_path_alloc(sctx);
2747	if (!valid_path) {
2748		ret = -ENOMEM;
2749		goto out;
2750	}
2751
2752	check_dirs = ulist_alloc(GFP_NOFS);
2753	if (!check_dirs) {
2754		ret = -ENOMEM;
2755		goto out;
2756	}
2757
2758	/*
2759	 * First, check if the first ref of the current inode was overwritten
2760	 * before. If yes, we know that the current inode was already orphanized
2761	 * and thus use the orphan name. If not, we can use get_cur_path to
2762	 * get the path of the first ref as it would like while receiving at
2763	 * this point in time.
2764	 * New inodes are always orphan at the beginning, so force to use the
2765	 * orphan name in this case.
2766	 * The first ref is stored in valid_path and will be updated if it
2767	 * gets moved around.
2768	 */
2769	if (!sctx->cur_inode_new) {
2770		ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
2771				sctx->cur_inode_gen);
2772		if (ret < 0)
2773			goto out;
2774		if (ret)
2775			did_overwrite = 1;
2776	}
2777	if (sctx->cur_inode_new || did_overwrite) {
2778		ret = gen_unique_name(sctx, sctx->cur_ino,
2779				sctx->cur_inode_gen, valid_path);
2780		if (ret < 0)
2781			goto out;
2782		is_orphan = 1;
2783	} else {
2784		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
2785				valid_path);
2786		if (ret < 0)
2787			goto out;
2788	}
2789
2790	list_for_each_entry(cur, &sctx->new_refs, list) {
2791		/*
2792		 * We may have refs where the parent directory does not exist
2793		 * yet. This happens if the parent directories inum is higher
2794		 * the the current inum. To handle this case, we create the
2795		 * parent directory out of order. But we need to check if this
2796		 * did already happen before due to other refs in the same dir.
2797		 */
2798		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
2799		if (ret < 0)
2800			goto out;
2801		if (ret == inode_state_will_create) {
2802			ret = 0;
2803			/*
2804			 * First check if any of the current inodes refs did
2805			 * already create the dir.
2806			 */
2807			list_for_each_entry(cur2, &sctx->new_refs, list) {
2808				if (cur == cur2)
2809					break;
2810				if (cur2->dir == cur->dir) {
2811					ret = 1;
2812					break;
2813				}
2814			}
2815
2816			/*
2817			 * If that did not happen, check if a previous inode
2818			 * did already create the dir.
2819			 */
2820			if (!ret)
2821				ret = did_create_dir(sctx, cur->dir);
2822			if (ret < 0)
2823				goto out;
2824			if (!ret) {
2825				ret = send_create_inode(sctx, cur->dir);
2826				if (ret < 0)
2827					goto out;
2828			}
2829		}
2830
2831		/*
2832		 * Check if this new ref would overwrite the first ref of
2833		 * another unprocessed inode. If yes, orphanize the
2834		 * overwritten inode. If we find an overwritten ref that is
2835		 * not the first ref, simply unlink it.
2836		 */
2837		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2838				cur->name, cur->name_len,
2839				&ow_inode, &ow_gen);
2840		if (ret < 0)
2841			goto out;
2842		if (ret) {
2843			ret = is_first_ref(sctx, sctx->parent_root,
2844					ow_inode, cur->dir, cur->name,
2845					cur->name_len);
2846			if (ret < 0)
2847				goto out;
2848			if (ret) {
2849				ret = orphanize_inode(sctx, ow_inode, ow_gen,
2850						cur->full_path);
2851				if (ret < 0)
2852					goto out;
2853			} else {
2854				ret = send_unlink(sctx, cur->full_path);
2855				if (ret < 0)
2856					goto out;
2857			}
2858		}
2859
2860		/*
2861		 * link/move the ref to the new place. If we have an orphan
2862		 * inode, move it and update valid_path. If not, link or move
2863		 * it depending on the inode mode.
2864		 */
2865		if (is_orphan) {
2866			ret = send_rename(sctx, valid_path, cur->full_path);
2867			if (ret < 0)
2868				goto out;
2869			is_orphan = 0;
2870			ret = fs_path_copy(valid_path, cur->full_path);
2871			if (ret < 0)
2872				goto out;
2873		} else {
2874			if (S_ISDIR(sctx->cur_inode_mode)) {
2875				/*
2876				 * Dirs can't be linked, so move it. For moved
2877				 * dirs, we always have one new and one deleted
2878				 * ref. The deleted ref is ignored later.
2879				 */
2880				ret = send_rename(sctx, valid_path,
2881						cur->full_path);
2882				if (ret < 0)
2883					goto out;
2884				ret = fs_path_copy(valid_path, cur->full_path);
2885				if (ret < 0)
2886					goto out;
2887			} else {
2888				ret = send_link(sctx, cur->full_path,
2889						valid_path);
2890				if (ret < 0)
2891					goto out;
2892			}
2893		}
2894		ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2895				GFP_NOFS);
2896		if (ret < 0)
2897			goto out;
2898	}
2899
2900	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
2901		/*
2902		 * Check if we can already rmdir the directory. If not,
2903		 * orphanize it. For every dir item inside that gets deleted
2904		 * later, we do this check again and rmdir it then if possible.
2905		 * See the use of check_dirs for more details.
2906		 */
2907		ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino);
2908		if (ret < 0)
2909			goto out;
2910		if (ret) {
2911			ret = send_rmdir(sctx, valid_path);
2912			if (ret < 0)
2913				goto out;
2914		} else if (!is_orphan) {
2915			ret = orphanize_inode(sctx, sctx->cur_ino,
2916					sctx->cur_inode_gen, valid_path);
2917			if (ret < 0)
2918				goto out;
2919			is_orphan = 1;
2920		}
2921
2922		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2923			ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2924					GFP_NOFS);
2925			if (ret < 0)
2926				goto out;
2927		}
2928	} else if (S_ISDIR(sctx->cur_inode_mode) &&
2929		   !list_empty(&sctx->deleted_refs)) {
2930		/*
2931		 * We have a moved dir. Add the old parent to check_dirs
2932		 */
2933		cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
2934				list);
2935		ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2936				GFP_NOFS);
2937		if (ret < 0)
2938			goto out;
2939	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
2940		/*
2941		 * We have a non dir inode. Go through all deleted refs and
2942		 * unlink them if they were not already overwritten by other
2943		 * inodes.
2944		 */
2945		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2946			ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2947					sctx->cur_ino, sctx->cur_inode_gen,
2948					cur->name, cur->name_len);
2949			if (ret < 0)
2950				goto out;
2951			if (!ret) {
2952				ret = send_unlink(sctx, cur->full_path);
2953				if (ret < 0)
2954					goto out;
2955			}
2956			ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2957					GFP_NOFS);
2958			if (ret < 0)
2959				goto out;
2960		}
2961
2962		/*
2963		 * If the inode is still orphan, unlink the orphan. This may
2964		 * happen when a previous inode did overwrite the first ref
2965		 * of this inode and no new refs were added for the current
2966		 * inode. Unlinking does not mean that the inode is deleted in
2967		 * all cases. There may still be links to this inode in other
2968		 * places.
2969		 */
2970		if (is_orphan) {
2971			ret = send_unlink(sctx, valid_path);
2972			if (ret < 0)
2973				goto out;
2974		}
2975	}
2976
2977	/*
2978	 * We did collect all parent dirs where cur_inode was once located. We
2979	 * now go through all these dirs and check if they are pending for
2980	 * deletion and if it's finally possible to perform the rmdir now.
2981	 * We also update the inode stats of the parent dirs here.
2982	 */
2983	ULIST_ITER_INIT(&uit);
2984	while ((un = ulist_next(check_dirs, &uit))) {
2985		/*
2986		 * In case we had refs into dirs that were not processed yet,
2987		 * we don't need to do the utime and rmdir logic for these dirs.
2988		 * The dir will be processed later.
2989		 */
2990		if (un->val > sctx->cur_ino)
2991			continue;
2992
2993		ret = get_cur_inode_state(sctx, un->val, un->aux);
2994		if (ret < 0)
2995			goto out;
2996
2997		if (ret == inode_state_did_create ||
2998		    ret == inode_state_no_change) {
2999			/* TODO delayed utimes */
3000			ret = send_utimes(sctx, un->val, un->aux);
3001			if (ret < 0)
3002				goto out;
3003		} else if (ret == inode_state_did_delete) {
3004			ret = can_rmdir(sctx, un->val, sctx->cur_ino);
3005			if (ret < 0)
3006				goto out;
3007			if (ret) {
3008				ret = get_cur_path(sctx, un->val, un->aux,
3009						valid_path);
3010				if (ret < 0)
3011					goto out;
3012				ret = send_rmdir(sctx, valid_path);
3013				if (ret < 0)
3014					goto out;
3015			}
3016		}
3017	}
3018
3019	ret = 0;
3020
3021out:
3022	free_recorded_refs(sctx);
3023	ulist_free(check_dirs);
3024	fs_path_free(sctx, valid_path);
3025	return ret;
3026}
3027
3028static int __record_new_ref(int num, u64 dir, int index,
3029			    struct fs_path *name,
3030			    void *ctx)
3031{
3032	int ret = 0;
3033	struct send_ctx *sctx = ctx;
3034	struct fs_path *p;
3035	u64 gen;
3036
3037	p = fs_path_alloc(sctx);
3038	if (!p)
3039		return -ENOMEM;
3040
3041	ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
3042			NULL, NULL);
3043	if (ret < 0)
3044		goto out;
3045
3046	ret = get_cur_path(sctx, dir, gen, p);
3047	if (ret < 0)
3048		goto out;
3049	ret = fs_path_add_path(p, name);
3050	if (ret < 0)
3051		goto out;
3052
3053	ret = record_ref(&sctx->new_refs, dir, gen, p);
3054
3055out:
3056	if (ret)
3057		fs_path_free(sctx, p);
3058	return ret;
3059}
3060
3061static int __record_deleted_ref(int num, u64 dir, int index,
3062				struct fs_path *name,
3063				void *ctx)
3064{
3065	int ret = 0;
3066	struct send_ctx *sctx = ctx;
3067	struct fs_path *p;
3068	u64 gen;
3069
3070	p = fs_path_alloc(sctx);
3071	if (!p)
3072		return -ENOMEM;
3073
3074	ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
3075			NULL, NULL);
3076	if (ret < 0)
3077		goto out;
3078
3079	ret = get_cur_path(sctx, dir, gen, p);
3080	if (ret < 0)
3081		goto out;
3082	ret = fs_path_add_path(p, name);
3083	if (ret < 0)
3084		goto out;
3085
3086	ret = record_ref(&sctx->deleted_refs, dir, gen, p);
3087
3088out:
3089	if (ret)
3090		fs_path_free(sctx, p);
3091	return ret;
3092}
3093
3094static int record_new_ref(struct send_ctx *sctx)
3095{
3096	int ret;
3097
3098	ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path,
3099			sctx->cmp_key, 0, __record_new_ref, sctx);
3100	if (ret < 0)
3101		goto out;
3102	ret = 0;
3103
3104out:
3105	return ret;
3106}
3107
3108static int record_deleted_ref(struct send_ctx *sctx)
3109{
3110	int ret;
3111
3112	ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path,
3113			sctx->cmp_key, 0, __record_deleted_ref, sctx);
3114	if (ret < 0)
3115		goto out;
3116	ret = 0;
3117
3118out:
3119	return ret;
3120}
3121
3122struct find_ref_ctx {
3123	u64 dir;
3124	struct fs_path *name;
3125	int found_idx;
3126};
3127
3128static int __find_iref(int num, u64 dir, int index,
3129		       struct fs_path *name,
3130		       void *ctx_)
3131{
3132	struct find_ref_ctx *ctx = ctx_;
3133
3134	if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3135	    strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3136		ctx->found_idx = num;
3137		return 1;
3138	}
3139	return 0;
3140}
3141
3142static int find_iref(struct send_ctx *sctx,
3143		     struct btrfs_root *root,
3144		     struct btrfs_path *path,
3145		     struct btrfs_key *key,
3146		     u64 dir, struct fs_path *name)
3147{
3148	int ret;
3149	struct find_ref_ctx ctx;
3150
3151	ctx.dir = dir;
3152	ctx.name = name;
3153	ctx.found_idx = -1;
3154
3155	ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref, &ctx);
3156	if (ret < 0)
3157		return ret;
3158
3159	if (ctx.found_idx == -1)
3160		return -ENOENT;
3161
3162	return ctx.found_idx;
3163}
3164
3165static int __record_changed_new_ref(int num, u64 dir, int index,
3166				    struct fs_path *name,
3167				    void *ctx)
3168{
3169	int ret;
3170	struct send_ctx *sctx = ctx;
3171
3172	ret = find_iref(sctx, sctx->parent_root, sctx->right_path,
3173			sctx->cmp_key, dir, name);
3174	if (ret == -ENOENT)
3175		ret = __record_new_ref(num, dir, index, name, sctx);
3176	else if (ret > 0)
3177		ret = 0;
3178
3179	return ret;
3180}
3181
3182static int __record_changed_deleted_ref(int num, u64 dir, int index,
3183					struct fs_path *name,
3184					void *ctx)
3185{
3186	int ret;
3187	struct send_ctx *sctx = ctx;
3188
3189	ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key,
3190			dir, name);
3191	if (ret == -ENOENT)
3192		ret = __record_deleted_ref(num, dir, index, name, sctx);
3193	else if (ret > 0)
3194		ret = 0;
3195
3196	return ret;
3197}
3198
3199static int record_changed_ref(struct send_ctx *sctx)
3200{
3201	int ret = 0;
3202
3203	ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path,
3204			sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3205	if (ret < 0)
3206		goto out;
3207	ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path,
3208			sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
3209	if (ret < 0)
3210		goto out;
3211	ret = 0;
3212
3213out:
3214	return ret;
3215}
3216
3217/*
3218 * Record and process all refs at once. Needed when an inode changes the
3219 * generation number, which means that it was deleted and recreated.
3220 */
3221static int process_all_refs(struct send_ctx *sctx,
3222			    enum btrfs_compare_tree_result cmd)
3223{
3224	int ret;
3225	struct btrfs_root *root;
3226	struct btrfs_path *path;
3227	struct btrfs_key key;
3228	struct btrfs_key found_key;
3229	struct extent_buffer *eb;
3230	int slot;
3231	iterate_inode_ref_t cb;
3232
3233	path = alloc_path_for_send();
3234	if (!path)
3235		return -ENOMEM;
3236
3237	if (cmd == BTRFS_COMPARE_TREE_NEW) {
3238		root = sctx->send_root;
3239		cb = __record_new_ref;
3240	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
3241		root = sctx->parent_root;
3242		cb = __record_deleted_ref;
3243	} else {
3244		BUG();
3245	}
3246
3247	key.objectid = sctx->cmp_key->objectid;
3248	key.type = BTRFS_INODE_REF_KEY;
3249	key.offset = 0;
3250	while (1) {
3251		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3252		if (ret < 0)
3253			goto out;
3254		if (ret)
3255			break;
3256
3257		eb = path->nodes[0];
3258		slot = path->slots[0];
3259		btrfs_item_key_to_cpu(eb, &found_key, slot);
3260
3261		if (found_key.objectid != key.objectid ||
3262		    (found_key.type != BTRFS_INODE_REF_KEY &&
3263		     found_key.type != BTRFS_INODE_EXTREF_KEY))
3264			break;
3265
3266		ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb,
3267				sctx);
3268		btrfs_release_path(path);
3269		if (ret < 0)
3270			goto out;
3271
3272		key.offset = found_key.offset + 1;
3273	}
3274	btrfs_release_path(path);
3275
3276	ret = process_recorded_refs(sctx);
3277
3278out:
3279	btrfs_free_path(path);
3280	return ret;
3281}
3282
3283static int send_set_xattr(struct send_ctx *sctx,
3284			  struct fs_path *path,
3285			  const char *name, int name_len,
3286			  const char *data, int data_len)
3287{
3288	int ret = 0;
3289
3290	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
3291	if (ret < 0)
3292		goto out;
3293
3294	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3295	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3296	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
3297
3298	ret = send_cmd(sctx);
3299
3300tlv_put_failure:
3301out:
3302	return ret;
3303}
3304
3305static int send_remove_xattr(struct send_ctx *sctx,
3306			  struct fs_path *path,
3307			  const char *name, int name_len)
3308{
3309	int ret = 0;
3310
3311	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
3312	if (ret < 0)
3313		goto out;
3314
3315	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3316	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3317
3318	ret = send_cmd(sctx);
3319
3320tlv_put_failure:
3321out:
3322	return ret;
3323}
3324
3325static int __process_new_xattr(int num, struct btrfs_key *di_key,
3326			       const char *name, int name_len,
3327			       const char *data, int data_len,
3328			       u8 type, void *ctx)
3329{
3330	int ret;
3331	struct send_ctx *sctx = ctx;
3332	struct fs_path *p;
3333	posix_acl_xattr_header dummy_acl;
3334
3335	p = fs_path_alloc(sctx);
3336	if (!p)
3337		return -ENOMEM;
3338
3339	/*
3340	 * This hack is needed because empty acl's are stored as zero byte
3341	 * data in xattrs. Problem with that is, that receiving these zero byte
3342	 * acl's will fail later. To fix this, we send a dummy acl list that
3343	 * only contains the version number and no entries.
3344	 */
3345	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
3346	    !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
3347		if (data_len == 0) {
3348			dummy_acl.a_version =
3349					cpu_to_le32(POSIX_ACL_XATTR_VERSION);
3350			data = (char *)&dummy_acl;
3351			data_len = sizeof(dummy_acl);
3352		}
3353	}
3354
3355	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3356	if (ret < 0)
3357		goto out;
3358
3359	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
3360
3361out:
3362	fs_path_free(sctx, p);
3363	return ret;
3364}
3365
3366static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
3367				   const char *name, int name_len,
3368				   const char *data, int data_len,
3369				   u8 type, void *ctx)
3370{
3371	int ret;
3372	struct send_ctx *sctx = ctx;
3373	struct fs_path *p;
3374
3375	p = fs_path_alloc(sctx);
3376	if (!p)
3377		return -ENOMEM;
3378
3379	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3380	if (ret < 0)
3381		goto out;
3382
3383	ret = send_remove_xattr(sctx, p, name, name_len);
3384
3385out:
3386	fs_path_free(sctx, p);
3387	return ret;
3388}
3389
3390static int process_new_xattr(struct send_ctx *sctx)
3391{
3392	int ret = 0;
3393
3394	ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path,
3395			sctx->cmp_key, __process_new_xattr, sctx);
3396
3397	return ret;
3398}
3399
3400static int process_deleted_xattr(struct send_ctx *sctx)
3401{
3402	int ret;
3403
3404	ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path,
3405			sctx->cmp_key, __process_deleted_xattr, sctx);
3406
3407	return ret;
3408}
3409
3410struct find_xattr_ctx {
3411	const char *name;
3412	int name_len;
3413	int found_idx;
3414	char *found_data;
3415	int found_data_len;
3416};
3417
3418static int __find_xattr(int num, struct btrfs_key *di_key,
3419			const char *name, int name_len,
3420			const char *data, int data_len,
3421			u8 type, void *vctx)
3422{
3423	struct find_xattr_ctx *ctx = vctx;
3424
3425	if (name_len == ctx->name_len &&
3426	    strncmp(name, ctx->name, name_len) == 0) {
3427		ctx->found_idx = num;
3428		ctx->found_data_len = data_len;
3429		ctx->found_data = kmalloc(data_len, GFP_NOFS);
3430		if (!ctx->found_data)
3431			return -ENOMEM;
3432		memcpy(ctx->found_data, data, data_len);
3433		return 1;
3434	}
3435	return 0;
3436}
3437
3438static int find_xattr(struct send_ctx *sctx,
3439		      struct btrfs_root *root,
3440		      struct btrfs_path *path,
3441		      struct btrfs_key *key,
3442		      const char *name, int name_len,
3443		      char **data, int *data_len)
3444{
3445	int ret;
3446	struct find_xattr_ctx ctx;
3447
3448	ctx.name = name;
3449	ctx.name_len = name_len;
3450	ctx.found_idx = -1;
3451	ctx.found_data = NULL;
3452	ctx.found_data_len = 0;
3453
3454	ret = iterate_dir_item(sctx, root, path, key, __find_xattr, &ctx);
3455	if (ret < 0)
3456		return ret;
3457
3458	if (ctx.found_idx == -1)
3459		return -ENOENT;
3460	if (data) {
3461		*data = ctx.found_data;
3462		*data_len = ctx.found_data_len;
3463	} else {
3464		kfree(ctx.found_data);
3465	}
3466	return ctx.found_idx;
3467}
3468
3469
3470static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3471				       const char *name, int name_len,
3472				       const char *data, int data_len,
3473				       u8 type, void *ctx)
3474{
3475	int ret;
3476	struct send_ctx *sctx = ctx;
3477	char *found_data = NULL;
3478	int found_data_len  = 0;
3479	struct fs_path *p = NULL;
3480
3481	ret = find_xattr(sctx, sctx->parent_root, sctx->right_path,
3482			sctx->cmp_key, name, name_len, &found_data,
3483			&found_data_len);
3484	if (ret == -ENOENT) {
3485		ret = __process_new_xattr(num, di_key, name, name_len, data,
3486				data_len, type, ctx);
3487	} else if (ret >= 0) {
3488		if (data_len != found_data_len ||
3489		    memcmp(data, found_data, data_len)) {
3490			ret = __process_new_xattr(num, di_key, name, name_len,
3491					data, data_len, type, ctx);
3492		} else {
3493			ret = 0;
3494		}
3495	}
3496
3497	kfree(found_data);
3498	fs_path_free(sctx, p);
3499	return ret;
3500}
3501
3502static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
3503					   const char *name, int name_len,
3504					   const char *data, int data_len,
3505					   u8 type, void *ctx)
3506{
3507	int ret;
3508	struct send_ctx *sctx = ctx;
3509
3510	ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key,
3511			name, name_len, NULL, NULL);
3512	if (ret == -ENOENT)
3513		ret = __process_deleted_xattr(num, di_key, name, name_len, data,
3514				data_len, type, ctx);
3515	else if (ret >= 0)
3516		ret = 0;
3517
3518	return ret;
3519}
3520
3521static int process_changed_xattr(struct send_ctx *sctx)
3522{
3523	int ret = 0;
3524
3525	ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path,
3526			sctx->cmp_key, __process_changed_new_xattr, sctx);
3527	if (ret < 0)
3528		goto out;
3529	ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path,
3530			sctx->cmp_key, __process_changed_deleted_xattr, sctx);
3531
3532out:
3533	return ret;
3534}
3535
3536static int process_all_new_xattrs(struct send_ctx *sctx)
3537{
3538	int ret;
3539	struct btrfs_root *root;
3540	struct btrfs_path *path;
3541	struct btrfs_key key;
3542	struct btrfs_key found_key;
3543	struct extent_buffer *eb;
3544	int slot;
3545
3546	path = alloc_path_for_send();
3547	if (!path)
3548		return -ENOMEM;
3549
3550	root = sctx->send_root;
3551
3552	key.objectid = sctx->cmp_key->objectid;
3553	key.type = BTRFS_XATTR_ITEM_KEY;
3554	key.offset = 0;
3555	while (1) {
3556		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3557		if (ret < 0)
3558			goto out;
3559		if (ret) {
3560			ret = 0;
3561			goto out;
3562		}
3563
3564		eb = path->nodes[0];
3565		slot = path->slots[0];
3566		btrfs_item_key_to_cpu(eb, &found_key, slot);
3567
3568		if (found_key.objectid != key.objectid ||
3569		    found_key.type != key.type) {
3570			ret = 0;
3571			goto out;
3572		}
3573
3574		ret = iterate_dir_item(sctx, root, path, &found_key,
3575				__process_new_xattr, sctx);
3576		if (ret < 0)
3577			goto out;
3578
3579		btrfs_release_path(path);
3580		key.offset = found_key.offset + 1;
3581	}
3582
3583out:
3584	btrfs_free_path(path);
3585	return ret;
3586}
3587
3588/*
3589 * Read some bytes from the current inode/file and send a write command to
3590 * user space.
3591 */
3592static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
3593{
3594	int ret = 0;
3595	struct fs_path *p;
3596	loff_t pos = offset;
3597	int num_read = 0;
3598	mm_segment_t old_fs;
3599
3600	p = fs_path_alloc(sctx);
3601	if (!p)
3602		return -ENOMEM;
3603
3604	/*
3605	 * vfs normally only accepts user space buffers for security reasons.
3606	 * we only read from the file and also only provide the read_buf buffer
3607	 * to vfs. As this buffer does not come from a user space call, it's
3608	 * ok to temporary allow kernel space buffers.
3609	 */
3610	old_fs = get_fs();
3611	set_fs(KERNEL_DS);
3612
3613verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
3614
3615	ret = open_cur_inode_file(sctx);
3616	if (ret < 0)
3617		goto out;
3618
3619	ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
3620	if (ret < 0)
3621		goto out;
3622	num_read = ret;
3623	if (!num_read)
3624		goto out;
3625
3626	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
3627	if (ret < 0)
3628		goto out;
3629
3630	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3631	if (ret < 0)
3632		goto out;
3633
3634	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3635	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3636	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
3637
3638	ret = send_cmd(sctx);
3639
3640tlv_put_failure:
3641out:
3642	fs_path_free(sctx, p);
3643	set_fs(old_fs);
3644	if (ret < 0)
3645		return ret;
3646	return num_read;
3647}
3648
3649/*
3650 * Send a clone command to user space.
3651 */
3652static int send_clone(struct send_ctx *sctx,
3653		      u64 offset, u32 len,
3654		      struct clone_root *clone_root)
3655{
3656	int ret = 0;
3657	struct fs_path *p;
3658	u64 gen;
3659
3660verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3661	       "clone_inode=%llu, clone_offset=%llu\n", offset, len,
3662		clone_root->root->objectid, clone_root->ino,
3663		clone_root->offset);
3664
3665	p = fs_path_alloc(sctx);
3666	if (!p)
3667		return -ENOMEM;
3668
3669	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
3670	if (ret < 0)
3671		goto out;
3672
3673	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3674	if (ret < 0)
3675		goto out;
3676
3677	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3678	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
3679	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3680
3681	if (clone_root->root == sctx->send_root) {
3682		ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
3683				&gen, NULL, NULL, NULL, NULL);
3684		if (ret < 0)
3685			goto out;
3686		ret = get_cur_path(sctx, clone_root->ino, gen, p);
3687	} else {
3688		ret = get_inode_path(sctx, clone_root->root,
3689				clone_root->ino, p);
3690	}
3691	if (ret < 0)
3692		goto out;
3693
3694	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3695			clone_root->root->root_item.uuid);
3696	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3697			clone_root->root->root_item.ctransid);
3698	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3699	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3700			clone_root->offset);
3701
3702	ret = send_cmd(sctx);
3703
3704tlv_put_failure:
3705out:
3706	fs_path_free(sctx, p);
3707	return ret;
3708}
3709
3710static int send_write_or_clone(struct send_ctx *sctx,
3711			       struct btrfs_path *path,
3712			       struct btrfs_key *key,
3713			       struct clone_root *clone_root)
3714{
3715	int ret = 0;
3716	struct btrfs_file_extent_item *ei;
3717	u64 offset = key->offset;
3718	u64 pos = 0;
3719	u64 len;
3720	u32 l;
3721	u8 type;
3722
3723	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3724			struct btrfs_file_extent_item);
3725	type = btrfs_file_extent_type(path->nodes[0], ei);
3726	if (type == BTRFS_FILE_EXTENT_INLINE) {
3727		len = btrfs_file_extent_inline_len(path->nodes[0], ei);
3728		/*
3729		 * it is possible the inline item won't cover the whole page,
3730		 * but there may be items after this page.  Make
3731		 * sure to send the whole thing
3732		 */
3733		len = PAGE_CACHE_ALIGN(len);
3734	} else {
3735		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
3736	}
3737
3738	if (offset + len > sctx->cur_inode_size)
3739		len = sctx->cur_inode_size - offset;
3740	if (len == 0) {
3741		ret = 0;
3742		goto out;
3743	}
3744
3745	if (!clone_root) {
3746		while (pos < len) {
3747			l = len - pos;
3748			if (l > BTRFS_SEND_READ_SIZE)
3749				l = BTRFS_SEND_READ_SIZE;
3750			ret = send_write(sctx, pos + offset, l);
3751			if (ret < 0)
3752				goto out;
3753			if (!ret)
3754				break;
3755			pos += ret;
3756		}
3757		ret = 0;
3758	} else {
3759		ret = send_clone(sctx, offset, len, clone_root);
3760	}
3761
3762out:
3763	return ret;
3764}
3765
3766static int is_extent_unchanged(struct send_ctx *sctx,
3767			       struct btrfs_path *left_path,
3768			       struct btrfs_key *ekey)
3769{
3770	int ret = 0;
3771	struct btrfs_key key;
3772	struct btrfs_path *path = NULL;
3773	struct extent_buffer *eb;
3774	int slot;
3775	struct btrfs_key found_key;
3776	struct btrfs_file_extent_item *ei;
3777	u64 left_disknr;
3778	u64 right_disknr;
3779	u64 left_offset;
3780	u64 right_offset;
3781	u64 left_offset_fixed;
3782	u64 left_len;
3783	u64 right_len;
3784	u64 left_gen;
3785	u64 right_gen;
3786	u8 left_type;
3787	u8 right_type;
3788
3789	path = alloc_path_for_send();
3790	if (!path)
3791		return -ENOMEM;
3792
3793	eb = left_path->nodes[0];
3794	slot = left_path->slots[0];
3795	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3796	left_type = btrfs_file_extent_type(eb, ei);
3797
3798	if (left_type != BTRFS_FILE_EXTENT_REG) {
3799		ret = 0;
3800		goto out;
3801	}
3802	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3803	left_len = btrfs_file_extent_num_bytes(eb, ei);
3804	left_offset = btrfs_file_extent_offset(eb, ei);
3805	left_gen = btrfs_file_extent_generation(eb, ei);
3806
3807	/*
3808	 * Following comments will refer to these graphics. L is the left
3809	 * extents which we are checking at the moment. 1-8 are the right
3810	 * extents that we iterate.
3811	 *
3812	 *       |-----L-----|
3813	 * |-1-|-2a-|-3-|-4-|-5-|-6-|
3814	 *
3815	 *       |-----L-----|
3816	 * |--1--|-2b-|...(same as above)
3817	 *
3818	 * Alternative situation. Happens on files where extents got split.
3819	 *       |-----L-----|
3820	 * |-----------7-----------|-6-|
3821	 *
3822	 * Alternative situation. Happens on files which got larger.
3823	 *       |-----L-----|
3824	 * |-8-|
3825	 * Nothing follows after 8.
3826	 */
3827
3828	key.objectid = ekey->objectid;
3829	key.type = BTRFS_EXTENT_DATA_KEY;
3830	key.offset = ekey->offset;
3831	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
3832	if (ret < 0)
3833		goto out;
3834	if (ret) {
3835		ret = 0;
3836		goto out;
3837	}
3838
3839	/*
3840	 * Handle special case where the right side has no extents at all.
3841	 */
3842	eb = path->nodes[0];
3843	slot = path->slots[0];
3844	btrfs_item_key_to_cpu(eb, &found_key, slot);
3845	if (found_key.objectid != key.objectid ||
3846	    found_key.type != key.type) {
3847		ret = 0;
3848		goto out;
3849	}
3850
3851	/*
3852	 * We're now on 2a, 2b or 7.
3853	 */
3854	key = found_key;
3855	while (key.offset < ekey->offset + left_len) {
3856		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3857		right_type = btrfs_file_extent_type(eb, ei);
3858		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3859		right_len = btrfs_file_extent_num_bytes(eb, ei);
3860		right_offset = btrfs_file_extent_offset(eb, ei);
3861		right_gen = btrfs_file_extent_generation(eb, ei);
3862
3863		if (right_type != BTRFS_FILE_EXTENT_REG) {
3864			ret = 0;
3865			goto out;
3866		}
3867
3868		/*
3869		 * Are we at extent 8? If yes, we know the extent is changed.
3870		 * This may only happen on the first iteration.
3871		 */
3872		if (found_key.offset + right_len <= ekey->offset) {
3873			ret = 0;
3874			goto out;
3875		}
3876
3877		left_offset_fixed = left_offset;
3878		if (key.offset < ekey->offset) {
3879			/* Fix the right offset for 2a and 7. */
3880			right_offset += ekey->offset - key.offset;
3881		} else {
3882			/* Fix the left offset for all behind 2a and 2b */
3883			left_offset_fixed += key.offset - ekey->offset;
3884		}
3885
3886		/*
3887		 * Check if we have the same extent.
3888		 */
3889		if (left_disknr != right_disknr ||
3890		    left_offset_fixed != right_offset ||
3891		    left_gen != right_gen) {
3892			ret = 0;
3893			goto out;
3894		}
3895
3896		/*
3897		 * Go to the next extent.
3898		 */
3899		ret = btrfs_next_item(sctx->parent_root, path);
3900		if (ret < 0)
3901			goto out;
3902		if (!ret) {
3903			eb = path->nodes[0];
3904			slot = path->slots[0];
3905			btrfs_item_key_to_cpu(eb, &found_key, slot);
3906		}
3907		if (ret || found_key.objectid != key.objectid ||
3908		    found_key.type != key.type) {
3909			key.offset += right_len;
3910			break;
3911		} else {
3912			if (found_key.offset != key.offset + right_len) {
3913				/* Should really not happen */
3914				ret = -EIO;
3915				goto out;
3916			}
3917		}
3918		key = found_key;
3919	}
3920
3921	/*
3922	 * We're now behind the left extent (treat as unchanged) or at the end
3923	 * of the right side (treat as changed).
3924	 */
3925	if (key.offset >= ekey->offset + left_len)
3926		ret = 1;
3927	else
3928		ret = 0;
3929
3930
3931out:
3932	btrfs_free_path(path);
3933	return ret;
3934}
3935
3936static int process_extent(struct send_ctx *sctx,
3937			  struct btrfs_path *path,
3938			  struct btrfs_key *key)
3939{
3940	int ret = 0;
3941	struct clone_root *found_clone = NULL;
3942
3943	if (S_ISLNK(sctx->cur_inode_mode))
3944		return 0;
3945
3946	if (sctx->parent_root && !sctx->cur_inode_new) {
3947		ret = is_extent_unchanged(sctx, path, key);
3948		if (ret < 0)
3949			goto out;
3950		if (ret) {
3951			ret = 0;
3952			goto out;
3953		}
3954	}
3955
3956	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
3957			sctx->cur_inode_size, &found_clone);
3958	if (ret != -ENOENT && ret < 0)
3959		goto out;
3960
3961	ret = send_write_or_clone(sctx, path, key, found_clone);
3962
3963out:
3964	return ret;
3965}
3966
3967static int process_all_extents(struct send_ctx *sctx)
3968{
3969	int ret;
3970	struct btrfs_root *root;
3971	struct btrfs_path *path;
3972	struct btrfs_key key;
3973	struct btrfs_key found_key;
3974	struct extent_buffer *eb;
3975	int slot;
3976
3977	root = sctx->send_root;
3978	path = alloc_path_for_send();
3979	if (!path)
3980		return -ENOMEM;
3981
3982	key.objectid = sctx->cmp_key->objectid;
3983	key.type = BTRFS_EXTENT_DATA_KEY;
3984	key.offset = 0;
3985	while (1) {
3986		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3987		if (ret < 0)
3988			goto out;
3989		if (ret) {
3990			ret = 0;
3991			goto out;
3992		}
3993
3994		eb = path->nodes[0];
3995		slot = path->slots[0];
3996		btrfs_item_key_to_cpu(eb, &found_key, slot);
3997
3998		if (found_key.objectid != key.objectid ||
3999		    found_key.type != key.type) {
4000			ret = 0;
4001			goto out;
4002		}
4003
4004		ret = process_extent(sctx, path, &found_key);
4005		if (ret < 0)
4006			goto out;
4007
4008		btrfs_release_path(path);
4009		key.offset = found_key.offset + 1;
4010	}
4011
4012out:
4013	btrfs_free_path(path);
4014	return ret;
4015}
4016
4017static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
4018{
4019	int ret = 0;
4020
4021	if (sctx->cur_ino == 0)
4022		goto out;
4023	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
4024	    sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
4025		goto out;
4026	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4027		goto out;
4028
4029	ret = process_recorded_refs(sctx);
4030	if (ret < 0)
4031		goto out;
4032
4033	/*
4034	 * We have processed the refs and thus need to advance send_progress.
4035	 * Now, calls to get_cur_xxx will take the updated refs of the current
4036	 * inode into account.
4037	 */
4038	sctx->send_progress = sctx->cur_ino + 1;
4039
4040out:
4041	return ret;
4042}
4043
4044static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4045{
4046	int ret = 0;
4047	u64 left_mode;
4048	u64 left_uid;
4049	u64 left_gid;
4050	u64 right_mode;
4051	u64 right_uid;
4052	u64 right_gid;
4053	int need_chmod = 0;
4054	int need_chown = 0;
4055
4056	ret = process_recorded_refs_if_needed(sctx, at_end);
4057	if (ret < 0)
4058		goto out;
4059
4060	if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4061		goto out;
4062	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
4063		goto out;
4064
4065	ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
4066			&left_mode, &left_uid, &left_gid, NULL);
4067	if (ret < 0)
4068		goto out;
4069
4070	if (!S_ISLNK(sctx->cur_inode_mode)) {
4071		if (!sctx->parent_root || sctx->cur_inode_new) {
4072			need_chmod = 1;
4073			need_chown = 1;
4074		} else {
4075			ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
4076					NULL, NULL, &right_mode, &right_uid,
4077					&right_gid, NULL);
4078			if (ret < 0)
4079				goto out;
4080
4081			if (left_uid != right_uid || left_gid != right_gid)
4082				need_chown = 1;
4083			if (left_mode != right_mode)
4084				need_chmod = 1;
4085		}
4086	}
4087
4088	if (S_ISREG(sctx->cur_inode_mode)) {
4089		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4090				sctx->cur_inode_size);
4091		if (ret < 0)
4092			goto out;
4093	}
4094
4095	if (need_chown) {
4096		ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4097				left_uid, left_gid);
4098		if (ret < 0)
4099			goto out;
4100	}
4101	if (need_chmod) {
4102		ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4103				left_mode);
4104		if (ret < 0)
4105			goto out;
4106	}
4107
4108	/*
4109	 * Need to send that every time, no matter if it actually changed
4110	 * between the two trees as we have done changes to the inode before.
4111	 */
4112	ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4113	if (ret < 0)
4114		goto out;
4115
4116out:
4117	return ret;
4118}
4119
4120static int changed_inode(struct send_ctx *sctx,
4121			 enum btrfs_compare_tree_result result)
4122{
4123	int ret = 0;
4124	struct btrfs_key *key = sctx->cmp_key;
4125	struct btrfs_inode_item *left_ii = NULL;
4126	struct btrfs_inode_item *right_ii = NULL;
4127	u64 left_gen = 0;
4128	u64 right_gen = 0;
4129
4130	ret = close_cur_inode_file(sctx);
4131	if (ret < 0)
4132		goto out;
4133
4134	sctx->cur_ino = key->objectid;
4135	sctx->cur_inode_new_gen = 0;
4136
4137	/*
4138	 * Set send_progress to current inode. This will tell all get_cur_xxx
4139	 * functions that the current inode's refs are not updated yet. Later,
4140	 * when process_recorded_refs is finished, it is set to cur_ino + 1.
4141	 */
4142	sctx->send_progress = sctx->cur_ino;
4143
4144	if (result == BTRFS_COMPARE_TREE_NEW ||
4145	    result == BTRFS_COMPARE_TREE_CHANGED) {
4146		left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
4147				sctx->left_path->slots[0],
4148				struct btrfs_inode_item);
4149		left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
4150				left_ii);
4151	} else {
4152		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4153				sctx->right_path->slots[0],
4154				struct btrfs_inode_item);
4155		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4156				right_ii);
4157	}
4158	if (result == BTRFS_COMPARE_TREE_CHANGED) {
4159		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4160				sctx->right_path->slots[0],
4161				struct btrfs_inode_item);
4162
4163		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4164				right_ii);
4165
4166		/*
4167		 * The cur_ino = root dir case is special here. We can't treat
4168		 * the inode as deleted+reused because it would generate a
4169		 * stream that tries to delete/mkdir the root dir.
4170		 */
4171		if (left_gen != right_gen &&
4172		    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4173			sctx->cur_inode_new_gen = 1;
4174	}
4175
4176	if (result == BTRFS_COMPARE_TREE_NEW) {
4177		sctx->cur_inode_gen = left_gen;
4178		sctx->cur_inode_new = 1;
4179		sctx->cur_inode_deleted = 0;
4180		sctx->cur_inode_size = btrfs_inode_size(
4181				sctx->left_path->nodes[0], left_ii);
4182		sctx->cur_inode_mode = btrfs_inode_mode(
4183				sctx->left_path->nodes[0], left_ii);
4184		if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4185			ret = send_create_inode_if_needed(sctx);
4186	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
4187		sctx->cur_inode_gen = right_gen;
4188		sctx->cur_inode_new = 0;
4189		sctx->cur_inode_deleted = 1;
4190		sctx->cur_inode_size = btrfs_inode_size(
4191				sctx->right_path->nodes[0], right_ii);
4192		sctx->cur_inode_mode = btrfs_inode_mode(
4193				sctx->right_path->nodes[0], right_ii);
4194	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
4195		/*
4196		 * We need to do some special handling in case the inode was
4197		 * reported as changed with a changed generation number. This
4198		 * means that the original inode was deleted and new inode
4199		 * reused the same inum. So we have to treat the old inode as
4200		 * deleted and the new one as new.
4201		 */
4202		if (sctx->cur_inode_new_gen) {
4203			/*
4204			 * First, process the inode as if it was deleted.
4205			 */
4206			sctx->cur_inode_gen = right_gen;
4207			sctx->cur_inode_new = 0;
4208			sctx->cur_inode_deleted = 1;
4209			sctx->cur_inode_size = btrfs_inode_size(
4210					sctx->right_path->nodes[0], right_ii);
4211			sctx->cur_inode_mode = btrfs_inode_mode(
4212					sctx->right_path->nodes[0], right_ii);
4213			ret = process_all_refs(sctx,
4214					BTRFS_COMPARE_TREE_DELETED);
4215			if (ret < 0)
4216				goto out;
4217
4218			/*
4219			 * Now process the inode as if it was new.
4220			 */
4221			sctx->cur_inode_gen = left_gen;
4222			sctx->cur_inode_new = 1;
4223			sctx->cur_inode_deleted = 0;
4224			sctx->cur_inode_size = btrfs_inode_size(
4225					sctx->left_path->nodes[0], left_ii);
4226			sctx->cur_inode_mode = btrfs_inode_mode(
4227					sctx->left_path->nodes[0], left_ii);
4228			ret = send_create_inode_if_needed(sctx);
4229			if (ret < 0)
4230				goto out;
4231
4232			ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
4233			if (ret < 0)
4234				goto out;
4235			/*
4236			 * Advance send_progress now as we did not get into
4237			 * process_recorded_refs_if_needed in the new_gen case.
4238			 */
4239			sctx->send_progress = sctx->cur_ino + 1;
4240
4241			/*
4242			 * Now process all extents and xattrs of the inode as if
4243			 * they were all new.
4244			 */
4245			ret = process_all_extents(sctx);
4246			if (ret < 0)
4247				goto out;
4248			ret = process_all_new_xattrs(sctx);
4249			if (ret < 0)
4250				goto out;
4251		} else {
4252			sctx->cur_inode_gen = left_gen;
4253			sctx->cur_inode_new = 0;
4254			sctx->cur_inode_new_gen = 0;
4255			sctx->cur_inode_deleted = 0;
4256			sctx->cur_inode_size = btrfs_inode_size(
4257					sctx->left_path->nodes[0], left_ii);
4258			sctx->cur_inode_mode = btrfs_inode_mode(
4259					sctx->left_path->nodes[0], left_ii);
4260		}
4261	}
4262
4263out:
4264	return ret;
4265}
4266
4267/*
4268 * We have to process new refs before deleted refs, but compare_trees gives us
4269 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
4270 * first and later process them in process_recorded_refs.
4271 * For the cur_inode_new_gen case, we skip recording completely because
4272 * changed_inode did already initiate processing of refs. The reason for this is
4273 * that in this case, compare_tree actually compares the refs of 2 different
4274 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
4275 * refs of the right tree as deleted and all refs of the left tree as new.
4276 */
4277static int changed_ref(struct send_ctx *sctx,
4278		       enum btrfs_compare_tree_result result)
4279{
4280	int ret = 0;
4281
4282	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4283
4284	if (!sctx->cur_inode_new_gen &&
4285	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
4286		if (result == BTRFS_COMPARE_TREE_NEW)
4287			ret = record_new_ref(sctx);
4288		else if (result == BTRFS_COMPARE_TREE_DELETED)
4289			ret = record_deleted_ref(sctx);
4290		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4291			ret = record_changed_ref(sctx);
4292	}
4293
4294	return ret;
4295}
4296
4297/*
4298 * Process new/deleted/changed xattrs. We skip processing in the
4299 * cur_inode_new_gen case because changed_inode did already initiate processing
4300 * of xattrs. The reason is the same as in changed_ref
4301 */
4302static int changed_xattr(struct send_ctx *sctx,
4303			 enum btrfs_compare_tree_result result)
4304{
4305	int ret = 0;
4306
4307	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4308
4309	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4310		if (result == BTRFS_COMPARE_TREE_NEW)
4311			ret = process_new_xattr(sctx);
4312		else if (result == BTRFS_COMPARE_TREE_DELETED)
4313			ret = process_deleted_xattr(sctx);
4314		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4315			ret = process_changed_xattr(sctx);
4316	}
4317
4318	return ret;
4319}
4320
4321/*
4322 * Process new/deleted/changed extents. We skip processing in the
4323 * cur_inode_new_gen case because changed_inode did already initiate processing
4324 * of extents. The reason is the same as in changed_ref
4325 */
4326static int changed_extent(struct send_ctx *sctx,
4327			  enum btrfs_compare_tree_result result)
4328{
4329	int ret = 0;
4330
4331	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4332
4333	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4334		if (result != BTRFS_COMPARE_TREE_DELETED)
4335			ret = process_extent(sctx, sctx->left_path,
4336					sctx->cmp_key);
4337	}
4338
4339	return ret;
4340}
4341
4342/*
4343 * Updates compare related fields in sctx and simply forwards to the actual
4344 * changed_xxx functions.
4345 */
4346static int changed_cb(struct btrfs_root *left_root,
4347		      struct btrfs_root *right_root,
4348		      struct btrfs_path *left_path,
4349		      struct btrfs_path *right_path,
4350		      struct btrfs_key *key,
4351		      enum btrfs_compare_tree_result result,
4352		      void *ctx)
4353{
4354	int ret = 0;
4355	struct send_ctx *sctx = ctx;
4356
4357	sctx->left_path = left_path;
4358	sctx->right_path = right_path;
4359	sctx->cmp_key = key;
4360
4361	ret = finish_inode_if_needed(sctx, 0);
4362	if (ret < 0)
4363		goto out;
4364
4365	/* Ignore non-FS objects */
4366	if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
4367	    key->objectid == BTRFS_FREE_SPACE_OBJECTID)
4368		goto out;
4369
4370	if (key->type == BTRFS_INODE_ITEM_KEY)
4371		ret = changed_inode(sctx, result);
4372	else if (key->type == BTRFS_INODE_REF_KEY ||
4373		 key->type == BTRFS_INODE_EXTREF_KEY)
4374		ret = changed_ref(sctx, result);
4375	else if (key->type == BTRFS_XATTR_ITEM_KEY)
4376		ret = changed_xattr(sctx, result);
4377	else if (key->type == BTRFS_EXTENT_DATA_KEY)
4378		ret = changed_extent(sctx, result);
4379
4380out:
4381	return ret;
4382}
4383
4384static int full_send_tree(struct send_ctx *sctx)
4385{
4386	int ret;
4387	struct btrfs_trans_handle *trans = NULL;
4388	struct btrfs_root *send_root = sctx->send_root;
4389	struct btrfs_key key;
4390	struct btrfs_key found_key;
4391	struct btrfs_path *path;
4392	struct extent_buffer *eb;
4393	int slot;
4394	u64 start_ctransid;
4395	u64 ctransid;
4396
4397	path = alloc_path_for_send();
4398	if (!path)
4399		return -ENOMEM;
4400
4401	spin_lock(&send_root->root_times_lock);
4402	start_ctransid = btrfs_root_ctransid(&send_root->root_item);
4403	spin_unlock(&send_root->root_times_lock);
4404
4405	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4406	key.type = BTRFS_INODE_ITEM_KEY;
4407	key.offset = 0;
4408
4409join_trans:
4410	/*
4411	 * We need to make sure the transaction does not get committed
4412	 * while we do anything on commit roots. Join a transaction to prevent
4413	 * this.
4414	 */
4415	trans = btrfs_join_transaction(send_root);
4416	if (IS_ERR(trans)) {
4417		ret = PTR_ERR(trans);
4418		trans = NULL;
4419		goto out;
4420	}
4421
4422	/*
4423	 * Make sure the tree has not changed after re-joining. We detect this
4424	 * by comparing start_ctransid and ctransid. They should always match.
4425	 */
4426	spin_lock(&send_root->root_times_lock);
4427	ctransid = btrfs_root_ctransid(&send_root->root_item);
4428	spin_unlock(&send_root->root_times_lock);
4429
4430	if (ctransid != start_ctransid) {
4431		WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
4432				     "send was modified in between. This is "
4433				     "probably a bug.\n");
4434		ret = -EIO;
4435		goto out;
4436	}
4437
4438	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
4439	if (ret < 0)
4440		goto out;
4441	if (ret)
4442		goto out_finish;
4443
4444	while (1) {
4445		/*
4446		 * When someone want to commit while we iterate, end the
4447		 * joined transaction and rejoin.
4448		 */
4449		if (btrfs_should_end_transaction(trans, send_root)) {
4450			ret = btrfs_end_transaction(trans, send_root);
4451			trans = NULL;
4452			if (ret < 0)
4453				goto out;
4454			btrfs_release_path(path);
4455			goto join_trans;
4456		}
4457
4458		eb = path->nodes[0];
4459		slot = path->slots[0];
4460		btrfs_item_key_to_cpu(eb, &found_key, slot);
4461
4462		ret = changed_cb(send_root, NULL, path, NULL,
4463				&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
4464		if (ret < 0)
4465			goto out;
4466
4467		key.objectid = found_key.objectid;
4468		key.type = found_key.type;
4469		key.offset = found_key.offset + 1;
4470
4471		ret = btrfs_next_item(send_root, path);
4472		if (ret < 0)
4473			goto out;
4474		if (ret) {
4475			ret  = 0;
4476			break;
4477		}
4478	}
4479
4480out_finish:
4481	ret = finish_inode_if_needed(sctx, 1);
4482
4483out:
4484	btrfs_free_path(path);
4485	if (trans) {
4486		if (!ret)
4487			ret = btrfs_end_transaction(trans, send_root);
4488		else
4489			btrfs_end_transaction(trans, send_root);
4490	}
4491	return ret;
4492}
4493
4494static int send_subvol(struct send_ctx *sctx)
4495{
4496	int ret;
4497
4498	ret = send_header(sctx);
4499	if (ret < 0)
4500		goto out;
4501
4502	ret = send_subvol_begin(sctx);
4503	if (ret < 0)
4504		goto out;
4505
4506	if (sctx->parent_root) {
4507		ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
4508				changed_cb, sctx);
4509		if (ret < 0)
4510			goto out;
4511		ret = finish_inode_if_needed(sctx, 1);
4512		if (ret < 0)
4513			goto out;
4514	} else {
4515		ret = full_send_tree(sctx);
4516		if (ret < 0)
4517			goto out;
4518	}
4519
4520out:
4521	if (!ret)
4522		ret = close_cur_inode_file(sctx);
4523	else
4524		close_cur_inode_file(sctx);
4525
4526	free_recorded_refs(sctx);
4527	return ret;
4528}
4529
4530long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4531{
4532	int ret = 0;
4533	struct btrfs_root *send_root;
4534	struct btrfs_root *clone_root;
4535	struct btrfs_fs_info *fs_info;
4536	struct btrfs_ioctl_send_args *arg = NULL;
4537	struct btrfs_key key;
4538	struct file *filp = NULL;
4539	struct send_ctx *sctx = NULL;
4540	u32 i;
4541	u64 *clone_sources_tmp = NULL;
4542
4543	if (!capable(CAP_SYS_ADMIN))
4544		return -EPERM;
4545
4546	send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root;
4547	fs_info = send_root->fs_info;
4548
4549	arg = memdup_user(arg_, sizeof(*arg));
4550	if (IS_ERR(arg)) {
4551		ret = PTR_ERR(arg);
4552		arg = NULL;
4553		goto out;
4554	}
4555
4556	if (!access_ok(VERIFY_READ, arg->clone_sources,
4557			sizeof(*arg->clone_sources *
4558			arg->clone_sources_count))) {
4559		ret = -EFAULT;
4560		goto out;
4561	}
4562
4563	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
4564	if (!sctx) {
4565		ret = -ENOMEM;
4566		goto out;
4567	}
4568
4569	INIT_LIST_HEAD(&sctx->new_refs);
4570	INIT_LIST_HEAD(&sctx->deleted_refs);
4571	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
4572	INIT_LIST_HEAD(&sctx->name_cache_list);
4573
4574	sctx->send_filp = fget(arg->send_fd);
4575	if (IS_ERR(sctx->send_filp)) {
4576		ret = PTR_ERR(sctx->send_filp);
4577		goto out;
4578	}
4579
4580	sctx->mnt = mnt_file->f_path.mnt;
4581
4582	sctx->send_root = send_root;
4583	sctx->clone_roots_cnt = arg->clone_sources_count;
4584
4585	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
4586	sctx->send_buf = vmalloc(sctx->send_max_size);
4587	if (!sctx->send_buf) {
4588		ret = -ENOMEM;
4589		goto out;
4590	}
4591
4592	sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
4593	if (!sctx->read_buf) {
4594		ret = -ENOMEM;
4595		goto out;
4596	}
4597
4598	sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4599			(arg->clone_sources_count + 1));
4600	if (!sctx->clone_roots) {
4601		ret = -ENOMEM;
4602		goto out;
4603	}
4604
4605	if (arg->clone_sources_count) {
4606		clone_sources_tmp = vmalloc(arg->clone_sources_count *
4607				sizeof(*arg->clone_sources));
4608		if (!clone_sources_tmp) {
4609			ret = -ENOMEM;
4610			goto out;
4611		}
4612
4613		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
4614				arg->clone_sources_count *
4615				sizeof(*arg->clone_sources));
4616		if (ret) {
4617			ret = -EFAULT;
4618			goto out;
4619		}
4620
4621		for (i = 0; i < arg->clone_sources_count; i++) {
4622			key.objectid = clone_sources_tmp[i];
4623			key.type = BTRFS_ROOT_ITEM_KEY;
4624			key.offset = (u64)-1;
4625			clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4626			if (!clone_root) {
4627				ret = -EINVAL;
4628				goto out;
4629			}
4630			if (IS_ERR(clone_root)) {
4631				ret = PTR_ERR(clone_root);
4632				goto out;
4633			}
4634			sctx->clone_roots[i].root = clone_root;
4635		}
4636		vfree(clone_sources_tmp);
4637		clone_sources_tmp = NULL;
4638	}
4639
4640	if (arg->parent_root) {
4641		key.objectid = arg->parent_root;
4642		key.type = BTRFS_ROOT_ITEM_KEY;
4643		key.offset = (u64)-1;
4644		sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4645		if (!sctx->parent_root) {
4646			ret = -EINVAL;
4647			goto out;
4648		}
4649	}
4650
4651	/*
4652	 * Clones from send_root are allowed, but only if the clone source
4653	 * is behind the current send position. This is checked while searching
4654	 * for possible clone sources.
4655	 */
4656	sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
4657
4658	/* We do a bsearch later */
4659	sort(sctx->clone_roots, sctx->clone_roots_cnt,
4660			sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4661			NULL);
4662
4663	ret = send_subvol(sctx);
4664	if (ret < 0)
4665		goto out;
4666
4667	ret = begin_cmd(sctx, BTRFS_SEND_C_END);
4668	if (ret < 0)
4669		goto out;
4670	ret = send_cmd(sctx);
4671	if (ret < 0)
4672		goto out;
4673
4674out:
4675	if (filp)
4676		fput(filp);
4677	kfree(arg);
4678	vfree(clone_sources_tmp);
4679
4680	if (sctx) {
4681		if (sctx->send_filp)
4682			fput(sctx->send_filp);
4683
4684		vfree(sctx->clone_roots);
4685		vfree(sctx->send_buf);
4686		vfree(sctx->read_buf);
4687
4688		name_cache_free(sctx);
4689
4690		kfree(sctx);
4691	}
4692
4693	return ret;
4694}
4695