send.c revision 9dc442143b9874ba677fc83bf8c60744ec642998
1/*
2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/bsearch.h>
20#include <linux/fs.h>
21#include <linux/file.h>
22#include <linux/sort.h>
23#include <linux/mount.h>
24#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h>
26#include <linux/radix-tree.h>
27#include <linux/vmalloc.h>
28#include <linux/string.h>
29
30#include "send.h"
31#include "backref.h"
32#include "hash.h"
33#include "locking.h"
34#include "disk-io.h"
35#include "btrfs_inode.h"
36#include "transaction.h"
37
38static int g_verbose = 0;
39
40#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
41
42/*
43 * A fs_path is a helper to dynamically build path names with unknown size.
44 * It reallocates the internal buffer on demand.
45 * It allows fast adding of path elements on the right side (normal path) and
46 * fast adding to the left side (reversed path). A reversed path can also be
47 * unreversed if needed.
48 */
49struct fs_path {
50	union {
51		struct {
52			char *start;
53			char *end;
54
55			char *buf;
56			unsigned short buf_len:15;
57			unsigned short reversed:1;
58			char inline_buf[];
59		};
60		/*
61		 * Average path length does not exceed 200 bytes, we'll have
62		 * better packing in the slab and higher chance to satisfy
63		 * a allocation later during send.
64		 */
65		char pad[256];
66	};
67};
68#define FS_PATH_INLINE_SIZE \
69	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
70
71
72/* reused for each extent */
73struct clone_root {
74	struct btrfs_root *root;
75	u64 ino;
76	u64 offset;
77
78	u64 found_refs;
79};
80
81#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
82#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
83
84struct send_ctx {
85	struct file *send_filp;
86	loff_t send_off;
87	char *send_buf;
88	u32 send_size;
89	u32 send_max_size;
90	u64 total_send_size;
91	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
92	u64 flags;	/* 'flags' member of btrfs_ioctl_send_args is u64 */
93
94	struct btrfs_root *send_root;
95	struct btrfs_root *parent_root;
96	struct clone_root *clone_roots;
97	int clone_roots_cnt;
98
99	/* current state of the compare_tree call */
100	struct btrfs_path *left_path;
101	struct btrfs_path *right_path;
102	struct btrfs_key *cmp_key;
103
104	/*
105	 * infos of the currently processed inode. In case of deleted inodes,
106	 * these are the values from the deleted inode.
107	 */
108	u64 cur_ino;
109	u64 cur_inode_gen;
110	int cur_inode_new;
111	int cur_inode_new_gen;
112	int cur_inode_deleted;
113	u64 cur_inode_size;
114	u64 cur_inode_mode;
115	u64 cur_inode_last_extent;
116
117	u64 send_progress;
118
119	struct list_head new_refs;
120	struct list_head deleted_refs;
121
122	struct radix_tree_root name_cache;
123	struct list_head name_cache_list;
124	int name_cache_size;
125
126	char *read_buf;
127
128	/*
129	 * We process inodes by their increasing order, so if before an
130	 * incremental send we reverse the parent/child relationship of
131	 * directories such that a directory with a lower inode number was
132	 * the parent of a directory with a higher inode number, and the one
133	 * becoming the new parent got renamed too, we can't rename/move the
134	 * directory with lower inode number when we finish processing it - we
135	 * must process the directory with higher inode number first, then
136	 * rename/move it and then rename/move the directory with lower inode
137	 * number. Example follows.
138	 *
139	 * Tree state when the first send was performed:
140	 *
141	 * .
142	 * |-- a                   (ino 257)
143	 *     |-- b               (ino 258)
144	 *         |
145	 *         |
146	 *         |-- c           (ino 259)
147	 *         |   |-- d       (ino 260)
148	 *         |
149	 *         |-- c2          (ino 261)
150	 *
151	 * Tree state when the second (incremental) send is performed:
152	 *
153	 * .
154	 * |-- a                   (ino 257)
155	 *     |-- b               (ino 258)
156	 *         |-- c2          (ino 261)
157	 *             |-- d2      (ino 260)
158	 *                 |-- cc  (ino 259)
159	 *
160	 * The sequence of steps that lead to the second state was:
161	 *
162	 * mv /a/b/c/d /a/b/c2/d2
163	 * mv /a/b/c /a/b/c2/d2/cc
164	 *
165	 * "c" has lower inode number, but we can't move it (2nd mv operation)
166	 * before we move "d", which has higher inode number.
167	 *
168	 * So we just memorize which move/rename operations must be performed
169	 * later when their respective parent is processed and moved/renamed.
170	 */
171
172	/* Indexed by parent directory inode number. */
173	struct rb_root pending_dir_moves;
174
175	/*
176	 * Reverse index, indexed by the inode number of a directory that
177	 * is waiting for the move/rename of its immediate parent before its
178	 * own move/rename can be performed.
179	 */
180	struct rb_root waiting_dir_moves;
181
182	/*
183	 * A directory that is going to be rm'ed might have a child directory
184	 * which is in the pending directory moves index above. In this case,
185	 * the directory can only be removed after the move/rename of its child
186	 * is performed. Example:
187	 *
188	 * Parent snapshot:
189	 *
190	 * .                        (ino 256)
191	 * |-- a/                   (ino 257)
192	 *     |-- b/               (ino 258)
193	 *         |-- c/           (ino 259)
194	 *         |   |-- x/       (ino 260)
195	 *         |
196	 *         |-- y/           (ino 261)
197	 *
198	 * Send snapshot:
199	 *
200	 * .                        (ino 256)
201	 * |-- a/                   (ino 257)
202	 *     |-- b/               (ino 258)
203	 *         |-- YY/          (ino 261)
204	 *              |-- x/      (ino 260)
205	 *
206	 * Sequence of steps that lead to the send snapshot:
207	 * rm -f /a/b/c/foo.txt
208	 * mv /a/b/y /a/b/YY
209	 * mv /a/b/c/x /a/b/YY
210	 * rmdir /a/b/c
211	 *
212	 * When the child is processed, its move/rename is delayed until its
213	 * parent is processed (as explained above), but all other operations
214	 * like update utimes, chown, chgrp, etc, are performed and the paths
215	 * that it uses for those operations must use the orphanized name of
216	 * its parent (the directory we're going to rm later), so we need to
217	 * memorize that name.
218	 *
219	 * Indexed by the inode number of the directory to be deleted.
220	 */
221	struct rb_root orphan_dirs;
222};
223
224struct pending_dir_move {
225	struct rb_node node;
226	struct list_head list;
227	u64 parent_ino;
228	u64 ino;
229	u64 gen;
230	struct list_head update_refs;
231};
232
233struct waiting_dir_move {
234	struct rb_node node;
235	u64 ino;
236	/*
237	 * There might be some directory that could not be removed because it
238	 * was waiting for this directory inode to be moved first. Therefore
239	 * after this directory is moved, we can try to rmdir the ino rmdir_ino.
240	 */
241	u64 rmdir_ino;
242};
243
244struct orphan_dir_info {
245	struct rb_node node;
246	u64 ino;
247	u64 gen;
248};
249
250struct name_cache_entry {
251	struct list_head list;
252	/*
253	 * radix_tree has only 32bit entries but we need to handle 64bit inums.
254	 * We use the lower 32bit of the 64bit inum to store it in the tree. If
255	 * more then one inum would fall into the same entry, we use radix_list
256	 * to store the additional entries. radix_list is also used to store
257	 * entries where two entries have the same inum but different
258	 * generations.
259	 */
260	struct list_head radix_list;
261	u64 ino;
262	u64 gen;
263	u64 parent_ino;
264	u64 parent_gen;
265	int ret;
266	int need_later_update;
267	int name_len;
268	char name[];
269};
270
271static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
272
273static struct waiting_dir_move *
274get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
275
276static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
277
278static int need_send_hole(struct send_ctx *sctx)
279{
280	return (sctx->parent_root && !sctx->cur_inode_new &&
281		!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
282		S_ISREG(sctx->cur_inode_mode));
283}
284
285static void fs_path_reset(struct fs_path *p)
286{
287	if (p->reversed) {
288		p->start = p->buf + p->buf_len - 1;
289		p->end = p->start;
290		*p->start = 0;
291	} else {
292		p->start = p->buf;
293		p->end = p->start;
294		*p->start = 0;
295	}
296}
297
298static struct fs_path *fs_path_alloc(void)
299{
300	struct fs_path *p;
301
302	p = kmalloc(sizeof(*p), GFP_NOFS);
303	if (!p)
304		return NULL;
305	p->reversed = 0;
306	p->buf = p->inline_buf;
307	p->buf_len = FS_PATH_INLINE_SIZE;
308	fs_path_reset(p);
309	return p;
310}
311
312static struct fs_path *fs_path_alloc_reversed(void)
313{
314	struct fs_path *p;
315
316	p = fs_path_alloc();
317	if (!p)
318		return NULL;
319	p->reversed = 1;
320	fs_path_reset(p);
321	return p;
322}
323
324static void fs_path_free(struct fs_path *p)
325{
326	if (!p)
327		return;
328	if (p->buf != p->inline_buf)
329		kfree(p->buf);
330	kfree(p);
331}
332
333static int fs_path_len(struct fs_path *p)
334{
335	return p->end - p->start;
336}
337
338static int fs_path_ensure_buf(struct fs_path *p, int len)
339{
340	char *tmp_buf;
341	int path_len;
342	int old_buf_len;
343
344	len++;
345
346	if (p->buf_len >= len)
347		return 0;
348
349	/*
350	 * First time the inline_buf does not suffice
351	 */
352	if (p->buf == p->inline_buf) {
353		p->buf = kmalloc(len, GFP_NOFS);
354		if (!p->buf)
355			return -ENOMEM;
356		/*
357		 * The real size of the buffer is bigger, this will let the
358		 * fast path happen most of the time
359		 */
360		p->buf_len = ksize(p->buf);
361	} else {
362		char *tmp;
363
364		tmp = krealloc(p->buf, len, GFP_NOFS);
365		if (!tmp)
366			return -ENOMEM;
367		p->buf = tmp;
368		p->buf_len = ksize(p->buf);
369	}
370
371	path_len = p->end - p->start;
372	old_buf_len = p->buf_len;
373
374	if (p->reversed) {
375		tmp_buf = p->buf + old_buf_len - path_len - 1;
376		p->end = p->buf + p->buf_len - 1;
377		p->start = p->end - path_len;
378		memmove(p->start, tmp_buf, path_len + 1);
379	} else {
380		p->start = p->buf;
381		p->end = p->start + path_len;
382	}
383	return 0;
384}
385
386static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
387				   char **prepared)
388{
389	int ret;
390	int new_len;
391
392	new_len = p->end - p->start + name_len;
393	if (p->start != p->end)
394		new_len++;
395	ret = fs_path_ensure_buf(p, new_len);
396	if (ret < 0)
397		goto out;
398
399	if (p->reversed) {
400		if (p->start != p->end)
401			*--p->start = '/';
402		p->start -= name_len;
403		*prepared = p->start;
404	} else {
405		if (p->start != p->end)
406			*p->end++ = '/';
407		*prepared = p->end;
408		p->end += name_len;
409		*p->end = 0;
410	}
411
412out:
413	return ret;
414}
415
416static int fs_path_add(struct fs_path *p, const char *name, int name_len)
417{
418	int ret;
419	char *prepared;
420
421	ret = fs_path_prepare_for_add(p, name_len, &prepared);
422	if (ret < 0)
423		goto out;
424	memcpy(prepared, name, name_len);
425
426out:
427	return ret;
428}
429
430static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
431{
432	int ret;
433	char *prepared;
434
435	ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
436	if (ret < 0)
437		goto out;
438	memcpy(prepared, p2->start, p2->end - p2->start);
439
440out:
441	return ret;
442}
443
444static int fs_path_add_from_extent_buffer(struct fs_path *p,
445					  struct extent_buffer *eb,
446					  unsigned long off, int len)
447{
448	int ret;
449	char *prepared;
450
451	ret = fs_path_prepare_for_add(p, len, &prepared);
452	if (ret < 0)
453		goto out;
454
455	read_extent_buffer(eb, prepared, off, len);
456
457out:
458	return ret;
459}
460
461static int fs_path_copy(struct fs_path *p, struct fs_path *from)
462{
463	int ret;
464
465	p->reversed = from->reversed;
466	fs_path_reset(p);
467
468	ret = fs_path_add_path(p, from);
469
470	return ret;
471}
472
473
474static void fs_path_unreverse(struct fs_path *p)
475{
476	char *tmp;
477	int len;
478
479	if (!p->reversed)
480		return;
481
482	tmp = p->start;
483	len = p->end - p->start;
484	p->start = p->buf;
485	p->end = p->start + len;
486	memmove(p->start, tmp, len + 1);
487	p->reversed = 0;
488}
489
490static struct btrfs_path *alloc_path_for_send(void)
491{
492	struct btrfs_path *path;
493
494	path = btrfs_alloc_path();
495	if (!path)
496		return NULL;
497	path->search_commit_root = 1;
498	path->skip_locking = 1;
499	return path;
500}
501
502static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
503{
504	int ret;
505	mm_segment_t old_fs;
506	u32 pos = 0;
507
508	old_fs = get_fs();
509	set_fs(KERNEL_DS);
510
511	while (pos < len) {
512		ret = vfs_write(filp, (char *)buf + pos, len - pos, off);
513		/* TODO handle that correctly */
514		/*if (ret == -ERESTARTSYS) {
515			continue;
516		}*/
517		if (ret < 0)
518			goto out;
519		if (ret == 0) {
520			ret = -EIO;
521			goto out;
522		}
523		pos += ret;
524	}
525
526	ret = 0;
527
528out:
529	set_fs(old_fs);
530	return ret;
531}
532
533static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
534{
535	struct btrfs_tlv_header *hdr;
536	int total_len = sizeof(*hdr) + len;
537	int left = sctx->send_max_size - sctx->send_size;
538
539	if (unlikely(left < total_len))
540		return -EOVERFLOW;
541
542	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
543	hdr->tlv_type = cpu_to_le16(attr);
544	hdr->tlv_len = cpu_to_le16(len);
545	memcpy(hdr + 1, data, len);
546	sctx->send_size += total_len;
547
548	return 0;
549}
550
551#define TLV_PUT_DEFINE_INT(bits) \
552	static int tlv_put_u##bits(struct send_ctx *sctx,	 	\
553			u##bits attr, u##bits value)			\
554	{								\
555		__le##bits __tmp = cpu_to_le##bits(value);		\
556		return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));	\
557	}
558
559TLV_PUT_DEFINE_INT(64)
560
561static int tlv_put_string(struct send_ctx *sctx, u16 attr,
562			  const char *str, int len)
563{
564	if (len == -1)
565		len = strlen(str);
566	return tlv_put(sctx, attr, str, len);
567}
568
569static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
570			const u8 *uuid)
571{
572	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
573}
574
575static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
576				  struct extent_buffer *eb,
577				  struct btrfs_timespec *ts)
578{
579	struct btrfs_timespec bts;
580	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
581	return tlv_put(sctx, attr, &bts, sizeof(bts));
582}
583
584
585#define TLV_PUT(sctx, attrtype, attrlen, data) \
586	do { \
587		ret = tlv_put(sctx, attrtype, attrlen, data); \
588		if (ret < 0) \
589			goto tlv_put_failure; \
590	} while (0)
591
592#define TLV_PUT_INT(sctx, attrtype, bits, value) \
593	do { \
594		ret = tlv_put_u##bits(sctx, attrtype, value); \
595		if (ret < 0) \
596			goto tlv_put_failure; \
597	} while (0)
598
599#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
600#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
601#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
602#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
603#define TLV_PUT_STRING(sctx, attrtype, str, len) \
604	do { \
605		ret = tlv_put_string(sctx, attrtype, str, len); \
606		if (ret < 0) \
607			goto tlv_put_failure; \
608	} while (0)
609#define TLV_PUT_PATH(sctx, attrtype, p) \
610	do { \
611		ret = tlv_put_string(sctx, attrtype, p->start, \
612			p->end - p->start); \
613		if (ret < 0) \
614			goto tlv_put_failure; \
615	} while(0)
616#define TLV_PUT_UUID(sctx, attrtype, uuid) \
617	do { \
618		ret = tlv_put_uuid(sctx, attrtype, uuid); \
619		if (ret < 0) \
620			goto tlv_put_failure; \
621	} while (0)
622#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
623	do { \
624		ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
625		if (ret < 0) \
626			goto tlv_put_failure; \
627	} while (0)
628
629static int send_header(struct send_ctx *sctx)
630{
631	struct btrfs_stream_header hdr;
632
633	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
634	hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
635
636	return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
637					&sctx->send_off);
638}
639
640/*
641 * For each command/item we want to send to userspace, we call this function.
642 */
643static int begin_cmd(struct send_ctx *sctx, int cmd)
644{
645	struct btrfs_cmd_header *hdr;
646
647	if (WARN_ON(!sctx->send_buf))
648		return -EINVAL;
649
650	BUG_ON(sctx->send_size);
651
652	sctx->send_size += sizeof(*hdr);
653	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
654	hdr->cmd = cpu_to_le16(cmd);
655
656	return 0;
657}
658
659static int send_cmd(struct send_ctx *sctx)
660{
661	int ret;
662	struct btrfs_cmd_header *hdr;
663	u32 crc;
664
665	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
666	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
667	hdr->crc = 0;
668
669	crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
670	hdr->crc = cpu_to_le32(crc);
671
672	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
673					&sctx->send_off);
674
675	sctx->total_send_size += sctx->send_size;
676	sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
677	sctx->send_size = 0;
678
679	return ret;
680}
681
682/*
683 * Sends a move instruction to user space
684 */
685static int send_rename(struct send_ctx *sctx,
686		     struct fs_path *from, struct fs_path *to)
687{
688	int ret;
689
690verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
691
692	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
693	if (ret < 0)
694		goto out;
695
696	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
697	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
698
699	ret = send_cmd(sctx);
700
701tlv_put_failure:
702out:
703	return ret;
704}
705
706/*
707 * Sends a link instruction to user space
708 */
709static int send_link(struct send_ctx *sctx,
710		     struct fs_path *path, struct fs_path *lnk)
711{
712	int ret;
713
714verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
715
716	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
717	if (ret < 0)
718		goto out;
719
720	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
721	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
722
723	ret = send_cmd(sctx);
724
725tlv_put_failure:
726out:
727	return ret;
728}
729
730/*
731 * Sends an unlink instruction to user space
732 */
733static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
734{
735	int ret;
736
737verbose_printk("btrfs: send_unlink %s\n", path->start);
738
739	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
740	if (ret < 0)
741		goto out;
742
743	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
744
745	ret = send_cmd(sctx);
746
747tlv_put_failure:
748out:
749	return ret;
750}
751
752/*
753 * Sends a rmdir instruction to user space
754 */
755static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
756{
757	int ret;
758
759verbose_printk("btrfs: send_rmdir %s\n", path->start);
760
761	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
762	if (ret < 0)
763		goto out;
764
765	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
766
767	ret = send_cmd(sctx);
768
769tlv_put_failure:
770out:
771	return ret;
772}
773
774/*
775 * Helper function to retrieve some fields from an inode item.
776 */
777static int get_inode_info(struct btrfs_root *root,
778			  u64 ino, u64 *size, u64 *gen,
779			  u64 *mode, u64 *uid, u64 *gid,
780			  u64 *rdev)
781{
782	int ret;
783	struct btrfs_inode_item *ii;
784	struct btrfs_key key;
785	struct btrfs_path *path;
786
787	path = alloc_path_for_send();
788	if (!path)
789		return -ENOMEM;
790
791	key.objectid = ino;
792	key.type = BTRFS_INODE_ITEM_KEY;
793	key.offset = 0;
794	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
795	if (ret < 0)
796		goto out;
797	if (ret) {
798		ret = -ENOENT;
799		goto out;
800	}
801
802	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
803			struct btrfs_inode_item);
804	if (size)
805		*size = btrfs_inode_size(path->nodes[0], ii);
806	if (gen)
807		*gen = btrfs_inode_generation(path->nodes[0], ii);
808	if (mode)
809		*mode = btrfs_inode_mode(path->nodes[0], ii);
810	if (uid)
811		*uid = btrfs_inode_uid(path->nodes[0], ii);
812	if (gid)
813		*gid = btrfs_inode_gid(path->nodes[0], ii);
814	if (rdev)
815		*rdev = btrfs_inode_rdev(path->nodes[0], ii);
816
817out:
818	btrfs_free_path(path);
819	return ret;
820}
821
822typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
823				   struct fs_path *p,
824				   void *ctx);
825
826/*
827 * Helper function to iterate the entries in ONE btrfs_inode_ref or
828 * btrfs_inode_extref.
829 * The iterate callback may return a non zero value to stop iteration. This can
830 * be a negative value for error codes or 1 to simply stop it.
831 *
832 * path must point to the INODE_REF or INODE_EXTREF when called.
833 */
834static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
835			     struct btrfs_key *found_key, int resolve,
836			     iterate_inode_ref_t iterate, void *ctx)
837{
838	struct extent_buffer *eb = path->nodes[0];
839	struct btrfs_item *item;
840	struct btrfs_inode_ref *iref;
841	struct btrfs_inode_extref *extref;
842	struct btrfs_path *tmp_path;
843	struct fs_path *p;
844	u32 cur = 0;
845	u32 total;
846	int slot = path->slots[0];
847	u32 name_len;
848	char *start;
849	int ret = 0;
850	int num = 0;
851	int index;
852	u64 dir;
853	unsigned long name_off;
854	unsigned long elem_size;
855	unsigned long ptr;
856
857	p = fs_path_alloc_reversed();
858	if (!p)
859		return -ENOMEM;
860
861	tmp_path = alloc_path_for_send();
862	if (!tmp_path) {
863		fs_path_free(p);
864		return -ENOMEM;
865	}
866
867
868	if (found_key->type == BTRFS_INODE_REF_KEY) {
869		ptr = (unsigned long)btrfs_item_ptr(eb, slot,
870						    struct btrfs_inode_ref);
871		item = btrfs_item_nr(slot);
872		total = btrfs_item_size(eb, item);
873		elem_size = sizeof(*iref);
874	} else {
875		ptr = btrfs_item_ptr_offset(eb, slot);
876		total = btrfs_item_size_nr(eb, slot);
877		elem_size = sizeof(*extref);
878	}
879
880	while (cur < total) {
881		fs_path_reset(p);
882
883		if (found_key->type == BTRFS_INODE_REF_KEY) {
884			iref = (struct btrfs_inode_ref *)(ptr + cur);
885			name_len = btrfs_inode_ref_name_len(eb, iref);
886			name_off = (unsigned long)(iref + 1);
887			index = btrfs_inode_ref_index(eb, iref);
888			dir = found_key->offset;
889		} else {
890			extref = (struct btrfs_inode_extref *)(ptr + cur);
891			name_len = btrfs_inode_extref_name_len(eb, extref);
892			name_off = (unsigned long)&extref->name;
893			index = btrfs_inode_extref_index(eb, extref);
894			dir = btrfs_inode_extref_parent(eb, extref);
895		}
896
897		if (resolve) {
898			start = btrfs_ref_to_path(root, tmp_path, name_len,
899						  name_off, eb, dir,
900						  p->buf, p->buf_len);
901			if (IS_ERR(start)) {
902				ret = PTR_ERR(start);
903				goto out;
904			}
905			if (start < p->buf) {
906				/* overflow , try again with larger buffer */
907				ret = fs_path_ensure_buf(p,
908						p->buf_len + p->buf - start);
909				if (ret < 0)
910					goto out;
911				start = btrfs_ref_to_path(root, tmp_path,
912							  name_len, name_off,
913							  eb, dir,
914							  p->buf, p->buf_len);
915				if (IS_ERR(start)) {
916					ret = PTR_ERR(start);
917					goto out;
918				}
919				BUG_ON(start < p->buf);
920			}
921			p->start = start;
922		} else {
923			ret = fs_path_add_from_extent_buffer(p, eb, name_off,
924							     name_len);
925			if (ret < 0)
926				goto out;
927		}
928
929		cur += elem_size + name_len;
930		ret = iterate(num, dir, index, p, ctx);
931		if (ret)
932			goto out;
933		num++;
934	}
935
936out:
937	btrfs_free_path(tmp_path);
938	fs_path_free(p);
939	return ret;
940}
941
942typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
943				  const char *name, int name_len,
944				  const char *data, int data_len,
945				  u8 type, void *ctx);
946
947/*
948 * Helper function to iterate the entries in ONE btrfs_dir_item.
949 * The iterate callback may return a non zero value to stop iteration. This can
950 * be a negative value for error codes or 1 to simply stop it.
951 *
952 * path must point to the dir item when called.
953 */
954static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
955			    struct btrfs_key *found_key,
956			    iterate_dir_item_t iterate, void *ctx)
957{
958	int ret = 0;
959	struct extent_buffer *eb;
960	struct btrfs_item *item;
961	struct btrfs_dir_item *di;
962	struct btrfs_key di_key;
963	char *buf = NULL;
964	const int buf_len = PATH_MAX;
965	u32 name_len;
966	u32 data_len;
967	u32 cur;
968	u32 len;
969	u32 total;
970	int slot;
971	int num;
972	u8 type;
973
974	buf = kmalloc(buf_len, GFP_NOFS);
975	if (!buf) {
976		ret = -ENOMEM;
977		goto out;
978	}
979
980	eb = path->nodes[0];
981	slot = path->slots[0];
982	item = btrfs_item_nr(slot);
983	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
984	cur = 0;
985	len = 0;
986	total = btrfs_item_size(eb, item);
987
988	num = 0;
989	while (cur < total) {
990		name_len = btrfs_dir_name_len(eb, di);
991		data_len = btrfs_dir_data_len(eb, di);
992		type = btrfs_dir_type(eb, di);
993		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
994
995		/*
996		 * Path too long
997		 */
998		if (name_len + data_len > buf_len) {
999			ret = -ENAMETOOLONG;
1000			goto out;
1001		}
1002
1003		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
1004				name_len + data_len);
1005
1006		len = sizeof(*di) + name_len + data_len;
1007		di = (struct btrfs_dir_item *)((char *)di + len);
1008		cur += len;
1009
1010		ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1011				data_len, type, ctx);
1012		if (ret < 0)
1013			goto out;
1014		if (ret) {
1015			ret = 0;
1016			goto out;
1017		}
1018
1019		num++;
1020	}
1021
1022out:
1023	kfree(buf);
1024	return ret;
1025}
1026
1027static int __copy_first_ref(int num, u64 dir, int index,
1028			    struct fs_path *p, void *ctx)
1029{
1030	int ret;
1031	struct fs_path *pt = ctx;
1032
1033	ret = fs_path_copy(pt, p);
1034	if (ret < 0)
1035		return ret;
1036
1037	/* we want the first only */
1038	return 1;
1039}
1040
1041/*
1042 * Retrieve the first path of an inode. If an inode has more then one
1043 * ref/hardlink, this is ignored.
1044 */
1045static int get_inode_path(struct btrfs_root *root,
1046			  u64 ino, struct fs_path *path)
1047{
1048	int ret;
1049	struct btrfs_key key, found_key;
1050	struct btrfs_path *p;
1051
1052	p = alloc_path_for_send();
1053	if (!p)
1054		return -ENOMEM;
1055
1056	fs_path_reset(path);
1057
1058	key.objectid = ino;
1059	key.type = BTRFS_INODE_REF_KEY;
1060	key.offset = 0;
1061
1062	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1063	if (ret < 0)
1064		goto out;
1065	if (ret) {
1066		ret = 1;
1067		goto out;
1068	}
1069	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1070	if (found_key.objectid != ino ||
1071	    (found_key.type != BTRFS_INODE_REF_KEY &&
1072	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1073		ret = -ENOENT;
1074		goto out;
1075	}
1076
1077	ret = iterate_inode_ref(root, p, &found_key, 1,
1078				__copy_first_ref, path);
1079	if (ret < 0)
1080		goto out;
1081	ret = 0;
1082
1083out:
1084	btrfs_free_path(p);
1085	return ret;
1086}
1087
1088struct backref_ctx {
1089	struct send_ctx *sctx;
1090
1091	/* number of total found references */
1092	u64 found;
1093
1094	/*
1095	 * used for clones found in send_root. clones found behind cur_objectid
1096	 * and cur_offset are not considered as allowed clones.
1097	 */
1098	u64 cur_objectid;
1099	u64 cur_offset;
1100
1101	/* may be truncated in case it's the last extent in a file */
1102	u64 extent_len;
1103
1104	/* Just to check for bugs in backref resolving */
1105	int found_itself;
1106};
1107
1108static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1109{
1110	u64 root = (u64)(uintptr_t)key;
1111	struct clone_root *cr = (struct clone_root *)elt;
1112
1113	if (root < cr->root->objectid)
1114		return -1;
1115	if (root > cr->root->objectid)
1116		return 1;
1117	return 0;
1118}
1119
1120static int __clone_root_cmp_sort(const void *e1, const void *e2)
1121{
1122	struct clone_root *cr1 = (struct clone_root *)e1;
1123	struct clone_root *cr2 = (struct clone_root *)e2;
1124
1125	if (cr1->root->objectid < cr2->root->objectid)
1126		return -1;
1127	if (cr1->root->objectid > cr2->root->objectid)
1128		return 1;
1129	return 0;
1130}
1131
1132/*
1133 * Called for every backref that is found for the current extent.
1134 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1135 */
1136static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1137{
1138	struct backref_ctx *bctx = ctx_;
1139	struct clone_root *found;
1140	int ret;
1141	u64 i_size;
1142
1143	/* First check if the root is in the list of accepted clone sources */
1144	found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1145			bctx->sctx->clone_roots_cnt,
1146			sizeof(struct clone_root),
1147			__clone_root_cmp_bsearch);
1148	if (!found)
1149		return 0;
1150
1151	if (found->root == bctx->sctx->send_root &&
1152	    ino == bctx->cur_objectid &&
1153	    offset == bctx->cur_offset) {
1154		bctx->found_itself = 1;
1155	}
1156
1157	/*
1158	 * There are inodes that have extents that lie behind its i_size. Don't
1159	 * accept clones from these extents.
1160	 */
1161	ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
1162			NULL);
1163	if (ret < 0)
1164		return ret;
1165
1166	if (offset + bctx->extent_len > i_size)
1167		return 0;
1168
1169	/*
1170	 * Make sure we don't consider clones from send_root that are
1171	 * behind the current inode/offset.
1172	 */
1173	if (found->root == bctx->sctx->send_root) {
1174		/*
1175		 * TODO for the moment we don't accept clones from the inode
1176		 * that is currently send. We may change this when
1177		 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1178		 * file.
1179		 */
1180		if (ino >= bctx->cur_objectid)
1181			return 0;
1182#if 0
1183		if (ino > bctx->cur_objectid)
1184			return 0;
1185		if (offset + bctx->extent_len > bctx->cur_offset)
1186			return 0;
1187#endif
1188	}
1189
1190	bctx->found++;
1191	found->found_refs++;
1192	if (ino < found->ino) {
1193		found->ino = ino;
1194		found->offset = offset;
1195	} else if (found->ino == ino) {
1196		/*
1197		 * same extent found more then once in the same file.
1198		 */
1199		if (found->offset > offset + bctx->extent_len)
1200			found->offset = offset;
1201	}
1202
1203	return 0;
1204}
1205
1206/*
1207 * Given an inode, offset and extent item, it finds a good clone for a clone
1208 * instruction. Returns -ENOENT when none could be found. The function makes
1209 * sure that the returned clone is usable at the point where sending is at the
1210 * moment. This means, that no clones are accepted which lie behind the current
1211 * inode+offset.
1212 *
1213 * path must point to the extent item when called.
1214 */
1215static int find_extent_clone(struct send_ctx *sctx,
1216			     struct btrfs_path *path,
1217			     u64 ino, u64 data_offset,
1218			     u64 ino_size,
1219			     struct clone_root **found)
1220{
1221	int ret;
1222	int extent_type;
1223	u64 logical;
1224	u64 disk_byte;
1225	u64 num_bytes;
1226	u64 extent_item_pos;
1227	u64 flags = 0;
1228	struct btrfs_file_extent_item *fi;
1229	struct extent_buffer *eb = path->nodes[0];
1230	struct backref_ctx *backref_ctx = NULL;
1231	struct clone_root *cur_clone_root;
1232	struct btrfs_key found_key;
1233	struct btrfs_path *tmp_path;
1234	int compressed;
1235	u32 i;
1236
1237	tmp_path = alloc_path_for_send();
1238	if (!tmp_path)
1239		return -ENOMEM;
1240
1241	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1242	if (!backref_ctx) {
1243		ret = -ENOMEM;
1244		goto out;
1245	}
1246
1247	if (data_offset >= ino_size) {
1248		/*
1249		 * There may be extents that lie behind the file's size.
1250		 * I at least had this in combination with snapshotting while
1251		 * writing large files.
1252		 */
1253		ret = 0;
1254		goto out;
1255	}
1256
1257	fi = btrfs_item_ptr(eb, path->slots[0],
1258			struct btrfs_file_extent_item);
1259	extent_type = btrfs_file_extent_type(eb, fi);
1260	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1261		ret = -ENOENT;
1262		goto out;
1263	}
1264	compressed = btrfs_file_extent_compression(eb, fi);
1265
1266	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1267	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1268	if (disk_byte == 0) {
1269		ret = -ENOENT;
1270		goto out;
1271	}
1272	logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1273
1274	ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1275				  &found_key, &flags);
1276	btrfs_release_path(tmp_path);
1277
1278	if (ret < 0)
1279		goto out;
1280	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1281		ret = -EIO;
1282		goto out;
1283	}
1284
1285	/*
1286	 * Setup the clone roots.
1287	 */
1288	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1289		cur_clone_root = sctx->clone_roots + i;
1290		cur_clone_root->ino = (u64)-1;
1291		cur_clone_root->offset = 0;
1292		cur_clone_root->found_refs = 0;
1293	}
1294
1295	backref_ctx->sctx = sctx;
1296	backref_ctx->found = 0;
1297	backref_ctx->cur_objectid = ino;
1298	backref_ctx->cur_offset = data_offset;
1299	backref_ctx->found_itself = 0;
1300	backref_ctx->extent_len = num_bytes;
1301
1302	/*
1303	 * The last extent of a file may be too large due to page alignment.
1304	 * We need to adjust extent_len in this case so that the checks in
1305	 * __iterate_backrefs work.
1306	 */
1307	if (data_offset + num_bytes >= ino_size)
1308		backref_ctx->extent_len = ino_size - data_offset;
1309
1310	/*
1311	 * Now collect all backrefs.
1312	 */
1313	if (compressed == BTRFS_COMPRESS_NONE)
1314		extent_item_pos = logical - found_key.objectid;
1315	else
1316		extent_item_pos = 0;
1317	ret = iterate_extent_inodes(sctx->send_root->fs_info,
1318					found_key.objectid, extent_item_pos, 1,
1319					__iterate_backrefs, backref_ctx);
1320
1321	if (ret < 0)
1322		goto out;
1323
1324	if (!backref_ctx->found_itself) {
1325		/* found a bug in backref code? */
1326		ret = -EIO;
1327		btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1328				"send_root. inode=%llu, offset=%llu, "
1329				"disk_byte=%llu found extent=%llu\n",
1330				ino, data_offset, disk_byte, found_key.objectid);
1331		goto out;
1332	}
1333
1334verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1335		"ino=%llu, "
1336		"num_bytes=%llu, logical=%llu\n",
1337		data_offset, ino, num_bytes, logical);
1338
1339	if (!backref_ctx->found)
1340		verbose_printk("btrfs:    no clones found\n");
1341
1342	cur_clone_root = NULL;
1343	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1344		if (sctx->clone_roots[i].found_refs) {
1345			if (!cur_clone_root)
1346				cur_clone_root = sctx->clone_roots + i;
1347			else if (sctx->clone_roots[i].root == sctx->send_root)
1348				/* prefer clones from send_root over others */
1349				cur_clone_root = sctx->clone_roots + i;
1350		}
1351
1352	}
1353
1354	if (cur_clone_root) {
1355		if (compressed != BTRFS_COMPRESS_NONE) {
1356			/*
1357			 * Offsets given by iterate_extent_inodes() are relative
1358			 * to the start of the extent, we need to add logical
1359			 * offset from the file extent item.
1360			 * (See why at backref.c:check_extent_in_eb())
1361			 */
1362			cur_clone_root->offset += btrfs_file_extent_offset(eb,
1363									   fi);
1364		}
1365		*found = cur_clone_root;
1366		ret = 0;
1367	} else {
1368		ret = -ENOENT;
1369	}
1370
1371out:
1372	btrfs_free_path(tmp_path);
1373	kfree(backref_ctx);
1374	return ret;
1375}
1376
1377static int read_symlink(struct btrfs_root *root,
1378			u64 ino,
1379			struct fs_path *dest)
1380{
1381	int ret;
1382	struct btrfs_path *path;
1383	struct btrfs_key key;
1384	struct btrfs_file_extent_item *ei;
1385	u8 type;
1386	u8 compression;
1387	unsigned long off;
1388	int len;
1389
1390	path = alloc_path_for_send();
1391	if (!path)
1392		return -ENOMEM;
1393
1394	key.objectid = ino;
1395	key.type = BTRFS_EXTENT_DATA_KEY;
1396	key.offset = 0;
1397	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1398	if (ret < 0)
1399		goto out;
1400	BUG_ON(ret);
1401
1402	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1403			struct btrfs_file_extent_item);
1404	type = btrfs_file_extent_type(path->nodes[0], ei);
1405	compression = btrfs_file_extent_compression(path->nodes[0], ei);
1406	BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1407	BUG_ON(compression);
1408
1409	off = btrfs_file_extent_inline_start(ei);
1410	len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1411
1412	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1413
1414out:
1415	btrfs_free_path(path);
1416	return ret;
1417}
1418
1419/*
1420 * Helper function to generate a file name that is unique in the root of
1421 * send_root and parent_root. This is used to generate names for orphan inodes.
1422 */
1423static int gen_unique_name(struct send_ctx *sctx,
1424			   u64 ino, u64 gen,
1425			   struct fs_path *dest)
1426{
1427	int ret = 0;
1428	struct btrfs_path *path;
1429	struct btrfs_dir_item *di;
1430	char tmp[64];
1431	int len;
1432	u64 idx = 0;
1433
1434	path = alloc_path_for_send();
1435	if (!path)
1436		return -ENOMEM;
1437
1438	while (1) {
1439		len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1440				ino, gen, idx);
1441		ASSERT(len < sizeof(tmp));
1442
1443		di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1444				path, BTRFS_FIRST_FREE_OBJECTID,
1445				tmp, strlen(tmp), 0);
1446		btrfs_release_path(path);
1447		if (IS_ERR(di)) {
1448			ret = PTR_ERR(di);
1449			goto out;
1450		}
1451		if (di) {
1452			/* not unique, try again */
1453			idx++;
1454			continue;
1455		}
1456
1457		if (!sctx->parent_root) {
1458			/* unique */
1459			ret = 0;
1460			break;
1461		}
1462
1463		di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1464				path, BTRFS_FIRST_FREE_OBJECTID,
1465				tmp, strlen(tmp), 0);
1466		btrfs_release_path(path);
1467		if (IS_ERR(di)) {
1468			ret = PTR_ERR(di);
1469			goto out;
1470		}
1471		if (di) {
1472			/* not unique, try again */
1473			idx++;
1474			continue;
1475		}
1476		/* unique */
1477		break;
1478	}
1479
1480	ret = fs_path_add(dest, tmp, strlen(tmp));
1481
1482out:
1483	btrfs_free_path(path);
1484	return ret;
1485}
1486
1487enum inode_state {
1488	inode_state_no_change,
1489	inode_state_will_create,
1490	inode_state_did_create,
1491	inode_state_will_delete,
1492	inode_state_did_delete,
1493};
1494
1495static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1496{
1497	int ret;
1498	int left_ret;
1499	int right_ret;
1500	u64 left_gen;
1501	u64 right_gen;
1502
1503	ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1504			NULL, NULL);
1505	if (ret < 0 && ret != -ENOENT)
1506		goto out;
1507	left_ret = ret;
1508
1509	if (!sctx->parent_root) {
1510		right_ret = -ENOENT;
1511	} else {
1512		ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1513				NULL, NULL, NULL, NULL);
1514		if (ret < 0 && ret != -ENOENT)
1515			goto out;
1516		right_ret = ret;
1517	}
1518
1519	if (!left_ret && !right_ret) {
1520		if (left_gen == gen && right_gen == gen) {
1521			ret = inode_state_no_change;
1522		} else if (left_gen == gen) {
1523			if (ino < sctx->send_progress)
1524				ret = inode_state_did_create;
1525			else
1526				ret = inode_state_will_create;
1527		} else if (right_gen == gen) {
1528			if (ino < sctx->send_progress)
1529				ret = inode_state_did_delete;
1530			else
1531				ret = inode_state_will_delete;
1532		} else  {
1533			ret = -ENOENT;
1534		}
1535	} else if (!left_ret) {
1536		if (left_gen == gen) {
1537			if (ino < sctx->send_progress)
1538				ret = inode_state_did_create;
1539			else
1540				ret = inode_state_will_create;
1541		} else {
1542			ret = -ENOENT;
1543		}
1544	} else if (!right_ret) {
1545		if (right_gen == gen) {
1546			if (ino < sctx->send_progress)
1547				ret = inode_state_did_delete;
1548			else
1549				ret = inode_state_will_delete;
1550		} else {
1551			ret = -ENOENT;
1552		}
1553	} else {
1554		ret = -ENOENT;
1555	}
1556
1557out:
1558	return ret;
1559}
1560
1561static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1562{
1563	int ret;
1564
1565	ret = get_cur_inode_state(sctx, ino, gen);
1566	if (ret < 0)
1567		goto out;
1568
1569	if (ret == inode_state_no_change ||
1570	    ret == inode_state_did_create ||
1571	    ret == inode_state_will_delete)
1572		ret = 1;
1573	else
1574		ret = 0;
1575
1576out:
1577	return ret;
1578}
1579
1580/*
1581 * Helper function to lookup a dir item in a dir.
1582 */
1583static int lookup_dir_item_inode(struct btrfs_root *root,
1584				 u64 dir, const char *name, int name_len,
1585				 u64 *found_inode,
1586				 u8 *found_type)
1587{
1588	int ret = 0;
1589	struct btrfs_dir_item *di;
1590	struct btrfs_key key;
1591	struct btrfs_path *path;
1592
1593	path = alloc_path_for_send();
1594	if (!path)
1595		return -ENOMEM;
1596
1597	di = btrfs_lookup_dir_item(NULL, root, path,
1598			dir, name, name_len, 0);
1599	if (!di) {
1600		ret = -ENOENT;
1601		goto out;
1602	}
1603	if (IS_ERR(di)) {
1604		ret = PTR_ERR(di);
1605		goto out;
1606	}
1607	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1608	*found_inode = key.objectid;
1609	*found_type = btrfs_dir_type(path->nodes[0], di);
1610
1611out:
1612	btrfs_free_path(path);
1613	return ret;
1614}
1615
1616/*
1617 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1618 * generation of the parent dir and the name of the dir entry.
1619 */
1620static int get_first_ref(struct btrfs_root *root, u64 ino,
1621			 u64 *dir, u64 *dir_gen, struct fs_path *name)
1622{
1623	int ret;
1624	struct btrfs_key key;
1625	struct btrfs_key found_key;
1626	struct btrfs_path *path;
1627	int len;
1628	u64 parent_dir;
1629
1630	path = alloc_path_for_send();
1631	if (!path)
1632		return -ENOMEM;
1633
1634	key.objectid = ino;
1635	key.type = BTRFS_INODE_REF_KEY;
1636	key.offset = 0;
1637
1638	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1639	if (ret < 0)
1640		goto out;
1641	if (!ret)
1642		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1643				path->slots[0]);
1644	if (ret || found_key.objectid != ino ||
1645	    (found_key.type != BTRFS_INODE_REF_KEY &&
1646	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1647		ret = -ENOENT;
1648		goto out;
1649	}
1650
1651	if (key.type == BTRFS_INODE_REF_KEY) {
1652		struct btrfs_inode_ref *iref;
1653		iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1654				      struct btrfs_inode_ref);
1655		len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1656		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1657						     (unsigned long)(iref + 1),
1658						     len);
1659		parent_dir = found_key.offset;
1660	} else {
1661		struct btrfs_inode_extref *extref;
1662		extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1663					struct btrfs_inode_extref);
1664		len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1665		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1666					(unsigned long)&extref->name, len);
1667		parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1668	}
1669	if (ret < 0)
1670		goto out;
1671	btrfs_release_path(path);
1672
1673	ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
1674			NULL, NULL);
1675	if (ret < 0)
1676		goto out;
1677
1678	*dir = parent_dir;
1679
1680out:
1681	btrfs_free_path(path);
1682	return ret;
1683}
1684
1685static int is_first_ref(struct btrfs_root *root,
1686			u64 ino, u64 dir,
1687			const char *name, int name_len)
1688{
1689	int ret;
1690	struct fs_path *tmp_name;
1691	u64 tmp_dir;
1692	u64 tmp_dir_gen;
1693
1694	tmp_name = fs_path_alloc();
1695	if (!tmp_name)
1696		return -ENOMEM;
1697
1698	ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
1699	if (ret < 0)
1700		goto out;
1701
1702	if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1703		ret = 0;
1704		goto out;
1705	}
1706
1707	ret = !memcmp(tmp_name->start, name, name_len);
1708
1709out:
1710	fs_path_free(tmp_name);
1711	return ret;
1712}
1713
1714/*
1715 * Used by process_recorded_refs to determine if a new ref would overwrite an
1716 * already existing ref. In case it detects an overwrite, it returns the
1717 * inode/gen in who_ino/who_gen.
1718 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1719 * to make sure later references to the overwritten inode are possible.
1720 * Orphanizing is however only required for the first ref of an inode.
1721 * process_recorded_refs does an additional is_first_ref check to see if
1722 * orphanizing is really required.
1723 */
1724static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1725			      const char *name, int name_len,
1726			      u64 *who_ino, u64 *who_gen)
1727{
1728	int ret = 0;
1729	u64 gen;
1730	u64 other_inode = 0;
1731	u8 other_type = 0;
1732
1733	if (!sctx->parent_root)
1734		goto out;
1735
1736	ret = is_inode_existent(sctx, dir, dir_gen);
1737	if (ret <= 0)
1738		goto out;
1739
1740	/*
1741	 * If we have a parent root we need to verify that the parent dir was
1742	 * not delted and then re-created, if it was then we have no overwrite
1743	 * and we can just unlink this entry.
1744	 */
1745	if (sctx->parent_root) {
1746		ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1747				     NULL, NULL, NULL);
1748		if (ret < 0 && ret != -ENOENT)
1749			goto out;
1750		if (ret) {
1751			ret = 0;
1752			goto out;
1753		}
1754		if (gen != dir_gen)
1755			goto out;
1756	}
1757
1758	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1759			&other_inode, &other_type);
1760	if (ret < 0 && ret != -ENOENT)
1761		goto out;
1762	if (ret) {
1763		ret = 0;
1764		goto out;
1765	}
1766
1767	/*
1768	 * Check if the overwritten ref was already processed. If yes, the ref
1769	 * was already unlinked/moved, so we can safely assume that we will not
1770	 * overwrite anything at this point in time.
1771	 */
1772	if (other_inode > sctx->send_progress) {
1773		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1774				who_gen, NULL, NULL, NULL, NULL);
1775		if (ret < 0)
1776			goto out;
1777
1778		ret = 1;
1779		*who_ino = other_inode;
1780	} else {
1781		ret = 0;
1782	}
1783
1784out:
1785	return ret;
1786}
1787
1788/*
1789 * Checks if the ref was overwritten by an already processed inode. This is
1790 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1791 * thus the orphan name needs be used.
1792 * process_recorded_refs also uses it to avoid unlinking of refs that were
1793 * overwritten.
1794 */
1795static int did_overwrite_ref(struct send_ctx *sctx,
1796			    u64 dir, u64 dir_gen,
1797			    u64 ino, u64 ino_gen,
1798			    const char *name, int name_len)
1799{
1800	int ret = 0;
1801	u64 gen;
1802	u64 ow_inode;
1803	u8 other_type;
1804
1805	if (!sctx->parent_root)
1806		goto out;
1807
1808	ret = is_inode_existent(sctx, dir, dir_gen);
1809	if (ret <= 0)
1810		goto out;
1811
1812	/* check if the ref was overwritten by another ref */
1813	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1814			&ow_inode, &other_type);
1815	if (ret < 0 && ret != -ENOENT)
1816		goto out;
1817	if (ret) {
1818		/* was never and will never be overwritten */
1819		ret = 0;
1820		goto out;
1821	}
1822
1823	ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1824			NULL, NULL);
1825	if (ret < 0)
1826		goto out;
1827
1828	if (ow_inode == ino && gen == ino_gen) {
1829		ret = 0;
1830		goto out;
1831	}
1832
1833	/* we know that it is or will be overwritten. check this now */
1834	if (ow_inode < sctx->send_progress)
1835		ret = 1;
1836	else
1837		ret = 0;
1838
1839out:
1840	return ret;
1841}
1842
1843/*
1844 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1845 * that got overwritten. This is used by process_recorded_refs to determine
1846 * if it has to use the path as returned by get_cur_path or the orphan name.
1847 */
1848static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1849{
1850	int ret = 0;
1851	struct fs_path *name = NULL;
1852	u64 dir;
1853	u64 dir_gen;
1854
1855	if (!sctx->parent_root)
1856		goto out;
1857
1858	name = fs_path_alloc();
1859	if (!name)
1860		return -ENOMEM;
1861
1862	ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
1863	if (ret < 0)
1864		goto out;
1865
1866	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1867			name->start, fs_path_len(name));
1868
1869out:
1870	fs_path_free(name);
1871	return ret;
1872}
1873
1874/*
1875 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
1876 * so we need to do some special handling in case we have clashes. This function
1877 * takes care of this with the help of name_cache_entry::radix_list.
1878 * In case of error, nce is kfreed.
1879 */
1880static int name_cache_insert(struct send_ctx *sctx,
1881			     struct name_cache_entry *nce)
1882{
1883	int ret = 0;
1884	struct list_head *nce_head;
1885
1886	nce_head = radix_tree_lookup(&sctx->name_cache,
1887			(unsigned long)nce->ino);
1888	if (!nce_head) {
1889		nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1890		if (!nce_head) {
1891			kfree(nce);
1892			return -ENOMEM;
1893		}
1894		INIT_LIST_HEAD(nce_head);
1895
1896		ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
1897		if (ret < 0) {
1898			kfree(nce_head);
1899			kfree(nce);
1900			return ret;
1901		}
1902	}
1903	list_add_tail(&nce->radix_list, nce_head);
1904	list_add_tail(&nce->list, &sctx->name_cache_list);
1905	sctx->name_cache_size++;
1906
1907	return ret;
1908}
1909
1910static void name_cache_delete(struct send_ctx *sctx,
1911			      struct name_cache_entry *nce)
1912{
1913	struct list_head *nce_head;
1914
1915	nce_head = radix_tree_lookup(&sctx->name_cache,
1916			(unsigned long)nce->ino);
1917	if (!nce_head) {
1918		btrfs_err(sctx->send_root->fs_info,
1919	      "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
1920			nce->ino, sctx->name_cache_size);
1921	}
1922
1923	list_del(&nce->radix_list);
1924	list_del(&nce->list);
1925	sctx->name_cache_size--;
1926
1927	/*
1928	 * We may not get to the final release of nce_head if the lookup fails
1929	 */
1930	if (nce_head && list_empty(nce_head)) {
1931		radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
1932		kfree(nce_head);
1933	}
1934}
1935
1936static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
1937						    u64 ino, u64 gen)
1938{
1939	struct list_head *nce_head;
1940	struct name_cache_entry *cur;
1941
1942	nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
1943	if (!nce_head)
1944		return NULL;
1945
1946	list_for_each_entry(cur, nce_head, radix_list) {
1947		if (cur->ino == ino && cur->gen == gen)
1948			return cur;
1949	}
1950	return NULL;
1951}
1952
1953/*
1954 * Removes the entry from the list and adds it back to the end. This marks the
1955 * entry as recently used so that name_cache_clean_unused does not remove it.
1956 */
1957static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
1958{
1959	list_del(&nce->list);
1960	list_add_tail(&nce->list, &sctx->name_cache_list);
1961}
1962
1963/*
1964 * Remove some entries from the beginning of name_cache_list.
1965 */
1966static void name_cache_clean_unused(struct send_ctx *sctx)
1967{
1968	struct name_cache_entry *nce;
1969
1970	if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
1971		return;
1972
1973	while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
1974		nce = list_entry(sctx->name_cache_list.next,
1975				struct name_cache_entry, list);
1976		name_cache_delete(sctx, nce);
1977		kfree(nce);
1978	}
1979}
1980
1981static void name_cache_free(struct send_ctx *sctx)
1982{
1983	struct name_cache_entry *nce;
1984
1985	while (!list_empty(&sctx->name_cache_list)) {
1986		nce = list_entry(sctx->name_cache_list.next,
1987				struct name_cache_entry, list);
1988		name_cache_delete(sctx, nce);
1989		kfree(nce);
1990	}
1991}
1992
1993/*
1994 * Used by get_cur_path for each ref up to the root.
1995 * Returns 0 if it succeeded.
1996 * Returns 1 if the inode is not existent or got overwritten. In that case, the
1997 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
1998 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
1999 * Returns <0 in case of error.
2000 */
2001static int __get_cur_name_and_parent(struct send_ctx *sctx,
2002				     u64 ino, u64 gen,
2003				     int skip_name_cache,
2004				     u64 *parent_ino,
2005				     u64 *parent_gen,
2006				     struct fs_path *dest)
2007{
2008	int ret;
2009	int nce_ret;
2010	struct btrfs_path *path = NULL;
2011	struct name_cache_entry *nce = NULL;
2012
2013	if (skip_name_cache)
2014		goto get_ref;
2015	/*
2016	 * First check if we already did a call to this function with the same
2017	 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2018	 * return the cached result.
2019	 */
2020	nce = name_cache_search(sctx, ino, gen);
2021	if (nce) {
2022		if (ino < sctx->send_progress && nce->need_later_update) {
2023			name_cache_delete(sctx, nce);
2024			kfree(nce);
2025			nce = NULL;
2026		} else {
2027			name_cache_used(sctx, nce);
2028			*parent_ino = nce->parent_ino;
2029			*parent_gen = nce->parent_gen;
2030			ret = fs_path_add(dest, nce->name, nce->name_len);
2031			if (ret < 0)
2032				goto out;
2033			ret = nce->ret;
2034			goto out;
2035		}
2036	}
2037
2038	path = alloc_path_for_send();
2039	if (!path)
2040		return -ENOMEM;
2041
2042	/*
2043	 * If the inode is not existent yet, add the orphan name and return 1.
2044	 * This should only happen for the parent dir that we determine in
2045	 * __record_new_ref
2046	 */
2047	ret = is_inode_existent(sctx, ino, gen);
2048	if (ret < 0)
2049		goto out;
2050
2051	if (!ret) {
2052		ret = gen_unique_name(sctx, ino, gen, dest);
2053		if (ret < 0)
2054			goto out;
2055		ret = 1;
2056		goto out_cache;
2057	}
2058
2059get_ref:
2060	/*
2061	 * Depending on whether the inode was already processed or not, use
2062	 * send_root or parent_root for ref lookup.
2063	 */
2064	if (ino < sctx->send_progress && !skip_name_cache)
2065		ret = get_first_ref(sctx->send_root, ino,
2066				    parent_ino, parent_gen, dest);
2067	else
2068		ret = get_first_ref(sctx->parent_root, ino,
2069				    parent_ino, parent_gen, dest);
2070	if (ret < 0)
2071		goto out;
2072
2073	/*
2074	 * Check if the ref was overwritten by an inode's ref that was processed
2075	 * earlier. If yes, treat as orphan and return 1.
2076	 */
2077	ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2078			dest->start, dest->end - dest->start);
2079	if (ret < 0)
2080		goto out;
2081	if (ret) {
2082		fs_path_reset(dest);
2083		ret = gen_unique_name(sctx, ino, gen, dest);
2084		if (ret < 0)
2085			goto out;
2086		ret = 1;
2087	}
2088	if (skip_name_cache)
2089		goto out;
2090
2091out_cache:
2092	/*
2093	 * Store the result of the lookup in the name cache.
2094	 */
2095	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
2096	if (!nce) {
2097		ret = -ENOMEM;
2098		goto out;
2099	}
2100
2101	nce->ino = ino;
2102	nce->gen = gen;
2103	nce->parent_ino = *parent_ino;
2104	nce->parent_gen = *parent_gen;
2105	nce->name_len = fs_path_len(dest);
2106	nce->ret = ret;
2107	strcpy(nce->name, dest->start);
2108
2109	if (ino < sctx->send_progress)
2110		nce->need_later_update = 0;
2111	else
2112		nce->need_later_update = 1;
2113
2114	nce_ret = name_cache_insert(sctx, nce);
2115	if (nce_ret < 0)
2116		ret = nce_ret;
2117	name_cache_clean_unused(sctx);
2118
2119out:
2120	btrfs_free_path(path);
2121	return ret;
2122}
2123
2124/*
2125 * Magic happens here. This function returns the first ref to an inode as it
2126 * would look like while receiving the stream at this point in time.
2127 * We walk the path up to the root. For every inode in between, we check if it
2128 * was already processed/sent. If yes, we continue with the parent as found
2129 * in send_root. If not, we continue with the parent as found in parent_root.
2130 * If we encounter an inode that was deleted at this point in time, we use the
2131 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2132 * that were not created yet and overwritten inodes/refs.
2133 *
2134 * When do we have have orphan inodes:
2135 * 1. When an inode is freshly created and thus no valid refs are available yet
2136 * 2. When a directory lost all it's refs (deleted) but still has dir items
2137 *    inside which were not processed yet (pending for move/delete). If anyone
2138 *    tried to get the path to the dir items, it would get a path inside that
2139 *    orphan directory.
2140 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2141 *    of an unprocessed inode. If in that case the first ref would be
2142 *    overwritten, the overwritten inode gets "orphanized". Later when we
2143 *    process this overwritten inode, it is restored at a new place by moving
2144 *    the orphan inode.
2145 *
2146 * sctx->send_progress tells this function at which point in time receiving
2147 * would be.
2148 */
2149static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2150			struct fs_path *dest)
2151{
2152	int ret = 0;
2153	struct fs_path *name = NULL;
2154	u64 parent_inode = 0;
2155	u64 parent_gen = 0;
2156	int stop = 0;
2157	int skip_name_cache = 0;
2158
2159	name = fs_path_alloc();
2160	if (!name) {
2161		ret = -ENOMEM;
2162		goto out;
2163	}
2164
2165	if (is_waiting_for_move(sctx, ino))
2166		skip_name_cache = 1;
2167
2168	dest->reversed = 1;
2169	fs_path_reset(dest);
2170
2171	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2172		fs_path_reset(name);
2173
2174		if (is_waiting_for_rm(sctx, ino)) {
2175			ret = gen_unique_name(sctx, ino, gen, name);
2176			if (ret < 0)
2177				goto out;
2178			ret = fs_path_add_path(dest, name);
2179			break;
2180		}
2181
2182		ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache,
2183				&parent_inode, &parent_gen, name);
2184		if (ret < 0)
2185			goto out;
2186		if (ret)
2187			stop = 1;
2188
2189		if (!skip_name_cache &&
2190		    is_waiting_for_move(sctx, parent_inode))
2191			skip_name_cache = 1;
2192
2193		ret = fs_path_add_path(dest, name);
2194		if (ret < 0)
2195			goto out;
2196
2197		ino = parent_inode;
2198		gen = parent_gen;
2199	}
2200
2201out:
2202	fs_path_free(name);
2203	if (!ret)
2204		fs_path_unreverse(dest);
2205	return ret;
2206}
2207
2208/*
2209 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2210 */
2211static int send_subvol_begin(struct send_ctx *sctx)
2212{
2213	int ret;
2214	struct btrfs_root *send_root = sctx->send_root;
2215	struct btrfs_root *parent_root = sctx->parent_root;
2216	struct btrfs_path *path;
2217	struct btrfs_key key;
2218	struct btrfs_root_ref *ref;
2219	struct extent_buffer *leaf;
2220	char *name = NULL;
2221	int namelen;
2222
2223	path = btrfs_alloc_path();
2224	if (!path)
2225		return -ENOMEM;
2226
2227	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2228	if (!name) {
2229		btrfs_free_path(path);
2230		return -ENOMEM;
2231	}
2232
2233	key.objectid = send_root->objectid;
2234	key.type = BTRFS_ROOT_BACKREF_KEY;
2235	key.offset = 0;
2236
2237	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2238				&key, path, 1, 0);
2239	if (ret < 0)
2240		goto out;
2241	if (ret) {
2242		ret = -ENOENT;
2243		goto out;
2244	}
2245
2246	leaf = path->nodes[0];
2247	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2248	if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2249	    key.objectid != send_root->objectid) {
2250		ret = -ENOENT;
2251		goto out;
2252	}
2253	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2254	namelen = btrfs_root_ref_name_len(leaf, ref);
2255	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2256	btrfs_release_path(path);
2257
2258	if (parent_root) {
2259		ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2260		if (ret < 0)
2261			goto out;
2262	} else {
2263		ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2264		if (ret < 0)
2265			goto out;
2266	}
2267
2268	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2269	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2270			sctx->send_root->root_item.uuid);
2271	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2272		    le64_to_cpu(sctx->send_root->root_item.ctransid));
2273	if (parent_root) {
2274		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2275				sctx->parent_root->root_item.uuid);
2276		TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2277			    le64_to_cpu(sctx->parent_root->root_item.ctransid));
2278	}
2279
2280	ret = send_cmd(sctx);
2281
2282tlv_put_failure:
2283out:
2284	btrfs_free_path(path);
2285	kfree(name);
2286	return ret;
2287}
2288
2289static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2290{
2291	int ret = 0;
2292	struct fs_path *p;
2293
2294verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2295
2296	p = fs_path_alloc();
2297	if (!p)
2298		return -ENOMEM;
2299
2300	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2301	if (ret < 0)
2302		goto out;
2303
2304	ret = get_cur_path(sctx, ino, gen, p);
2305	if (ret < 0)
2306		goto out;
2307	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2308	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2309
2310	ret = send_cmd(sctx);
2311
2312tlv_put_failure:
2313out:
2314	fs_path_free(p);
2315	return ret;
2316}
2317
2318static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2319{
2320	int ret = 0;
2321	struct fs_path *p;
2322
2323verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2324
2325	p = fs_path_alloc();
2326	if (!p)
2327		return -ENOMEM;
2328
2329	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2330	if (ret < 0)
2331		goto out;
2332
2333	ret = get_cur_path(sctx, ino, gen, p);
2334	if (ret < 0)
2335		goto out;
2336	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2337	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2338
2339	ret = send_cmd(sctx);
2340
2341tlv_put_failure:
2342out:
2343	fs_path_free(p);
2344	return ret;
2345}
2346
2347static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2348{
2349	int ret = 0;
2350	struct fs_path *p;
2351
2352verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2353
2354	p = fs_path_alloc();
2355	if (!p)
2356		return -ENOMEM;
2357
2358	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2359	if (ret < 0)
2360		goto out;
2361
2362	ret = get_cur_path(sctx, ino, gen, p);
2363	if (ret < 0)
2364		goto out;
2365	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2366	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2367	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2368
2369	ret = send_cmd(sctx);
2370
2371tlv_put_failure:
2372out:
2373	fs_path_free(p);
2374	return ret;
2375}
2376
2377static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2378{
2379	int ret = 0;
2380	struct fs_path *p = NULL;
2381	struct btrfs_inode_item *ii;
2382	struct btrfs_path *path = NULL;
2383	struct extent_buffer *eb;
2384	struct btrfs_key key;
2385	int slot;
2386
2387verbose_printk("btrfs: send_utimes %llu\n", ino);
2388
2389	p = fs_path_alloc();
2390	if (!p)
2391		return -ENOMEM;
2392
2393	path = alloc_path_for_send();
2394	if (!path) {
2395		ret = -ENOMEM;
2396		goto out;
2397	}
2398
2399	key.objectid = ino;
2400	key.type = BTRFS_INODE_ITEM_KEY;
2401	key.offset = 0;
2402	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2403	if (ret < 0)
2404		goto out;
2405
2406	eb = path->nodes[0];
2407	slot = path->slots[0];
2408	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2409
2410	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2411	if (ret < 0)
2412		goto out;
2413
2414	ret = get_cur_path(sctx, ino, gen, p);
2415	if (ret < 0)
2416		goto out;
2417	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2418	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
2419			btrfs_inode_atime(ii));
2420	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
2421			btrfs_inode_mtime(ii));
2422	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2423			btrfs_inode_ctime(ii));
2424	/* TODO Add otime support when the otime patches get into upstream */
2425
2426	ret = send_cmd(sctx);
2427
2428tlv_put_failure:
2429out:
2430	fs_path_free(p);
2431	btrfs_free_path(path);
2432	return ret;
2433}
2434
2435/*
2436 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2437 * a valid path yet because we did not process the refs yet. So, the inode
2438 * is created as orphan.
2439 */
2440static int send_create_inode(struct send_ctx *sctx, u64 ino)
2441{
2442	int ret = 0;
2443	struct fs_path *p;
2444	int cmd;
2445	u64 gen;
2446	u64 mode;
2447	u64 rdev;
2448
2449verbose_printk("btrfs: send_create_inode %llu\n", ino);
2450
2451	p = fs_path_alloc();
2452	if (!p)
2453		return -ENOMEM;
2454
2455	ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL,
2456			NULL, &rdev);
2457	if (ret < 0)
2458		goto out;
2459
2460	if (S_ISREG(mode)) {
2461		cmd = BTRFS_SEND_C_MKFILE;
2462	} else if (S_ISDIR(mode)) {
2463		cmd = BTRFS_SEND_C_MKDIR;
2464	} else if (S_ISLNK(mode)) {
2465		cmd = BTRFS_SEND_C_SYMLINK;
2466	} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2467		cmd = BTRFS_SEND_C_MKNOD;
2468	} else if (S_ISFIFO(mode)) {
2469		cmd = BTRFS_SEND_C_MKFIFO;
2470	} else if (S_ISSOCK(mode)) {
2471		cmd = BTRFS_SEND_C_MKSOCK;
2472	} else {
2473		printk(KERN_WARNING "btrfs: unexpected inode type %o",
2474				(int)(mode & S_IFMT));
2475		ret = -ENOTSUPP;
2476		goto out;
2477	}
2478
2479	ret = begin_cmd(sctx, cmd);
2480	if (ret < 0)
2481		goto out;
2482
2483	ret = gen_unique_name(sctx, ino, gen, p);
2484	if (ret < 0)
2485		goto out;
2486
2487	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2488	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2489
2490	if (S_ISLNK(mode)) {
2491		fs_path_reset(p);
2492		ret = read_symlink(sctx->send_root, ino, p);
2493		if (ret < 0)
2494			goto out;
2495		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2496	} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2497		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2498		TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2499		TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2500	}
2501
2502	ret = send_cmd(sctx);
2503	if (ret < 0)
2504		goto out;
2505
2506
2507tlv_put_failure:
2508out:
2509	fs_path_free(p);
2510	return ret;
2511}
2512
2513/*
2514 * We need some special handling for inodes that get processed before the parent
2515 * directory got created. See process_recorded_refs for details.
2516 * This function does the check if we already created the dir out of order.
2517 */
2518static int did_create_dir(struct send_ctx *sctx, u64 dir)
2519{
2520	int ret = 0;
2521	struct btrfs_path *path = NULL;
2522	struct btrfs_key key;
2523	struct btrfs_key found_key;
2524	struct btrfs_key di_key;
2525	struct extent_buffer *eb;
2526	struct btrfs_dir_item *di;
2527	int slot;
2528
2529	path = alloc_path_for_send();
2530	if (!path) {
2531		ret = -ENOMEM;
2532		goto out;
2533	}
2534
2535	key.objectid = dir;
2536	key.type = BTRFS_DIR_INDEX_KEY;
2537	key.offset = 0;
2538	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2539	if (ret < 0)
2540		goto out;
2541
2542	while (1) {
2543		eb = path->nodes[0];
2544		slot = path->slots[0];
2545		if (slot >= btrfs_header_nritems(eb)) {
2546			ret = btrfs_next_leaf(sctx->send_root, path);
2547			if (ret < 0) {
2548				goto out;
2549			} else if (ret > 0) {
2550				ret = 0;
2551				break;
2552			}
2553			continue;
2554		}
2555
2556		btrfs_item_key_to_cpu(eb, &found_key, slot);
2557		if (found_key.objectid != key.objectid ||
2558		    found_key.type != key.type) {
2559			ret = 0;
2560			goto out;
2561		}
2562
2563		di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2564		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2565
2566		if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2567		    di_key.objectid < sctx->send_progress) {
2568			ret = 1;
2569			goto out;
2570		}
2571
2572		path->slots[0]++;
2573	}
2574
2575out:
2576	btrfs_free_path(path);
2577	return ret;
2578}
2579
2580/*
2581 * Only creates the inode if it is:
2582 * 1. Not a directory
2583 * 2. Or a directory which was not created already due to out of order
2584 *    directories. See did_create_dir and process_recorded_refs for details.
2585 */
2586static int send_create_inode_if_needed(struct send_ctx *sctx)
2587{
2588	int ret;
2589
2590	if (S_ISDIR(sctx->cur_inode_mode)) {
2591		ret = did_create_dir(sctx, sctx->cur_ino);
2592		if (ret < 0)
2593			goto out;
2594		if (ret) {
2595			ret = 0;
2596			goto out;
2597		}
2598	}
2599
2600	ret = send_create_inode(sctx, sctx->cur_ino);
2601	if (ret < 0)
2602		goto out;
2603
2604out:
2605	return ret;
2606}
2607
2608struct recorded_ref {
2609	struct list_head list;
2610	char *dir_path;
2611	char *name;
2612	struct fs_path *full_path;
2613	u64 dir;
2614	u64 dir_gen;
2615	int dir_path_len;
2616	int name_len;
2617};
2618
2619/*
2620 * We need to process new refs before deleted refs, but compare_tree gives us
2621 * everything mixed. So we first record all refs and later process them.
2622 * This function is a helper to record one ref.
2623 */
2624static int record_ref(struct list_head *head, u64 dir,
2625		      u64 dir_gen, struct fs_path *path)
2626{
2627	struct recorded_ref *ref;
2628
2629	ref = kmalloc(sizeof(*ref), GFP_NOFS);
2630	if (!ref)
2631		return -ENOMEM;
2632
2633	ref->dir = dir;
2634	ref->dir_gen = dir_gen;
2635	ref->full_path = path;
2636
2637	ref->name = (char *)kbasename(ref->full_path->start);
2638	ref->name_len = ref->full_path->end - ref->name;
2639	ref->dir_path = ref->full_path->start;
2640	if (ref->name == ref->full_path->start)
2641		ref->dir_path_len = 0;
2642	else
2643		ref->dir_path_len = ref->full_path->end -
2644				ref->full_path->start - 1 - ref->name_len;
2645
2646	list_add_tail(&ref->list, head);
2647	return 0;
2648}
2649
2650static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2651{
2652	struct recorded_ref *new;
2653
2654	new = kmalloc(sizeof(*ref), GFP_NOFS);
2655	if (!new)
2656		return -ENOMEM;
2657
2658	new->dir = ref->dir;
2659	new->dir_gen = ref->dir_gen;
2660	new->full_path = NULL;
2661	INIT_LIST_HEAD(&new->list);
2662	list_add_tail(&new->list, list);
2663	return 0;
2664}
2665
2666static void __free_recorded_refs(struct list_head *head)
2667{
2668	struct recorded_ref *cur;
2669
2670	while (!list_empty(head)) {
2671		cur = list_entry(head->next, struct recorded_ref, list);
2672		fs_path_free(cur->full_path);
2673		list_del(&cur->list);
2674		kfree(cur);
2675	}
2676}
2677
2678static void free_recorded_refs(struct send_ctx *sctx)
2679{
2680	__free_recorded_refs(&sctx->new_refs);
2681	__free_recorded_refs(&sctx->deleted_refs);
2682}
2683
2684/*
2685 * Renames/moves a file/dir to its orphan name. Used when the first
2686 * ref of an unprocessed inode gets overwritten and for all non empty
2687 * directories.
2688 */
2689static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2690			  struct fs_path *path)
2691{
2692	int ret;
2693	struct fs_path *orphan;
2694
2695	orphan = fs_path_alloc();
2696	if (!orphan)
2697		return -ENOMEM;
2698
2699	ret = gen_unique_name(sctx, ino, gen, orphan);
2700	if (ret < 0)
2701		goto out;
2702
2703	ret = send_rename(sctx, path, orphan);
2704
2705out:
2706	fs_path_free(orphan);
2707	return ret;
2708}
2709
2710static struct orphan_dir_info *
2711add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2712{
2713	struct rb_node **p = &sctx->orphan_dirs.rb_node;
2714	struct rb_node *parent = NULL;
2715	struct orphan_dir_info *entry, *odi;
2716
2717	odi = kmalloc(sizeof(*odi), GFP_NOFS);
2718	if (!odi)
2719		return ERR_PTR(-ENOMEM);
2720	odi->ino = dir_ino;
2721	odi->gen = 0;
2722
2723	while (*p) {
2724		parent = *p;
2725		entry = rb_entry(parent, struct orphan_dir_info, node);
2726		if (dir_ino < entry->ino) {
2727			p = &(*p)->rb_left;
2728		} else if (dir_ino > entry->ino) {
2729			p = &(*p)->rb_right;
2730		} else {
2731			kfree(odi);
2732			return entry;
2733		}
2734	}
2735
2736	rb_link_node(&odi->node, parent, p);
2737	rb_insert_color(&odi->node, &sctx->orphan_dirs);
2738	return odi;
2739}
2740
2741static struct orphan_dir_info *
2742get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2743{
2744	struct rb_node *n = sctx->orphan_dirs.rb_node;
2745	struct orphan_dir_info *entry;
2746
2747	while (n) {
2748		entry = rb_entry(n, struct orphan_dir_info, node);
2749		if (dir_ino < entry->ino)
2750			n = n->rb_left;
2751		else if (dir_ino > entry->ino)
2752			n = n->rb_right;
2753		else
2754			return entry;
2755	}
2756	return NULL;
2757}
2758
2759static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2760{
2761	struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2762
2763	return odi != NULL;
2764}
2765
2766static void free_orphan_dir_info(struct send_ctx *sctx,
2767				 struct orphan_dir_info *odi)
2768{
2769	if (!odi)
2770		return;
2771	rb_erase(&odi->node, &sctx->orphan_dirs);
2772	kfree(odi);
2773}
2774
2775/*
2776 * Returns 1 if a directory can be removed at this point in time.
2777 * We check this by iterating all dir items and checking if the inode behind
2778 * the dir item was already processed.
2779 */
2780static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2781		     u64 send_progress)
2782{
2783	int ret = 0;
2784	struct btrfs_root *root = sctx->parent_root;
2785	struct btrfs_path *path;
2786	struct btrfs_key key;
2787	struct btrfs_key found_key;
2788	struct btrfs_key loc;
2789	struct btrfs_dir_item *di;
2790
2791	/*
2792	 * Don't try to rmdir the top/root subvolume dir.
2793	 */
2794	if (dir == BTRFS_FIRST_FREE_OBJECTID)
2795		return 0;
2796
2797	path = alloc_path_for_send();
2798	if (!path)
2799		return -ENOMEM;
2800
2801	key.objectid = dir;
2802	key.type = BTRFS_DIR_INDEX_KEY;
2803	key.offset = 0;
2804	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2805	if (ret < 0)
2806		goto out;
2807
2808	while (1) {
2809		struct waiting_dir_move *dm;
2810
2811		if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2812			ret = btrfs_next_leaf(root, path);
2813			if (ret < 0)
2814				goto out;
2815			else if (ret > 0)
2816				break;
2817			continue;
2818		}
2819		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2820				      path->slots[0]);
2821		if (found_key.objectid != key.objectid ||
2822		    found_key.type != key.type)
2823			break;
2824
2825		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2826				struct btrfs_dir_item);
2827		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2828
2829		dm = get_waiting_dir_move(sctx, loc.objectid);
2830		if (dm) {
2831			struct orphan_dir_info *odi;
2832
2833			odi = add_orphan_dir_info(sctx, dir);
2834			if (IS_ERR(odi)) {
2835				ret = PTR_ERR(odi);
2836				goto out;
2837			}
2838			odi->gen = dir_gen;
2839			dm->rmdir_ino = dir;
2840			ret = 0;
2841			goto out;
2842		}
2843
2844		if (loc.objectid > send_progress) {
2845			ret = 0;
2846			goto out;
2847		}
2848
2849		path->slots[0]++;
2850	}
2851
2852	ret = 1;
2853
2854out:
2855	btrfs_free_path(path);
2856	return ret;
2857}
2858
2859static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2860{
2861	struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
2862
2863	return entry != NULL;
2864}
2865
2866static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2867{
2868	struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2869	struct rb_node *parent = NULL;
2870	struct waiting_dir_move *entry, *dm;
2871
2872	dm = kmalloc(sizeof(*dm), GFP_NOFS);
2873	if (!dm)
2874		return -ENOMEM;
2875	dm->ino = ino;
2876	dm->rmdir_ino = 0;
2877
2878	while (*p) {
2879		parent = *p;
2880		entry = rb_entry(parent, struct waiting_dir_move, node);
2881		if (ino < entry->ino) {
2882			p = &(*p)->rb_left;
2883		} else if (ino > entry->ino) {
2884			p = &(*p)->rb_right;
2885		} else {
2886			kfree(dm);
2887			return -EEXIST;
2888		}
2889	}
2890
2891	rb_link_node(&dm->node, parent, p);
2892	rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2893	return 0;
2894}
2895
2896static struct waiting_dir_move *
2897get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2898{
2899	struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2900	struct waiting_dir_move *entry;
2901
2902	while (n) {
2903		entry = rb_entry(n, struct waiting_dir_move, node);
2904		if (ino < entry->ino)
2905			n = n->rb_left;
2906		else if (ino > entry->ino)
2907			n = n->rb_right;
2908		else
2909			return entry;
2910	}
2911	return NULL;
2912}
2913
2914static void free_waiting_dir_move(struct send_ctx *sctx,
2915				  struct waiting_dir_move *dm)
2916{
2917	if (!dm)
2918		return;
2919	rb_erase(&dm->node, &sctx->waiting_dir_moves);
2920	kfree(dm);
2921}
2922
2923static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
2924{
2925	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2926	struct rb_node *parent = NULL;
2927	struct pending_dir_move *entry, *pm;
2928	struct recorded_ref *cur;
2929	int exists = 0;
2930	int ret;
2931
2932	pm = kmalloc(sizeof(*pm), GFP_NOFS);
2933	if (!pm)
2934		return -ENOMEM;
2935	pm->parent_ino = parent_ino;
2936	pm->ino = sctx->cur_ino;
2937	pm->gen = sctx->cur_inode_gen;
2938	INIT_LIST_HEAD(&pm->list);
2939	INIT_LIST_HEAD(&pm->update_refs);
2940	RB_CLEAR_NODE(&pm->node);
2941
2942	while (*p) {
2943		parent = *p;
2944		entry = rb_entry(parent, struct pending_dir_move, node);
2945		if (parent_ino < entry->parent_ino) {
2946			p = &(*p)->rb_left;
2947		} else if (parent_ino > entry->parent_ino) {
2948			p = &(*p)->rb_right;
2949		} else {
2950			exists = 1;
2951			break;
2952		}
2953	}
2954
2955	list_for_each_entry(cur, &sctx->deleted_refs, list) {
2956		ret = dup_ref(cur, &pm->update_refs);
2957		if (ret < 0)
2958			goto out;
2959	}
2960	list_for_each_entry(cur, &sctx->new_refs, list) {
2961		ret = dup_ref(cur, &pm->update_refs);
2962		if (ret < 0)
2963			goto out;
2964	}
2965
2966	ret = add_waiting_dir_move(sctx, pm->ino);
2967	if (ret)
2968		goto out;
2969
2970	if (exists) {
2971		list_add_tail(&pm->list, &entry->list);
2972	} else {
2973		rb_link_node(&pm->node, parent, p);
2974		rb_insert_color(&pm->node, &sctx->pending_dir_moves);
2975	}
2976	ret = 0;
2977out:
2978	if (ret) {
2979		__free_recorded_refs(&pm->update_refs);
2980		kfree(pm);
2981	}
2982	return ret;
2983}
2984
2985static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
2986						      u64 parent_ino)
2987{
2988	struct rb_node *n = sctx->pending_dir_moves.rb_node;
2989	struct pending_dir_move *entry;
2990
2991	while (n) {
2992		entry = rb_entry(n, struct pending_dir_move, node);
2993		if (parent_ino < entry->parent_ino)
2994			n = n->rb_left;
2995		else if (parent_ino > entry->parent_ino)
2996			n = n->rb_right;
2997		else
2998			return entry;
2999	}
3000	return NULL;
3001}
3002
3003static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3004{
3005	struct fs_path *from_path = NULL;
3006	struct fs_path *to_path = NULL;
3007	struct fs_path *name = NULL;
3008	u64 orig_progress = sctx->send_progress;
3009	struct recorded_ref *cur;
3010	u64 parent_ino, parent_gen;
3011	struct waiting_dir_move *dm = NULL;
3012	u64 rmdir_ino = 0;
3013	int ret;
3014
3015	name = fs_path_alloc();
3016	from_path = fs_path_alloc();
3017	if (!name || !from_path) {
3018		ret = -ENOMEM;
3019		goto out;
3020	}
3021
3022	dm = get_waiting_dir_move(sctx, pm->ino);
3023	ASSERT(dm);
3024	rmdir_ino = dm->rmdir_ino;
3025	free_waiting_dir_move(sctx, dm);
3026
3027	ret = get_first_ref(sctx->parent_root, pm->ino,
3028			    &parent_ino, &parent_gen, name);
3029	if (ret < 0)
3030		goto out;
3031
3032	if (parent_ino == sctx->cur_ino) {
3033		/* child only renamed, not moved */
3034		ASSERT(parent_gen == sctx->cur_inode_gen);
3035		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3036				   from_path);
3037		if (ret < 0)
3038			goto out;
3039		ret = fs_path_add_path(from_path, name);
3040		if (ret < 0)
3041			goto out;
3042	} else {
3043		/* child moved and maybe renamed too */
3044		sctx->send_progress = pm->ino;
3045		ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
3046		if (ret < 0)
3047			goto out;
3048	}
3049
3050	fs_path_free(name);
3051	name = NULL;
3052
3053	to_path = fs_path_alloc();
3054	if (!to_path) {
3055		ret = -ENOMEM;
3056		goto out;
3057	}
3058
3059	sctx->send_progress = sctx->cur_ino + 1;
3060	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3061	if (ret < 0)
3062		goto out;
3063
3064	ret = send_rename(sctx, from_path, to_path);
3065	if (ret < 0)
3066		goto out;
3067
3068	if (rmdir_ino) {
3069		struct orphan_dir_info *odi;
3070
3071		odi = get_orphan_dir_info(sctx, rmdir_ino);
3072		if (!odi) {
3073			/* already deleted */
3074			goto finish;
3075		}
3076		ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
3077		if (ret < 0)
3078			goto out;
3079		if (!ret)
3080			goto finish;
3081
3082		name = fs_path_alloc();
3083		if (!name) {
3084			ret = -ENOMEM;
3085			goto out;
3086		}
3087		ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
3088		if (ret < 0)
3089			goto out;
3090		ret = send_rmdir(sctx, name);
3091		if (ret < 0)
3092			goto out;
3093		free_orphan_dir_info(sctx, odi);
3094	}
3095
3096finish:
3097	ret = send_utimes(sctx, pm->ino, pm->gen);
3098	if (ret < 0)
3099		goto out;
3100
3101	/*
3102	 * After rename/move, need to update the utimes of both new parent(s)
3103	 * and old parent(s).
3104	 */
3105	list_for_each_entry(cur, &pm->update_refs, list) {
3106		if (cur->dir == rmdir_ino)
3107			continue;
3108		ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3109		if (ret < 0)
3110			goto out;
3111	}
3112
3113out:
3114	fs_path_free(name);
3115	fs_path_free(from_path);
3116	fs_path_free(to_path);
3117	sctx->send_progress = orig_progress;
3118
3119	return ret;
3120}
3121
3122static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3123{
3124	if (!list_empty(&m->list))
3125		list_del(&m->list);
3126	if (!RB_EMPTY_NODE(&m->node))
3127		rb_erase(&m->node, &sctx->pending_dir_moves);
3128	__free_recorded_refs(&m->update_refs);
3129	kfree(m);
3130}
3131
3132static void tail_append_pending_moves(struct pending_dir_move *moves,
3133				      struct list_head *stack)
3134{
3135	if (list_empty(&moves->list)) {
3136		list_add_tail(&moves->list, stack);
3137	} else {
3138		LIST_HEAD(list);
3139		list_splice_init(&moves->list, &list);
3140		list_add_tail(&moves->list, stack);
3141		list_splice_tail(&list, stack);
3142	}
3143}
3144
3145static int apply_children_dir_moves(struct send_ctx *sctx)
3146{
3147	struct pending_dir_move *pm;
3148	struct list_head stack;
3149	u64 parent_ino = sctx->cur_ino;
3150	int ret = 0;
3151
3152	pm = get_pending_dir_moves(sctx, parent_ino);
3153	if (!pm)
3154		return 0;
3155
3156	INIT_LIST_HEAD(&stack);
3157	tail_append_pending_moves(pm, &stack);
3158
3159	while (!list_empty(&stack)) {
3160		pm = list_first_entry(&stack, struct pending_dir_move, list);
3161		parent_ino = pm->ino;
3162		ret = apply_dir_move(sctx, pm);
3163		free_pending_move(sctx, pm);
3164		if (ret)
3165			goto out;
3166		pm = get_pending_dir_moves(sctx, parent_ino);
3167		if (pm)
3168			tail_append_pending_moves(pm, &stack);
3169	}
3170	return 0;
3171
3172out:
3173	while (!list_empty(&stack)) {
3174		pm = list_first_entry(&stack, struct pending_dir_move, list);
3175		free_pending_move(sctx, pm);
3176	}
3177	return ret;
3178}
3179
3180static int wait_for_parent_move(struct send_ctx *sctx,
3181				struct recorded_ref *parent_ref)
3182{
3183	int ret;
3184	u64 ino = parent_ref->dir;
3185	u64 parent_ino_before, parent_ino_after;
3186	u64 new_gen, old_gen;
3187	struct fs_path *path_before = NULL;
3188	struct fs_path *path_after = NULL;
3189	int len1, len2;
3190
3191	if (parent_ref->dir <= sctx->cur_ino)
3192		return 0;
3193
3194	if (is_waiting_for_move(sctx, ino))
3195		return 1;
3196
3197	ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
3198			     NULL, NULL, NULL, NULL);
3199	if (ret == -ENOENT)
3200		return 0;
3201	else if (ret < 0)
3202		return ret;
3203
3204	ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen,
3205			     NULL, NULL, NULL, NULL);
3206	if (ret < 0)
3207		return ret;
3208
3209	if (new_gen != old_gen)
3210		return 0;
3211
3212	path_before = fs_path_alloc();
3213	if (!path_before)
3214		return -ENOMEM;
3215
3216	ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3217			    NULL, path_before);
3218	if (ret == -ENOENT) {
3219		ret = 0;
3220		goto out;
3221	} else if (ret < 0) {
3222		goto out;
3223	}
3224
3225	path_after = fs_path_alloc();
3226	if (!path_after) {
3227		ret = -ENOMEM;
3228		goto out;
3229	}
3230
3231	ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3232			    NULL, path_after);
3233	if (ret == -ENOENT) {
3234		ret = 0;
3235		goto out;
3236	} else if (ret < 0) {
3237		goto out;
3238	}
3239
3240	len1 = fs_path_len(path_before);
3241	len2 = fs_path_len(path_after);
3242	if (parent_ino_before != parent_ino_after || len1 != len2 ||
3243	     memcmp(path_before->start, path_after->start, len1)) {
3244		ret = 1;
3245		goto out;
3246	}
3247	ret = 0;
3248
3249out:
3250	fs_path_free(path_before);
3251	fs_path_free(path_after);
3252
3253	return ret;
3254}
3255
3256/*
3257 * This does all the move/link/unlink/rmdir magic.
3258 */
3259static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3260{
3261	int ret = 0;
3262	struct recorded_ref *cur;
3263	struct recorded_ref *cur2;
3264	struct list_head check_dirs;
3265	struct fs_path *valid_path = NULL;
3266	u64 ow_inode = 0;
3267	u64 ow_gen;
3268	int did_overwrite = 0;
3269	int is_orphan = 0;
3270	u64 last_dir_ino_rm = 0;
3271
3272verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3273
3274	/*
3275	 * This should never happen as the root dir always has the same ref
3276	 * which is always '..'
3277	 */
3278	BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3279	INIT_LIST_HEAD(&check_dirs);
3280
3281	valid_path = fs_path_alloc();
3282	if (!valid_path) {
3283		ret = -ENOMEM;
3284		goto out;
3285	}
3286
3287	/*
3288	 * First, check if the first ref of the current inode was overwritten
3289	 * before. If yes, we know that the current inode was already orphanized
3290	 * and thus use the orphan name. If not, we can use get_cur_path to
3291	 * get the path of the first ref as it would like while receiving at
3292	 * this point in time.
3293	 * New inodes are always orphan at the beginning, so force to use the
3294	 * orphan name in this case.
3295	 * The first ref is stored in valid_path and will be updated if it
3296	 * gets moved around.
3297	 */
3298	if (!sctx->cur_inode_new) {
3299		ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3300				sctx->cur_inode_gen);
3301		if (ret < 0)
3302			goto out;
3303		if (ret)
3304			did_overwrite = 1;
3305	}
3306	if (sctx->cur_inode_new || did_overwrite) {
3307		ret = gen_unique_name(sctx, sctx->cur_ino,
3308				sctx->cur_inode_gen, valid_path);
3309		if (ret < 0)
3310			goto out;
3311		is_orphan = 1;
3312	} else {
3313		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3314				valid_path);
3315		if (ret < 0)
3316			goto out;
3317	}
3318
3319	list_for_each_entry(cur, &sctx->new_refs, list) {
3320		/*
3321		 * We may have refs where the parent directory does not exist
3322		 * yet. This happens if the parent directories inum is higher
3323		 * the the current inum. To handle this case, we create the
3324		 * parent directory out of order. But we need to check if this
3325		 * did already happen before due to other refs in the same dir.
3326		 */
3327		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3328		if (ret < 0)
3329			goto out;
3330		if (ret == inode_state_will_create) {
3331			ret = 0;
3332			/*
3333			 * First check if any of the current inodes refs did
3334			 * already create the dir.
3335			 */
3336			list_for_each_entry(cur2, &sctx->new_refs, list) {
3337				if (cur == cur2)
3338					break;
3339				if (cur2->dir == cur->dir) {
3340					ret = 1;
3341					break;
3342				}
3343			}
3344
3345			/*
3346			 * If that did not happen, check if a previous inode
3347			 * did already create the dir.
3348			 */
3349			if (!ret)
3350				ret = did_create_dir(sctx, cur->dir);
3351			if (ret < 0)
3352				goto out;
3353			if (!ret) {
3354				ret = send_create_inode(sctx, cur->dir);
3355				if (ret < 0)
3356					goto out;
3357			}
3358		}
3359
3360		/*
3361		 * Check if this new ref would overwrite the first ref of
3362		 * another unprocessed inode. If yes, orphanize the
3363		 * overwritten inode. If we find an overwritten ref that is
3364		 * not the first ref, simply unlink it.
3365		 */
3366		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3367				cur->name, cur->name_len,
3368				&ow_inode, &ow_gen);
3369		if (ret < 0)
3370			goto out;
3371		if (ret) {
3372			ret = is_first_ref(sctx->parent_root,
3373					   ow_inode, cur->dir, cur->name,
3374					   cur->name_len);
3375			if (ret < 0)
3376				goto out;
3377			if (ret) {
3378				ret = orphanize_inode(sctx, ow_inode, ow_gen,
3379						cur->full_path);
3380				if (ret < 0)
3381					goto out;
3382			} else {
3383				ret = send_unlink(sctx, cur->full_path);
3384				if (ret < 0)
3385					goto out;
3386			}
3387		}
3388
3389		/*
3390		 * link/move the ref to the new place. If we have an orphan
3391		 * inode, move it and update valid_path. If not, link or move
3392		 * it depending on the inode mode.
3393		 */
3394		if (is_orphan) {
3395			ret = send_rename(sctx, valid_path, cur->full_path);
3396			if (ret < 0)
3397				goto out;
3398			is_orphan = 0;
3399			ret = fs_path_copy(valid_path, cur->full_path);
3400			if (ret < 0)
3401				goto out;
3402		} else {
3403			if (S_ISDIR(sctx->cur_inode_mode)) {
3404				/*
3405				 * Dirs can't be linked, so move it. For moved
3406				 * dirs, we always have one new and one deleted
3407				 * ref. The deleted ref is ignored later.
3408				 */
3409				ret = wait_for_parent_move(sctx, cur);
3410				if (ret < 0)
3411					goto out;
3412				if (ret) {
3413					ret = add_pending_dir_move(sctx,
3414								   cur->dir);
3415					*pending_move = 1;
3416				} else {
3417					ret = send_rename(sctx, valid_path,
3418							  cur->full_path);
3419					if (!ret)
3420						ret = fs_path_copy(valid_path,
3421							       cur->full_path);
3422				}
3423				if (ret < 0)
3424					goto out;
3425			} else {
3426				ret = send_link(sctx, cur->full_path,
3427						valid_path);
3428				if (ret < 0)
3429					goto out;
3430			}
3431		}
3432		ret = dup_ref(cur, &check_dirs);
3433		if (ret < 0)
3434			goto out;
3435	}
3436
3437	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
3438		/*
3439		 * Check if we can already rmdir the directory. If not,
3440		 * orphanize it. For every dir item inside that gets deleted
3441		 * later, we do this check again and rmdir it then if possible.
3442		 * See the use of check_dirs for more details.
3443		 */
3444		ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3445				sctx->cur_ino);
3446		if (ret < 0)
3447			goto out;
3448		if (ret) {
3449			ret = send_rmdir(sctx, valid_path);
3450			if (ret < 0)
3451				goto out;
3452		} else if (!is_orphan) {
3453			ret = orphanize_inode(sctx, sctx->cur_ino,
3454					sctx->cur_inode_gen, valid_path);
3455			if (ret < 0)
3456				goto out;
3457			is_orphan = 1;
3458		}
3459
3460		list_for_each_entry(cur, &sctx->deleted_refs, list) {
3461			ret = dup_ref(cur, &check_dirs);
3462			if (ret < 0)
3463				goto out;
3464		}
3465	} else if (S_ISDIR(sctx->cur_inode_mode) &&
3466		   !list_empty(&sctx->deleted_refs)) {
3467		/*
3468		 * We have a moved dir. Add the old parent to check_dirs
3469		 */
3470		cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
3471				list);
3472		ret = dup_ref(cur, &check_dirs);
3473		if (ret < 0)
3474			goto out;
3475	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
3476		/*
3477		 * We have a non dir inode. Go through all deleted refs and
3478		 * unlink them if they were not already overwritten by other
3479		 * inodes.
3480		 */
3481		list_for_each_entry(cur, &sctx->deleted_refs, list) {
3482			ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3483					sctx->cur_ino, sctx->cur_inode_gen,
3484					cur->name, cur->name_len);
3485			if (ret < 0)
3486				goto out;
3487			if (!ret) {
3488				ret = send_unlink(sctx, cur->full_path);
3489				if (ret < 0)
3490					goto out;
3491			}
3492			ret = dup_ref(cur, &check_dirs);
3493			if (ret < 0)
3494				goto out;
3495		}
3496		/*
3497		 * If the inode is still orphan, unlink the orphan. This may
3498		 * happen when a previous inode did overwrite the first ref
3499		 * of this inode and no new refs were added for the current
3500		 * inode. Unlinking does not mean that the inode is deleted in
3501		 * all cases. There may still be links to this inode in other
3502		 * places.
3503		 */
3504		if (is_orphan) {
3505			ret = send_unlink(sctx, valid_path);
3506			if (ret < 0)
3507				goto out;
3508		}
3509	}
3510
3511	/*
3512	 * We did collect all parent dirs where cur_inode was once located. We
3513	 * now go through all these dirs and check if they are pending for
3514	 * deletion and if it's finally possible to perform the rmdir now.
3515	 * We also update the inode stats of the parent dirs here.
3516	 */
3517	list_for_each_entry(cur, &check_dirs, list) {
3518		/*
3519		 * In case we had refs into dirs that were not processed yet,
3520		 * we don't need to do the utime and rmdir logic for these dirs.
3521		 * The dir will be processed later.
3522		 */
3523		if (cur->dir > sctx->cur_ino)
3524			continue;
3525
3526		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3527		if (ret < 0)
3528			goto out;
3529
3530		if (ret == inode_state_did_create ||
3531		    ret == inode_state_no_change) {
3532			/* TODO delayed utimes */
3533			ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3534			if (ret < 0)
3535				goto out;
3536		} else if (ret == inode_state_did_delete &&
3537			   cur->dir != last_dir_ino_rm) {
3538			ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
3539					sctx->cur_ino);
3540			if (ret < 0)
3541				goto out;
3542			if (ret) {
3543				ret = get_cur_path(sctx, cur->dir,
3544						   cur->dir_gen, valid_path);
3545				if (ret < 0)
3546					goto out;
3547				ret = send_rmdir(sctx, valid_path);
3548				if (ret < 0)
3549					goto out;
3550				last_dir_ino_rm = cur->dir;
3551			}
3552		}
3553	}
3554
3555	ret = 0;
3556
3557out:
3558	__free_recorded_refs(&check_dirs);
3559	free_recorded_refs(sctx);
3560	fs_path_free(valid_path);
3561	return ret;
3562}
3563
3564static int __record_new_ref(int num, u64 dir, int index,
3565			    struct fs_path *name,
3566			    void *ctx)
3567{
3568	int ret = 0;
3569	struct send_ctx *sctx = ctx;
3570	struct fs_path *p;
3571	u64 gen;
3572
3573	p = fs_path_alloc();
3574	if (!p)
3575		return -ENOMEM;
3576
3577	ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
3578			NULL, NULL);
3579	if (ret < 0)
3580		goto out;
3581
3582	ret = get_cur_path(sctx, dir, gen, p);
3583	if (ret < 0)
3584		goto out;
3585	ret = fs_path_add_path(p, name);
3586	if (ret < 0)
3587		goto out;
3588
3589	ret = record_ref(&sctx->new_refs, dir, gen, p);
3590
3591out:
3592	if (ret)
3593		fs_path_free(p);
3594	return ret;
3595}
3596
3597static int __record_deleted_ref(int num, u64 dir, int index,
3598				struct fs_path *name,
3599				void *ctx)
3600{
3601	int ret = 0;
3602	struct send_ctx *sctx = ctx;
3603	struct fs_path *p;
3604	u64 gen;
3605
3606	p = fs_path_alloc();
3607	if (!p)
3608		return -ENOMEM;
3609
3610	ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
3611			NULL, NULL);
3612	if (ret < 0)
3613		goto out;
3614
3615	ret = get_cur_path(sctx, dir, gen, p);
3616	if (ret < 0)
3617		goto out;
3618	ret = fs_path_add_path(p, name);
3619	if (ret < 0)
3620		goto out;
3621
3622	ret = record_ref(&sctx->deleted_refs, dir, gen, p);
3623
3624out:
3625	if (ret)
3626		fs_path_free(p);
3627	return ret;
3628}
3629
3630static int record_new_ref(struct send_ctx *sctx)
3631{
3632	int ret;
3633
3634	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3635				sctx->cmp_key, 0, __record_new_ref, sctx);
3636	if (ret < 0)
3637		goto out;
3638	ret = 0;
3639
3640out:
3641	return ret;
3642}
3643
3644static int record_deleted_ref(struct send_ctx *sctx)
3645{
3646	int ret;
3647
3648	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3649				sctx->cmp_key, 0, __record_deleted_ref, sctx);
3650	if (ret < 0)
3651		goto out;
3652	ret = 0;
3653
3654out:
3655	return ret;
3656}
3657
3658struct find_ref_ctx {
3659	u64 dir;
3660	u64 dir_gen;
3661	struct btrfs_root *root;
3662	struct fs_path *name;
3663	int found_idx;
3664};
3665
3666static int __find_iref(int num, u64 dir, int index,
3667		       struct fs_path *name,
3668		       void *ctx_)
3669{
3670	struct find_ref_ctx *ctx = ctx_;
3671	u64 dir_gen;
3672	int ret;
3673
3674	if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3675	    strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3676		/*
3677		 * To avoid doing extra lookups we'll only do this if everything
3678		 * else matches.
3679		 */
3680		ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
3681				     NULL, NULL, NULL);
3682		if (ret)
3683			return ret;
3684		if (dir_gen != ctx->dir_gen)
3685			return 0;
3686		ctx->found_idx = num;
3687		return 1;
3688	}
3689	return 0;
3690}
3691
3692static int find_iref(struct btrfs_root *root,
3693		     struct btrfs_path *path,
3694		     struct btrfs_key *key,
3695		     u64 dir, u64 dir_gen, struct fs_path *name)
3696{
3697	int ret;
3698	struct find_ref_ctx ctx;
3699
3700	ctx.dir = dir;
3701	ctx.name = name;
3702	ctx.dir_gen = dir_gen;
3703	ctx.found_idx = -1;
3704	ctx.root = root;
3705
3706	ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3707	if (ret < 0)
3708		return ret;
3709
3710	if (ctx.found_idx == -1)
3711		return -ENOENT;
3712
3713	return ctx.found_idx;
3714}
3715
3716static int __record_changed_new_ref(int num, u64 dir, int index,
3717				    struct fs_path *name,
3718				    void *ctx)
3719{
3720	u64 dir_gen;
3721	int ret;
3722	struct send_ctx *sctx = ctx;
3723
3724	ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
3725			     NULL, NULL, NULL);
3726	if (ret)
3727		return ret;
3728
3729	ret = find_iref(sctx->parent_root, sctx->right_path,
3730			sctx->cmp_key, dir, dir_gen, name);
3731	if (ret == -ENOENT)
3732		ret = __record_new_ref(num, dir, index, name, sctx);
3733	else if (ret > 0)
3734		ret = 0;
3735
3736	return ret;
3737}
3738
3739static int __record_changed_deleted_ref(int num, u64 dir, int index,
3740					struct fs_path *name,
3741					void *ctx)
3742{
3743	u64 dir_gen;
3744	int ret;
3745	struct send_ctx *sctx = ctx;
3746
3747	ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
3748			     NULL, NULL, NULL);
3749	if (ret)
3750		return ret;
3751
3752	ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
3753			dir, dir_gen, name);
3754	if (ret == -ENOENT)
3755		ret = __record_deleted_ref(num, dir, index, name, sctx);
3756	else if (ret > 0)
3757		ret = 0;
3758
3759	return ret;
3760}
3761
3762static int record_changed_ref(struct send_ctx *sctx)
3763{
3764	int ret = 0;
3765
3766	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3767			sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3768	if (ret < 0)
3769		goto out;
3770	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3771			sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
3772	if (ret < 0)
3773		goto out;
3774	ret = 0;
3775
3776out:
3777	return ret;
3778}
3779
3780/*
3781 * Record and process all refs at once. Needed when an inode changes the
3782 * generation number, which means that it was deleted and recreated.
3783 */
3784static int process_all_refs(struct send_ctx *sctx,
3785			    enum btrfs_compare_tree_result cmd)
3786{
3787	int ret;
3788	struct btrfs_root *root;
3789	struct btrfs_path *path;
3790	struct btrfs_key key;
3791	struct btrfs_key found_key;
3792	struct extent_buffer *eb;
3793	int slot;
3794	iterate_inode_ref_t cb;
3795	int pending_move = 0;
3796
3797	path = alloc_path_for_send();
3798	if (!path)
3799		return -ENOMEM;
3800
3801	if (cmd == BTRFS_COMPARE_TREE_NEW) {
3802		root = sctx->send_root;
3803		cb = __record_new_ref;
3804	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
3805		root = sctx->parent_root;
3806		cb = __record_deleted_ref;
3807	} else {
3808		btrfs_err(sctx->send_root->fs_info,
3809				"Wrong command %d in process_all_refs", cmd);
3810		ret = -EINVAL;
3811		goto out;
3812	}
3813
3814	key.objectid = sctx->cmp_key->objectid;
3815	key.type = BTRFS_INODE_REF_KEY;
3816	key.offset = 0;
3817	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3818	if (ret < 0)
3819		goto out;
3820
3821	while (1) {
3822		eb = path->nodes[0];
3823		slot = path->slots[0];
3824		if (slot >= btrfs_header_nritems(eb)) {
3825			ret = btrfs_next_leaf(root, path);
3826			if (ret < 0)
3827				goto out;
3828			else if (ret > 0)
3829				break;
3830			continue;
3831		}
3832
3833		btrfs_item_key_to_cpu(eb, &found_key, slot);
3834
3835		if (found_key.objectid != key.objectid ||
3836		    (found_key.type != BTRFS_INODE_REF_KEY &&
3837		     found_key.type != BTRFS_INODE_EXTREF_KEY))
3838			break;
3839
3840		ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
3841		if (ret < 0)
3842			goto out;
3843
3844		path->slots[0]++;
3845	}
3846	btrfs_release_path(path);
3847
3848	ret = process_recorded_refs(sctx, &pending_move);
3849	/* Only applicable to an incremental send. */
3850	ASSERT(pending_move == 0);
3851
3852out:
3853	btrfs_free_path(path);
3854	return ret;
3855}
3856
3857static int send_set_xattr(struct send_ctx *sctx,
3858			  struct fs_path *path,
3859			  const char *name, int name_len,
3860			  const char *data, int data_len)
3861{
3862	int ret = 0;
3863
3864	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
3865	if (ret < 0)
3866		goto out;
3867
3868	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3869	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3870	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
3871
3872	ret = send_cmd(sctx);
3873
3874tlv_put_failure:
3875out:
3876	return ret;
3877}
3878
3879static int send_remove_xattr(struct send_ctx *sctx,
3880			  struct fs_path *path,
3881			  const char *name, int name_len)
3882{
3883	int ret = 0;
3884
3885	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
3886	if (ret < 0)
3887		goto out;
3888
3889	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3890	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3891
3892	ret = send_cmd(sctx);
3893
3894tlv_put_failure:
3895out:
3896	return ret;
3897}
3898
3899static int __process_new_xattr(int num, struct btrfs_key *di_key,
3900			       const char *name, int name_len,
3901			       const char *data, int data_len,
3902			       u8 type, void *ctx)
3903{
3904	int ret;
3905	struct send_ctx *sctx = ctx;
3906	struct fs_path *p;
3907	posix_acl_xattr_header dummy_acl;
3908
3909	p = fs_path_alloc();
3910	if (!p)
3911		return -ENOMEM;
3912
3913	/*
3914	 * This hack is needed because empty acl's are stored as zero byte
3915	 * data in xattrs. Problem with that is, that receiving these zero byte
3916	 * acl's will fail later. To fix this, we send a dummy acl list that
3917	 * only contains the version number and no entries.
3918	 */
3919	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
3920	    !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
3921		if (data_len == 0) {
3922			dummy_acl.a_version =
3923					cpu_to_le32(POSIX_ACL_XATTR_VERSION);
3924			data = (char *)&dummy_acl;
3925			data_len = sizeof(dummy_acl);
3926		}
3927	}
3928
3929	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3930	if (ret < 0)
3931		goto out;
3932
3933	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
3934
3935out:
3936	fs_path_free(p);
3937	return ret;
3938}
3939
3940static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
3941				   const char *name, int name_len,
3942				   const char *data, int data_len,
3943				   u8 type, void *ctx)
3944{
3945	int ret;
3946	struct send_ctx *sctx = ctx;
3947	struct fs_path *p;
3948
3949	p = fs_path_alloc();
3950	if (!p)
3951		return -ENOMEM;
3952
3953	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3954	if (ret < 0)
3955		goto out;
3956
3957	ret = send_remove_xattr(sctx, p, name, name_len);
3958
3959out:
3960	fs_path_free(p);
3961	return ret;
3962}
3963
3964static int process_new_xattr(struct send_ctx *sctx)
3965{
3966	int ret = 0;
3967
3968	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
3969			       sctx->cmp_key, __process_new_xattr, sctx);
3970
3971	return ret;
3972}
3973
3974static int process_deleted_xattr(struct send_ctx *sctx)
3975{
3976	int ret;
3977
3978	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
3979			       sctx->cmp_key, __process_deleted_xattr, sctx);
3980
3981	return ret;
3982}
3983
3984struct find_xattr_ctx {
3985	const char *name;
3986	int name_len;
3987	int found_idx;
3988	char *found_data;
3989	int found_data_len;
3990};
3991
3992static int __find_xattr(int num, struct btrfs_key *di_key,
3993			const char *name, int name_len,
3994			const char *data, int data_len,
3995			u8 type, void *vctx)
3996{
3997	struct find_xattr_ctx *ctx = vctx;
3998
3999	if (name_len == ctx->name_len &&
4000	    strncmp(name, ctx->name, name_len) == 0) {
4001		ctx->found_idx = num;
4002		ctx->found_data_len = data_len;
4003		ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
4004		if (!ctx->found_data)
4005			return -ENOMEM;
4006		return 1;
4007	}
4008	return 0;
4009}
4010
4011static int find_xattr(struct btrfs_root *root,
4012		      struct btrfs_path *path,
4013		      struct btrfs_key *key,
4014		      const char *name, int name_len,
4015		      char **data, int *data_len)
4016{
4017	int ret;
4018	struct find_xattr_ctx ctx;
4019
4020	ctx.name = name;
4021	ctx.name_len = name_len;
4022	ctx.found_idx = -1;
4023	ctx.found_data = NULL;
4024	ctx.found_data_len = 0;
4025
4026	ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
4027	if (ret < 0)
4028		return ret;
4029
4030	if (ctx.found_idx == -1)
4031		return -ENOENT;
4032	if (data) {
4033		*data = ctx.found_data;
4034		*data_len = ctx.found_data_len;
4035	} else {
4036		kfree(ctx.found_data);
4037	}
4038	return ctx.found_idx;
4039}
4040
4041
4042static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4043				       const char *name, int name_len,
4044				       const char *data, int data_len,
4045				       u8 type, void *ctx)
4046{
4047	int ret;
4048	struct send_ctx *sctx = ctx;
4049	char *found_data = NULL;
4050	int found_data_len  = 0;
4051
4052	ret = find_xattr(sctx->parent_root, sctx->right_path,
4053			 sctx->cmp_key, name, name_len, &found_data,
4054			 &found_data_len);
4055	if (ret == -ENOENT) {
4056		ret = __process_new_xattr(num, di_key, name, name_len, data,
4057				data_len, type, ctx);
4058	} else if (ret >= 0) {
4059		if (data_len != found_data_len ||
4060		    memcmp(data, found_data, data_len)) {
4061			ret = __process_new_xattr(num, di_key, name, name_len,
4062					data, data_len, type, ctx);
4063		} else {
4064			ret = 0;
4065		}
4066	}
4067
4068	kfree(found_data);
4069	return ret;
4070}
4071
4072static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4073					   const char *name, int name_len,
4074					   const char *data, int data_len,
4075					   u8 type, void *ctx)
4076{
4077	int ret;
4078	struct send_ctx *sctx = ctx;
4079
4080	ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4081			 name, name_len, NULL, NULL);
4082	if (ret == -ENOENT)
4083		ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4084				data_len, type, ctx);
4085	else if (ret >= 0)
4086		ret = 0;
4087
4088	return ret;
4089}
4090
4091static int process_changed_xattr(struct send_ctx *sctx)
4092{
4093	int ret = 0;
4094
4095	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4096			sctx->cmp_key, __process_changed_new_xattr, sctx);
4097	if (ret < 0)
4098		goto out;
4099	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4100			sctx->cmp_key, __process_changed_deleted_xattr, sctx);
4101
4102out:
4103	return ret;
4104}
4105
4106static int process_all_new_xattrs(struct send_ctx *sctx)
4107{
4108	int ret;
4109	struct btrfs_root *root;
4110	struct btrfs_path *path;
4111	struct btrfs_key key;
4112	struct btrfs_key found_key;
4113	struct extent_buffer *eb;
4114	int slot;
4115
4116	path = alloc_path_for_send();
4117	if (!path)
4118		return -ENOMEM;
4119
4120	root = sctx->send_root;
4121
4122	key.objectid = sctx->cmp_key->objectid;
4123	key.type = BTRFS_XATTR_ITEM_KEY;
4124	key.offset = 0;
4125	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4126	if (ret < 0)
4127		goto out;
4128
4129	while (1) {
4130		eb = path->nodes[0];
4131		slot = path->slots[0];
4132		if (slot >= btrfs_header_nritems(eb)) {
4133			ret = btrfs_next_leaf(root, path);
4134			if (ret < 0) {
4135				goto out;
4136			} else if (ret > 0) {
4137				ret = 0;
4138				break;
4139			}
4140			continue;
4141		}
4142
4143		btrfs_item_key_to_cpu(eb, &found_key, slot);
4144		if (found_key.objectid != key.objectid ||
4145		    found_key.type != key.type) {
4146			ret = 0;
4147			goto out;
4148		}
4149
4150		ret = iterate_dir_item(root, path, &found_key,
4151				       __process_new_xattr, sctx);
4152		if (ret < 0)
4153			goto out;
4154
4155		path->slots[0]++;
4156	}
4157
4158out:
4159	btrfs_free_path(path);
4160	return ret;
4161}
4162
4163static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4164{
4165	struct btrfs_root *root = sctx->send_root;
4166	struct btrfs_fs_info *fs_info = root->fs_info;
4167	struct inode *inode;
4168	struct page *page;
4169	char *addr;
4170	struct btrfs_key key;
4171	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
4172	pgoff_t last_index;
4173	unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
4174	ssize_t ret = 0;
4175
4176	key.objectid = sctx->cur_ino;
4177	key.type = BTRFS_INODE_ITEM_KEY;
4178	key.offset = 0;
4179
4180	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4181	if (IS_ERR(inode))
4182		return PTR_ERR(inode);
4183
4184	if (offset + len > i_size_read(inode)) {
4185		if (offset > i_size_read(inode))
4186			len = 0;
4187		else
4188			len = offset - i_size_read(inode);
4189	}
4190	if (len == 0)
4191		goto out;
4192
4193	last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
4194	while (index <= last_index) {
4195		unsigned cur_len = min_t(unsigned, len,
4196					 PAGE_CACHE_SIZE - pg_offset);
4197		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4198		if (!page) {
4199			ret = -ENOMEM;
4200			break;
4201		}
4202
4203		if (!PageUptodate(page)) {
4204			btrfs_readpage(NULL, page);
4205			lock_page(page);
4206			if (!PageUptodate(page)) {
4207				unlock_page(page);
4208				page_cache_release(page);
4209				ret = -EIO;
4210				break;
4211			}
4212		}
4213
4214		addr = kmap(page);
4215		memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4216		kunmap(page);
4217		unlock_page(page);
4218		page_cache_release(page);
4219		index++;
4220		pg_offset = 0;
4221		len -= cur_len;
4222		ret += cur_len;
4223	}
4224out:
4225	iput(inode);
4226	return ret;
4227}
4228
4229/*
4230 * Read some bytes from the current inode/file and send a write command to
4231 * user space.
4232 */
4233static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4234{
4235	int ret = 0;
4236	struct fs_path *p;
4237	ssize_t num_read = 0;
4238
4239	p = fs_path_alloc();
4240	if (!p)
4241		return -ENOMEM;
4242
4243verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
4244
4245	num_read = fill_read_buf(sctx, offset, len);
4246	if (num_read <= 0) {
4247		if (num_read < 0)
4248			ret = num_read;
4249		goto out;
4250	}
4251
4252	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4253	if (ret < 0)
4254		goto out;
4255
4256	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4257	if (ret < 0)
4258		goto out;
4259
4260	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4261	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4262	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4263
4264	ret = send_cmd(sctx);
4265
4266tlv_put_failure:
4267out:
4268	fs_path_free(p);
4269	if (ret < 0)
4270		return ret;
4271	return num_read;
4272}
4273
4274/*
4275 * Send a clone command to user space.
4276 */
4277static int send_clone(struct send_ctx *sctx,
4278		      u64 offset, u32 len,
4279		      struct clone_root *clone_root)
4280{
4281	int ret = 0;
4282	struct fs_path *p;
4283	u64 gen;
4284
4285verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
4286	       "clone_inode=%llu, clone_offset=%llu\n", offset, len,
4287		clone_root->root->objectid, clone_root->ino,
4288		clone_root->offset);
4289
4290	p = fs_path_alloc();
4291	if (!p)
4292		return -ENOMEM;
4293
4294	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4295	if (ret < 0)
4296		goto out;
4297
4298	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4299	if (ret < 0)
4300		goto out;
4301
4302	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4303	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4304	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4305
4306	if (clone_root->root == sctx->send_root) {
4307		ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4308				&gen, NULL, NULL, NULL, NULL);
4309		if (ret < 0)
4310			goto out;
4311		ret = get_cur_path(sctx, clone_root->ino, gen, p);
4312	} else {
4313		ret = get_inode_path(clone_root->root, clone_root->ino, p);
4314	}
4315	if (ret < 0)
4316		goto out;
4317
4318	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4319			clone_root->root->root_item.uuid);
4320	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4321		    le64_to_cpu(clone_root->root->root_item.ctransid));
4322	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4323	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4324			clone_root->offset);
4325
4326	ret = send_cmd(sctx);
4327
4328tlv_put_failure:
4329out:
4330	fs_path_free(p);
4331	return ret;
4332}
4333
4334/*
4335 * Send an update extent command to user space.
4336 */
4337static int send_update_extent(struct send_ctx *sctx,
4338			      u64 offset, u32 len)
4339{
4340	int ret = 0;
4341	struct fs_path *p;
4342
4343	p = fs_path_alloc();
4344	if (!p)
4345		return -ENOMEM;
4346
4347	ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
4348	if (ret < 0)
4349		goto out;
4350
4351	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4352	if (ret < 0)
4353		goto out;
4354
4355	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4356	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4357	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
4358
4359	ret = send_cmd(sctx);
4360
4361tlv_put_failure:
4362out:
4363	fs_path_free(p);
4364	return ret;
4365}
4366
4367static int send_hole(struct send_ctx *sctx, u64 end)
4368{
4369	struct fs_path *p = NULL;
4370	u64 offset = sctx->cur_inode_last_extent;
4371	u64 len;
4372	int ret = 0;
4373
4374	p = fs_path_alloc();
4375	if (!p)
4376		return -ENOMEM;
4377	memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4378	while (offset < end) {
4379		len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4380
4381		ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4382		if (ret < 0)
4383			break;
4384		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4385		if (ret < 0)
4386			break;
4387		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4388		TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4389		TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4390		ret = send_cmd(sctx);
4391		if (ret < 0)
4392			break;
4393		offset += len;
4394	}
4395tlv_put_failure:
4396	fs_path_free(p);
4397	return ret;
4398}
4399
4400static int send_write_or_clone(struct send_ctx *sctx,
4401			       struct btrfs_path *path,
4402			       struct btrfs_key *key,
4403			       struct clone_root *clone_root)
4404{
4405	int ret = 0;
4406	struct btrfs_file_extent_item *ei;
4407	u64 offset = key->offset;
4408	u64 pos = 0;
4409	u64 len;
4410	u32 l;
4411	u8 type;
4412	u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
4413
4414	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4415			struct btrfs_file_extent_item);
4416	type = btrfs_file_extent_type(path->nodes[0], ei);
4417	if (type == BTRFS_FILE_EXTENT_INLINE) {
4418		len = btrfs_file_extent_inline_len(path->nodes[0],
4419						   path->slots[0], ei);
4420		/*
4421		 * it is possible the inline item won't cover the whole page,
4422		 * but there may be items after this page.  Make
4423		 * sure to send the whole thing
4424		 */
4425		len = PAGE_CACHE_ALIGN(len);
4426	} else {
4427		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
4428	}
4429
4430	if (offset + len > sctx->cur_inode_size)
4431		len = sctx->cur_inode_size - offset;
4432	if (len == 0) {
4433		ret = 0;
4434		goto out;
4435	}
4436
4437	if (clone_root && IS_ALIGNED(offset + len, bs)) {
4438		ret = send_clone(sctx, offset, len, clone_root);
4439	} else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
4440		ret = send_update_extent(sctx, offset, len);
4441	} else {
4442		while (pos < len) {
4443			l = len - pos;
4444			if (l > BTRFS_SEND_READ_SIZE)
4445				l = BTRFS_SEND_READ_SIZE;
4446			ret = send_write(sctx, pos + offset, l);
4447			if (ret < 0)
4448				goto out;
4449			if (!ret)
4450				break;
4451			pos += ret;
4452		}
4453		ret = 0;
4454	}
4455out:
4456	return ret;
4457}
4458
4459static int is_extent_unchanged(struct send_ctx *sctx,
4460			       struct btrfs_path *left_path,
4461			       struct btrfs_key *ekey)
4462{
4463	int ret = 0;
4464	struct btrfs_key key;
4465	struct btrfs_path *path = NULL;
4466	struct extent_buffer *eb;
4467	int slot;
4468	struct btrfs_key found_key;
4469	struct btrfs_file_extent_item *ei;
4470	u64 left_disknr;
4471	u64 right_disknr;
4472	u64 left_offset;
4473	u64 right_offset;
4474	u64 left_offset_fixed;
4475	u64 left_len;
4476	u64 right_len;
4477	u64 left_gen;
4478	u64 right_gen;
4479	u8 left_type;
4480	u8 right_type;
4481
4482	path = alloc_path_for_send();
4483	if (!path)
4484		return -ENOMEM;
4485
4486	eb = left_path->nodes[0];
4487	slot = left_path->slots[0];
4488	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4489	left_type = btrfs_file_extent_type(eb, ei);
4490
4491	if (left_type != BTRFS_FILE_EXTENT_REG) {
4492		ret = 0;
4493		goto out;
4494	}
4495	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
4496	left_len = btrfs_file_extent_num_bytes(eb, ei);
4497	left_offset = btrfs_file_extent_offset(eb, ei);
4498	left_gen = btrfs_file_extent_generation(eb, ei);
4499
4500	/*
4501	 * Following comments will refer to these graphics. L is the left
4502	 * extents which we are checking at the moment. 1-8 are the right
4503	 * extents that we iterate.
4504	 *
4505	 *       |-----L-----|
4506	 * |-1-|-2a-|-3-|-4-|-5-|-6-|
4507	 *
4508	 *       |-----L-----|
4509	 * |--1--|-2b-|...(same as above)
4510	 *
4511	 * Alternative situation. Happens on files where extents got split.
4512	 *       |-----L-----|
4513	 * |-----------7-----------|-6-|
4514	 *
4515	 * Alternative situation. Happens on files which got larger.
4516	 *       |-----L-----|
4517	 * |-8-|
4518	 * Nothing follows after 8.
4519	 */
4520
4521	key.objectid = ekey->objectid;
4522	key.type = BTRFS_EXTENT_DATA_KEY;
4523	key.offset = ekey->offset;
4524	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
4525	if (ret < 0)
4526		goto out;
4527	if (ret) {
4528		ret = 0;
4529		goto out;
4530	}
4531
4532	/*
4533	 * Handle special case where the right side has no extents at all.
4534	 */
4535	eb = path->nodes[0];
4536	slot = path->slots[0];
4537	btrfs_item_key_to_cpu(eb, &found_key, slot);
4538	if (found_key.objectid != key.objectid ||
4539	    found_key.type != key.type) {
4540		/* If we're a hole then just pretend nothing changed */
4541		ret = (left_disknr) ? 0 : 1;
4542		goto out;
4543	}
4544
4545	/*
4546	 * We're now on 2a, 2b or 7.
4547	 */
4548	key = found_key;
4549	while (key.offset < ekey->offset + left_len) {
4550		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4551		right_type = btrfs_file_extent_type(eb, ei);
4552		if (right_type != BTRFS_FILE_EXTENT_REG) {
4553			ret = 0;
4554			goto out;
4555		}
4556
4557		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
4558		right_len = btrfs_file_extent_num_bytes(eb, ei);
4559		right_offset = btrfs_file_extent_offset(eb, ei);
4560		right_gen = btrfs_file_extent_generation(eb, ei);
4561
4562		/*
4563		 * Are we at extent 8? If yes, we know the extent is changed.
4564		 * This may only happen on the first iteration.
4565		 */
4566		if (found_key.offset + right_len <= ekey->offset) {
4567			/* If we're a hole just pretend nothing changed */
4568			ret = (left_disknr) ? 0 : 1;
4569			goto out;
4570		}
4571
4572		left_offset_fixed = left_offset;
4573		if (key.offset < ekey->offset) {
4574			/* Fix the right offset for 2a and 7. */
4575			right_offset += ekey->offset - key.offset;
4576		} else {
4577			/* Fix the left offset for all behind 2a and 2b */
4578			left_offset_fixed += key.offset - ekey->offset;
4579		}
4580
4581		/*
4582		 * Check if we have the same extent.
4583		 */
4584		if (left_disknr != right_disknr ||
4585		    left_offset_fixed != right_offset ||
4586		    left_gen != right_gen) {
4587			ret = 0;
4588			goto out;
4589		}
4590
4591		/*
4592		 * Go to the next extent.
4593		 */
4594		ret = btrfs_next_item(sctx->parent_root, path);
4595		if (ret < 0)
4596			goto out;
4597		if (!ret) {
4598			eb = path->nodes[0];
4599			slot = path->slots[0];
4600			btrfs_item_key_to_cpu(eb, &found_key, slot);
4601		}
4602		if (ret || found_key.objectid != key.objectid ||
4603		    found_key.type != key.type) {
4604			key.offset += right_len;
4605			break;
4606		}
4607		if (found_key.offset != key.offset + right_len) {
4608			ret = 0;
4609			goto out;
4610		}
4611		key = found_key;
4612	}
4613
4614	/*
4615	 * We're now behind the left extent (treat as unchanged) or at the end
4616	 * of the right side (treat as changed).
4617	 */
4618	if (key.offset >= ekey->offset + left_len)
4619		ret = 1;
4620	else
4621		ret = 0;
4622
4623
4624out:
4625	btrfs_free_path(path);
4626	return ret;
4627}
4628
4629static int get_last_extent(struct send_ctx *sctx, u64 offset)
4630{
4631	struct btrfs_path *path;
4632	struct btrfs_root *root = sctx->send_root;
4633	struct btrfs_file_extent_item *fi;
4634	struct btrfs_key key;
4635	u64 extent_end;
4636	u8 type;
4637	int ret;
4638
4639	path = alloc_path_for_send();
4640	if (!path)
4641		return -ENOMEM;
4642
4643	sctx->cur_inode_last_extent = 0;
4644
4645	key.objectid = sctx->cur_ino;
4646	key.type = BTRFS_EXTENT_DATA_KEY;
4647	key.offset = offset;
4648	ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
4649	if (ret < 0)
4650		goto out;
4651	ret = 0;
4652	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4653	if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
4654		goto out;
4655
4656	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4657			    struct btrfs_file_extent_item);
4658	type = btrfs_file_extent_type(path->nodes[0], fi);
4659	if (type == BTRFS_FILE_EXTENT_INLINE) {
4660		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4661							path->slots[0], fi);
4662		extent_end = ALIGN(key.offset + size,
4663				   sctx->send_root->sectorsize);
4664	} else {
4665		extent_end = key.offset +
4666			btrfs_file_extent_num_bytes(path->nodes[0], fi);
4667	}
4668	sctx->cur_inode_last_extent = extent_end;
4669out:
4670	btrfs_free_path(path);
4671	return ret;
4672}
4673
4674static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
4675			   struct btrfs_key *key)
4676{
4677	struct btrfs_file_extent_item *fi;
4678	u64 extent_end;
4679	u8 type;
4680	int ret = 0;
4681
4682	if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
4683		return 0;
4684
4685	if (sctx->cur_inode_last_extent == (u64)-1) {
4686		ret = get_last_extent(sctx, key->offset - 1);
4687		if (ret)
4688			return ret;
4689	}
4690
4691	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4692			    struct btrfs_file_extent_item);
4693	type = btrfs_file_extent_type(path->nodes[0], fi);
4694	if (type == BTRFS_FILE_EXTENT_INLINE) {
4695		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4696							path->slots[0], fi);
4697		extent_end = ALIGN(key->offset + size,
4698				   sctx->send_root->sectorsize);
4699	} else {
4700		extent_end = key->offset +
4701			btrfs_file_extent_num_bytes(path->nodes[0], fi);
4702	}
4703
4704	if (path->slots[0] == 0 &&
4705	    sctx->cur_inode_last_extent < key->offset) {
4706		/*
4707		 * We might have skipped entire leafs that contained only
4708		 * file extent items for our current inode. These leafs have
4709		 * a generation number smaller (older) than the one in the
4710		 * current leaf and the leaf our last extent came from, and
4711		 * are located between these 2 leafs.
4712		 */
4713		ret = get_last_extent(sctx, key->offset - 1);
4714		if (ret)
4715			return ret;
4716	}
4717
4718	if (sctx->cur_inode_last_extent < key->offset)
4719		ret = send_hole(sctx, key->offset);
4720	sctx->cur_inode_last_extent = extent_end;
4721	return ret;
4722}
4723
4724static int process_extent(struct send_ctx *sctx,
4725			  struct btrfs_path *path,
4726			  struct btrfs_key *key)
4727{
4728	struct clone_root *found_clone = NULL;
4729	int ret = 0;
4730
4731	if (S_ISLNK(sctx->cur_inode_mode))
4732		return 0;
4733
4734	if (sctx->parent_root && !sctx->cur_inode_new) {
4735		ret = is_extent_unchanged(sctx, path, key);
4736		if (ret < 0)
4737			goto out;
4738		if (ret) {
4739			ret = 0;
4740			goto out_hole;
4741		}
4742	} else {
4743		struct btrfs_file_extent_item *ei;
4744		u8 type;
4745
4746		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4747				    struct btrfs_file_extent_item);
4748		type = btrfs_file_extent_type(path->nodes[0], ei);
4749		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
4750		    type == BTRFS_FILE_EXTENT_REG) {
4751			/*
4752			 * The send spec does not have a prealloc command yet,
4753			 * so just leave a hole for prealloc'ed extents until
4754			 * we have enough commands queued up to justify rev'ing
4755			 * the send spec.
4756			 */
4757			if (type == BTRFS_FILE_EXTENT_PREALLOC) {
4758				ret = 0;
4759				goto out;
4760			}
4761
4762			/* Have a hole, just skip it. */
4763			if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
4764				ret = 0;
4765				goto out;
4766			}
4767		}
4768	}
4769
4770	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
4771			sctx->cur_inode_size, &found_clone);
4772	if (ret != -ENOENT && ret < 0)
4773		goto out;
4774
4775	ret = send_write_or_clone(sctx, path, key, found_clone);
4776	if (ret)
4777		goto out;
4778out_hole:
4779	ret = maybe_send_hole(sctx, path, key);
4780out:
4781	return ret;
4782}
4783
4784static int process_all_extents(struct send_ctx *sctx)
4785{
4786	int ret;
4787	struct btrfs_root *root;
4788	struct btrfs_path *path;
4789	struct btrfs_key key;
4790	struct btrfs_key found_key;
4791	struct extent_buffer *eb;
4792	int slot;
4793
4794	root = sctx->send_root;
4795	path = alloc_path_for_send();
4796	if (!path)
4797		return -ENOMEM;
4798
4799	key.objectid = sctx->cmp_key->objectid;
4800	key.type = BTRFS_EXTENT_DATA_KEY;
4801	key.offset = 0;
4802	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4803	if (ret < 0)
4804		goto out;
4805
4806	while (1) {
4807		eb = path->nodes[0];
4808		slot = path->slots[0];
4809
4810		if (slot >= btrfs_header_nritems(eb)) {
4811			ret = btrfs_next_leaf(root, path);
4812			if (ret < 0) {
4813				goto out;
4814			} else if (ret > 0) {
4815				ret = 0;
4816				break;
4817			}
4818			continue;
4819		}
4820
4821		btrfs_item_key_to_cpu(eb, &found_key, slot);
4822
4823		if (found_key.objectid != key.objectid ||
4824		    found_key.type != key.type) {
4825			ret = 0;
4826			goto out;
4827		}
4828
4829		ret = process_extent(sctx, path, &found_key);
4830		if (ret < 0)
4831			goto out;
4832
4833		path->slots[0]++;
4834	}
4835
4836out:
4837	btrfs_free_path(path);
4838	return ret;
4839}
4840
4841static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
4842					   int *pending_move,
4843					   int *refs_processed)
4844{
4845	int ret = 0;
4846
4847	if (sctx->cur_ino == 0)
4848		goto out;
4849	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
4850	    sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
4851		goto out;
4852	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4853		goto out;
4854
4855	ret = process_recorded_refs(sctx, pending_move);
4856	if (ret < 0)
4857		goto out;
4858
4859	*refs_processed = 1;
4860out:
4861	return ret;
4862}
4863
4864static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4865{
4866	int ret = 0;
4867	u64 left_mode;
4868	u64 left_uid;
4869	u64 left_gid;
4870	u64 right_mode;
4871	u64 right_uid;
4872	u64 right_gid;
4873	int need_chmod = 0;
4874	int need_chown = 0;
4875	int pending_move = 0;
4876	int refs_processed = 0;
4877
4878	ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
4879					      &refs_processed);
4880	if (ret < 0)
4881		goto out;
4882
4883	/*
4884	 * We have processed the refs and thus need to advance send_progress.
4885	 * Now, calls to get_cur_xxx will take the updated refs of the current
4886	 * inode into account.
4887	 *
4888	 * On the other hand, if our current inode is a directory and couldn't
4889	 * be moved/renamed because its parent was renamed/moved too and it has
4890	 * a higher inode number, we can only move/rename our current inode
4891	 * after we moved/renamed its parent. Therefore in this case operate on
4892	 * the old path (pre move/rename) of our current inode, and the
4893	 * move/rename will be performed later.
4894	 */
4895	if (refs_processed && !pending_move)
4896		sctx->send_progress = sctx->cur_ino + 1;
4897
4898	if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4899		goto out;
4900	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
4901		goto out;
4902
4903	ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
4904			&left_mode, &left_uid, &left_gid, NULL);
4905	if (ret < 0)
4906		goto out;
4907
4908	if (!sctx->parent_root || sctx->cur_inode_new) {
4909		need_chown = 1;
4910		if (!S_ISLNK(sctx->cur_inode_mode))
4911			need_chmod = 1;
4912	} else {
4913		ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
4914				NULL, NULL, &right_mode, &right_uid,
4915				&right_gid, NULL);
4916		if (ret < 0)
4917			goto out;
4918
4919		if (left_uid != right_uid || left_gid != right_gid)
4920			need_chown = 1;
4921		if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
4922			need_chmod = 1;
4923	}
4924
4925	if (S_ISREG(sctx->cur_inode_mode)) {
4926		if (need_send_hole(sctx)) {
4927			if (sctx->cur_inode_last_extent == (u64)-1) {
4928				ret = get_last_extent(sctx, (u64)-1);
4929				if (ret)
4930					goto out;
4931			}
4932			if (sctx->cur_inode_last_extent <
4933			    sctx->cur_inode_size) {
4934				ret = send_hole(sctx, sctx->cur_inode_size);
4935				if (ret)
4936					goto out;
4937			}
4938		}
4939		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4940				sctx->cur_inode_size);
4941		if (ret < 0)
4942			goto out;
4943	}
4944
4945	if (need_chown) {
4946		ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4947				left_uid, left_gid);
4948		if (ret < 0)
4949			goto out;
4950	}
4951	if (need_chmod) {
4952		ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4953				left_mode);
4954		if (ret < 0)
4955			goto out;
4956	}
4957
4958	/*
4959	 * If other directory inodes depended on our current directory
4960	 * inode's move/rename, now do their move/rename operations.
4961	 */
4962	if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
4963		ret = apply_children_dir_moves(sctx);
4964		if (ret)
4965			goto out;
4966	}
4967
4968	/*
4969	 * Need to send that every time, no matter if it actually
4970	 * changed between the two trees as we have done changes to
4971	 * the inode before.
4972	 */
4973	sctx->send_progress = sctx->cur_ino + 1;
4974	ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4975	if (ret < 0)
4976		goto out;
4977
4978out:
4979	return ret;
4980}
4981
4982static int changed_inode(struct send_ctx *sctx,
4983			 enum btrfs_compare_tree_result result)
4984{
4985	int ret = 0;
4986	struct btrfs_key *key = sctx->cmp_key;
4987	struct btrfs_inode_item *left_ii = NULL;
4988	struct btrfs_inode_item *right_ii = NULL;
4989	u64 left_gen = 0;
4990	u64 right_gen = 0;
4991
4992	sctx->cur_ino = key->objectid;
4993	sctx->cur_inode_new_gen = 0;
4994	sctx->cur_inode_last_extent = (u64)-1;
4995
4996	/*
4997	 * Set send_progress to current inode. This will tell all get_cur_xxx
4998	 * functions that the current inode's refs are not updated yet. Later,
4999	 * when process_recorded_refs is finished, it is set to cur_ino + 1.
5000	 */
5001	sctx->send_progress = sctx->cur_ino;
5002
5003	if (result == BTRFS_COMPARE_TREE_NEW ||
5004	    result == BTRFS_COMPARE_TREE_CHANGED) {
5005		left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
5006				sctx->left_path->slots[0],
5007				struct btrfs_inode_item);
5008		left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
5009				left_ii);
5010	} else {
5011		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5012				sctx->right_path->slots[0],
5013				struct btrfs_inode_item);
5014		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5015				right_ii);
5016	}
5017	if (result == BTRFS_COMPARE_TREE_CHANGED) {
5018		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5019				sctx->right_path->slots[0],
5020				struct btrfs_inode_item);
5021
5022		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5023				right_ii);
5024
5025		/*
5026		 * The cur_ino = root dir case is special here. We can't treat
5027		 * the inode as deleted+reused because it would generate a
5028		 * stream that tries to delete/mkdir the root dir.
5029		 */
5030		if (left_gen != right_gen &&
5031		    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5032			sctx->cur_inode_new_gen = 1;
5033	}
5034
5035	if (result == BTRFS_COMPARE_TREE_NEW) {
5036		sctx->cur_inode_gen = left_gen;
5037		sctx->cur_inode_new = 1;
5038		sctx->cur_inode_deleted = 0;
5039		sctx->cur_inode_size = btrfs_inode_size(
5040				sctx->left_path->nodes[0], left_ii);
5041		sctx->cur_inode_mode = btrfs_inode_mode(
5042				sctx->left_path->nodes[0], left_ii);
5043		if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5044			ret = send_create_inode_if_needed(sctx);
5045	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
5046		sctx->cur_inode_gen = right_gen;
5047		sctx->cur_inode_new = 0;
5048		sctx->cur_inode_deleted = 1;
5049		sctx->cur_inode_size = btrfs_inode_size(
5050				sctx->right_path->nodes[0], right_ii);
5051		sctx->cur_inode_mode = btrfs_inode_mode(
5052				sctx->right_path->nodes[0], right_ii);
5053	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
5054		/*
5055		 * We need to do some special handling in case the inode was
5056		 * reported as changed with a changed generation number. This
5057		 * means that the original inode was deleted and new inode
5058		 * reused the same inum. So we have to treat the old inode as
5059		 * deleted and the new one as new.
5060		 */
5061		if (sctx->cur_inode_new_gen) {
5062			/*
5063			 * First, process the inode as if it was deleted.
5064			 */
5065			sctx->cur_inode_gen = right_gen;
5066			sctx->cur_inode_new = 0;
5067			sctx->cur_inode_deleted = 1;
5068			sctx->cur_inode_size = btrfs_inode_size(
5069					sctx->right_path->nodes[0], right_ii);
5070			sctx->cur_inode_mode = btrfs_inode_mode(
5071					sctx->right_path->nodes[0], right_ii);
5072			ret = process_all_refs(sctx,
5073					BTRFS_COMPARE_TREE_DELETED);
5074			if (ret < 0)
5075				goto out;
5076
5077			/*
5078			 * Now process the inode as if it was new.
5079			 */
5080			sctx->cur_inode_gen = left_gen;
5081			sctx->cur_inode_new = 1;
5082			sctx->cur_inode_deleted = 0;
5083			sctx->cur_inode_size = btrfs_inode_size(
5084					sctx->left_path->nodes[0], left_ii);
5085			sctx->cur_inode_mode = btrfs_inode_mode(
5086					sctx->left_path->nodes[0], left_ii);
5087			ret = send_create_inode_if_needed(sctx);
5088			if (ret < 0)
5089				goto out;
5090
5091			ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
5092			if (ret < 0)
5093				goto out;
5094			/*
5095			 * Advance send_progress now as we did not get into
5096			 * process_recorded_refs_if_needed in the new_gen case.
5097			 */
5098			sctx->send_progress = sctx->cur_ino + 1;
5099
5100			/*
5101			 * Now process all extents and xattrs of the inode as if
5102			 * they were all new.
5103			 */
5104			ret = process_all_extents(sctx);
5105			if (ret < 0)
5106				goto out;
5107			ret = process_all_new_xattrs(sctx);
5108			if (ret < 0)
5109				goto out;
5110		} else {
5111			sctx->cur_inode_gen = left_gen;
5112			sctx->cur_inode_new = 0;
5113			sctx->cur_inode_new_gen = 0;
5114			sctx->cur_inode_deleted = 0;
5115			sctx->cur_inode_size = btrfs_inode_size(
5116					sctx->left_path->nodes[0], left_ii);
5117			sctx->cur_inode_mode = btrfs_inode_mode(
5118					sctx->left_path->nodes[0], left_ii);
5119		}
5120	}
5121
5122out:
5123	return ret;
5124}
5125
5126/*
5127 * We have to process new refs before deleted refs, but compare_trees gives us
5128 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
5129 * first and later process them in process_recorded_refs.
5130 * For the cur_inode_new_gen case, we skip recording completely because
5131 * changed_inode did already initiate processing of refs. The reason for this is
5132 * that in this case, compare_tree actually compares the refs of 2 different
5133 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
5134 * refs of the right tree as deleted and all refs of the left tree as new.
5135 */
5136static int changed_ref(struct send_ctx *sctx,
5137		       enum btrfs_compare_tree_result result)
5138{
5139	int ret = 0;
5140
5141	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5142
5143	if (!sctx->cur_inode_new_gen &&
5144	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
5145		if (result == BTRFS_COMPARE_TREE_NEW)
5146			ret = record_new_ref(sctx);
5147		else if (result == BTRFS_COMPARE_TREE_DELETED)
5148			ret = record_deleted_ref(sctx);
5149		else if (result == BTRFS_COMPARE_TREE_CHANGED)
5150			ret = record_changed_ref(sctx);
5151	}
5152
5153	return ret;
5154}
5155
5156/*
5157 * Process new/deleted/changed xattrs. We skip processing in the
5158 * cur_inode_new_gen case because changed_inode did already initiate processing
5159 * of xattrs. The reason is the same as in changed_ref
5160 */
5161static int changed_xattr(struct send_ctx *sctx,
5162			 enum btrfs_compare_tree_result result)
5163{
5164	int ret = 0;
5165
5166	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5167
5168	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5169		if (result == BTRFS_COMPARE_TREE_NEW)
5170			ret = process_new_xattr(sctx);
5171		else if (result == BTRFS_COMPARE_TREE_DELETED)
5172			ret = process_deleted_xattr(sctx);
5173		else if (result == BTRFS_COMPARE_TREE_CHANGED)
5174			ret = process_changed_xattr(sctx);
5175	}
5176
5177	return ret;
5178}
5179
5180/*
5181 * Process new/deleted/changed extents. We skip processing in the
5182 * cur_inode_new_gen case because changed_inode did already initiate processing
5183 * of extents. The reason is the same as in changed_ref
5184 */
5185static int changed_extent(struct send_ctx *sctx,
5186			  enum btrfs_compare_tree_result result)
5187{
5188	int ret = 0;
5189
5190	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5191
5192	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5193		if (result != BTRFS_COMPARE_TREE_DELETED)
5194			ret = process_extent(sctx, sctx->left_path,
5195					sctx->cmp_key);
5196	}
5197
5198	return ret;
5199}
5200
5201static int dir_changed(struct send_ctx *sctx, u64 dir)
5202{
5203	u64 orig_gen, new_gen;
5204	int ret;
5205
5206	ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
5207			     NULL, NULL);
5208	if (ret)
5209		return ret;
5210
5211	ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
5212			     NULL, NULL, NULL);
5213	if (ret)
5214		return ret;
5215
5216	return (orig_gen != new_gen) ? 1 : 0;
5217}
5218
5219static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
5220			struct btrfs_key *key)
5221{
5222	struct btrfs_inode_extref *extref;
5223	struct extent_buffer *leaf;
5224	u64 dirid = 0, last_dirid = 0;
5225	unsigned long ptr;
5226	u32 item_size;
5227	u32 cur_offset = 0;
5228	int ref_name_len;
5229	int ret = 0;
5230
5231	/* Easy case, just check this one dirid */
5232	if (key->type == BTRFS_INODE_REF_KEY) {
5233		dirid = key->offset;
5234
5235		ret = dir_changed(sctx, dirid);
5236		goto out;
5237	}
5238
5239	leaf = path->nodes[0];
5240	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
5241	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
5242	while (cur_offset < item_size) {
5243		extref = (struct btrfs_inode_extref *)(ptr +
5244						       cur_offset);
5245		dirid = btrfs_inode_extref_parent(leaf, extref);
5246		ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
5247		cur_offset += ref_name_len + sizeof(*extref);
5248		if (dirid == last_dirid)
5249			continue;
5250		ret = dir_changed(sctx, dirid);
5251		if (ret)
5252			break;
5253		last_dirid = dirid;
5254	}
5255out:
5256	return ret;
5257}
5258
5259/*
5260 * Updates compare related fields in sctx and simply forwards to the actual
5261 * changed_xxx functions.
5262 */
5263static int changed_cb(struct btrfs_root *left_root,
5264		      struct btrfs_root *right_root,
5265		      struct btrfs_path *left_path,
5266		      struct btrfs_path *right_path,
5267		      struct btrfs_key *key,
5268		      enum btrfs_compare_tree_result result,
5269		      void *ctx)
5270{
5271	int ret = 0;
5272	struct send_ctx *sctx = ctx;
5273
5274	if (result == BTRFS_COMPARE_TREE_SAME) {
5275		if (key->type == BTRFS_INODE_REF_KEY ||
5276		    key->type == BTRFS_INODE_EXTREF_KEY) {
5277			ret = compare_refs(sctx, left_path, key);
5278			if (!ret)
5279				return 0;
5280			if (ret < 0)
5281				return ret;
5282		} else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5283			return maybe_send_hole(sctx, left_path, key);
5284		} else {
5285			return 0;
5286		}
5287		result = BTRFS_COMPARE_TREE_CHANGED;
5288		ret = 0;
5289	}
5290
5291	sctx->left_path = left_path;
5292	sctx->right_path = right_path;
5293	sctx->cmp_key = key;
5294
5295	ret = finish_inode_if_needed(sctx, 0);
5296	if (ret < 0)
5297		goto out;
5298
5299	/* Ignore non-FS objects */
5300	if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
5301	    key->objectid == BTRFS_FREE_SPACE_OBJECTID)
5302		goto out;
5303
5304	if (key->type == BTRFS_INODE_ITEM_KEY)
5305		ret = changed_inode(sctx, result);
5306	else if (key->type == BTRFS_INODE_REF_KEY ||
5307		 key->type == BTRFS_INODE_EXTREF_KEY)
5308		ret = changed_ref(sctx, result);
5309	else if (key->type == BTRFS_XATTR_ITEM_KEY)
5310		ret = changed_xattr(sctx, result);
5311	else if (key->type == BTRFS_EXTENT_DATA_KEY)
5312		ret = changed_extent(sctx, result);
5313
5314out:
5315	return ret;
5316}
5317
5318static int full_send_tree(struct send_ctx *sctx)
5319{
5320	int ret;
5321	struct btrfs_trans_handle *trans = NULL;
5322	struct btrfs_root *send_root = sctx->send_root;
5323	struct btrfs_key key;
5324	struct btrfs_key found_key;
5325	struct btrfs_path *path;
5326	struct extent_buffer *eb;
5327	int slot;
5328	u64 start_ctransid;
5329	u64 ctransid;
5330
5331	path = alloc_path_for_send();
5332	if (!path)
5333		return -ENOMEM;
5334
5335	spin_lock(&send_root->root_item_lock);
5336	start_ctransid = btrfs_root_ctransid(&send_root->root_item);
5337	spin_unlock(&send_root->root_item_lock);
5338
5339	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5340	key.type = BTRFS_INODE_ITEM_KEY;
5341	key.offset = 0;
5342
5343join_trans:
5344	/*
5345	 * We need to make sure the transaction does not get committed
5346	 * while we do anything on commit roots. Join a transaction to prevent
5347	 * this.
5348	 */
5349	trans = btrfs_join_transaction(send_root);
5350	if (IS_ERR(trans)) {
5351		ret = PTR_ERR(trans);
5352		trans = NULL;
5353		goto out;
5354	}
5355
5356	/*
5357	 * Make sure the tree has not changed after re-joining. We detect this
5358	 * by comparing start_ctransid and ctransid. They should always match.
5359	 */
5360	spin_lock(&send_root->root_item_lock);
5361	ctransid = btrfs_root_ctransid(&send_root->root_item);
5362	spin_unlock(&send_root->root_item_lock);
5363
5364	if (ctransid != start_ctransid) {
5365		WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
5366				     "send was modified in between. This is "
5367				     "probably a bug.\n");
5368		ret = -EIO;
5369		goto out;
5370	}
5371
5372	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
5373	if (ret < 0)
5374		goto out;
5375	if (ret)
5376		goto out_finish;
5377
5378	while (1) {
5379		/*
5380		 * When someone want to commit while we iterate, end the
5381		 * joined transaction and rejoin.
5382		 */
5383		if (btrfs_should_end_transaction(trans, send_root)) {
5384			ret = btrfs_end_transaction(trans, send_root);
5385			trans = NULL;
5386			if (ret < 0)
5387				goto out;
5388			btrfs_release_path(path);
5389			goto join_trans;
5390		}
5391
5392		eb = path->nodes[0];
5393		slot = path->slots[0];
5394		btrfs_item_key_to_cpu(eb, &found_key, slot);
5395
5396		ret = changed_cb(send_root, NULL, path, NULL,
5397				&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
5398		if (ret < 0)
5399			goto out;
5400
5401		key.objectid = found_key.objectid;
5402		key.type = found_key.type;
5403		key.offset = found_key.offset + 1;
5404
5405		ret = btrfs_next_item(send_root, path);
5406		if (ret < 0)
5407			goto out;
5408		if (ret) {
5409			ret  = 0;
5410			break;
5411		}
5412	}
5413
5414out_finish:
5415	ret = finish_inode_if_needed(sctx, 1);
5416
5417out:
5418	btrfs_free_path(path);
5419	if (trans) {
5420		if (!ret)
5421			ret = btrfs_end_transaction(trans, send_root);
5422		else
5423			btrfs_end_transaction(trans, send_root);
5424	}
5425	return ret;
5426}
5427
5428static int send_subvol(struct send_ctx *sctx)
5429{
5430	int ret;
5431
5432	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
5433		ret = send_header(sctx);
5434		if (ret < 0)
5435			goto out;
5436	}
5437
5438	ret = send_subvol_begin(sctx);
5439	if (ret < 0)
5440		goto out;
5441
5442	if (sctx->parent_root) {
5443		ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
5444				changed_cb, sctx);
5445		if (ret < 0)
5446			goto out;
5447		ret = finish_inode_if_needed(sctx, 1);
5448		if (ret < 0)
5449			goto out;
5450	} else {
5451		ret = full_send_tree(sctx);
5452		if (ret < 0)
5453			goto out;
5454	}
5455
5456out:
5457	free_recorded_refs(sctx);
5458	return ret;
5459}
5460
5461static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
5462{
5463	spin_lock(&root->root_item_lock);
5464	root->send_in_progress--;
5465	/*
5466	 * Not much left to do, we don't know why it's unbalanced and
5467	 * can't blindly reset it to 0.
5468	 */
5469	if (root->send_in_progress < 0)
5470		btrfs_err(root->fs_info,
5471			"send_in_progres unbalanced %d root %llu\n",
5472			root->send_in_progress, root->root_key.objectid);
5473	spin_unlock(&root->root_item_lock);
5474}
5475
5476long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5477{
5478	int ret = 0;
5479	struct btrfs_root *send_root;
5480	struct btrfs_root *clone_root;
5481	struct btrfs_fs_info *fs_info;
5482	struct btrfs_ioctl_send_args *arg = NULL;
5483	struct btrfs_key key;
5484	struct send_ctx *sctx = NULL;
5485	u32 i;
5486	u64 *clone_sources_tmp = NULL;
5487	int clone_sources_to_rollback = 0;
5488	int sort_clone_roots = 0;
5489	int index;
5490
5491	if (!capable(CAP_SYS_ADMIN))
5492		return -EPERM;
5493
5494	send_root = BTRFS_I(file_inode(mnt_file))->root;
5495	fs_info = send_root->fs_info;
5496
5497	/*
5498	 * The subvolume must remain read-only during send, protect against
5499	 * making it RW.
5500	 */
5501	spin_lock(&send_root->root_item_lock);
5502	send_root->send_in_progress++;
5503	spin_unlock(&send_root->root_item_lock);
5504
5505	/*
5506	 * This is done when we lookup the root, it should already be complete
5507	 * by the time we get here.
5508	 */
5509	WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
5510
5511	/*
5512	 * Userspace tools do the checks and warn the user if it's
5513	 * not RO.
5514	 */
5515	if (!btrfs_root_readonly(send_root)) {
5516		ret = -EPERM;
5517		goto out;
5518	}
5519
5520	arg = memdup_user(arg_, sizeof(*arg));
5521	if (IS_ERR(arg)) {
5522		ret = PTR_ERR(arg);
5523		arg = NULL;
5524		goto out;
5525	}
5526
5527	if (!access_ok(VERIFY_READ, arg->clone_sources,
5528			sizeof(*arg->clone_sources) *
5529			arg->clone_sources_count)) {
5530		ret = -EFAULT;
5531		goto out;
5532	}
5533
5534	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
5535		ret = -EINVAL;
5536		goto out;
5537	}
5538
5539	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
5540	if (!sctx) {
5541		ret = -ENOMEM;
5542		goto out;
5543	}
5544
5545	INIT_LIST_HEAD(&sctx->new_refs);
5546	INIT_LIST_HEAD(&sctx->deleted_refs);
5547	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
5548	INIT_LIST_HEAD(&sctx->name_cache_list);
5549
5550	sctx->flags = arg->flags;
5551
5552	sctx->send_filp = fget(arg->send_fd);
5553	if (!sctx->send_filp) {
5554		ret = -EBADF;
5555		goto out;
5556	}
5557
5558	sctx->send_root = send_root;
5559	sctx->clone_roots_cnt = arg->clone_sources_count;
5560
5561	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
5562	sctx->send_buf = vmalloc(sctx->send_max_size);
5563	if (!sctx->send_buf) {
5564		ret = -ENOMEM;
5565		goto out;
5566	}
5567
5568	sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
5569	if (!sctx->read_buf) {
5570		ret = -ENOMEM;
5571		goto out;
5572	}
5573
5574	sctx->pending_dir_moves = RB_ROOT;
5575	sctx->waiting_dir_moves = RB_ROOT;
5576	sctx->orphan_dirs = RB_ROOT;
5577
5578	sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
5579			(arg->clone_sources_count + 1));
5580	if (!sctx->clone_roots) {
5581		ret = -ENOMEM;
5582		goto out;
5583	}
5584
5585	if (arg->clone_sources_count) {
5586		clone_sources_tmp = vmalloc(arg->clone_sources_count *
5587				sizeof(*arg->clone_sources));
5588		if (!clone_sources_tmp) {
5589			ret = -ENOMEM;
5590			goto out;
5591		}
5592
5593		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
5594				arg->clone_sources_count *
5595				sizeof(*arg->clone_sources));
5596		if (ret) {
5597			ret = -EFAULT;
5598			goto out;
5599		}
5600
5601		for (i = 0; i < arg->clone_sources_count; i++) {
5602			key.objectid = clone_sources_tmp[i];
5603			key.type = BTRFS_ROOT_ITEM_KEY;
5604			key.offset = (u64)-1;
5605
5606			index = srcu_read_lock(&fs_info->subvol_srcu);
5607
5608			clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
5609			if (IS_ERR(clone_root)) {
5610				srcu_read_unlock(&fs_info->subvol_srcu, index);
5611				ret = PTR_ERR(clone_root);
5612				goto out;
5613			}
5614			clone_sources_to_rollback = i + 1;
5615			spin_lock(&clone_root->root_item_lock);
5616			clone_root->send_in_progress++;
5617			if (!btrfs_root_readonly(clone_root)) {
5618				spin_unlock(&clone_root->root_item_lock);
5619				srcu_read_unlock(&fs_info->subvol_srcu, index);
5620				ret = -EPERM;
5621				goto out;
5622			}
5623			spin_unlock(&clone_root->root_item_lock);
5624			srcu_read_unlock(&fs_info->subvol_srcu, index);
5625
5626			sctx->clone_roots[i].root = clone_root;
5627		}
5628		vfree(clone_sources_tmp);
5629		clone_sources_tmp = NULL;
5630	}
5631
5632	if (arg->parent_root) {
5633		key.objectid = arg->parent_root;
5634		key.type = BTRFS_ROOT_ITEM_KEY;
5635		key.offset = (u64)-1;
5636
5637		index = srcu_read_lock(&fs_info->subvol_srcu);
5638
5639		sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
5640		if (IS_ERR(sctx->parent_root)) {
5641			srcu_read_unlock(&fs_info->subvol_srcu, index);
5642			ret = PTR_ERR(sctx->parent_root);
5643			goto out;
5644		}
5645
5646		spin_lock(&sctx->parent_root->root_item_lock);
5647		sctx->parent_root->send_in_progress++;
5648		if (!btrfs_root_readonly(sctx->parent_root)) {
5649			spin_unlock(&sctx->parent_root->root_item_lock);
5650			srcu_read_unlock(&fs_info->subvol_srcu, index);
5651			ret = -EPERM;
5652			goto out;
5653		}
5654		spin_unlock(&sctx->parent_root->root_item_lock);
5655
5656		srcu_read_unlock(&fs_info->subvol_srcu, index);
5657	}
5658
5659	/*
5660	 * Clones from send_root are allowed, but only if the clone source
5661	 * is behind the current send position. This is checked while searching
5662	 * for possible clone sources.
5663	 */
5664	sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
5665
5666	/* We do a bsearch later */
5667	sort(sctx->clone_roots, sctx->clone_roots_cnt,
5668			sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
5669			NULL);
5670	sort_clone_roots = 1;
5671
5672	ret = send_subvol(sctx);
5673	if (ret < 0)
5674		goto out;
5675
5676	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
5677		ret = begin_cmd(sctx, BTRFS_SEND_C_END);
5678		if (ret < 0)
5679			goto out;
5680		ret = send_cmd(sctx);
5681		if (ret < 0)
5682			goto out;
5683	}
5684
5685out:
5686	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
5687	while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
5688		struct rb_node *n;
5689		struct pending_dir_move *pm;
5690
5691		n = rb_first(&sctx->pending_dir_moves);
5692		pm = rb_entry(n, struct pending_dir_move, node);
5693		while (!list_empty(&pm->list)) {
5694			struct pending_dir_move *pm2;
5695
5696			pm2 = list_first_entry(&pm->list,
5697					       struct pending_dir_move, list);
5698			free_pending_move(sctx, pm2);
5699		}
5700		free_pending_move(sctx, pm);
5701	}
5702
5703	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
5704	while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
5705		struct rb_node *n;
5706		struct waiting_dir_move *dm;
5707
5708		n = rb_first(&sctx->waiting_dir_moves);
5709		dm = rb_entry(n, struct waiting_dir_move, node);
5710		rb_erase(&dm->node, &sctx->waiting_dir_moves);
5711		kfree(dm);
5712	}
5713
5714	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
5715	while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
5716		struct rb_node *n;
5717		struct orphan_dir_info *odi;
5718
5719		n = rb_first(&sctx->orphan_dirs);
5720		odi = rb_entry(n, struct orphan_dir_info, node);
5721		free_orphan_dir_info(sctx, odi);
5722	}
5723
5724	if (sort_clone_roots) {
5725		for (i = 0; i < sctx->clone_roots_cnt; i++)
5726			btrfs_root_dec_send_in_progress(
5727					sctx->clone_roots[i].root);
5728	} else {
5729		for (i = 0; sctx && i < clone_sources_to_rollback; i++)
5730			btrfs_root_dec_send_in_progress(
5731					sctx->clone_roots[i].root);
5732
5733		btrfs_root_dec_send_in_progress(send_root);
5734	}
5735	if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
5736		btrfs_root_dec_send_in_progress(sctx->parent_root);
5737
5738	kfree(arg);
5739	vfree(clone_sources_tmp);
5740
5741	if (sctx) {
5742		if (sctx->send_filp)
5743			fput(sctx->send_filp);
5744
5745		vfree(sctx->clone_roots);
5746		vfree(sctx->send_buf);
5747		vfree(sctx->read_buf);
5748
5749		name_cache_free(sctx);
5750
5751		kfree(sctx);
5752	}
5753
5754	return ret;
5755}
5756