send.c revision 9c9ca00bd31989f1a3dcbf54e97c979024e44409
1/*
2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/bsearch.h>
20#include <linux/fs.h>
21#include <linux/file.h>
22#include <linux/sort.h>
23#include <linux/mount.h>
24#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h>
26#include <linux/radix-tree.h>
27#include <linux/vmalloc.h>
28#include <linux/string.h>
29
30#include "send.h"
31#include "backref.h"
32#include "hash.h"
33#include "locking.h"
34#include "disk-io.h"
35#include "btrfs_inode.h"
36#include "transaction.h"
37
38static int g_verbose = 0;
39
40#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
41
42/*
43 * A fs_path is a helper to dynamically build path names with unknown size.
44 * It reallocates the internal buffer on demand.
45 * It allows fast adding of path elements on the right side (normal path) and
46 * fast adding to the left side (reversed path). A reversed path can also be
47 * unreversed if needed.
48 */
49struct fs_path {
50	union {
51		struct {
52			char *start;
53			char *end;
54
55			char *buf;
56			unsigned short buf_len:15;
57			unsigned short reversed:1;
58			char inline_buf[];
59		};
60		/*
61		 * Average path length does not exceed 200 bytes, we'll have
62		 * better packing in the slab and higher chance to satisfy
63		 * a allocation later during send.
64		 */
65		char pad[256];
66	};
67};
68#define FS_PATH_INLINE_SIZE \
69	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
70
71
72/* reused for each extent */
73struct clone_root {
74	struct btrfs_root *root;
75	u64 ino;
76	u64 offset;
77
78	u64 found_refs;
79};
80
81#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
82#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
83
84struct send_ctx {
85	struct file *send_filp;
86	loff_t send_off;
87	char *send_buf;
88	u32 send_size;
89	u32 send_max_size;
90	u64 total_send_size;
91	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
92	u64 flags;	/* 'flags' member of btrfs_ioctl_send_args is u64 */
93
94	struct btrfs_root *send_root;
95	struct btrfs_root *parent_root;
96	struct clone_root *clone_roots;
97	int clone_roots_cnt;
98
99	/* current state of the compare_tree call */
100	struct btrfs_path *left_path;
101	struct btrfs_path *right_path;
102	struct btrfs_key *cmp_key;
103
104	/*
105	 * infos of the currently processed inode. In case of deleted inodes,
106	 * these are the values from the deleted inode.
107	 */
108	u64 cur_ino;
109	u64 cur_inode_gen;
110	int cur_inode_new;
111	int cur_inode_new_gen;
112	int cur_inode_deleted;
113	u64 cur_inode_size;
114	u64 cur_inode_mode;
115	u64 cur_inode_last_extent;
116
117	u64 send_progress;
118
119	struct list_head new_refs;
120	struct list_head deleted_refs;
121
122	struct radix_tree_root name_cache;
123	struct list_head name_cache_list;
124	int name_cache_size;
125
126	char *read_buf;
127
128	/*
129	 * We process inodes by their increasing order, so if before an
130	 * incremental send we reverse the parent/child relationship of
131	 * directories such that a directory with a lower inode number was
132	 * the parent of a directory with a higher inode number, and the one
133	 * becoming the new parent got renamed too, we can't rename/move the
134	 * directory with lower inode number when we finish processing it - we
135	 * must process the directory with higher inode number first, then
136	 * rename/move it and then rename/move the directory with lower inode
137	 * number. Example follows.
138	 *
139	 * Tree state when the first send was performed:
140	 *
141	 * .
142	 * |-- a                   (ino 257)
143	 *     |-- b               (ino 258)
144	 *         |
145	 *         |
146	 *         |-- c           (ino 259)
147	 *         |   |-- d       (ino 260)
148	 *         |
149	 *         |-- c2          (ino 261)
150	 *
151	 * Tree state when the second (incremental) send is performed:
152	 *
153	 * .
154	 * |-- a                   (ino 257)
155	 *     |-- b               (ino 258)
156	 *         |-- c2          (ino 261)
157	 *             |-- d2      (ino 260)
158	 *                 |-- cc  (ino 259)
159	 *
160	 * The sequence of steps that lead to the second state was:
161	 *
162	 * mv /a/b/c/d /a/b/c2/d2
163	 * mv /a/b/c /a/b/c2/d2/cc
164	 *
165	 * "c" has lower inode number, but we can't move it (2nd mv operation)
166	 * before we move "d", which has higher inode number.
167	 *
168	 * So we just memorize which move/rename operations must be performed
169	 * later when their respective parent is processed and moved/renamed.
170	 */
171
172	/* Indexed by parent directory inode number. */
173	struct rb_root pending_dir_moves;
174
175	/*
176	 * Reverse index, indexed by the inode number of a directory that
177	 * is waiting for the move/rename of its immediate parent before its
178	 * own move/rename can be performed.
179	 */
180	struct rb_root waiting_dir_moves;
181
182	/*
183	 * A directory that is going to be rm'ed might have a child directory
184	 * which is in the pending directory moves index above. In this case,
185	 * the directory can only be removed after the move/rename of its child
186	 * is performed. Example:
187	 *
188	 * Parent snapshot:
189	 *
190	 * .                        (ino 256)
191	 * |-- a/                   (ino 257)
192	 *     |-- b/               (ino 258)
193	 *         |-- c/           (ino 259)
194	 *         |   |-- x/       (ino 260)
195	 *         |
196	 *         |-- y/           (ino 261)
197	 *
198	 * Send snapshot:
199	 *
200	 * .                        (ino 256)
201	 * |-- a/                   (ino 257)
202	 *     |-- b/               (ino 258)
203	 *         |-- YY/          (ino 261)
204	 *              |-- x/      (ino 260)
205	 *
206	 * Sequence of steps that lead to the send snapshot:
207	 * rm -f /a/b/c/foo.txt
208	 * mv /a/b/y /a/b/YY
209	 * mv /a/b/c/x /a/b/YY
210	 * rmdir /a/b/c
211	 *
212	 * When the child is processed, its move/rename is delayed until its
213	 * parent is processed (as explained above), but all other operations
214	 * like update utimes, chown, chgrp, etc, are performed and the paths
215	 * that it uses for those operations must use the orphanized name of
216	 * its parent (the directory we're going to rm later), so we need to
217	 * memorize that name.
218	 *
219	 * Indexed by the inode number of the directory to be deleted.
220	 */
221	struct rb_root orphan_dirs;
222};
223
224struct pending_dir_move {
225	struct rb_node node;
226	struct list_head list;
227	u64 parent_ino;
228	u64 ino;
229	u64 gen;
230	struct list_head update_refs;
231};
232
233struct waiting_dir_move {
234	struct rb_node node;
235	u64 ino;
236	/*
237	 * There might be some directory that could not be removed because it
238	 * was waiting for this directory inode to be moved first. Therefore
239	 * after this directory is moved, we can try to rmdir the ino rmdir_ino.
240	 */
241	u64 rmdir_ino;
242};
243
244struct orphan_dir_info {
245	struct rb_node node;
246	u64 ino;
247	u64 gen;
248};
249
250struct name_cache_entry {
251	struct list_head list;
252	/*
253	 * radix_tree has only 32bit entries but we need to handle 64bit inums.
254	 * We use the lower 32bit of the 64bit inum to store it in the tree. If
255	 * more then one inum would fall into the same entry, we use radix_list
256	 * to store the additional entries. radix_list is also used to store
257	 * entries where two entries have the same inum but different
258	 * generations.
259	 */
260	struct list_head radix_list;
261	u64 ino;
262	u64 gen;
263	u64 parent_ino;
264	u64 parent_gen;
265	int ret;
266	int need_later_update;
267	int name_len;
268	char name[];
269};
270
271static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
272
273static struct waiting_dir_move *
274get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
275
276static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
277
278static int need_send_hole(struct send_ctx *sctx)
279{
280	return (sctx->parent_root && !sctx->cur_inode_new &&
281		!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
282		S_ISREG(sctx->cur_inode_mode));
283}
284
285static void fs_path_reset(struct fs_path *p)
286{
287	if (p->reversed) {
288		p->start = p->buf + p->buf_len - 1;
289		p->end = p->start;
290		*p->start = 0;
291	} else {
292		p->start = p->buf;
293		p->end = p->start;
294		*p->start = 0;
295	}
296}
297
298static struct fs_path *fs_path_alloc(void)
299{
300	struct fs_path *p;
301
302	p = kmalloc(sizeof(*p), GFP_NOFS);
303	if (!p)
304		return NULL;
305	p->reversed = 0;
306	p->buf = p->inline_buf;
307	p->buf_len = FS_PATH_INLINE_SIZE;
308	fs_path_reset(p);
309	return p;
310}
311
312static struct fs_path *fs_path_alloc_reversed(void)
313{
314	struct fs_path *p;
315
316	p = fs_path_alloc();
317	if (!p)
318		return NULL;
319	p->reversed = 1;
320	fs_path_reset(p);
321	return p;
322}
323
324static void fs_path_free(struct fs_path *p)
325{
326	if (!p)
327		return;
328	if (p->buf != p->inline_buf)
329		kfree(p->buf);
330	kfree(p);
331}
332
333static int fs_path_len(struct fs_path *p)
334{
335	return p->end - p->start;
336}
337
338static int fs_path_ensure_buf(struct fs_path *p, int len)
339{
340	char *tmp_buf;
341	int path_len;
342	int old_buf_len;
343
344	len++;
345
346	if (p->buf_len >= len)
347		return 0;
348
349	path_len = p->end - p->start;
350	old_buf_len = p->buf_len;
351
352	/*
353	 * First time the inline_buf does not suffice
354	 */
355	if (p->buf == p->inline_buf)
356		tmp_buf = kmalloc(len, GFP_NOFS);
357	else
358		tmp_buf = krealloc(p->buf, len, GFP_NOFS);
359	if (!tmp_buf)
360		return -ENOMEM;
361	p->buf = tmp_buf;
362	/*
363	 * The real size of the buffer is bigger, this will let the fast path
364	 * happen most of the time
365	 */
366	p->buf_len = ksize(p->buf);
367
368	if (p->reversed) {
369		tmp_buf = p->buf + old_buf_len - path_len - 1;
370		p->end = p->buf + p->buf_len - 1;
371		p->start = p->end - path_len;
372		memmove(p->start, tmp_buf, path_len + 1);
373	} else {
374		p->start = p->buf;
375		p->end = p->start + path_len;
376	}
377	return 0;
378}
379
380static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
381				   char **prepared)
382{
383	int ret;
384	int new_len;
385
386	new_len = p->end - p->start + name_len;
387	if (p->start != p->end)
388		new_len++;
389	ret = fs_path_ensure_buf(p, new_len);
390	if (ret < 0)
391		goto out;
392
393	if (p->reversed) {
394		if (p->start != p->end)
395			*--p->start = '/';
396		p->start -= name_len;
397		*prepared = p->start;
398	} else {
399		if (p->start != p->end)
400			*p->end++ = '/';
401		*prepared = p->end;
402		p->end += name_len;
403		*p->end = 0;
404	}
405
406out:
407	return ret;
408}
409
410static int fs_path_add(struct fs_path *p, const char *name, int name_len)
411{
412	int ret;
413	char *prepared;
414
415	ret = fs_path_prepare_for_add(p, name_len, &prepared);
416	if (ret < 0)
417		goto out;
418	memcpy(prepared, name, name_len);
419
420out:
421	return ret;
422}
423
424static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
425{
426	int ret;
427	char *prepared;
428
429	ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
430	if (ret < 0)
431		goto out;
432	memcpy(prepared, p2->start, p2->end - p2->start);
433
434out:
435	return ret;
436}
437
438static int fs_path_add_from_extent_buffer(struct fs_path *p,
439					  struct extent_buffer *eb,
440					  unsigned long off, int len)
441{
442	int ret;
443	char *prepared;
444
445	ret = fs_path_prepare_for_add(p, len, &prepared);
446	if (ret < 0)
447		goto out;
448
449	read_extent_buffer(eb, prepared, off, len);
450
451out:
452	return ret;
453}
454
455static int fs_path_copy(struct fs_path *p, struct fs_path *from)
456{
457	int ret;
458
459	p->reversed = from->reversed;
460	fs_path_reset(p);
461
462	ret = fs_path_add_path(p, from);
463
464	return ret;
465}
466
467
468static void fs_path_unreverse(struct fs_path *p)
469{
470	char *tmp;
471	int len;
472
473	if (!p->reversed)
474		return;
475
476	tmp = p->start;
477	len = p->end - p->start;
478	p->start = p->buf;
479	p->end = p->start + len;
480	memmove(p->start, tmp, len + 1);
481	p->reversed = 0;
482}
483
484static struct btrfs_path *alloc_path_for_send(void)
485{
486	struct btrfs_path *path;
487
488	path = btrfs_alloc_path();
489	if (!path)
490		return NULL;
491	path->search_commit_root = 1;
492	path->skip_locking = 1;
493	return path;
494}
495
496static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
497{
498	int ret;
499	mm_segment_t old_fs;
500	u32 pos = 0;
501
502	old_fs = get_fs();
503	set_fs(KERNEL_DS);
504
505	while (pos < len) {
506		ret = vfs_write(filp, (char *)buf + pos, len - pos, off);
507		/* TODO handle that correctly */
508		/*if (ret == -ERESTARTSYS) {
509			continue;
510		}*/
511		if (ret < 0)
512			goto out;
513		if (ret == 0) {
514			ret = -EIO;
515			goto out;
516		}
517		pos += ret;
518	}
519
520	ret = 0;
521
522out:
523	set_fs(old_fs);
524	return ret;
525}
526
527static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
528{
529	struct btrfs_tlv_header *hdr;
530	int total_len = sizeof(*hdr) + len;
531	int left = sctx->send_max_size - sctx->send_size;
532
533	if (unlikely(left < total_len))
534		return -EOVERFLOW;
535
536	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
537	hdr->tlv_type = cpu_to_le16(attr);
538	hdr->tlv_len = cpu_to_le16(len);
539	memcpy(hdr + 1, data, len);
540	sctx->send_size += total_len;
541
542	return 0;
543}
544
545#define TLV_PUT_DEFINE_INT(bits) \
546	static int tlv_put_u##bits(struct send_ctx *sctx,	 	\
547			u##bits attr, u##bits value)			\
548	{								\
549		__le##bits __tmp = cpu_to_le##bits(value);		\
550		return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));	\
551	}
552
553TLV_PUT_DEFINE_INT(64)
554
555static int tlv_put_string(struct send_ctx *sctx, u16 attr,
556			  const char *str, int len)
557{
558	if (len == -1)
559		len = strlen(str);
560	return tlv_put(sctx, attr, str, len);
561}
562
563static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
564			const u8 *uuid)
565{
566	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
567}
568
569static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
570				  struct extent_buffer *eb,
571				  struct btrfs_timespec *ts)
572{
573	struct btrfs_timespec bts;
574	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
575	return tlv_put(sctx, attr, &bts, sizeof(bts));
576}
577
578
579#define TLV_PUT(sctx, attrtype, attrlen, data) \
580	do { \
581		ret = tlv_put(sctx, attrtype, attrlen, data); \
582		if (ret < 0) \
583			goto tlv_put_failure; \
584	} while (0)
585
586#define TLV_PUT_INT(sctx, attrtype, bits, value) \
587	do { \
588		ret = tlv_put_u##bits(sctx, attrtype, value); \
589		if (ret < 0) \
590			goto tlv_put_failure; \
591	} while (0)
592
593#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
594#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
595#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
596#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
597#define TLV_PUT_STRING(sctx, attrtype, str, len) \
598	do { \
599		ret = tlv_put_string(sctx, attrtype, str, len); \
600		if (ret < 0) \
601			goto tlv_put_failure; \
602	} while (0)
603#define TLV_PUT_PATH(sctx, attrtype, p) \
604	do { \
605		ret = tlv_put_string(sctx, attrtype, p->start, \
606			p->end - p->start); \
607		if (ret < 0) \
608			goto tlv_put_failure; \
609	} while(0)
610#define TLV_PUT_UUID(sctx, attrtype, uuid) \
611	do { \
612		ret = tlv_put_uuid(sctx, attrtype, uuid); \
613		if (ret < 0) \
614			goto tlv_put_failure; \
615	} while (0)
616#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
617	do { \
618		ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
619		if (ret < 0) \
620			goto tlv_put_failure; \
621	} while (0)
622
623static int send_header(struct send_ctx *sctx)
624{
625	struct btrfs_stream_header hdr;
626
627	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
628	hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
629
630	return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
631					&sctx->send_off);
632}
633
634/*
635 * For each command/item we want to send to userspace, we call this function.
636 */
637static int begin_cmd(struct send_ctx *sctx, int cmd)
638{
639	struct btrfs_cmd_header *hdr;
640
641	if (WARN_ON(!sctx->send_buf))
642		return -EINVAL;
643
644	BUG_ON(sctx->send_size);
645
646	sctx->send_size += sizeof(*hdr);
647	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
648	hdr->cmd = cpu_to_le16(cmd);
649
650	return 0;
651}
652
653static int send_cmd(struct send_ctx *sctx)
654{
655	int ret;
656	struct btrfs_cmd_header *hdr;
657	u32 crc;
658
659	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
660	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
661	hdr->crc = 0;
662
663	crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
664	hdr->crc = cpu_to_le32(crc);
665
666	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
667					&sctx->send_off);
668
669	sctx->total_send_size += sctx->send_size;
670	sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
671	sctx->send_size = 0;
672
673	return ret;
674}
675
676/*
677 * Sends a move instruction to user space
678 */
679static int send_rename(struct send_ctx *sctx,
680		     struct fs_path *from, struct fs_path *to)
681{
682	int ret;
683
684verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
685
686	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
687	if (ret < 0)
688		goto out;
689
690	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
691	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
692
693	ret = send_cmd(sctx);
694
695tlv_put_failure:
696out:
697	return ret;
698}
699
700/*
701 * Sends a link instruction to user space
702 */
703static int send_link(struct send_ctx *sctx,
704		     struct fs_path *path, struct fs_path *lnk)
705{
706	int ret;
707
708verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
709
710	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
711	if (ret < 0)
712		goto out;
713
714	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
715	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
716
717	ret = send_cmd(sctx);
718
719tlv_put_failure:
720out:
721	return ret;
722}
723
724/*
725 * Sends an unlink instruction to user space
726 */
727static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
728{
729	int ret;
730
731verbose_printk("btrfs: send_unlink %s\n", path->start);
732
733	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
734	if (ret < 0)
735		goto out;
736
737	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
738
739	ret = send_cmd(sctx);
740
741tlv_put_failure:
742out:
743	return ret;
744}
745
746/*
747 * Sends a rmdir instruction to user space
748 */
749static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
750{
751	int ret;
752
753verbose_printk("btrfs: send_rmdir %s\n", path->start);
754
755	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
756	if (ret < 0)
757		goto out;
758
759	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
760
761	ret = send_cmd(sctx);
762
763tlv_put_failure:
764out:
765	return ret;
766}
767
768/*
769 * Helper function to retrieve some fields from an inode item.
770 */
771static int get_inode_info(struct btrfs_root *root,
772			  u64 ino, u64 *size, u64 *gen,
773			  u64 *mode, u64 *uid, u64 *gid,
774			  u64 *rdev)
775{
776	int ret;
777	struct btrfs_inode_item *ii;
778	struct btrfs_key key;
779	struct btrfs_path *path;
780
781	path = alloc_path_for_send();
782	if (!path)
783		return -ENOMEM;
784
785	key.objectid = ino;
786	key.type = BTRFS_INODE_ITEM_KEY;
787	key.offset = 0;
788	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
789	if (ret < 0)
790		goto out;
791	if (ret) {
792		ret = -ENOENT;
793		goto out;
794	}
795
796	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
797			struct btrfs_inode_item);
798	if (size)
799		*size = btrfs_inode_size(path->nodes[0], ii);
800	if (gen)
801		*gen = btrfs_inode_generation(path->nodes[0], ii);
802	if (mode)
803		*mode = btrfs_inode_mode(path->nodes[0], ii);
804	if (uid)
805		*uid = btrfs_inode_uid(path->nodes[0], ii);
806	if (gid)
807		*gid = btrfs_inode_gid(path->nodes[0], ii);
808	if (rdev)
809		*rdev = btrfs_inode_rdev(path->nodes[0], ii);
810
811out:
812	btrfs_free_path(path);
813	return ret;
814}
815
816typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
817				   struct fs_path *p,
818				   void *ctx);
819
820/*
821 * Helper function to iterate the entries in ONE btrfs_inode_ref or
822 * btrfs_inode_extref.
823 * The iterate callback may return a non zero value to stop iteration. This can
824 * be a negative value for error codes or 1 to simply stop it.
825 *
826 * path must point to the INODE_REF or INODE_EXTREF when called.
827 */
828static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
829			     struct btrfs_key *found_key, int resolve,
830			     iterate_inode_ref_t iterate, void *ctx)
831{
832	struct extent_buffer *eb = path->nodes[0];
833	struct btrfs_item *item;
834	struct btrfs_inode_ref *iref;
835	struct btrfs_inode_extref *extref;
836	struct btrfs_path *tmp_path;
837	struct fs_path *p;
838	u32 cur = 0;
839	u32 total;
840	int slot = path->slots[0];
841	u32 name_len;
842	char *start;
843	int ret = 0;
844	int num = 0;
845	int index;
846	u64 dir;
847	unsigned long name_off;
848	unsigned long elem_size;
849	unsigned long ptr;
850
851	p = fs_path_alloc_reversed();
852	if (!p)
853		return -ENOMEM;
854
855	tmp_path = alloc_path_for_send();
856	if (!tmp_path) {
857		fs_path_free(p);
858		return -ENOMEM;
859	}
860
861
862	if (found_key->type == BTRFS_INODE_REF_KEY) {
863		ptr = (unsigned long)btrfs_item_ptr(eb, slot,
864						    struct btrfs_inode_ref);
865		item = btrfs_item_nr(slot);
866		total = btrfs_item_size(eb, item);
867		elem_size = sizeof(*iref);
868	} else {
869		ptr = btrfs_item_ptr_offset(eb, slot);
870		total = btrfs_item_size_nr(eb, slot);
871		elem_size = sizeof(*extref);
872	}
873
874	while (cur < total) {
875		fs_path_reset(p);
876
877		if (found_key->type == BTRFS_INODE_REF_KEY) {
878			iref = (struct btrfs_inode_ref *)(ptr + cur);
879			name_len = btrfs_inode_ref_name_len(eb, iref);
880			name_off = (unsigned long)(iref + 1);
881			index = btrfs_inode_ref_index(eb, iref);
882			dir = found_key->offset;
883		} else {
884			extref = (struct btrfs_inode_extref *)(ptr + cur);
885			name_len = btrfs_inode_extref_name_len(eb, extref);
886			name_off = (unsigned long)&extref->name;
887			index = btrfs_inode_extref_index(eb, extref);
888			dir = btrfs_inode_extref_parent(eb, extref);
889		}
890
891		if (resolve) {
892			start = btrfs_ref_to_path(root, tmp_path, name_len,
893						  name_off, eb, dir,
894						  p->buf, p->buf_len);
895			if (IS_ERR(start)) {
896				ret = PTR_ERR(start);
897				goto out;
898			}
899			if (start < p->buf) {
900				/* overflow , try again with larger buffer */
901				ret = fs_path_ensure_buf(p,
902						p->buf_len + p->buf - start);
903				if (ret < 0)
904					goto out;
905				start = btrfs_ref_to_path(root, tmp_path,
906							  name_len, name_off,
907							  eb, dir,
908							  p->buf, p->buf_len);
909				if (IS_ERR(start)) {
910					ret = PTR_ERR(start);
911					goto out;
912				}
913				BUG_ON(start < p->buf);
914			}
915			p->start = start;
916		} else {
917			ret = fs_path_add_from_extent_buffer(p, eb, name_off,
918							     name_len);
919			if (ret < 0)
920				goto out;
921		}
922
923		cur += elem_size + name_len;
924		ret = iterate(num, dir, index, p, ctx);
925		if (ret)
926			goto out;
927		num++;
928	}
929
930out:
931	btrfs_free_path(tmp_path);
932	fs_path_free(p);
933	return ret;
934}
935
936typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
937				  const char *name, int name_len,
938				  const char *data, int data_len,
939				  u8 type, void *ctx);
940
941/*
942 * Helper function to iterate the entries in ONE btrfs_dir_item.
943 * The iterate callback may return a non zero value to stop iteration. This can
944 * be a negative value for error codes or 1 to simply stop it.
945 *
946 * path must point to the dir item when called.
947 */
948static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
949			    struct btrfs_key *found_key,
950			    iterate_dir_item_t iterate, void *ctx)
951{
952	int ret = 0;
953	struct extent_buffer *eb;
954	struct btrfs_item *item;
955	struct btrfs_dir_item *di;
956	struct btrfs_key di_key;
957	char *buf = NULL;
958	const int buf_len = PATH_MAX;
959	u32 name_len;
960	u32 data_len;
961	u32 cur;
962	u32 len;
963	u32 total;
964	int slot;
965	int num;
966	u8 type;
967
968	buf = kmalloc(buf_len, GFP_NOFS);
969	if (!buf) {
970		ret = -ENOMEM;
971		goto out;
972	}
973
974	eb = path->nodes[0];
975	slot = path->slots[0];
976	item = btrfs_item_nr(slot);
977	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
978	cur = 0;
979	len = 0;
980	total = btrfs_item_size(eb, item);
981
982	num = 0;
983	while (cur < total) {
984		name_len = btrfs_dir_name_len(eb, di);
985		data_len = btrfs_dir_data_len(eb, di);
986		type = btrfs_dir_type(eb, di);
987		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
988
989		/*
990		 * Path too long
991		 */
992		if (name_len + data_len > buf_len) {
993			ret = -ENAMETOOLONG;
994			goto out;
995		}
996
997		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
998				name_len + data_len);
999
1000		len = sizeof(*di) + name_len + data_len;
1001		di = (struct btrfs_dir_item *)((char *)di + len);
1002		cur += len;
1003
1004		ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1005				data_len, type, ctx);
1006		if (ret < 0)
1007			goto out;
1008		if (ret) {
1009			ret = 0;
1010			goto out;
1011		}
1012
1013		num++;
1014	}
1015
1016out:
1017	kfree(buf);
1018	return ret;
1019}
1020
1021static int __copy_first_ref(int num, u64 dir, int index,
1022			    struct fs_path *p, void *ctx)
1023{
1024	int ret;
1025	struct fs_path *pt = ctx;
1026
1027	ret = fs_path_copy(pt, p);
1028	if (ret < 0)
1029		return ret;
1030
1031	/* we want the first only */
1032	return 1;
1033}
1034
1035/*
1036 * Retrieve the first path of an inode. If an inode has more then one
1037 * ref/hardlink, this is ignored.
1038 */
1039static int get_inode_path(struct btrfs_root *root,
1040			  u64 ino, struct fs_path *path)
1041{
1042	int ret;
1043	struct btrfs_key key, found_key;
1044	struct btrfs_path *p;
1045
1046	p = alloc_path_for_send();
1047	if (!p)
1048		return -ENOMEM;
1049
1050	fs_path_reset(path);
1051
1052	key.objectid = ino;
1053	key.type = BTRFS_INODE_REF_KEY;
1054	key.offset = 0;
1055
1056	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1057	if (ret < 0)
1058		goto out;
1059	if (ret) {
1060		ret = 1;
1061		goto out;
1062	}
1063	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1064	if (found_key.objectid != ino ||
1065	    (found_key.type != BTRFS_INODE_REF_KEY &&
1066	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1067		ret = -ENOENT;
1068		goto out;
1069	}
1070
1071	ret = iterate_inode_ref(root, p, &found_key, 1,
1072				__copy_first_ref, path);
1073	if (ret < 0)
1074		goto out;
1075	ret = 0;
1076
1077out:
1078	btrfs_free_path(p);
1079	return ret;
1080}
1081
1082struct backref_ctx {
1083	struct send_ctx *sctx;
1084
1085	/* number of total found references */
1086	u64 found;
1087
1088	/*
1089	 * used for clones found in send_root. clones found behind cur_objectid
1090	 * and cur_offset are not considered as allowed clones.
1091	 */
1092	u64 cur_objectid;
1093	u64 cur_offset;
1094
1095	/* may be truncated in case it's the last extent in a file */
1096	u64 extent_len;
1097
1098	/* Just to check for bugs in backref resolving */
1099	int found_itself;
1100};
1101
1102static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1103{
1104	u64 root = (u64)(uintptr_t)key;
1105	struct clone_root *cr = (struct clone_root *)elt;
1106
1107	if (root < cr->root->objectid)
1108		return -1;
1109	if (root > cr->root->objectid)
1110		return 1;
1111	return 0;
1112}
1113
1114static int __clone_root_cmp_sort(const void *e1, const void *e2)
1115{
1116	struct clone_root *cr1 = (struct clone_root *)e1;
1117	struct clone_root *cr2 = (struct clone_root *)e2;
1118
1119	if (cr1->root->objectid < cr2->root->objectid)
1120		return -1;
1121	if (cr1->root->objectid > cr2->root->objectid)
1122		return 1;
1123	return 0;
1124}
1125
1126/*
1127 * Called for every backref that is found for the current extent.
1128 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1129 */
1130static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1131{
1132	struct backref_ctx *bctx = ctx_;
1133	struct clone_root *found;
1134	int ret;
1135	u64 i_size;
1136
1137	/* First check if the root is in the list of accepted clone sources */
1138	found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1139			bctx->sctx->clone_roots_cnt,
1140			sizeof(struct clone_root),
1141			__clone_root_cmp_bsearch);
1142	if (!found)
1143		return 0;
1144
1145	if (found->root == bctx->sctx->send_root &&
1146	    ino == bctx->cur_objectid &&
1147	    offset == bctx->cur_offset) {
1148		bctx->found_itself = 1;
1149	}
1150
1151	/*
1152	 * There are inodes that have extents that lie behind its i_size. Don't
1153	 * accept clones from these extents.
1154	 */
1155	ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
1156			NULL);
1157	if (ret < 0)
1158		return ret;
1159
1160	if (offset + bctx->extent_len > i_size)
1161		return 0;
1162
1163	/*
1164	 * Make sure we don't consider clones from send_root that are
1165	 * behind the current inode/offset.
1166	 */
1167	if (found->root == bctx->sctx->send_root) {
1168		/*
1169		 * TODO for the moment we don't accept clones from the inode
1170		 * that is currently send. We may change this when
1171		 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1172		 * file.
1173		 */
1174		if (ino >= bctx->cur_objectid)
1175			return 0;
1176#if 0
1177		if (ino > bctx->cur_objectid)
1178			return 0;
1179		if (offset + bctx->extent_len > bctx->cur_offset)
1180			return 0;
1181#endif
1182	}
1183
1184	bctx->found++;
1185	found->found_refs++;
1186	if (ino < found->ino) {
1187		found->ino = ino;
1188		found->offset = offset;
1189	} else if (found->ino == ino) {
1190		/*
1191		 * same extent found more then once in the same file.
1192		 */
1193		if (found->offset > offset + bctx->extent_len)
1194			found->offset = offset;
1195	}
1196
1197	return 0;
1198}
1199
1200/*
1201 * Given an inode, offset and extent item, it finds a good clone for a clone
1202 * instruction. Returns -ENOENT when none could be found. The function makes
1203 * sure that the returned clone is usable at the point where sending is at the
1204 * moment. This means, that no clones are accepted which lie behind the current
1205 * inode+offset.
1206 *
1207 * path must point to the extent item when called.
1208 */
1209static int find_extent_clone(struct send_ctx *sctx,
1210			     struct btrfs_path *path,
1211			     u64 ino, u64 data_offset,
1212			     u64 ino_size,
1213			     struct clone_root **found)
1214{
1215	int ret;
1216	int extent_type;
1217	u64 logical;
1218	u64 disk_byte;
1219	u64 num_bytes;
1220	u64 extent_item_pos;
1221	u64 flags = 0;
1222	struct btrfs_file_extent_item *fi;
1223	struct extent_buffer *eb = path->nodes[0];
1224	struct backref_ctx *backref_ctx = NULL;
1225	struct clone_root *cur_clone_root;
1226	struct btrfs_key found_key;
1227	struct btrfs_path *tmp_path;
1228	int compressed;
1229	u32 i;
1230
1231	tmp_path = alloc_path_for_send();
1232	if (!tmp_path)
1233		return -ENOMEM;
1234
1235	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1236	if (!backref_ctx) {
1237		ret = -ENOMEM;
1238		goto out;
1239	}
1240
1241	if (data_offset >= ino_size) {
1242		/*
1243		 * There may be extents that lie behind the file's size.
1244		 * I at least had this in combination with snapshotting while
1245		 * writing large files.
1246		 */
1247		ret = 0;
1248		goto out;
1249	}
1250
1251	fi = btrfs_item_ptr(eb, path->slots[0],
1252			struct btrfs_file_extent_item);
1253	extent_type = btrfs_file_extent_type(eb, fi);
1254	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1255		ret = -ENOENT;
1256		goto out;
1257	}
1258	compressed = btrfs_file_extent_compression(eb, fi);
1259
1260	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1261	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1262	if (disk_byte == 0) {
1263		ret = -ENOENT;
1264		goto out;
1265	}
1266	logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1267
1268	ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1269				  &found_key, &flags);
1270	btrfs_release_path(tmp_path);
1271
1272	if (ret < 0)
1273		goto out;
1274	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1275		ret = -EIO;
1276		goto out;
1277	}
1278
1279	/*
1280	 * Setup the clone roots.
1281	 */
1282	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1283		cur_clone_root = sctx->clone_roots + i;
1284		cur_clone_root->ino = (u64)-1;
1285		cur_clone_root->offset = 0;
1286		cur_clone_root->found_refs = 0;
1287	}
1288
1289	backref_ctx->sctx = sctx;
1290	backref_ctx->found = 0;
1291	backref_ctx->cur_objectid = ino;
1292	backref_ctx->cur_offset = data_offset;
1293	backref_ctx->found_itself = 0;
1294	backref_ctx->extent_len = num_bytes;
1295
1296	/*
1297	 * The last extent of a file may be too large due to page alignment.
1298	 * We need to adjust extent_len in this case so that the checks in
1299	 * __iterate_backrefs work.
1300	 */
1301	if (data_offset + num_bytes >= ino_size)
1302		backref_ctx->extent_len = ino_size - data_offset;
1303
1304	/*
1305	 * Now collect all backrefs.
1306	 */
1307	if (compressed == BTRFS_COMPRESS_NONE)
1308		extent_item_pos = logical - found_key.objectid;
1309	else
1310		extent_item_pos = 0;
1311	ret = iterate_extent_inodes(sctx->send_root->fs_info,
1312					found_key.objectid, extent_item_pos, 1,
1313					__iterate_backrefs, backref_ctx);
1314
1315	if (ret < 0)
1316		goto out;
1317
1318	if (!backref_ctx->found_itself) {
1319		/* found a bug in backref code? */
1320		ret = -EIO;
1321		btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1322				"send_root. inode=%llu, offset=%llu, "
1323				"disk_byte=%llu found extent=%llu\n",
1324				ino, data_offset, disk_byte, found_key.objectid);
1325		goto out;
1326	}
1327
1328verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1329		"ino=%llu, "
1330		"num_bytes=%llu, logical=%llu\n",
1331		data_offset, ino, num_bytes, logical);
1332
1333	if (!backref_ctx->found)
1334		verbose_printk("btrfs:    no clones found\n");
1335
1336	cur_clone_root = NULL;
1337	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1338		if (sctx->clone_roots[i].found_refs) {
1339			if (!cur_clone_root)
1340				cur_clone_root = sctx->clone_roots + i;
1341			else if (sctx->clone_roots[i].root == sctx->send_root)
1342				/* prefer clones from send_root over others */
1343				cur_clone_root = sctx->clone_roots + i;
1344		}
1345
1346	}
1347
1348	if (cur_clone_root) {
1349		if (compressed != BTRFS_COMPRESS_NONE) {
1350			/*
1351			 * Offsets given by iterate_extent_inodes() are relative
1352			 * to the start of the extent, we need to add logical
1353			 * offset from the file extent item.
1354			 * (See why at backref.c:check_extent_in_eb())
1355			 */
1356			cur_clone_root->offset += btrfs_file_extent_offset(eb,
1357									   fi);
1358		}
1359		*found = cur_clone_root;
1360		ret = 0;
1361	} else {
1362		ret = -ENOENT;
1363	}
1364
1365out:
1366	btrfs_free_path(tmp_path);
1367	kfree(backref_ctx);
1368	return ret;
1369}
1370
1371static int read_symlink(struct btrfs_root *root,
1372			u64 ino,
1373			struct fs_path *dest)
1374{
1375	int ret;
1376	struct btrfs_path *path;
1377	struct btrfs_key key;
1378	struct btrfs_file_extent_item *ei;
1379	u8 type;
1380	u8 compression;
1381	unsigned long off;
1382	int len;
1383
1384	path = alloc_path_for_send();
1385	if (!path)
1386		return -ENOMEM;
1387
1388	key.objectid = ino;
1389	key.type = BTRFS_EXTENT_DATA_KEY;
1390	key.offset = 0;
1391	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1392	if (ret < 0)
1393		goto out;
1394	BUG_ON(ret);
1395
1396	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1397			struct btrfs_file_extent_item);
1398	type = btrfs_file_extent_type(path->nodes[0], ei);
1399	compression = btrfs_file_extent_compression(path->nodes[0], ei);
1400	BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1401	BUG_ON(compression);
1402
1403	off = btrfs_file_extent_inline_start(ei);
1404	len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1405
1406	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1407
1408out:
1409	btrfs_free_path(path);
1410	return ret;
1411}
1412
1413/*
1414 * Helper function to generate a file name that is unique in the root of
1415 * send_root and parent_root. This is used to generate names for orphan inodes.
1416 */
1417static int gen_unique_name(struct send_ctx *sctx,
1418			   u64 ino, u64 gen,
1419			   struct fs_path *dest)
1420{
1421	int ret = 0;
1422	struct btrfs_path *path;
1423	struct btrfs_dir_item *di;
1424	char tmp[64];
1425	int len;
1426	u64 idx = 0;
1427
1428	path = alloc_path_for_send();
1429	if (!path)
1430		return -ENOMEM;
1431
1432	while (1) {
1433		len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1434				ino, gen, idx);
1435		ASSERT(len < sizeof(tmp));
1436
1437		di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1438				path, BTRFS_FIRST_FREE_OBJECTID,
1439				tmp, strlen(tmp), 0);
1440		btrfs_release_path(path);
1441		if (IS_ERR(di)) {
1442			ret = PTR_ERR(di);
1443			goto out;
1444		}
1445		if (di) {
1446			/* not unique, try again */
1447			idx++;
1448			continue;
1449		}
1450
1451		if (!sctx->parent_root) {
1452			/* unique */
1453			ret = 0;
1454			break;
1455		}
1456
1457		di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1458				path, BTRFS_FIRST_FREE_OBJECTID,
1459				tmp, strlen(tmp), 0);
1460		btrfs_release_path(path);
1461		if (IS_ERR(di)) {
1462			ret = PTR_ERR(di);
1463			goto out;
1464		}
1465		if (di) {
1466			/* not unique, try again */
1467			idx++;
1468			continue;
1469		}
1470		/* unique */
1471		break;
1472	}
1473
1474	ret = fs_path_add(dest, tmp, strlen(tmp));
1475
1476out:
1477	btrfs_free_path(path);
1478	return ret;
1479}
1480
1481enum inode_state {
1482	inode_state_no_change,
1483	inode_state_will_create,
1484	inode_state_did_create,
1485	inode_state_will_delete,
1486	inode_state_did_delete,
1487};
1488
1489static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1490{
1491	int ret;
1492	int left_ret;
1493	int right_ret;
1494	u64 left_gen;
1495	u64 right_gen;
1496
1497	ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1498			NULL, NULL);
1499	if (ret < 0 && ret != -ENOENT)
1500		goto out;
1501	left_ret = ret;
1502
1503	if (!sctx->parent_root) {
1504		right_ret = -ENOENT;
1505	} else {
1506		ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1507				NULL, NULL, NULL, NULL);
1508		if (ret < 0 && ret != -ENOENT)
1509			goto out;
1510		right_ret = ret;
1511	}
1512
1513	if (!left_ret && !right_ret) {
1514		if (left_gen == gen && right_gen == gen) {
1515			ret = inode_state_no_change;
1516		} else if (left_gen == gen) {
1517			if (ino < sctx->send_progress)
1518				ret = inode_state_did_create;
1519			else
1520				ret = inode_state_will_create;
1521		} else if (right_gen == gen) {
1522			if (ino < sctx->send_progress)
1523				ret = inode_state_did_delete;
1524			else
1525				ret = inode_state_will_delete;
1526		} else  {
1527			ret = -ENOENT;
1528		}
1529	} else if (!left_ret) {
1530		if (left_gen == gen) {
1531			if (ino < sctx->send_progress)
1532				ret = inode_state_did_create;
1533			else
1534				ret = inode_state_will_create;
1535		} else {
1536			ret = -ENOENT;
1537		}
1538	} else if (!right_ret) {
1539		if (right_gen == gen) {
1540			if (ino < sctx->send_progress)
1541				ret = inode_state_did_delete;
1542			else
1543				ret = inode_state_will_delete;
1544		} else {
1545			ret = -ENOENT;
1546		}
1547	} else {
1548		ret = -ENOENT;
1549	}
1550
1551out:
1552	return ret;
1553}
1554
1555static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1556{
1557	int ret;
1558
1559	ret = get_cur_inode_state(sctx, ino, gen);
1560	if (ret < 0)
1561		goto out;
1562
1563	if (ret == inode_state_no_change ||
1564	    ret == inode_state_did_create ||
1565	    ret == inode_state_will_delete)
1566		ret = 1;
1567	else
1568		ret = 0;
1569
1570out:
1571	return ret;
1572}
1573
1574/*
1575 * Helper function to lookup a dir item in a dir.
1576 */
1577static int lookup_dir_item_inode(struct btrfs_root *root,
1578				 u64 dir, const char *name, int name_len,
1579				 u64 *found_inode,
1580				 u8 *found_type)
1581{
1582	int ret = 0;
1583	struct btrfs_dir_item *di;
1584	struct btrfs_key key;
1585	struct btrfs_path *path;
1586
1587	path = alloc_path_for_send();
1588	if (!path)
1589		return -ENOMEM;
1590
1591	di = btrfs_lookup_dir_item(NULL, root, path,
1592			dir, name, name_len, 0);
1593	if (!di) {
1594		ret = -ENOENT;
1595		goto out;
1596	}
1597	if (IS_ERR(di)) {
1598		ret = PTR_ERR(di);
1599		goto out;
1600	}
1601	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1602	*found_inode = key.objectid;
1603	*found_type = btrfs_dir_type(path->nodes[0], di);
1604
1605out:
1606	btrfs_free_path(path);
1607	return ret;
1608}
1609
1610/*
1611 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1612 * generation of the parent dir and the name of the dir entry.
1613 */
1614static int get_first_ref(struct btrfs_root *root, u64 ino,
1615			 u64 *dir, u64 *dir_gen, struct fs_path *name)
1616{
1617	int ret;
1618	struct btrfs_key key;
1619	struct btrfs_key found_key;
1620	struct btrfs_path *path;
1621	int len;
1622	u64 parent_dir;
1623
1624	path = alloc_path_for_send();
1625	if (!path)
1626		return -ENOMEM;
1627
1628	key.objectid = ino;
1629	key.type = BTRFS_INODE_REF_KEY;
1630	key.offset = 0;
1631
1632	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1633	if (ret < 0)
1634		goto out;
1635	if (!ret)
1636		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1637				path->slots[0]);
1638	if (ret || found_key.objectid != ino ||
1639	    (found_key.type != BTRFS_INODE_REF_KEY &&
1640	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1641		ret = -ENOENT;
1642		goto out;
1643	}
1644
1645	if (key.type == BTRFS_INODE_REF_KEY) {
1646		struct btrfs_inode_ref *iref;
1647		iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1648				      struct btrfs_inode_ref);
1649		len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1650		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1651						     (unsigned long)(iref + 1),
1652						     len);
1653		parent_dir = found_key.offset;
1654	} else {
1655		struct btrfs_inode_extref *extref;
1656		extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1657					struct btrfs_inode_extref);
1658		len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1659		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1660					(unsigned long)&extref->name, len);
1661		parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1662	}
1663	if (ret < 0)
1664		goto out;
1665	btrfs_release_path(path);
1666
1667	ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
1668			NULL, NULL);
1669	if (ret < 0)
1670		goto out;
1671
1672	*dir = parent_dir;
1673
1674out:
1675	btrfs_free_path(path);
1676	return ret;
1677}
1678
1679static int is_first_ref(struct btrfs_root *root,
1680			u64 ino, u64 dir,
1681			const char *name, int name_len)
1682{
1683	int ret;
1684	struct fs_path *tmp_name;
1685	u64 tmp_dir;
1686	u64 tmp_dir_gen;
1687
1688	tmp_name = fs_path_alloc();
1689	if (!tmp_name)
1690		return -ENOMEM;
1691
1692	ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
1693	if (ret < 0)
1694		goto out;
1695
1696	if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1697		ret = 0;
1698		goto out;
1699	}
1700
1701	ret = !memcmp(tmp_name->start, name, name_len);
1702
1703out:
1704	fs_path_free(tmp_name);
1705	return ret;
1706}
1707
1708/*
1709 * Used by process_recorded_refs to determine if a new ref would overwrite an
1710 * already existing ref. In case it detects an overwrite, it returns the
1711 * inode/gen in who_ino/who_gen.
1712 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1713 * to make sure later references to the overwritten inode are possible.
1714 * Orphanizing is however only required for the first ref of an inode.
1715 * process_recorded_refs does an additional is_first_ref check to see if
1716 * orphanizing is really required.
1717 */
1718static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1719			      const char *name, int name_len,
1720			      u64 *who_ino, u64 *who_gen)
1721{
1722	int ret = 0;
1723	u64 gen;
1724	u64 other_inode = 0;
1725	u8 other_type = 0;
1726
1727	if (!sctx->parent_root)
1728		goto out;
1729
1730	ret = is_inode_existent(sctx, dir, dir_gen);
1731	if (ret <= 0)
1732		goto out;
1733
1734	/*
1735	 * If we have a parent root we need to verify that the parent dir was
1736	 * not delted and then re-created, if it was then we have no overwrite
1737	 * and we can just unlink this entry.
1738	 */
1739	if (sctx->parent_root) {
1740		ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1741				     NULL, NULL, NULL);
1742		if (ret < 0 && ret != -ENOENT)
1743			goto out;
1744		if (ret) {
1745			ret = 0;
1746			goto out;
1747		}
1748		if (gen != dir_gen)
1749			goto out;
1750	}
1751
1752	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1753			&other_inode, &other_type);
1754	if (ret < 0 && ret != -ENOENT)
1755		goto out;
1756	if (ret) {
1757		ret = 0;
1758		goto out;
1759	}
1760
1761	/*
1762	 * Check if the overwritten ref was already processed. If yes, the ref
1763	 * was already unlinked/moved, so we can safely assume that we will not
1764	 * overwrite anything at this point in time.
1765	 */
1766	if (other_inode > sctx->send_progress) {
1767		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1768				who_gen, NULL, NULL, NULL, NULL);
1769		if (ret < 0)
1770			goto out;
1771
1772		ret = 1;
1773		*who_ino = other_inode;
1774	} else {
1775		ret = 0;
1776	}
1777
1778out:
1779	return ret;
1780}
1781
1782/*
1783 * Checks if the ref was overwritten by an already processed inode. This is
1784 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1785 * thus the orphan name needs be used.
1786 * process_recorded_refs also uses it to avoid unlinking of refs that were
1787 * overwritten.
1788 */
1789static int did_overwrite_ref(struct send_ctx *sctx,
1790			    u64 dir, u64 dir_gen,
1791			    u64 ino, u64 ino_gen,
1792			    const char *name, int name_len)
1793{
1794	int ret = 0;
1795	u64 gen;
1796	u64 ow_inode;
1797	u8 other_type;
1798
1799	if (!sctx->parent_root)
1800		goto out;
1801
1802	ret = is_inode_existent(sctx, dir, dir_gen);
1803	if (ret <= 0)
1804		goto out;
1805
1806	/* check if the ref was overwritten by another ref */
1807	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1808			&ow_inode, &other_type);
1809	if (ret < 0 && ret != -ENOENT)
1810		goto out;
1811	if (ret) {
1812		/* was never and will never be overwritten */
1813		ret = 0;
1814		goto out;
1815	}
1816
1817	ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1818			NULL, NULL);
1819	if (ret < 0)
1820		goto out;
1821
1822	if (ow_inode == ino && gen == ino_gen) {
1823		ret = 0;
1824		goto out;
1825	}
1826
1827	/* we know that it is or will be overwritten. check this now */
1828	if (ow_inode < sctx->send_progress)
1829		ret = 1;
1830	else
1831		ret = 0;
1832
1833out:
1834	return ret;
1835}
1836
1837/*
1838 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1839 * that got overwritten. This is used by process_recorded_refs to determine
1840 * if it has to use the path as returned by get_cur_path or the orphan name.
1841 */
1842static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1843{
1844	int ret = 0;
1845	struct fs_path *name = NULL;
1846	u64 dir;
1847	u64 dir_gen;
1848
1849	if (!sctx->parent_root)
1850		goto out;
1851
1852	name = fs_path_alloc();
1853	if (!name)
1854		return -ENOMEM;
1855
1856	ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
1857	if (ret < 0)
1858		goto out;
1859
1860	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1861			name->start, fs_path_len(name));
1862
1863out:
1864	fs_path_free(name);
1865	return ret;
1866}
1867
1868/*
1869 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
1870 * so we need to do some special handling in case we have clashes. This function
1871 * takes care of this with the help of name_cache_entry::radix_list.
1872 * In case of error, nce is kfreed.
1873 */
1874static int name_cache_insert(struct send_ctx *sctx,
1875			     struct name_cache_entry *nce)
1876{
1877	int ret = 0;
1878	struct list_head *nce_head;
1879
1880	nce_head = radix_tree_lookup(&sctx->name_cache,
1881			(unsigned long)nce->ino);
1882	if (!nce_head) {
1883		nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1884		if (!nce_head) {
1885			kfree(nce);
1886			return -ENOMEM;
1887		}
1888		INIT_LIST_HEAD(nce_head);
1889
1890		ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
1891		if (ret < 0) {
1892			kfree(nce_head);
1893			kfree(nce);
1894			return ret;
1895		}
1896	}
1897	list_add_tail(&nce->radix_list, nce_head);
1898	list_add_tail(&nce->list, &sctx->name_cache_list);
1899	sctx->name_cache_size++;
1900
1901	return ret;
1902}
1903
1904static void name_cache_delete(struct send_ctx *sctx,
1905			      struct name_cache_entry *nce)
1906{
1907	struct list_head *nce_head;
1908
1909	nce_head = radix_tree_lookup(&sctx->name_cache,
1910			(unsigned long)nce->ino);
1911	if (!nce_head) {
1912		btrfs_err(sctx->send_root->fs_info,
1913	      "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
1914			nce->ino, sctx->name_cache_size);
1915	}
1916
1917	list_del(&nce->radix_list);
1918	list_del(&nce->list);
1919	sctx->name_cache_size--;
1920
1921	/*
1922	 * We may not get to the final release of nce_head if the lookup fails
1923	 */
1924	if (nce_head && list_empty(nce_head)) {
1925		radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
1926		kfree(nce_head);
1927	}
1928}
1929
1930static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
1931						    u64 ino, u64 gen)
1932{
1933	struct list_head *nce_head;
1934	struct name_cache_entry *cur;
1935
1936	nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
1937	if (!nce_head)
1938		return NULL;
1939
1940	list_for_each_entry(cur, nce_head, radix_list) {
1941		if (cur->ino == ino && cur->gen == gen)
1942			return cur;
1943	}
1944	return NULL;
1945}
1946
1947/*
1948 * Removes the entry from the list and adds it back to the end. This marks the
1949 * entry as recently used so that name_cache_clean_unused does not remove it.
1950 */
1951static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
1952{
1953	list_del(&nce->list);
1954	list_add_tail(&nce->list, &sctx->name_cache_list);
1955}
1956
1957/*
1958 * Remove some entries from the beginning of name_cache_list.
1959 */
1960static void name_cache_clean_unused(struct send_ctx *sctx)
1961{
1962	struct name_cache_entry *nce;
1963
1964	if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
1965		return;
1966
1967	while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
1968		nce = list_entry(sctx->name_cache_list.next,
1969				struct name_cache_entry, list);
1970		name_cache_delete(sctx, nce);
1971		kfree(nce);
1972	}
1973}
1974
1975static void name_cache_free(struct send_ctx *sctx)
1976{
1977	struct name_cache_entry *nce;
1978
1979	while (!list_empty(&sctx->name_cache_list)) {
1980		nce = list_entry(sctx->name_cache_list.next,
1981				struct name_cache_entry, list);
1982		name_cache_delete(sctx, nce);
1983		kfree(nce);
1984	}
1985}
1986
1987/*
1988 * Used by get_cur_path for each ref up to the root.
1989 * Returns 0 if it succeeded.
1990 * Returns 1 if the inode is not existent or got overwritten. In that case, the
1991 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
1992 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
1993 * Returns <0 in case of error.
1994 */
1995static int __get_cur_name_and_parent(struct send_ctx *sctx,
1996				     u64 ino, u64 gen,
1997				     u64 *parent_ino,
1998				     u64 *parent_gen,
1999				     struct fs_path *dest)
2000{
2001	int ret;
2002	int nce_ret;
2003	struct btrfs_path *path = NULL;
2004	struct name_cache_entry *nce = NULL;
2005
2006	/*
2007	 * First check if we already did a call to this function with the same
2008	 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2009	 * return the cached result.
2010	 */
2011	nce = name_cache_search(sctx, ino, gen);
2012	if (nce) {
2013		if (ino < sctx->send_progress && nce->need_later_update) {
2014			name_cache_delete(sctx, nce);
2015			kfree(nce);
2016			nce = NULL;
2017		} else {
2018			name_cache_used(sctx, nce);
2019			*parent_ino = nce->parent_ino;
2020			*parent_gen = nce->parent_gen;
2021			ret = fs_path_add(dest, nce->name, nce->name_len);
2022			if (ret < 0)
2023				goto out;
2024			ret = nce->ret;
2025			goto out;
2026		}
2027	}
2028
2029	path = alloc_path_for_send();
2030	if (!path)
2031		return -ENOMEM;
2032
2033	/*
2034	 * If the inode is not existent yet, add the orphan name and return 1.
2035	 * This should only happen for the parent dir that we determine in
2036	 * __record_new_ref
2037	 */
2038	ret = is_inode_existent(sctx, ino, gen);
2039	if (ret < 0)
2040		goto out;
2041
2042	if (!ret) {
2043		ret = gen_unique_name(sctx, ino, gen, dest);
2044		if (ret < 0)
2045			goto out;
2046		ret = 1;
2047		goto out_cache;
2048	}
2049
2050	/*
2051	 * Depending on whether the inode was already processed or not, use
2052	 * send_root or parent_root for ref lookup.
2053	 */
2054	if (ino < sctx->send_progress)
2055		ret = get_first_ref(sctx->send_root, ino,
2056				    parent_ino, parent_gen, dest);
2057	else
2058		ret = get_first_ref(sctx->parent_root, ino,
2059				    parent_ino, parent_gen, dest);
2060	if (ret < 0)
2061		goto out;
2062
2063	/*
2064	 * Check if the ref was overwritten by an inode's ref that was processed
2065	 * earlier. If yes, treat as orphan and return 1.
2066	 */
2067	ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2068			dest->start, dest->end - dest->start);
2069	if (ret < 0)
2070		goto out;
2071	if (ret) {
2072		fs_path_reset(dest);
2073		ret = gen_unique_name(sctx, ino, gen, dest);
2074		if (ret < 0)
2075			goto out;
2076		ret = 1;
2077	}
2078
2079out_cache:
2080	/*
2081	 * Store the result of the lookup in the name cache.
2082	 */
2083	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
2084	if (!nce) {
2085		ret = -ENOMEM;
2086		goto out;
2087	}
2088
2089	nce->ino = ino;
2090	nce->gen = gen;
2091	nce->parent_ino = *parent_ino;
2092	nce->parent_gen = *parent_gen;
2093	nce->name_len = fs_path_len(dest);
2094	nce->ret = ret;
2095	strcpy(nce->name, dest->start);
2096
2097	if (ino < sctx->send_progress)
2098		nce->need_later_update = 0;
2099	else
2100		nce->need_later_update = 1;
2101
2102	nce_ret = name_cache_insert(sctx, nce);
2103	if (nce_ret < 0)
2104		ret = nce_ret;
2105	name_cache_clean_unused(sctx);
2106
2107out:
2108	btrfs_free_path(path);
2109	return ret;
2110}
2111
2112/*
2113 * Magic happens here. This function returns the first ref to an inode as it
2114 * would look like while receiving the stream at this point in time.
2115 * We walk the path up to the root. For every inode in between, we check if it
2116 * was already processed/sent. If yes, we continue with the parent as found
2117 * in send_root. If not, we continue with the parent as found in parent_root.
2118 * If we encounter an inode that was deleted at this point in time, we use the
2119 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2120 * that were not created yet and overwritten inodes/refs.
2121 *
2122 * When do we have have orphan inodes:
2123 * 1. When an inode is freshly created and thus no valid refs are available yet
2124 * 2. When a directory lost all it's refs (deleted) but still has dir items
2125 *    inside which were not processed yet (pending for move/delete). If anyone
2126 *    tried to get the path to the dir items, it would get a path inside that
2127 *    orphan directory.
2128 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2129 *    of an unprocessed inode. If in that case the first ref would be
2130 *    overwritten, the overwritten inode gets "orphanized". Later when we
2131 *    process this overwritten inode, it is restored at a new place by moving
2132 *    the orphan inode.
2133 *
2134 * sctx->send_progress tells this function at which point in time receiving
2135 * would be.
2136 */
2137static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2138			struct fs_path *dest)
2139{
2140	int ret = 0;
2141	struct fs_path *name = NULL;
2142	u64 parent_inode = 0;
2143	u64 parent_gen = 0;
2144	int stop = 0;
2145
2146	name = fs_path_alloc();
2147	if (!name) {
2148		ret = -ENOMEM;
2149		goto out;
2150	}
2151
2152	dest->reversed = 1;
2153	fs_path_reset(dest);
2154
2155	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2156		fs_path_reset(name);
2157
2158		if (is_waiting_for_rm(sctx, ino)) {
2159			ret = gen_unique_name(sctx, ino, gen, name);
2160			if (ret < 0)
2161				goto out;
2162			ret = fs_path_add_path(dest, name);
2163			break;
2164		}
2165
2166		if (is_waiting_for_move(sctx, ino)) {
2167			ret = get_first_ref(sctx->parent_root, ino,
2168					    &parent_inode, &parent_gen, name);
2169		} else {
2170			ret = __get_cur_name_and_parent(sctx, ino, gen,
2171							&parent_inode,
2172							&parent_gen, name);
2173			if (ret)
2174				stop = 1;
2175		}
2176
2177		if (ret < 0)
2178			goto out;
2179
2180		ret = fs_path_add_path(dest, name);
2181		if (ret < 0)
2182			goto out;
2183
2184		ino = parent_inode;
2185		gen = parent_gen;
2186	}
2187
2188out:
2189	fs_path_free(name);
2190	if (!ret)
2191		fs_path_unreverse(dest);
2192	return ret;
2193}
2194
2195/*
2196 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2197 */
2198static int send_subvol_begin(struct send_ctx *sctx)
2199{
2200	int ret;
2201	struct btrfs_root *send_root = sctx->send_root;
2202	struct btrfs_root *parent_root = sctx->parent_root;
2203	struct btrfs_path *path;
2204	struct btrfs_key key;
2205	struct btrfs_root_ref *ref;
2206	struct extent_buffer *leaf;
2207	char *name = NULL;
2208	int namelen;
2209
2210	path = btrfs_alloc_path();
2211	if (!path)
2212		return -ENOMEM;
2213
2214	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2215	if (!name) {
2216		btrfs_free_path(path);
2217		return -ENOMEM;
2218	}
2219
2220	key.objectid = send_root->objectid;
2221	key.type = BTRFS_ROOT_BACKREF_KEY;
2222	key.offset = 0;
2223
2224	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2225				&key, path, 1, 0);
2226	if (ret < 0)
2227		goto out;
2228	if (ret) {
2229		ret = -ENOENT;
2230		goto out;
2231	}
2232
2233	leaf = path->nodes[0];
2234	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2235	if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2236	    key.objectid != send_root->objectid) {
2237		ret = -ENOENT;
2238		goto out;
2239	}
2240	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2241	namelen = btrfs_root_ref_name_len(leaf, ref);
2242	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2243	btrfs_release_path(path);
2244
2245	if (parent_root) {
2246		ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2247		if (ret < 0)
2248			goto out;
2249	} else {
2250		ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2251		if (ret < 0)
2252			goto out;
2253	}
2254
2255	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2256	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2257			sctx->send_root->root_item.uuid);
2258	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2259		    le64_to_cpu(sctx->send_root->root_item.ctransid));
2260	if (parent_root) {
2261		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2262				sctx->parent_root->root_item.uuid);
2263		TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2264			    le64_to_cpu(sctx->parent_root->root_item.ctransid));
2265	}
2266
2267	ret = send_cmd(sctx);
2268
2269tlv_put_failure:
2270out:
2271	btrfs_free_path(path);
2272	kfree(name);
2273	return ret;
2274}
2275
2276static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2277{
2278	int ret = 0;
2279	struct fs_path *p;
2280
2281verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2282
2283	p = fs_path_alloc();
2284	if (!p)
2285		return -ENOMEM;
2286
2287	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2288	if (ret < 0)
2289		goto out;
2290
2291	ret = get_cur_path(sctx, ino, gen, p);
2292	if (ret < 0)
2293		goto out;
2294	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2295	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2296
2297	ret = send_cmd(sctx);
2298
2299tlv_put_failure:
2300out:
2301	fs_path_free(p);
2302	return ret;
2303}
2304
2305static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2306{
2307	int ret = 0;
2308	struct fs_path *p;
2309
2310verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2311
2312	p = fs_path_alloc();
2313	if (!p)
2314		return -ENOMEM;
2315
2316	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2317	if (ret < 0)
2318		goto out;
2319
2320	ret = get_cur_path(sctx, ino, gen, p);
2321	if (ret < 0)
2322		goto out;
2323	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2324	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2325
2326	ret = send_cmd(sctx);
2327
2328tlv_put_failure:
2329out:
2330	fs_path_free(p);
2331	return ret;
2332}
2333
2334static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2335{
2336	int ret = 0;
2337	struct fs_path *p;
2338
2339verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2340
2341	p = fs_path_alloc();
2342	if (!p)
2343		return -ENOMEM;
2344
2345	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2346	if (ret < 0)
2347		goto out;
2348
2349	ret = get_cur_path(sctx, ino, gen, p);
2350	if (ret < 0)
2351		goto out;
2352	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2353	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2354	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2355
2356	ret = send_cmd(sctx);
2357
2358tlv_put_failure:
2359out:
2360	fs_path_free(p);
2361	return ret;
2362}
2363
2364static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2365{
2366	int ret = 0;
2367	struct fs_path *p = NULL;
2368	struct btrfs_inode_item *ii;
2369	struct btrfs_path *path = NULL;
2370	struct extent_buffer *eb;
2371	struct btrfs_key key;
2372	int slot;
2373
2374verbose_printk("btrfs: send_utimes %llu\n", ino);
2375
2376	p = fs_path_alloc();
2377	if (!p)
2378		return -ENOMEM;
2379
2380	path = alloc_path_for_send();
2381	if (!path) {
2382		ret = -ENOMEM;
2383		goto out;
2384	}
2385
2386	key.objectid = ino;
2387	key.type = BTRFS_INODE_ITEM_KEY;
2388	key.offset = 0;
2389	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2390	if (ret < 0)
2391		goto out;
2392
2393	eb = path->nodes[0];
2394	slot = path->slots[0];
2395	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2396
2397	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2398	if (ret < 0)
2399		goto out;
2400
2401	ret = get_cur_path(sctx, ino, gen, p);
2402	if (ret < 0)
2403		goto out;
2404	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2405	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
2406			btrfs_inode_atime(ii));
2407	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
2408			btrfs_inode_mtime(ii));
2409	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2410			btrfs_inode_ctime(ii));
2411	/* TODO Add otime support when the otime patches get into upstream */
2412
2413	ret = send_cmd(sctx);
2414
2415tlv_put_failure:
2416out:
2417	fs_path_free(p);
2418	btrfs_free_path(path);
2419	return ret;
2420}
2421
2422/*
2423 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2424 * a valid path yet because we did not process the refs yet. So, the inode
2425 * is created as orphan.
2426 */
2427static int send_create_inode(struct send_ctx *sctx, u64 ino)
2428{
2429	int ret = 0;
2430	struct fs_path *p;
2431	int cmd;
2432	u64 gen;
2433	u64 mode;
2434	u64 rdev;
2435
2436verbose_printk("btrfs: send_create_inode %llu\n", ino);
2437
2438	p = fs_path_alloc();
2439	if (!p)
2440		return -ENOMEM;
2441
2442	ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL,
2443			NULL, &rdev);
2444	if (ret < 0)
2445		goto out;
2446
2447	if (S_ISREG(mode)) {
2448		cmd = BTRFS_SEND_C_MKFILE;
2449	} else if (S_ISDIR(mode)) {
2450		cmd = BTRFS_SEND_C_MKDIR;
2451	} else if (S_ISLNK(mode)) {
2452		cmd = BTRFS_SEND_C_SYMLINK;
2453	} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2454		cmd = BTRFS_SEND_C_MKNOD;
2455	} else if (S_ISFIFO(mode)) {
2456		cmd = BTRFS_SEND_C_MKFIFO;
2457	} else if (S_ISSOCK(mode)) {
2458		cmd = BTRFS_SEND_C_MKSOCK;
2459	} else {
2460		printk(KERN_WARNING "btrfs: unexpected inode type %o",
2461				(int)(mode & S_IFMT));
2462		ret = -ENOTSUPP;
2463		goto out;
2464	}
2465
2466	ret = begin_cmd(sctx, cmd);
2467	if (ret < 0)
2468		goto out;
2469
2470	ret = gen_unique_name(sctx, ino, gen, p);
2471	if (ret < 0)
2472		goto out;
2473
2474	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2475	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2476
2477	if (S_ISLNK(mode)) {
2478		fs_path_reset(p);
2479		ret = read_symlink(sctx->send_root, ino, p);
2480		if (ret < 0)
2481			goto out;
2482		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2483	} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2484		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2485		TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2486		TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2487	}
2488
2489	ret = send_cmd(sctx);
2490	if (ret < 0)
2491		goto out;
2492
2493
2494tlv_put_failure:
2495out:
2496	fs_path_free(p);
2497	return ret;
2498}
2499
2500/*
2501 * We need some special handling for inodes that get processed before the parent
2502 * directory got created. See process_recorded_refs for details.
2503 * This function does the check if we already created the dir out of order.
2504 */
2505static int did_create_dir(struct send_ctx *sctx, u64 dir)
2506{
2507	int ret = 0;
2508	struct btrfs_path *path = NULL;
2509	struct btrfs_key key;
2510	struct btrfs_key found_key;
2511	struct btrfs_key di_key;
2512	struct extent_buffer *eb;
2513	struct btrfs_dir_item *di;
2514	int slot;
2515
2516	path = alloc_path_for_send();
2517	if (!path) {
2518		ret = -ENOMEM;
2519		goto out;
2520	}
2521
2522	key.objectid = dir;
2523	key.type = BTRFS_DIR_INDEX_KEY;
2524	key.offset = 0;
2525	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2526	if (ret < 0)
2527		goto out;
2528
2529	while (1) {
2530		eb = path->nodes[0];
2531		slot = path->slots[0];
2532		if (slot >= btrfs_header_nritems(eb)) {
2533			ret = btrfs_next_leaf(sctx->send_root, path);
2534			if (ret < 0) {
2535				goto out;
2536			} else if (ret > 0) {
2537				ret = 0;
2538				break;
2539			}
2540			continue;
2541		}
2542
2543		btrfs_item_key_to_cpu(eb, &found_key, slot);
2544		if (found_key.objectid != key.objectid ||
2545		    found_key.type != key.type) {
2546			ret = 0;
2547			goto out;
2548		}
2549
2550		di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2551		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2552
2553		if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2554		    di_key.objectid < sctx->send_progress) {
2555			ret = 1;
2556			goto out;
2557		}
2558
2559		path->slots[0]++;
2560	}
2561
2562out:
2563	btrfs_free_path(path);
2564	return ret;
2565}
2566
2567/*
2568 * Only creates the inode if it is:
2569 * 1. Not a directory
2570 * 2. Or a directory which was not created already due to out of order
2571 *    directories. See did_create_dir and process_recorded_refs for details.
2572 */
2573static int send_create_inode_if_needed(struct send_ctx *sctx)
2574{
2575	int ret;
2576
2577	if (S_ISDIR(sctx->cur_inode_mode)) {
2578		ret = did_create_dir(sctx, sctx->cur_ino);
2579		if (ret < 0)
2580			goto out;
2581		if (ret) {
2582			ret = 0;
2583			goto out;
2584		}
2585	}
2586
2587	ret = send_create_inode(sctx, sctx->cur_ino);
2588	if (ret < 0)
2589		goto out;
2590
2591out:
2592	return ret;
2593}
2594
2595struct recorded_ref {
2596	struct list_head list;
2597	char *dir_path;
2598	char *name;
2599	struct fs_path *full_path;
2600	u64 dir;
2601	u64 dir_gen;
2602	int dir_path_len;
2603	int name_len;
2604};
2605
2606/*
2607 * We need to process new refs before deleted refs, but compare_tree gives us
2608 * everything mixed. So we first record all refs and later process them.
2609 * This function is a helper to record one ref.
2610 */
2611static int record_ref(struct list_head *head, u64 dir,
2612		      u64 dir_gen, struct fs_path *path)
2613{
2614	struct recorded_ref *ref;
2615
2616	ref = kmalloc(sizeof(*ref), GFP_NOFS);
2617	if (!ref)
2618		return -ENOMEM;
2619
2620	ref->dir = dir;
2621	ref->dir_gen = dir_gen;
2622	ref->full_path = path;
2623
2624	ref->name = (char *)kbasename(ref->full_path->start);
2625	ref->name_len = ref->full_path->end - ref->name;
2626	ref->dir_path = ref->full_path->start;
2627	if (ref->name == ref->full_path->start)
2628		ref->dir_path_len = 0;
2629	else
2630		ref->dir_path_len = ref->full_path->end -
2631				ref->full_path->start - 1 - ref->name_len;
2632
2633	list_add_tail(&ref->list, head);
2634	return 0;
2635}
2636
2637static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2638{
2639	struct recorded_ref *new;
2640
2641	new = kmalloc(sizeof(*ref), GFP_NOFS);
2642	if (!new)
2643		return -ENOMEM;
2644
2645	new->dir = ref->dir;
2646	new->dir_gen = ref->dir_gen;
2647	new->full_path = NULL;
2648	INIT_LIST_HEAD(&new->list);
2649	list_add_tail(&new->list, list);
2650	return 0;
2651}
2652
2653static void __free_recorded_refs(struct list_head *head)
2654{
2655	struct recorded_ref *cur;
2656
2657	while (!list_empty(head)) {
2658		cur = list_entry(head->next, struct recorded_ref, list);
2659		fs_path_free(cur->full_path);
2660		list_del(&cur->list);
2661		kfree(cur);
2662	}
2663}
2664
2665static void free_recorded_refs(struct send_ctx *sctx)
2666{
2667	__free_recorded_refs(&sctx->new_refs);
2668	__free_recorded_refs(&sctx->deleted_refs);
2669}
2670
2671/*
2672 * Renames/moves a file/dir to its orphan name. Used when the first
2673 * ref of an unprocessed inode gets overwritten and for all non empty
2674 * directories.
2675 */
2676static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2677			  struct fs_path *path)
2678{
2679	int ret;
2680	struct fs_path *orphan;
2681
2682	orphan = fs_path_alloc();
2683	if (!orphan)
2684		return -ENOMEM;
2685
2686	ret = gen_unique_name(sctx, ino, gen, orphan);
2687	if (ret < 0)
2688		goto out;
2689
2690	ret = send_rename(sctx, path, orphan);
2691
2692out:
2693	fs_path_free(orphan);
2694	return ret;
2695}
2696
2697static struct orphan_dir_info *
2698add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2699{
2700	struct rb_node **p = &sctx->orphan_dirs.rb_node;
2701	struct rb_node *parent = NULL;
2702	struct orphan_dir_info *entry, *odi;
2703
2704	odi = kmalloc(sizeof(*odi), GFP_NOFS);
2705	if (!odi)
2706		return ERR_PTR(-ENOMEM);
2707	odi->ino = dir_ino;
2708	odi->gen = 0;
2709
2710	while (*p) {
2711		parent = *p;
2712		entry = rb_entry(parent, struct orphan_dir_info, node);
2713		if (dir_ino < entry->ino) {
2714			p = &(*p)->rb_left;
2715		} else if (dir_ino > entry->ino) {
2716			p = &(*p)->rb_right;
2717		} else {
2718			kfree(odi);
2719			return entry;
2720		}
2721	}
2722
2723	rb_link_node(&odi->node, parent, p);
2724	rb_insert_color(&odi->node, &sctx->orphan_dirs);
2725	return odi;
2726}
2727
2728static struct orphan_dir_info *
2729get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2730{
2731	struct rb_node *n = sctx->orphan_dirs.rb_node;
2732	struct orphan_dir_info *entry;
2733
2734	while (n) {
2735		entry = rb_entry(n, struct orphan_dir_info, node);
2736		if (dir_ino < entry->ino)
2737			n = n->rb_left;
2738		else if (dir_ino > entry->ino)
2739			n = n->rb_right;
2740		else
2741			return entry;
2742	}
2743	return NULL;
2744}
2745
2746static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2747{
2748	struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2749
2750	return odi != NULL;
2751}
2752
2753static void free_orphan_dir_info(struct send_ctx *sctx,
2754				 struct orphan_dir_info *odi)
2755{
2756	if (!odi)
2757		return;
2758	rb_erase(&odi->node, &sctx->orphan_dirs);
2759	kfree(odi);
2760}
2761
2762/*
2763 * Returns 1 if a directory can be removed at this point in time.
2764 * We check this by iterating all dir items and checking if the inode behind
2765 * the dir item was already processed.
2766 */
2767static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2768		     u64 send_progress)
2769{
2770	int ret = 0;
2771	struct btrfs_root *root = sctx->parent_root;
2772	struct btrfs_path *path;
2773	struct btrfs_key key;
2774	struct btrfs_key found_key;
2775	struct btrfs_key loc;
2776	struct btrfs_dir_item *di;
2777
2778	/*
2779	 * Don't try to rmdir the top/root subvolume dir.
2780	 */
2781	if (dir == BTRFS_FIRST_FREE_OBJECTID)
2782		return 0;
2783
2784	path = alloc_path_for_send();
2785	if (!path)
2786		return -ENOMEM;
2787
2788	key.objectid = dir;
2789	key.type = BTRFS_DIR_INDEX_KEY;
2790	key.offset = 0;
2791	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2792	if (ret < 0)
2793		goto out;
2794
2795	while (1) {
2796		struct waiting_dir_move *dm;
2797
2798		if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2799			ret = btrfs_next_leaf(root, path);
2800			if (ret < 0)
2801				goto out;
2802			else if (ret > 0)
2803				break;
2804			continue;
2805		}
2806		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2807				      path->slots[0]);
2808		if (found_key.objectid != key.objectid ||
2809		    found_key.type != key.type)
2810			break;
2811
2812		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2813				struct btrfs_dir_item);
2814		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2815
2816		dm = get_waiting_dir_move(sctx, loc.objectid);
2817		if (dm) {
2818			struct orphan_dir_info *odi;
2819
2820			odi = add_orphan_dir_info(sctx, dir);
2821			if (IS_ERR(odi)) {
2822				ret = PTR_ERR(odi);
2823				goto out;
2824			}
2825			odi->gen = dir_gen;
2826			dm->rmdir_ino = dir;
2827			ret = 0;
2828			goto out;
2829		}
2830
2831		if (loc.objectid > send_progress) {
2832			ret = 0;
2833			goto out;
2834		}
2835
2836		path->slots[0]++;
2837	}
2838
2839	ret = 1;
2840
2841out:
2842	btrfs_free_path(path);
2843	return ret;
2844}
2845
2846static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2847{
2848	struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
2849
2850	return entry != NULL;
2851}
2852
2853static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2854{
2855	struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2856	struct rb_node *parent = NULL;
2857	struct waiting_dir_move *entry, *dm;
2858
2859	dm = kmalloc(sizeof(*dm), GFP_NOFS);
2860	if (!dm)
2861		return -ENOMEM;
2862	dm->ino = ino;
2863	dm->rmdir_ino = 0;
2864
2865	while (*p) {
2866		parent = *p;
2867		entry = rb_entry(parent, struct waiting_dir_move, node);
2868		if (ino < entry->ino) {
2869			p = &(*p)->rb_left;
2870		} else if (ino > entry->ino) {
2871			p = &(*p)->rb_right;
2872		} else {
2873			kfree(dm);
2874			return -EEXIST;
2875		}
2876	}
2877
2878	rb_link_node(&dm->node, parent, p);
2879	rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2880	return 0;
2881}
2882
2883static struct waiting_dir_move *
2884get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2885{
2886	struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2887	struct waiting_dir_move *entry;
2888
2889	while (n) {
2890		entry = rb_entry(n, struct waiting_dir_move, node);
2891		if (ino < entry->ino)
2892			n = n->rb_left;
2893		else if (ino > entry->ino)
2894			n = n->rb_right;
2895		else
2896			return entry;
2897	}
2898	return NULL;
2899}
2900
2901static void free_waiting_dir_move(struct send_ctx *sctx,
2902				  struct waiting_dir_move *dm)
2903{
2904	if (!dm)
2905		return;
2906	rb_erase(&dm->node, &sctx->waiting_dir_moves);
2907	kfree(dm);
2908}
2909
2910static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
2911{
2912	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2913	struct rb_node *parent = NULL;
2914	struct pending_dir_move *entry, *pm;
2915	struct recorded_ref *cur;
2916	int exists = 0;
2917	int ret;
2918
2919	pm = kmalloc(sizeof(*pm), GFP_NOFS);
2920	if (!pm)
2921		return -ENOMEM;
2922	pm->parent_ino = parent_ino;
2923	pm->ino = sctx->cur_ino;
2924	pm->gen = sctx->cur_inode_gen;
2925	INIT_LIST_HEAD(&pm->list);
2926	INIT_LIST_HEAD(&pm->update_refs);
2927	RB_CLEAR_NODE(&pm->node);
2928
2929	while (*p) {
2930		parent = *p;
2931		entry = rb_entry(parent, struct pending_dir_move, node);
2932		if (parent_ino < entry->parent_ino) {
2933			p = &(*p)->rb_left;
2934		} else if (parent_ino > entry->parent_ino) {
2935			p = &(*p)->rb_right;
2936		} else {
2937			exists = 1;
2938			break;
2939		}
2940	}
2941
2942	list_for_each_entry(cur, &sctx->deleted_refs, list) {
2943		ret = dup_ref(cur, &pm->update_refs);
2944		if (ret < 0)
2945			goto out;
2946	}
2947	list_for_each_entry(cur, &sctx->new_refs, list) {
2948		ret = dup_ref(cur, &pm->update_refs);
2949		if (ret < 0)
2950			goto out;
2951	}
2952
2953	ret = add_waiting_dir_move(sctx, pm->ino);
2954	if (ret)
2955		goto out;
2956
2957	if (exists) {
2958		list_add_tail(&pm->list, &entry->list);
2959	} else {
2960		rb_link_node(&pm->node, parent, p);
2961		rb_insert_color(&pm->node, &sctx->pending_dir_moves);
2962	}
2963	ret = 0;
2964out:
2965	if (ret) {
2966		__free_recorded_refs(&pm->update_refs);
2967		kfree(pm);
2968	}
2969	return ret;
2970}
2971
2972static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
2973						      u64 parent_ino)
2974{
2975	struct rb_node *n = sctx->pending_dir_moves.rb_node;
2976	struct pending_dir_move *entry;
2977
2978	while (n) {
2979		entry = rb_entry(n, struct pending_dir_move, node);
2980		if (parent_ino < entry->parent_ino)
2981			n = n->rb_left;
2982		else if (parent_ino > entry->parent_ino)
2983			n = n->rb_right;
2984		else
2985			return entry;
2986	}
2987	return NULL;
2988}
2989
2990static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
2991{
2992	struct fs_path *from_path = NULL;
2993	struct fs_path *to_path = NULL;
2994	struct fs_path *name = NULL;
2995	u64 orig_progress = sctx->send_progress;
2996	struct recorded_ref *cur;
2997	u64 parent_ino, parent_gen;
2998	struct waiting_dir_move *dm = NULL;
2999	u64 rmdir_ino = 0;
3000	int ret;
3001
3002	name = fs_path_alloc();
3003	from_path = fs_path_alloc();
3004	if (!name || !from_path) {
3005		ret = -ENOMEM;
3006		goto out;
3007	}
3008
3009	dm = get_waiting_dir_move(sctx, pm->ino);
3010	ASSERT(dm);
3011	rmdir_ino = dm->rmdir_ino;
3012	free_waiting_dir_move(sctx, dm);
3013
3014	ret = get_first_ref(sctx->parent_root, pm->ino,
3015			    &parent_ino, &parent_gen, name);
3016	if (ret < 0)
3017		goto out;
3018
3019	if (parent_ino == sctx->cur_ino) {
3020		/* child only renamed, not moved */
3021		ASSERT(parent_gen == sctx->cur_inode_gen);
3022		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3023				   from_path);
3024		if (ret < 0)
3025			goto out;
3026		ret = fs_path_add_path(from_path, name);
3027		if (ret < 0)
3028			goto out;
3029	} else {
3030		/* child moved and maybe renamed too */
3031		sctx->send_progress = pm->ino;
3032		ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
3033		if (ret < 0)
3034			goto out;
3035	}
3036
3037	fs_path_free(name);
3038	name = NULL;
3039
3040	to_path = fs_path_alloc();
3041	if (!to_path) {
3042		ret = -ENOMEM;
3043		goto out;
3044	}
3045
3046	sctx->send_progress = sctx->cur_ino + 1;
3047	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3048	if (ret < 0)
3049		goto out;
3050
3051	ret = send_rename(sctx, from_path, to_path);
3052	if (ret < 0)
3053		goto out;
3054
3055	if (rmdir_ino) {
3056		struct orphan_dir_info *odi;
3057
3058		odi = get_orphan_dir_info(sctx, rmdir_ino);
3059		if (!odi) {
3060			/* already deleted */
3061			goto finish;
3062		}
3063		ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
3064		if (ret < 0)
3065			goto out;
3066		if (!ret)
3067			goto finish;
3068
3069		name = fs_path_alloc();
3070		if (!name) {
3071			ret = -ENOMEM;
3072			goto out;
3073		}
3074		ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
3075		if (ret < 0)
3076			goto out;
3077		ret = send_rmdir(sctx, name);
3078		if (ret < 0)
3079			goto out;
3080		free_orphan_dir_info(sctx, odi);
3081	}
3082
3083finish:
3084	ret = send_utimes(sctx, pm->ino, pm->gen);
3085	if (ret < 0)
3086		goto out;
3087
3088	/*
3089	 * After rename/move, need to update the utimes of both new parent(s)
3090	 * and old parent(s).
3091	 */
3092	list_for_each_entry(cur, &pm->update_refs, list) {
3093		if (cur->dir == rmdir_ino)
3094			continue;
3095		ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3096		if (ret < 0)
3097			goto out;
3098	}
3099
3100out:
3101	fs_path_free(name);
3102	fs_path_free(from_path);
3103	fs_path_free(to_path);
3104	sctx->send_progress = orig_progress;
3105
3106	return ret;
3107}
3108
3109static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3110{
3111	if (!list_empty(&m->list))
3112		list_del(&m->list);
3113	if (!RB_EMPTY_NODE(&m->node))
3114		rb_erase(&m->node, &sctx->pending_dir_moves);
3115	__free_recorded_refs(&m->update_refs);
3116	kfree(m);
3117}
3118
3119static void tail_append_pending_moves(struct pending_dir_move *moves,
3120				      struct list_head *stack)
3121{
3122	if (list_empty(&moves->list)) {
3123		list_add_tail(&moves->list, stack);
3124	} else {
3125		LIST_HEAD(list);
3126		list_splice_init(&moves->list, &list);
3127		list_add_tail(&moves->list, stack);
3128		list_splice_tail(&list, stack);
3129	}
3130}
3131
3132static int apply_children_dir_moves(struct send_ctx *sctx)
3133{
3134	struct pending_dir_move *pm;
3135	struct list_head stack;
3136	u64 parent_ino = sctx->cur_ino;
3137	int ret = 0;
3138
3139	pm = get_pending_dir_moves(sctx, parent_ino);
3140	if (!pm)
3141		return 0;
3142
3143	INIT_LIST_HEAD(&stack);
3144	tail_append_pending_moves(pm, &stack);
3145
3146	while (!list_empty(&stack)) {
3147		pm = list_first_entry(&stack, struct pending_dir_move, list);
3148		parent_ino = pm->ino;
3149		ret = apply_dir_move(sctx, pm);
3150		free_pending_move(sctx, pm);
3151		if (ret)
3152			goto out;
3153		pm = get_pending_dir_moves(sctx, parent_ino);
3154		if (pm)
3155			tail_append_pending_moves(pm, &stack);
3156	}
3157	return 0;
3158
3159out:
3160	while (!list_empty(&stack)) {
3161		pm = list_first_entry(&stack, struct pending_dir_move, list);
3162		free_pending_move(sctx, pm);
3163	}
3164	return ret;
3165}
3166
3167static int wait_for_parent_move(struct send_ctx *sctx,
3168				struct recorded_ref *parent_ref)
3169{
3170	int ret;
3171	u64 ino = parent_ref->dir;
3172	u64 parent_ino_before, parent_ino_after;
3173	u64 new_gen, old_gen;
3174	struct fs_path *path_before = NULL;
3175	struct fs_path *path_after = NULL;
3176	int len1, len2;
3177
3178	if (parent_ref->dir <= sctx->cur_ino)
3179		return 0;
3180
3181	if (is_waiting_for_move(sctx, ino))
3182		return 1;
3183
3184	ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
3185			     NULL, NULL, NULL, NULL);
3186	if (ret == -ENOENT)
3187		return 0;
3188	else if (ret < 0)
3189		return ret;
3190
3191	ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen,
3192			     NULL, NULL, NULL, NULL);
3193	if (ret < 0)
3194		return ret;
3195
3196	if (new_gen != old_gen)
3197		return 0;
3198
3199	path_before = fs_path_alloc();
3200	if (!path_before)
3201		return -ENOMEM;
3202
3203	ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3204			    NULL, path_before);
3205	if (ret == -ENOENT) {
3206		ret = 0;
3207		goto out;
3208	} else if (ret < 0) {
3209		goto out;
3210	}
3211
3212	path_after = fs_path_alloc();
3213	if (!path_after) {
3214		ret = -ENOMEM;
3215		goto out;
3216	}
3217
3218	ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3219			    NULL, path_after);
3220	if (ret == -ENOENT) {
3221		ret = 0;
3222		goto out;
3223	} else if (ret < 0) {
3224		goto out;
3225	}
3226
3227	len1 = fs_path_len(path_before);
3228	len2 = fs_path_len(path_after);
3229	if (parent_ino_before != parent_ino_after || len1 != len2 ||
3230	     memcmp(path_before->start, path_after->start, len1)) {
3231		ret = 1;
3232		goto out;
3233	}
3234	ret = 0;
3235
3236out:
3237	fs_path_free(path_before);
3238	fs_path_free(path_after);
3239
3240	return ret;
3241}
3242
3243/*
3244 * This does all the move/link/unlink/rmdir magic.
3245 */
3246static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3247{
3248	int ret = 0;
3249	struct recorded_ref *cur;
3250	struct recorded_ref *cur2;
3251	struct list_head check_dirs;
3252	struct fs_path *valid_path = NULL;
3253	u64 ow_inode = 0;
3254	u64 ow_gen;
3255	int did_overwrite = 0;
3256	int is_orphan = 0;
3257	u64 last_dir_ino_rm = 0;
3258
3259verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3260
3261	/*
3262	 * This should never happen as the root dir always has the same ref
3263	 * which is always '..'
3264	 */
3265	BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3266	INIT_LIST_HEAD(&check_dirs);
3267
3268	valid_path = fs_path_alloc();
3269	if (!valid_path) {
3270		ret = -ENOMEM;
3271		goto out;
3272	}
3273
3274	/*
3275	 * First, check if the first ref of the current inode was overwritten
3276	 * before. If yes, we know that the current inode was already orphanized
3277	 * and thus use the orphan name. If not, we can use get_cur_path to
3278	 * get the path of the first ref as it would like while receiving at
3279	 * this point in time.
3280	 * New inodes are always orphan at the beginning, so force to use the
3281	 * orphan name in this case.
3282	 * The first ref is stored in valid_path and will be updated if it
3283	 * gets moved around.
3284	 */
3285	if (!sctx->cur_inode_new) {
3286		ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3287				sctx->cur_inode_gen);
3288		if (ret < 0)
3289			goto out;
3290		if (ret)
3291			did_overwrite = 1;
3292	}
3293	if (sctx->cur_inode_new || did_overwrite) {
3294		ret = gen_unique_name(sctx, sctx->cur_ino,
3295				sctx->cur_inode_gen, valid_path);
3296		if (ret < 0)
3297			goto out;
3298		is_orphan = 1;
3299	} else {
3300		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3301				valid_path);
3302		if (ret < 0)
3303			goto out;
3304	}
3305
3306	list_for_each_entry(cur, &sctx->new_refs, list) {
3307		/*
3308		 * We may have refs where the parent directory does not exist
3309		 * yet. This happens if the parent directories inum is higher
3310		 * the the current inum. To handle this case, we create the
3311		 * parent directory out of order. But we need to check if this
3312		 * did already happen before due to other refs in the same dir.
3313		 */
3314		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3315		if (ret < 0)
3316			goto out;
3317		if (ret == inode_state_will_create) {
3318			ret = 0;
3319			/*
3320			 * First check if any of the current inodes refs did
3321			 * already create the dir.
3322			 */
3323			list_for_each_entry(cur2, &sctx->new_refs, list) {
3324				if (cur == cur2)
3325					break;
3326				if (cur2->dir == cur->dir) {
3327					ret = 1;
3328					break;
3329				}
3330			}
3331
3332			/*
3333			 * If that did not happen, check if a previous inode
3334			 * did already create the dir.
3335			 */
3336			if (!ret)
3337				ret = did_create_dir(sctx, cur->dir);
3338			if (ret < 0)
3339				goto out;
3340			if (!ret) {
3341				ret = send_create_inode(sctx, cur->dir);
3342				if (ret < 0)
3343					goto out;
3344			}
3345		}
3346
3347		/*
3348		 * Check if this new ref would overwrite the first ref of
3349		 * another unprocessed inode. If yes, orphanize the
3350		 * overwritten inode. If we find an overwritten ref that is
3351		 * not the first ref, simply unlink it.
3352		 */
3353		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3354				cur->name, cur->name_len,
3355				&ow_inode, &ow_gen);
3356		if (ret < 0)
3357			goto out;
3358		if (ret) {
3359			ret = is_first_ref(sctx->parent_root,
3360					   ow_inode, cur->dir, cur->name,
3361					   cur->name_len);
3362			if (ret < 0)
3363				goto out;
3364			if (ret) {
3365				ret = orphanize_inode(sctx, ow_inode, ow_gen,
3366						cur->full_path);
3367				if (ret < 0)
3368					goto out;
3369			} else {
3370				ret = send_unlink(sctx, cur->full_path);
3371				if (ret < 0)
3372					goto out;
3373			}
3374		}
3375
3376		/*
3377		 * link/move the ref to the new place. If we have an orphan
3378		 * inode, move it and update valid_path. If not, link or move
3379		 * it depending on the inode mode.
3380		 */
3381		if (is_orphan) {
3382			ret = send_rename(sctx, valid_path, cur->full_path);
3383			if (ret < 0)
3384				goto out;
3385			is_orphan = 0;
3386			ret = fs_path_copy(valid_path, cur->full_path);
3387			if (ret < 0)
3388				goto out;
3389		} else {
3390			if (S_ISDIR(sctx->cur_inode_mode)) {
3391				/*
3392				 * Dirs can't be linked, so move it. For moved
3393				 * dirs, we always have one new and one deleted
3394				 * ref. The deleted ref is ignored later.
3395				 */
3396				ret = wait_for_parent_move(sctx, cur);
3397				if (ret < 0)
3398					goto out;
3399				if (ret) {
3400					ret = add_pending_dir_move(sctx,
3401								   cur->dir);
3402					*pending_move = 1;
3403				} else {
3404					ret = send_rename(sctx, valid_path,
3405							  cur->full_path);
3406					if (!ret)
3407						ret = fs_path_copy(valid_path,
3408							       cur->full_path);
3409				}
3410				if (ret < 0)
3411					goto out;
3412			} else {
3413				ret = send_link(sctx, cur->full_path,
3414						valid_path);
3415				if (ret < 0)
3416					goto out;
3417			}
3418		}
3419		ret = dup_ref(cur, &check_dirs);
3420		if (ret < 0)
3421			goto out;
3422	}
3423
3424	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
3425		/*
3426		 * Check if we can already rmdir the directory. If not,
3427		 * orphanize it. For every dir item inside that gets deleted
3428		 * later, we do this check again and rmdir it then if possible.
3429		 * See the use of check_dirs for more details.
3430		 */
3431		ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3432				sctx->cur_ino);
3433		if (ret < 0)
3434			goto out;
3435		if (ret) {
3436			ret = send_rmdir(sctx, valid_path);
3437			if (ret < 0)
3438				goto out;
3439		} else if (!is_orphan) {
3440			ret = orphanize_inode(sctx, sctx->cur_ino,
3441					sctx->cur_inode_gen, valid_path);
3442			if (ret < 0)
3443				goto out;
3444			is_orphan = 1;
3445		}
3446
3447		list_for_each_entry(cur, &sctx->deleted_refs, list) {
3448			ret = dup_ref(cur, &check_dirs);
3449			if (ret < 0)
3450				goto out;
3451		}
3452	} else if (S_ISDIR(sctx->cur_inode_mode) &&
3453		   !list_empty(&sctx->deleted_refs)) {
3454		/*
3455		 * We have a moved dir. Add the old parent to check_dirs
3456		 */
3457		cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
3458				list);
3459		ret = dup_ref(cur, &check_dirs);
3460		if (ret < 0)
3461			goto out;
3462	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
3463		/*
3464		 * We have a non dir inode. Go through all deleted refs and
3465		 * unlink them if they were not already overwritten by other
3466		 * inodes.
3467		 */
3468		list_for_each_entry(cur, &sctx->deleted_refs, list) {
3469			ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3470					sctx->cur_ino, sctx->cur_inode_gen,
3471					cur->name, cur->name_len);
3472			if (ret < 0)
3473				goto out;
3474			if (!ret) {
3475				ret = send_unlink(sctx, cur->full_path);
3476				if (ret < 0)
3477					goto out;
3478			}
3479			ret = dup_ref(cur, &check_dirs);
3480			if (ret < 0)
3481				goto out;
3482		}
3483		/*
3484		 * If the inode is still orphan, unlink the orphan. This may
3485		 * happen when a previous inode did overwrite the first ref
3486		 * of this inode and no new refs were added for the current
3487		 * inode. Unlinking does not mean that the inode is deleted in
3488		 * all cases. There may still be links to this inode in other
3489		 * places.
3490		 */
3491		if (is_orphan) {
3492			ret = send_unlink(sctx, valid_path);
3493			if (ret < 0)
3494				goto out;
3495		}
3496	}
3497
3498	/*
3499	 * We did collect all parent dirs where cur_inode was once located. We
3500	 * now go through all these dirs and check if they are pending for
3501	 * deletion and if it's finally possible to perform the rmdir now.
3502	 * We also update the inode stats of the parent dirs here.
3503	 */
3504	list_for_each_entry(cur, &check_dirs, list) {
3505		/*
3506		 * In case we had refs into dirs that were not processed yet,
3507		 * we don't need to do the utime and rmdir logic for these dirs.
3508		 * The dir will be processed later.
3509		 */
3510		if (cur->dir > sctx->cur_ino)
3511			continue;
3512
3513		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3514		if (ret < 0)
3515			goto out;
3516
3517		if (ret == inode_state_did_create ||
3518		    ret == inode_state_no_change) {
3519			/* TODO delayed utimes */
3520			ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3521			if (ret < 0)
3522				goto out;
3523		} else if (ret == inode_state_did_delete &&
3524			   cur->dir != last_dir_ino_rm) {
3525			ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
3526					sctx->cur_ino);
3527			if (ret < 0)
3528				goto out;
3529			if (ret) {
3530				ret = get_cur_path(sctx, cur->dir,
3531						   cur->dir_gen, valid_path);
3532				if (ret < 0)
3533					goto out;
3534				ret = send_rmdir(sctx, valid_path);
3535				if (ret < 0)
3536					goto out;
3537				last_dir_ino_rm = cur->dir;
3538			}
3539		}
3540	}
3541
3542	ret = 0;
3543
3544out:
3545	__free_recorded_refs(&check_dirs);
3546	free_recorded_refs(sctx);
3547	fs_path_free(valid_path);
3548	return ret;
3549}
3550
3551static int __record_new_ref(int num, u64 dir, int index,
3552			    struct fs_path *name,
3553			    void *ctx)
3554{
3555	int ret = 0;
3556	struct send_ctx *sctx = ctx;
3557	struct fs_path *p;
3558	u64 gen;
3559
3560	p = fs_path_alloc();
3561	if (!p)
3562		return -ENOMEM;
3563
3564	ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
3565			NULL, NULL);
3566	if (ret < 0)
3567		goto out;
3568
3569	ret = get_cur_path(sctx, dir, gen, p);
3570	if (ret < 0)
3571		goto out;
3572	ret = fs_path_add_path(p, name);
3573	if (ret < 0)
3574		goto out;
3575
3576	ret = record_ref(&sctx->new_refs, dir, gen, p);
3577
3578out:
3579	if (ret)
3580		fs_path_free(p);
3581	return ret;
3582}
3583
3584static int __record_deleted_ref(int num, u64 dir, int index,
3585				struct fs_path *name,
3586				void *ctx)
3587{
3588	int ret = 0;
3589	struct send_ctx *sctx = ctx;
3590	struct fs_path *p;
3591	u64 gen;
3592
3593	p = fs_path_alloc();
3594	if (!p)
3595		return -ENOMEM;
3596
3597	ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
3598			NULL, NULL);
3599	if (ret < 0)
3600		goto out;
3601
3602	ret = get_cur_path(sctx, dir, gen, p);
3603	if (ret < 0)
3604		goto out;
3605	ret = fs_path_add_path(p, name);
3606	if (ret < 0)
3607		goto out;
3608
3609	ret = record_ref(&sctx->deleted_refs, dir, gen, p);
3610
3611out:
3612	if (ret)
3613		fs_path_free(p);
3614	return ret;
3615}
3616
3617static int record_new_ref(struct send_ctx *sctx)
3618{
3619	int ret;
3620
3621	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3622				sctx->cmp_key, 0, __record_new_ref, sctx);
3623	if (ret < 0)
3624		goto out;
3625	ret = 0;
3626
3627out:
3628	return ret;
3629}
3630
3631static int record_deleted_ref(struct send_ctx *sctx)
3632{
3633	int ret;
3634
3635	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3636				sctx->cmp_key, 0, __record_deleted_ref, sctx);
3637	if (ret < 0)
3638		goto out;
3639	ret = 0;
3640
3641out:
3642	return ret;
3643}
3644
3645struct find_ref_ctx {
3646	u64 dir;
3647	u64 dir_gen;
3648	struct btrfs_root *root;
3649	struct fs_path *name;
3650	int found_idx;
3651};
3652
3653static int __find_iref(int num, u64 dir, int index,
3654		       struct fs_path *name,
3655		       void *ctx_)
3656{
3657	struct find_ref_ctx *ctx = ctx_;
3658	u64 dir_gen;
3659	int ret;
3660
3661	if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3662	    strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3663		/*
3664		 * To avoid doing extra lookups we'll only do this if everything
3665		 * else matches.
3666		 */
3667		ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
3668				     NULL, NULL, NULL);
3669		if (ret)
3670			return ret;
3671		if (dir_gen != ctx->dir_gen)
3672			return 0;
3673		ctx->found_idx = num;
3674		return 1;
3675	}
3676	return 0;
3677}
3678
3679static int find_iref(struct btrfs_root *root,
3680		     struct btrfs_path *path,
3681		     struct btrfs_key *key,
3682		     u64 dir, u64 dir_gen, struct fs_path *name)
3683{
3684	int ret;
3685	struct find_ref_ctx ctx;
3686
3687	ctx.dir = dir;
3688	ctx.name = name;
3689	ctx.dir_gen = dir_gen;
3690	ctx.found_idx = -1;
3691	ctx.root = root;
3692
3693	ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3694	if (ret < 0)
3695		return ret;
3696
3697	if (ctx.found_idx == -1)
3698		return -ENOENT;
3699
3700	return ctx.found_idx;
3701}
3702
3703static int __record_changed_new_ref(int num, u64 dir, int index,
3704				    struct fs_path *name,
3705				    void *ctx)
3706{
3707	u64 dir_gen;
3708	int ret;
3709	struct send_ctx *sctx = ctx;
3710
3711	ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
3712			     NULL, NULL, NULL);
3713	if (ret)
3714		return ret;
3715
3716	ret = find_iref(sctx->parent_root, sctx->right_path,
3717			sctx->cmp_key, dir, dir_gen, name);
3718	if (ret == -ENOENT)
3719		ret = __record_new_ref(num, dir, index, name, sctx);
3720	else if (ret > 0)
3721		ret = 0;
3722
3723	return ret;
3724}
3725
3726static int __record_changed_deleted_ref(int num, u64 dir, int index,
3727					struct fs_path *name,
3728					void *ctx)
3729{
3730	u64 dir_gen;
3731	int ret;
3732	struct send_ctx *sctx = ctx;
3733
3734	ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
3735			     NULL, NULL, NULL);
3736	if (ret)
3737		return ret;
3738
3739	ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
3740			dir, dir_gen, name);
3741	if (ret == -ENOENT)
3742		ret = __record_deleted_ref(num, dir, index, name, sctx);
3743	else if (ret > 0)
3744		ret = 0;
3745
3746	return ret;
3747}
3748
3749static int record_changed_ref(struct send_ctx *sctx)
3750{
3751	int ret = 0;
3752
3753	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3754			sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3755	if (ret < 0)
3756		goto out;
3757	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3758			sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
3759	if (ret < 0)
3760		goto out;
3761	ret = 0;
3762
3763out:
3764	return ret;
3765}
3766
3767/*
3768 * Record and process all refs at once. Needed when an inode changes the
3769 * generation number, which means that it was deleted and recreated.
3770 */
3771static int process_all_refs(struct send_ctx *sctx,
3772			    enum btrfs_compare_tree_result cmd)
3773{
3774	int ret;
3775	struct btrfs_root *root;
3776	struct btrfs_path *path;
3777	struct btrfs_key key;
3778	struct btrfs_key found_key;
3779	struct extent_buffer *eb;
3780	int slot;
3781	iterate_inode_ref_t cb;
3782	int pending_move = 0;
3783
3784	path = alloc_path_for_send();
3785	if (!path)
3786		return -ENOMEM;
3787
3788	if (cmd == BTRFS_COMPARE_TREE_NEW) {
3789		root = sctx->send_root;
3790		cb = __record_new_ref;
3791	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
3792		root = sctx->parent_root;
3793		cb = __record_deleted_ref;
3794	} else {
3795		btrfs_err(sctx->send_root->fs_info,
3796				"Wrong command %d in process_all_refs", cmd);
3797		ret = -EINVAL;
3798		goto out;
3799	}
3800
3801	key.objectid = sctx->cmp_key->objectid;
3802	key.type = BTRFS_INODE_REF_KEY;
3803	key.offset = 0;
3804	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3805	if (ret < 0)
3806		goto out;
3807
3808	while (1) {
3809		eb = path->nodes[0];
3810		slot = path->slots[0];
3811		if (slot >= btrfs_header_nritems(eb)) {
3812			ret = btrfs_next_leaf(root, path);
3813			if (ret < 0)
3814				goto out;
3815			else if (ret > 0)
3816				break;
3817			continue;
3818		}
3819
3820		btrfs_item_key_to_cpu(eb, &found_key, slot);
3821
3822		if (found_key.objectid != key.objectid ||
3823		    (found_key.type != BTRFS_INODE_REF_KEY &&
3824		     found_key.type != BTRFS_INODE_EXTREF_KEY))
3825			break;
3826
3827		ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
3828		if (ret < 0)
3829			goto out;
3830
3831		path->slots[0]++;
3832	}
3833	btrfs_release_path(path);
3834
3835	ret = process_recorded_refs(sctx, &pending_move);
3836	/* Only applicable to an incremental send. */
3837	ASSERT(pending_move == 0);
3838
3839out:
3840	btrfs_free_path(path);
3841	return ret;
3842}
3843
3844static int send_set_xattr(struct send_ctx *sctx,
3845			  struct fs_path *path,
3846			  const char *name, int name_len,
3847			  const char *data, int data_len)
3848{
3849	int ret = 0;
3850
3851	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
3852	if (ret < 0)
3853		goto out;
3854
3855	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3856	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3857	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
3858
3859	ret = send_cmd(sctx);
3860
3861tlv_put_failure:
3862out:
3863	return ret;
3864}
3865
3866static int send_remove_xattr(struct send_ctx *sctx,
3867			  struct fs_path *path,
3868			  const char *name, int name_len)
3869{
3870	int ret = 0;
3871
3872	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
3873	if (ret < 0)
3874		goto out;
3875
3876	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3877	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3878
3879	ret = send_cmd(sctx);
3880
3881tlv_put_failure:
3882out:
3883	return ret;
3884}
3885
3886static int __process_new_xattr(int num, struct btrfs_key *di_key,
3887			       const char *name, int name_len,
3888			       const char *data, int data_len,
3889			       u8 type, void *ctx)
3890{
3891	int ret;
3892	struct send_ctx *sctx = ctx;
3893	struct fs_path *p;
3894	posix_acl_xattr_header dummy_acl;
3895
3896	p = fs_path_alloc();
3897	if (!p)
3898		return -ENOMEM;
3899
3900	/*
3901	 * This hack is needed because empty acl's are stored as zero byte
3902	 * data in xattrs. Problem with that is, that receiving these zero byte
3903	 * acl's will fail later. To fix this, we send a dummy acl list that
3904	 * only contains the version number and no entries.
3905	 */
3906	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
3907	    !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
3908		if (data_len == 0) {
3909			dummy_acl.a_version =
3910					cpu_to_le32(POSIX_ACL_XATTR_VERSION);
3911			data = (char *)&dummy_acl;
3912			data_len = sizeof(dummy_acl);
3913		}
3914	}
3915
3916	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3917	if (ret < 0)
3918		goto out;
3919
3920	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
3921
3922out:
3923	fs_path_free(p);
3924	return ret;
3925}
3926
3927static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
3928				   const char *name, int name_len,
3929				   const char *data, int data_len,
3930				   u8 type, void *ctx)
3931{
3932	int ret;
3933	struct send_ctx *sctx = ctx;
3934	struct fs_path *p;
3935
3936	p = fs_path_alloc();
3937	if (!p)
3938		return -ENOMEM;
3939
3940	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3941	if (ret < 0)
3942		goto out;
3943
3944	ret = send_remove_xattr(sctx, p, name, name_len);
3945
3946out:
3947	fs_path_free(p);
3948	return ret;
3949}
3950
3951static int process_new_xattr(struct send_ctx *sctx)
3952{
3953	int ret = 0;
3954
3955	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
3956			       sctx->cmp_key, __process_new_xattr, sctx);
3957
3958	return ret;
3959}
3960
3961static int process_deleted_xattr(struct send_ctx *sctx)
3962{
3963	int ret;
3964
3965	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
3966			       sctx->cmp_key, __process_deleted_xattr, sctx);
3967
3968	return ret;
3969}
3970
3971struct find_xattr_ctx {
3972	const char *name;
3973	int name_len;
3974	int found_idx;
3975	char *found_data;
3976	int found_data_len;
3977};
3978
3979static int __find_xattr(int num, struct btrfs_key *di_key,
3980			const char *name, int name_len,
3981			const char *data, int data_len,
3982			u8 type, void *vctx)
3983{
3984	struct find_xattr_ctx *ctx = vctx;
3985
3986	if (name_len == ctx->name_len &&
3987	    strncmp(name, ctx->name, name_len) == 0) {
3988		ctx->found_idx = num;
3989		ctx->found_data_len = data_len;
3990		ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
3991		if (!ctx->found_data)
3992			return -ENOMEM;
3993		return 1;
3994	}
3995	return 0;
3996}
3997
3998static int find_xattr(struct btrfs_root *root,
3999		      struct btrfs_path *path,
4000		      struct btrfs_key *key,
4001		      const char *name, int name_len,
4002		      char **data, int *data_len)
4003{
4004	int ret;
4005	struct find_xattr_ctx ctx;
4006
4007	ctx.name = name;
4008	ctx.name_len = name_len;
4009	ctx.found_idx = -1;
4010	ctx.found_data = NULL;
4011	ctx.found_data_len = 0;
4012
4013	ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
4014	if (ret < 0)
4015		return ret;
4016
4017	if (ctx.found_idx == -1)
4018		return -ENOENT;
4019	if (data) {
4020		*data = ctx.found_data;
4021		*data_len = ctx.found_data_len;
4022	} else {
4023		kfree(ctx.found_data);
4024	}
4025	return ctx.found_idx;
4026}
4027
4028
4029static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4030				       const char *name, int name_len,
4031				       const char *data, int data_len,
4032				       u8 type, void *ctx)
4033{
4034	int ret;
4035	struct send_ctx *sctx = ctx;
4036	char *found_data = NULL;
4037	int found_data_len  = 0;
4038
4039	ret = find_xattr(sctx->parent_root, sctx->right_path,
4040			 sctx->cmp_key, name, name_len, &found_data,
4041			 &found_data_len);
4042	if (ret == -ENOENT) {
4043		ret = __process_new_xattr(num, di_key, name, name_len, data,
4044				data_len, type, ctx);
4045	} else if (ret >= 0) {
4046		if (data_len != found_data_len ||
4047		    memcmp(data, found_data, data_len)) {
4048			ret = __process_new_xattr(num, di_key, name, name_len,
4049					data, data_len, type, ctx);
4050		} else {
4051			ret = 0;
4052		}
4053	}
4054
4055	kfree(found_data);
4056	return ret;
4057}
4058
4059static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4060					   const char *name, int name_len,
4061					   const char *data, int data_len,
4062					   u8 type, void *ctx)
4063{
4064	int ret;
4065	struct send_ctx *sctx = ctx;
4066
4067	ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4068			 name, name_len, NULL, NULL);
4069	if (ret == -ENOENT)
4070		ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4071				data_len, type, ctx);
4072	else if (ret >= 0)
4073		ret = 0;
4074
4075	return ret;
4076}
4077
4078static int process_changed_xattr(struct send_ctx *sctx)
4079{
4080	int ret = 0;
4081
4082	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4083			sctx->cmp_key, __process_changed_new_xattr, sctx);
4084	if (ret < 0)
4085		goto out;
4086	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4087			sctx->cmp_key, __process_changed_deleted_xattr, sctx);
4088
4089out:
4090	return ret;
4091}
4092
4093static int process_all_new_xattrs(struct send_ctx *sctx)
4094{
4095	int ret;
4096	struct btrfs_root *root;
4097	struct btrfs_path *path;
4098	struct btrfs_key key;
4099	struct btrfs_key found_key;
4100	struct extent_buffer *eb;
4101	int slot;
4102
4103	path = alloc_path_for_send();
4104	if (!path)
4105		return -ENOMEM;
4106
4107	root = sctx->send_root;
4108
4109	key.objectid = sctx->cmp_key->objectid;
4110	key.type = BTRFS_XATTR_ITEM_KEY;
4111	key.offset = 0;
4112	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4113	if (ret < 0)
4114		goto out;
4115
4116	while (1) {
4117		eb = path->nodes[0];
4118		slot = path->slots[0];
4119		if (slot >= btrfs_header_nritems(eb)) {
4120			ret = btrfs_next_leaf(root, path);
4121			if (ret < 0) {
4122				goto out;
4123			} else if (ret > 0) {
4124				ret = 0;
4125				break;
4126			}
4127			continue;
4128		}
4129
4130		btrfs_item_key_to_cpu(eb, &found_key, slot);
4131		if (found_key.objectid != key.objectid ||
4132		    found_key.type != key.type) {
4133			ret = 0;
4134			goto out;
4135		}
4136
4137		ret = iterate_dir_item(root, path, &found_key,
4138				       __process_new_xattr, sctx);
4139		if (ret < 0)
4140			goto out;
4141
4142		path->slots[0]++;
4143	}
4144
4145out:
4146	btrfs_free_path(path);
4147	return ret;
4148}
4149
4150static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4151{
4152	struct btrfs_root *root = sctx->send_root;
4153	struct btrfs_fs_info *fs_info = root->fs_info;
4154	struct inode *inode;
4155	struct page *page;
4156	char *addr;
4157	struct btrfs_key key;
4158	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
4159	pgoff_t last_index;
4160	unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
4161	ssize_t ret = 0;
4162
4163	key.objectid = sctx->cur_ino;
4164	key.type = BTRFS_INODE_ITEM_KEY;
4165	key.offset = 0;
4166
4167	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4168	if (IS_ERR(inode))
4169		return PTR_ERR(inode);
4170
4171	if (offset + len > i_size_read(inode)) {
4172		if (offset > i_size_read(inode))
4173			len = 0;
4174		else
4175			len = offset - i_size_read(inode);
4176	}
4177	if (len == 0)
4178		goto out;
4179
4180	last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
4181	while (index <= last_index) {
4182		unsigned cur_len = min_t(unsigned, len,
4183					 PAGE_CACHE_SIZE - pg_offset);
4184		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4185		if (!page) {
4186			ret = -ENOMEM;
4187			break;
4188		}
4189
4190		if (!PageUptodate(page)) {
4191			btrfs_readpage(NULL, page);
4192			lock_page(page);
4193			if (!PageUptodate(page)) {
4194				unlock_page(page);
4195				page_cache_release(page);
4196				ret = -EIO;
4197				break;
4198			}
4199		}
4200
4201		addr = kmap(page);
4202		memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4203		kunmap(page);
4204		unlock_page(page);
4205		page_cache_release(page);
4206		index++;
4207		pg_offset = 0;
4208		len -= cur_len;
4209		ret += cur_len;
4210	}
4211out:
4212	iput(inode);
4213	return ret;
4214}
4215
4216/*
4217 * Read some bytes from the current inode/file and send a write command to
4218 * user space.
4219 */
4220static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4221{
4222	int ret = 0;
4223	struct fs_path *p;
4224	ssize_t num_read = 0;
4225
4226	p = fs_path_alloc();
4227	if (!p)
4228		return -ENOMEM;
4229
4230verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
4231
4232	num_read = fill_read_buf(sctx, offset, len);
4233	if (num_read <= 0) {
4234		if (num_read < 0)
4235			ret = num_read;
4236		goto out;
4237	}
4238
4239	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4240	if (ret < 0)
4241		goto out;
4242
4243	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4244	if (ret < 0)
4245		goto out;
4246
4247	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4248	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4249	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4250
4251	ret = send_cmd(sctx);
4252
4253tlv_put_failure:
4254out:
4255	fs_path_free(p);
4256	if (ret < 0)
4257		return ret;
4258	return num_read;
4259}
4260
4261/*
4262 * Send a clone command to user space.
4263 */
4264static int send_clone(struct send_ctx *sctx,
4265		      u64 offset, u32 len,
4266		      struct clone_root *clone_root)
4267{
4268	int ret = 0;
4269	struct fs_path *p;
4270	u64 gen;
4271
4272verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
4273	       "clone_inode=%llu, clone_offset=%llu\n", offset, len,
4274		clone_root->root->objectid, clone_root->ino,
4275		clone_root->offset);
4276
4277	p = fs_path_alloc();
4278	if (!p)
4279		return -ENOMEM;
4280
4281	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4282	if (ret < 0)
4283		goto out;
4284
4285	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4286	if (ret < 0)
4287		goto out;
4288
4289	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4290	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4291	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4292
4293	if (clone_root->root == sctx->send_root) {
4294		ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4295				&gen, NULL, NULL, NULL, NULL);
4296		if (ret < 0)
4297			goto out;
4298		ret = get_cur_path(sctx, clone_root->ino, gen, p);
4299	} else {
4300		ret = get_inode_path(clone_root->root, clone_root->ino, p);
4301	}
4302	if (ret < 0)
4303		goto out;
4304
4305	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4306			clone_root->root->root_item.uuid);
4307	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4308		    le64_to_cpu(clone_root->root->root_item.ctransid));
4309	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4310	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4311			clone_root->offset);
4312
4313	ret = send_cmd(sctx);
4314
4315tlv_put_failure:
4316out:
4317	fs_path_free(p);
4318	return ret;
4319}
4320
4321/*
4322 * Send an update extent command to user space.
4323 */
4324static int send_update_extent(struct send_ctx *sctx,
4325			      u64 offset, u32 len)
4326{
4327	int ret = 0;
4328	struct fs_path *p;
4329
4330	p = fs_path_alloc();
4331	if (!p)
4332		return -ENOMEM;
4333
4334	ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
4335	if (ret < 0)
4336		goto out;
4337
4338	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4339	if (ret < 0)
4340		goto out;
4341
4342	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4343	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4344	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
4345
4346	ret = send_cmd(sctx);
4347
4348tlv_put_failure:
4349out:
4350	fs_path_free(p);
4351	return ret;
4352}
4353
4354static int send_hole(struct send_ctx *sctx, u64 end)
4355{
4356	struct fs_path *p = NULL;
4357	u64 offset = sctx->cur_inode_last_extent;
4358	u64 len;
4359	int ret = 0;
4360
4361	p = fs_path_alloc();
4362	if (!p)
4363		return -ENOMEM;
4364	memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4365	while (offset < end) {
4366		len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4367
4368		ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4369		if (ret < 0)
4370			break;
4371		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4372		if (ret < 0)
4373			break;
4374		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4375		TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4376		TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4377		ret = send_cmd(sctx);
4378		if (ret < 0)
4379			break;
4380		offset += len;
4381	}
4382tlv_put_failure:
4383	fs_path_free(p);
4384	return ret;
4385}
4386
4387static int send_write_or_clone(struct send_ctx *sctx,
4388			       struct btrfs_path *path,
4389			       struct btrfs_key *key,
4390			       struct clone_root *clone_root)
4391{
4392	int ret = 0;
4393	struct btrfs_file_extent_item *ei;
4394	u64 offset = key->offset;
4395	u64 pos = 0;
4396	u64 len;
4397	u32 l;
4398	u8 type;
4399	u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
4400
4401	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4402			struct btrfs_file_extent_item);
4403	type = btrfs_file_extent_type(path->nodes[0], ei);
4404	if (type == BTRFS_FILE_EXTENT_INLINE) {
4405		len = btrfs_file_extent_inline_len(path->nodes[0],
4406						   path->slots[0], ei);
4407		/*
4408		 * it is possible the inline item won't cover the whole page,
4409		 * but there may be items after this page.  Make
4410		 * sure to send the whole thing
4411		 */
4412		len = PAGE_CACHE_ALIGN(len);
4413	} else {
4414		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
4415	}
4416
4417	if (offset + len > sctx->cur_inode_size)
4418		len = sctx->cur_inode_size - offset;
4419	if (len == 0) {
4420		ret = 0;
4421		goto out;
4422	}
4423
4424	if (clone_root && IS_ALIGNED(offset + len, bs)) {
4425		ret = send_clone(sctx, offset, len, clone_root);
4426	} else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
4427		ret = send_update_extent(sctx, offset, len);
4428	} else {
4429		while (pos < len) {
4430			l = len - pos;
4431			if (l > BTRFS_SEND_READ_SIZE)
4432				l = BTRFS_SEND_READ_SIZE;
4433			ret = send_write(sctx, pos + offset, l);
4434			if (ret < 0)
4435				goto out;
4436			if (!ret)
4437				break;
4438			pos += ret;
4439		}
4440		ret = 0;
4441	}
4442out:
4443	return ret;
4444}
4445
4446static int is_extent_unchanged(struct send_ctx *sctx,
4447			       struct btrfs_path *left_path,
4448			       struct btrfs_key *ekey)
4449{
4450	int ret = 0;
4451	struct btrfs_key key;
4452	struct btrfs_path *path = NULL;
4453	struct extent_buffer *eb;
4454	int slot;
4455	struct btrfs_key found_key;
4456	struct btrfs_file_extent_item *ei;
4457	u64 left_disknr;
4458	u64 right_disknr;
4459	u64 left_offset;
4460	u64 right_offset;
4461	u64 left_offset_fixed;
4462	u64 left_len;
4463	u64 right_len;
4464	u64 left_gen;
4465	u64 right_gen;
4466	u8 left_type;
4467	u8 right_type;
4468
4469	path = alloc_path_for_send();
4470	if (!path)
4471		return -ENOMEM;
4472
4473	eb = left_path->nodes[0];
4474	slot = left_path->slots[0];
4475	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4476	left_type = btrfs_file_extent_type(eb, ei);
4477
4478	if (left_type != BTRFS_FILE_EXTENT_REG) {
4479		ret = 0;
4480		goto out;
4481	}
4482	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
4483	left_len = btrfs_file_extent_num_bytes(eb, ei);
4484	left_offset = btrfs_file_extent_offset(eb, ei);
4485	left_gen = btrfs_file_extent_generation(eb, ei);
4486
4487	/*
4488	 * Following comments will refer to these graphics. L is the left
4489	 * extents which we are checking at the moment. 1-8 are the right
4490	 * extents that we iterate.
4491	 *
4492	 *       |-----L-----|
4493	 * |-1-|-2a-|-3-|-4-|-5-|-6-|
4494	 *
4495	 *       |-----L-----|
4496	 * |--1--|-2b-|...(same as above)
4497	 *
4498	 * Alternative situation. Happens on files where extents got split.
4499	 *       |-----L-----|
4500	 * |-----------7-----------|-6-|
4501	 *
4502	 * Alternative situation. Happens on files which got larger.
4503	 *       |-----L-----|
4504	 * |-8-|
4505	 * Nothing follows after 8.
4506	 */
4507
4508	key.objectid = ekey->objectid;
4509	key.type = BTRFS_EXTENT_DATA_KEY;
4510	key.offset = ekey->offset;
4511	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
4512	if (ret < 0)
4513		goto out;
4514	if (ret) {
4515		ret = 0;
4516		goto out;
4517	}
4518
4519	/*
4520	 * Handle special case where the right side has no extents at all.
4521	 */
4522	eb = path->nodes[0];
4523	slot = path->slots[0];
4524	btrfs_item_key_to_cpu(eb, &found_key, slot);
4525	if (found_key.objectid != key.objectid ||
4526	    found_key.type != key.type) {
4527		/* If we're a hole then just pretend nothing changed */
4528		ret = (left_disknr) ? 0 : 1;
4529		goto out;
4530	}
4531
4532	/*
4533	 * We're now on 2a, 2b or 7.
4534	 */
4535	key = found_key;
4536	while (key.offset < ekey->offset + left_len) {
4537		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4538		right_type = btrfs_file_extent_type(eb, ei);
4539		if (right_type != BTRFS_FILE_EXTENT_REG) {
4540			ret = 0;
4541			goto out;
4542		}
4543
4544		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
4545		right_len = btrfs_file_extent_num_bytes(eb, ei);
4546		right_offset = btrfs_file_extent_offset(eb, ei);
4547		right_gen = btrfs_file_extent_generation(eb, ei);
4548
4549		/*
4550		 * Are we at extent 8? If yes, we know the extent is changed.
4551		 * This may only happen on the first iteration.
4552		 */
4553		if (found_key.offset + right_len <= ekey->offset) {
4554			/* If we're a hole just pretend nothing changed */
4555			ret = (left_disknr) ? 0 : 1;
4556			goto out;
4557		}
4558
4559		left_offset_fixed = left_offset;
4560		if (key.offset < ekey->offset) {
4561			/* Fix the right offset for 2a and 7. */
4562			right_offset += ekey->offset - key.offset;
4563		} else {
4564			/* Fix the left offset for all behind 2a and 2b */
4565			left_offset_fixed += key.offset - ekey->offset;
4566		}
4567
4568		/*
4569		 * Check if we have the same extent.
4570		 */
4571		if (left_disknr != right_disknr ||
4572		    left_offset_fixed != right_offset ||
4573		    left_gen != right_gen) {
4574			ret = 0;
4575			goto out;
4576		}
4577
4578		/*
4579		 * Go to the next extent.
4580		 */
4581		ret = btrfs_next_item(sctx->parent_root, path);
4582		if (ret < 0)
4583			goto out;
4584		if (!ret) {
4585			eb = path->nodes[0];
4586			slot = path->slots[0];
4587			btrfs_item_key_to_cpu(eb, &found_key, slot);
4588		}
4589		if (ret || found_key.objectid != key.objectid ||
4590		    found_key.type != key.type) {
4591			key.offset += right_len;
4592			break;
4593		}
4594		if (found_key.offset != key.offset + right_len) {
4595			ret = 0;
4596			goto out;
4597		}
4598		key = found_key;
4599	}
4600
4601	/*
4602	 * We're now behind the left extent (treat as unchanged) or at the end
4603	 * of the right side (treat as changed).
4604	 */
4605	if (key.offset >= ekey->offset + left_len)
4606		ret = 1;
4607	else
4608		ret = 0;
4609
4610
4611out:
4612	btrfs_free_path(path);
4613	return ret;
4614}
4615
4616static int get_last_extent(struct send_ctx *sctx, u64 offset)
4617{
4618	struct btrfs_path *path;
4619	struct btrfs_root *root = sctx->send_root;
4620	struct btrfs_file_extent_item *fi;
4621	struct btrfs_key key;
4622	u64 extent_end;
4623	u8 type;
4624	int ret;
4625
4626	path = alloc_path_for_send();
4627	if (!path)
4628		return -ENOMEM;
4629
4630	sctx->cur_inode_last_extent = 0;
4631
4632	key.objectid = sctx->cur_ino;
4633	key.type = BTRFS_EXTENT_DATA_KEY;
4634	key.offset = offset;
4635	ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
4636	if (ret < 0)
4637		goto out;
4638	ret = 0;
4639	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4640	if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
4641		goto out;
4642
4643	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4644			    struct btrfs_file_extent_item);
4645	type = btrfs_file_extent_type(path->nodes[0], fi);
4646	if (type == BTRFS_FILE_EXTENT_INLINE) {
4647		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4648							path->slots[0], fi);
4649		extent_end = ALIGN(key.offset + size,
4650				   sctx->send_root->sectorsize);
4651	} else {
4652		extent_end = key.offset +
4653			btrfs_file_extent_num_bytes(path->nodes[0], fi);
4654	}
4655	sctx->cur_inode_last_extent = extent_end;
4656out:
4657	btrfs_free_path(path);
4658	return ret;
4659}
4660
4661static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
4662			   struct btrfs_key *key)
4663{
4664	struct btrfs_file_extent_item *fi;
4665	u64 extent_end;
4666	u8 type;
4667	int ret = 0;
4668
4669	if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
4670		return 0;
4671
4672	if (sctx->cur_inode_last_extent == (u64)-1) {
4673		ret = get_last_extent(sctx, key->offset - 1);
4674		if (ret)
4675			return ret;
4676	}
4677
4678	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4679			    struct btrfs_file_extent_item);
4680	type = btrfs_file_extent_type(path->nodes[0], fi);
4681	if (type == BTRFS_FILE_EXTENT_INLINE) {
4682		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4683							path->slots[0], fi);
4684		extent_end = ALIGN(key->offset + size,
4685				   sctx->send_root->sectorsize);
4686	} else {
4687		extent_end = key->offset +
4688			btrfs_file_extent_num_bytes(path->nodes[0], fi);
4689	}
4690
4691	if (path->slots[0] == 0 &&
4692	    sctx->cur_inode_last_extent < key->offset) {
4693		/*
4694		 * We might have skipped entire leafs that contained only
4695		 * file extent items for our current inode. These leafs have
4696		 * a generation number smaller (older) than the one in the
4697		 * current leaf and the leaf our last extent came from, and
4698		 * are located between these 2 leafs.
4699		 */
4700		ret = get_last_extent(sctx, key->offset - 1);
4701		if (ret)
4702			return ret;
4703	}
4704
4705	if (sctx->cur_inode_last_extent < key->offset)
4706		ret = send_hole(sctx, key->offset);
4707	sctx->cur_inode_last_extent = extent_end;
4708	return ret;
4709}
4710
4711static int process_extent(struct send_ctx *sctx,
4712			  struct btrfs_path *path,
4713			  struct btrfs_key *key)
4714{
4715	struct clone_root *found_clone = NULL;
4716	int ret = 0;
4717
4718	if (S_ISLNK(sctx->cur_inode_mode))
4719		return 0;
4720
4721	if (sctx->parent_root && !sctx->cur_inode_new) {
4722		ret = is_extent_unchanged(sctx, path, key);
4723		if (ret < 0)
4724			goto out;
4725		if (ret) {
4726			ret = 0;
4727			goto out_hole;
4728		}
4729	} else {
4730		struct btrfs_file_extent_item *ei;
4731		u8 type;
4732
4733		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4734				    struct btrfs_file_extent_item);
4735		type = btrfs_file_extent_type(path->nodes[0], ei);
4736		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
4737		    type == BTRFS_FILE_EXTENT_REG) {
4738			/*
4739			 * The send spec does not have a prealloc command yet,
4740			 * so just leave a hole for prealloc'ed extents until
4741			 * we have enough commands queued up to justify rev'ing
4742			 * the send spec.
4743			 */
4744			if (type == BTRFS_FILE_EXTENT_PREALLOC) {
4745				ret = 0;
4746				goto out;
4747			}
4748
4749			/* Have a hole, just skip it. */
4750			if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
4751				ret = 0;
4752				goto out;
4753			}
4754		}
4755	}
4756
4757	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
4758			sctx->cur_inode_size, &found_clone);
4759	if (ret != -ENOENT && ret < 0)
4760		goto out;
4761
4762	ret = send_write_or_clone(sctx, path, key, found_clone);
4763	if (ret)
4764		goto out;
4765out_hole:
4766	ret = maybe_send_hole(sctx, path, key);
4767out:
4768	return ret;
4769}
4770
4771static int process_all_extents(struct send_ctx *sctx)
4772{
4773	int ret;
4774	struct btrfs_root *root;
4775	struct btrfs_path *path;
4776	struct btrfs_key key;
4777	struct btrfs_key found_key;
4778	struct extent_buffer *eb;
4779	int slot;
4780
4781	root = sctx->send_root;
4782	path = alloc_path_for_send();
4783	if (!path)
4784		return -ENOMEM;
4785
4786	key.objectid = sctx->cmp_key->objectid;
4787	key.type = BTRFS_EXTENT_DATA_KEY;
4788	key.offset = 0;
4789	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4790	if (ret < 0)
4791		goto out;
4792
4793	while (1) {
4794		eb = path->nodes[0];
4795		slot = path->slots[0];
4796
4797		if (slot >= btrfs_header_nritems(eb)) {
4798			ret = btrfs_next_leaf(root, path);
4799			if (ret < 0) {
4800				goto out;
4801			} else if (ret > 0) {
4802				ret = 0;
4803				break;
4804			}
4805			continue;
4806		}
4807
4808		btrfs_item_key_to_cpu(eb, &found_key, slot);
4809
4810		if (found_key.objectid != key.objectid ||
4811		    found_key.type != key.type) {
4812			ret = 0;
4813			goto out;
4814		}
4815
4816		ret = process_extent(sctx, path, &found_key);
4817		if (ret < 0)
4818			goto out;
4819
4820		path->slots[0]++;
4821	}
4822
4823out:
4824	btrfs_free_path(path);
4825	return ret;
4826}
4827
4828static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
4829					   int *pending_move,
4830					   int *refs_processed)
4831{
4832	int ret = 0;
4833
4834	if (sctx->cur_ino == 0)
4835		goto out;
4836	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
4837	    sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
4838		goto out;
4839	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4840		goto out;
4841
4842	ret = process_recorded_refs(sctx, pending_move);
4843	if (ret < 0)
4844		goto out;
4845
4846	*refs_processed = 1;
4847out:
4848	return ret;
4849}
4850
4851static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4852{
4853	int ret = 0;
4854	u64 left_mode;
4855	u64 left_uid;
4856	u64 left_gid;
4857	u64 right_mode;
4858	u64 right_uid;
4859	u64 right_gid;
4860	int need_chmod = 0;
4861	int need_chown = 0;
4862	int pending_move = 0;
4863	int refs_processed = 0;
4864
4865	ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
4866					      &refs_processed);
4867	if (ret < 0)
4868		goto out;
4869
4870	/*
4871	 * We have processed the refs and thus need to advance send_progress.
4872	 * Now, calls to get_cur_xxx will take the updated refs of the current
4873	 * inode into account.
4874	 *
4875	 * On the other hand, if our current inode is a directory and couldn't
4876	 * be moved/renamed because its parent was renamed/moved too and it has
4877	 * a higher inode number, we can only move/rename our current inode
4878	 * after we moved/renamed its parent. Therefore in this case operate on
4879	 * the old path (pre move/rename) of our current inode, and the
4880	 * move/rename will be performed later.
4881	 */
4882	if (refs_processed && !pending_move)
4883		sctx->send_progress = sctx->cur_ino + 1;
4884
4885	if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4886		goto out;
4887	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
4888		goto out;
4889
4890	ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
4891			&left_mode, &left_uid, &left_gid, NULL);
4892	if (ret < 0)
4893		goto out;
4894
4895	if (!sctx->parent_root || sctx->cur_inode_new) {
4896		need_chown = 1;
4897		if (!S_ISLNK(sctx->cur_inode_mode))
4898			need_chmod = 1;
4899	} else {
4900		ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
4901				NULL, NULL, &right_mode, &right_uid,
4902				&right_gid, NULL);
4903		if (ret < 0)
4904			goto out;
4905
4906		if (left_uid != right_uid || left_gid != right_gid)
4907			need_chown = 1;
4908		if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
4909			need_chmod = 1;
4910	}
4911
4912	if (S_ISREG(sctx->cur_inode_mode)) {
4913		if (need_send_hole(sctx)) {
4914			if (sctx->cur_inode_last_extent == (u64)-1) {
4915				ret = get_last_extent(sctx, (u64)-1);
4916				if (ret)
4917					goto out;
4918			}
4919			if (sctx->cur_inode_last_extent <
4920			    sctx->cur_inode_size) {
4921				ret = send_hole(sctx, sctx->cur_inode_size);
4922				if (ret)
4923					goto out;
4924			}
4925		}
4926		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4927				sctx->cur_inode_size);
4928		if (ret < 0)
4929			goto out;
4930	}
4931
4932	if (need_chown) {
4933		ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4934				left_uid, left_gid);
4935		if (ret < 0)
4936			goto out;
4937	}
4938	if (need_chmod) {
4939		ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4940				left_mode);
4941		if (ret < 0)
4942			goto out;
4943	}
4944
4945	/*
4946	 * If other directory inodes depended on our current directory
4947	 * inode's move/rename, now do their move/rename operations.
4948	 */
4949	if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
4950		ret = apply_children_dir_moves(sctx);
4951		if (ret)
4952			goto out;
4953	}
4954
4955	/*
4956	 * Need to send that every time, no matter if it actually
4957	 * changed between the two trees as we have done changes to
4958	 * the inode before.
4959	 */
4960	sctx->send_progress = sctx->cur_ino + 1;
4961	ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4962	if (ret < 0)
4963		goto out;
4964
4965out:
4966	return ret;
4967}
4968
4969static int changed_inode(struct send_ctx *sctx,
4970			 enum btrfs_compare_tree_result result)
4971{
4972	int ret = 0;
4973	struct btrfs_key *key = sctx->cmp_key;
4974	struct btrfs_inode_item *left_ii = NULL;
4975	struct btrfs_inode_item *right_ii = NULL;
4976	u64 left_gen = 0;
4977	u64 right_gen = 0;
4978
4979	sctx->cur_ino = key->objectid;
4980	sctx->cur_inode_new_gen = 0;
4981	sctx->cur_inode_last_extent = (u64)-1;
4982
4983	/*
4984	 * Set send_progress to current inode. This will tell all get_cur_xxx
4985	 * functions that the current inode's refs are not updated yet. Later,
4986	 * when process_recorded_refs is finished, it is set to cur_ino + 1.
4987	 */
4988	sctx->send_progress = sctx->cur_ino;
4989
4990	if (result == BTRFS_COMPARE_TREE_NEW ||
4991	    result == BTRFS_COMPARE_TREE_CHANGED) {
4992		left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
4993				sctx->left_path->slots[0],
4994				struct btrfs_inode_item);
4995		left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
4996				left_ii);
4997	} else {
4998		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4999				sctx->right_path->slots[0],
5000				struct btrfs_inode_item);
5001		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5002				right_ii);
5003	}
5004	if (result == BTRFS_COMPARE_TREE_CHANGED) {
5005		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5006				sctx->right_path->slots[0],
5007				struct btrfs_inode_item);
5008
5009		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5010				right_ii);
5011
5012		/*
5013		 * The cur_ino = root dir case is special here. We can't treat
5014		 * the inode as deleted+reused because it would generate a
5015		 * stream that tries to delete/mkdir the root dir.
5016		 */
5017		if (left_gen != right_gen &&
5018		    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5019			sctx->cur_inode_new_gen = 1;
5020	}
5021
5022	if (result == BTRFS_COMPARE_TREE_NEW) {
5023		sctx->cur_inode_gen = left_gen;
5024		sctx->cur_inode_new = 1;
5025		sctx->cur_inode_deleted = 0;
5026		sctx->cur_inode_size = btrfs_inode_size(
5027				sctx->left_path->nodes[0], left_ii);
5028		sctx->cur_inode_mode = btrfs_inode_mode(
5029				sctx->left_path->nodes[0], left_ii);
5030		if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5031			ret = send_create_inode_if_needed(sctx);
5032	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
5033		sctx->cur_inode_gen = right_gen;
5034		sctx->cur_inode_new = 0;
5035		sctx->cur_inode_deleted = 1;
5036		sctx->cur_inode_size = btrfs_inode_size(
5037				sctx->right_path->nodes[0], right_ii);
5038		sctx->cur_inode_mode = btrfs_inode_mode(
5039				sctx->right_path->nodes[0], right_ii);
5040	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
5041		/*
5042		 * We need to do some special handling in case the inode was
5043		 * reported as changed with a changed generation number. This
5044		 * means that the original inode was deleted and new inode
5045		 * reused the same inum. So we have to treat the old inode as
5046		 * deleted and the new one as new.
5047		 */
5048		if (sctx->cur_inode_new_gen) {
5049			/*
5050			 * First, process the inode as if it was deleted.
5051			 */
5052			sctx->cur_inode_gen = right_gen;
5053			sctx->cur_inode_new = 0;
5054			sctx->cur_inode_deleted = 1;
5055			sctx->cur_inode_size = btrfs_inode_size(
5056					sctx->right_path->nodes[0], right_ii);
5057			sctx->cur_inode_mode = btrfs_inode_mode(
5058					sctx->right_path->nodes[0], right_ii);
5059			ret = process_all_refs(sctx,
5060					BTRFS_COMPARE_TREE_DELETED);
5061			if (ret < 0)
5062				goto out;
5063
5064			/*
5065			 * Now process the inode as if it was new.
5066			 */
5067			sctx->cur_inode_gen = left_gen;
5068			sctx->cur_inode_new = 1;
5069			sctx->cur_inode_deleted = 0;
5070			sctx->cur_inode_size = btrfs_inode_size(
5071					sctx->left_path->nodes[0], left_ii);
5072			sctx->cur_inode_mode = btrfs_inode_mode(
5073					sctx->left_path->nodes[0], left_ii);
5074			ret = send_create_inode_if_needed(sctx);
5075			if (ret < 0)
5076				goto out;
5077
5078			ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
5079			if (ret < 0)
5080				goto out;
5081			/*
5082			 * Advance send_progress now as we did not get into
5083			 * process_recorded_refs_if_needed in the new_gen case.
5084			 */
5085			sctx->send_progress = sctx->cur_ino + 1;
5086
5087			/*
5088			 * Now process all extents and xattrs of the inode as if
5089			 * they were all new.
5090			 */
5091			ret = process_all_extents(sctx);
5092			if (ret < 0)
5093				goto out;
5094			ret = process_all_new_xattrs(sctx);
5095			if (ret < 0)
5096				goto out;
5097		} else {
5098			sctx->cur_inode_gen = left_gen;
5099			sctx->cur_inode_new = 0;
5100			sctx->cur_inode_new_gen = 0;
5101			sctx->cur_inode_deleted = 0;
5102			sctx->cur_inode_size = btrfs_inode_size(
5103					sctx->left_path->nodes[0], left_ii);
5104			sctx->cur_inode_mode = btrfs_inode_mode(
5105					sctx->left_path->nodes[0], left_ii);
5106		}
5107	}
5108
5109out:
5110	return ret;
5111}
5112
5113/*
5114 * We have to process new refs before deleted refs, but compare_trees gives us
5115 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
5116 * first and later process them in process_recorded_refs.
5117 * For the cur_inode_new_gen case, we skip recording completely because
5118 * changed_inode did already initiate processing of refs. The reason for this is
5119 * that in this case, compare_tree actually compares the refs of 2 different
5120 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
5121 * refs of the right tree as deleted and all refs of the left tree as new.
5122 */
5123static int changed_ref(struct send_ctx *sctx,
5124		       enum btrfs_compare_tree_result result)
5125{
5126	int ret = 0;
5127
5128	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5129
5130	if (!sctx->cur_inode_new_gen &&
5131	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
5132		if (result == BTRFS_COMPARE_TREE_NEW)
5133			ret = record_new_ref(sctx);
5134		else if (result == BTRFS_COMPARE_TREE_DELETED)
5135			ret = record_deleted_ref(sctx);
5136		else if (result == BTRFS_COMPARE_TREE_CHANGED)
5137			ret = record_changed_ref(sctx);
5138	}
5139
5140	return ret;
5141}
5142
5143/*
5144 * Process new/deleted/changed xattrs. We skip processing in the
5145 * cur_inode_new_gen case because changed_inode did already initiate processing
5146 * of xattrs. The reason is the same as in changed_ref
5147 */
5148static int changed_xattr(struct send_ctx *sctx,
5149			 enum btrfs_compare_tree_result result)
5150{
5151	int ret = 0;
5152
5153	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5154
5155	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5156		if (result == BTRFS_COMPARE_TREE_NEW)
5157			ret = process_new_xattr(sctx);
5158		else if (result == BTRFS_COMPARE_TREE_DELETED)
5159			ret = process_deleted_xattr(sctx);
5160		else if (result == BTRFS_COMPARE_TREE_CHANGED)
5161			ret = process_changed_xattr(sctx);
5162	}
5163
5164	return ret;
5165}
5166
5167/*
5168 * Process new/deleted/changed extents. We skip processing in the
5169 * cur_inode_new_gen case because changed_inode did already initiate processing
5170 * of extents. The reason is the same as in changed_ref
5171 */
5172static int changed_extent(struct send_ctx *sctx,
5173			  enum btrfs_compare_tree_result result)
5174{
5175	int ret = 0;
5176
5177	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5178
5179	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5180		if (result != BTRFS_COMPARE_TREE_DELETED)
5181			ret = process_extent(sctx, sctx->left_path,
5182					sctx->cmp_key);
5183	}
5184
5185	return ret;
5186}
5187
5188static int dir_changed(struct send_ctx *sctx, u64 dir)
5189{
5190	u64 orig_gen, new_gen;
5191	int ret;
5192
5193	ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
5194			     NULL, NULL);
5195	if (ret)
5196		return ret;
5197
5198	ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
5199			     NULL, NULL, NULL);
5200	if (ret)
5201		return ret;
5202
5203	return (orig_gen != new_gen) ? 1 : 0;
5204}
5205
5206static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
5207			struct btrfs_key *key)
5208{
5209	struct btrfs_inode_extref *extref;
5210	struct extent_buffer *leaf;
5211	u64 dirid = 0, last_dirid = 0;
5212	unsigned long ptr;
5213	u32 item_size;
5214	u32 cur_offset = 0;
5215	int ref_name_len;
5216	int ret = 0;
5217
5218	/* Easy case, just check this one dirid */
5219	if (key->type == BTRFS_INODE_REF_KEY) {
5220		dirid = key->offset;
5221
5222		ret = dir_changed(sctx, dirid);
5223		goto out;
5224	}
5225
5226	leaf = path->nodes[0];
5227	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
5228	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
5229	while (cur_offset < item_size) {
5230		extref = (struct btrfs_inode_extref *)(ptr +
5231						       cur_offset);
5232		dirid = btrfs_inode_extref_parent(leaf, extref);
5233		ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
5234		cur_offset += ref_name_len + sizeof(*extref);
5235		if (dirid == last_dirid)
5236			continue;
5237		ret = dir_changed(sctx, dirid);
5238		if (ret)
5239			break;
5240		last_dirid = dirid;
5241	}
5242out:
5243	return ret;
5244}
5245
5246/*
5247 * Updates compare related fields in sctx and simply forwards to the actual
5248 * changed_xxx functions.
5249 */
5250static int changed_cb(struct btrfs_root *left_root,
5251		      struct btrfs_root *right_root,
5252		      struct btrfs_path *left_path,
5253		      struct btrfs_path *right_path,
5254		      struct btrfs_key *key,
5255		      enum btrfs_compare_tree_result result,
5256		      void *ctx)
5257{
5258	int ret = 0;
5259	struct send_ctx *sctx = ctx;
5260
5261	if (result == BTRFS_COMPARE_TREE_SAME) {
5262		if (key->type == BTRFS_INODE_REF_KEY ||
5263		    key->type == BTRFS_INODE_EXTREF_KEY) {
5264			ret = compare_refs(sctx, left_path, key);
5265			if (!ret)
5266				return 0;
5267			if (ret < 0)
5268				return ret;
5269		} else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5270			return maybe_send_hole(sctx, left_path, key);
5271		} else {
5272			return 0;
5273		}
5274		result = BTRFS_COMPARE_TREE_CHANGED;
5275		ret = 0;
5276	}
5277
5278	sctx->left_path = left_path;
5279	sctx->right_path = right_path;
5280	sctx->cmp_key = key;
5281
5282	ret = finish_inode_if_needed(sctx, 0);
5283	if (ret < 0)
5284		goto out;
5285
5286	/* Ignore non-FS objects */
5287	if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
5288	    key->objectid == BTRFS_FREE_SPACE_OBJECTID)
5289		goto out;
5290
5291	if (key->type == BTRFS_INODE_ITEM_KEY)
5292		ret = changed_inode(sctx, result);
5293	else if (key->type == BTRFS_INODE_REF_KEY ||
5294		 key->type == BTRFS_INODE_EXTREF_KEY)
5295		ret = changed_ref(sctx, result);
5296	else if (key->type == BTRFS_XATTR_ITEM_KEY)
5297		ret = changed_xattr(sctx, result);
5298	else if (key->type == BTRFS_EXTENT_DATA_KEY)
5299		ret = changed_extent(sctx, result);
5300
5301out:
5302	return ret;
5303}
5304
5305static int full_send_tree(struct send_ctx *sctx)
5306{
5307	int ret;
5308	struct btrfs_trans_handle *trans = NULL;
5309	struct btrfs_root *send_root = sctx->send_root;
5310	struct btrfs_key key;
5311	struct btrfs_key found_key;
5312	struct btrfs_path *path;
5313	struct extent_buffer *eb;
5314	int slot;
5315	u64 start_ctransid;
5316	u64 ctransid;
5317
5318	path = alloc_path_for_send();
5319	if (!path)
5320		return -ENOMEM;
5321
5322	spin_lock(&send_root->root_item_lock);
5323	start_ctransid = btrfs_root_ctransid(&send_root->root_item);
5324	spin_unlock(&send_root->root_item_lock);
5325
5326	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5327	key.type = BTRFS_INODE_ITEM_KEY;
5328	key.offset = 0;
5329
5330join_trans:
5331	/*
5332	 * We need to make sure the transaction does not get committed
5333	 * while we do anything on commit roots. Join a transaction to prevent
5334	 * this.
5335	 */
5336	trans = btrfs_join_transaction(send_root);
5337	if (IS_ERR(trans)) {
5338		ret = PTR_ERR(trans);
5339		trans = NULL;
5340		goto out;
5341	}
5342
5343	/*
5344	 * Make sure the tree has not changed after re-joining. We detect this
5345	 * by comparing start_ctransid and ctransid. They should always match.
5346	 */
5347	spin_lock(&send_root->root_item_lock);
5348	ctransid = btrfs_root_ctransid(&send_root->root_item);
5349	spin_unlock(&send_root->root_item_lock);
5350
5351	if (ctransid != start_ctransid) {
5352		WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
5353				     "send was modified in between. This is "
5354				     "probably a bug.\n");
5355		ret = -EIO;
5356		goto out;
5357	}
5358
5359	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
5360	if (ret < 0)
5361		goto out;
5362	if (ret)
5363		goto out_finish;
5364
5365	while (1) {
5366		/*
5367		 * When someone want to commit while we iterate, end the
5368		 * joined transaction and rejoin.
5369		 */
5370		if (btrfs_should_end_transaction(trans, send_root)) {
5371			ret = btrfs_end_transaction(trans, send_root);
5372			trans = NULL;
5373			if (ret < 0)
5374				goto out;
5375			btrfs_release_path(path);
5376			goto join_trans;
5377		}
5378
5379		eb = path->nodes[0];
5380		slot = path->slots[0];
5381		btrfs_item_key_to_cpu(eb, &found_key, slot);
5382
5383		ret = changed_cb(send_root, NULL, path, NULL,
5384				&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
5385		if (ret < 0)
5386			goto out;
5387
5388		key.objectid = found_key.objectid;
5389		key.type = found_key.type;
5390		key.offset = found_key.offset + 1;
5391
5392		ret = btrfs_next_item(send_root, path);
5393		if (ret < 0)
5394			goto out;
5395		if (ret) {
5396			ret  = 0;
5397			break;
5398		}
5399	}
5400
5401out_finish:
5402	ret = finish_inode_if_needed(sctx, 1);
5403
5404out:
5405	btrfs_free_path(path);
5406	if (trans) {
5407		if (!ret)
5408			ret = btrfs_end_transaction(trans, send_root);
5409		else
5410			btrfs_end_transaction(trans, send_root);
5411	}
5412	return ret;
5413}
5414
5415static int send_subvol(struct send_ctx *sctx)
5416{
5417	int ret;
5418
5419	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
5420		ret = send_header(sctx);
5421		if (ret < 0)
5422			goto out;
5423	}
5424
5425	ret = send_subvol_begin(sctx);
5426	if (ret < 0)
5427		goto out;
5428
5429	if (sctx->parent_root) {
5430		ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
5431				changed_cb, sctx);
5432		if (ret < 0)
5433			goto out;
5434		ret = finish_inode_if_needed(sctx, 1);
5435		if (ret < 0)
5436			goto out;
5437	} else {
5438		ret = full_send_tree(sctx);
5439		if (ret < 0)
5440			goto out;
5441	}
5442
5443out:
5444	free_recorded_refs(sctx);
5445	return ret;
5446}
5447
5448static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
5449{
5450	spin_lock(&root->root_item_lock);
5451	root->send_in_progress--;
5452	/*
5453	 * Not much left to do, we don't know why it's unbalanced and
5454	 * can't blindly reset it to 0.
5455	 */
5456	if (root->send_in_progress < 0)
5457		btrfs_err(root->fs_info,
5458			"send_in_progres unbalanced %d root %llu\n",
5459			root->send_in_progress, root->root_key.objectid);
5460	spin_unlock(&root->root_item_lock);
5461}
5462
5463long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5464{
5465	int ret = 0;
5466	struct btrfs_root *send_root;
5467	struct btrfs_root *clone_root;
5468	struct btrfs_fs_info *fs_info;
5469	struct btrfs_ioctl_send_args *arg = NULL;
5470	struct btrfs_key key;
5471	struct send_ctx *sctx = NULL;
5472	u32 i;
5473	u64 *clone_sources_tmp = NULL;
5474	int clone_sources_to_rollback = 0;
5475	int sort_clone_roots = 0;
5476	int index;
5477
5478	if (!capable(CAP_SYS_ADMIN))
5479		return -EPERM;
5480
5481	send_root = BTRFS_I(file_inode(mnt_file))->root;
5482	fs_info = send_root->fs_info;
5483
5484	/*
5485	 * The subvolume must remain read-only during send, protect against
5486	 * making it RW.
5487	 */
5488	spin_lock(&send_root->root_item_lock);
5489	send_root->send_in_progress++;
5490	spin_unlock(&send_root->root_item_lock);
5491
5492	/*
5493	 * This is done when we lookup the root, it should already be complete
5494	 * by the time we get here.
5495	 */
5496	WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
5497
5498	/*
5499	 * Userspace tools do the checks and warn the user if it's
5500	 * not RO.
5501	 */
5502	if (!btrfs_root_readonly(send_root)) {
5503		ret = -EPERM;
5504		goto out;
5505	}
5506
5507	arg = memdup_user(arg_, sizeof(*arg));
5508	if (IS_ERR(arg)) {
5509		ret = PTR_ERR(arg);
5510		arg = NULL;
5511		goto out;
5512	}
5513
5514	if (!access_ok(VERIFY_READ, arg->clone_sources,
5515			sizeof(*arg->clone_sources) *
5516			arg->clone_sources_count)) {
5517		ret = -EFAULT;
5518		goto out;
5519	}
5520
5521	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
5522		ret = -EINVAL;
5523		goto out;
5524	}
5525
5526	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
5527	if (!sctx) {
5528		ret = -ENOMEM;
5529		goto out;
5530	}
5531
5532	INIT_LIST_HEAD(&sctx->new_refs);
5533	INIT_LIST_HEAD(&sctx->deleted_refs);
5534	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
5535	INIT_LIST_HEAD(&sctx->name_cache_list);
5536
5537	sctx->flags = arg->flags;
5538
5539	sctx->send_filp = fget(arg->send_fd);
5540	if (!sctx->send_filp) {
5541		ret = -EBADF;
5542		goto out;
5543	}
5544
5545	sctx->send_root = send_root;
5546	sctx->clone_roots_cnt = arg->clone_sources_count;
5547
5548	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
5549	sctx->send_buf = vmalloc(sctx->send_max_size);
5550	if (!sctx->send_buf) {
5551		ret = -ENOMEM;
5552		goto out;
5553	}
5554
5555	sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
5556	if (!sctx->read_buf) {
5557		ret = -ENOMEM;
5558		goto out;
5559	}
5560
5561	sctx->pending_dir_moves = RB_ROOT;
5562	sctx->waiting_dir_moves = RB_ROOT;
5563	sctx->orphan_dirs = RB_ROOT;
5564
5565	sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
5566			(arg->clone_sources_count + 1));
5567	if (!sctx->clone_roots) {
5568		ret = -ENOMEM;
5569		goto out;
5570	}
5571
5572	if (arg->clone_sources_count) {
5573		clone_sources_tmp = vmalloc(arg->clone_sources_count *
5574				sizeof(*arg->clone_sources));
5575		if (!clone_sources_tmp) {
5576			ret = -ENOMEM;
5577			goto out;
5578		}
5579
5580		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
5581				arg->clone_sources_count *
5582				sizeof(*arg->clone_sources));
5583		if (ret) {
5584			ret = -EFAULT;
5585			goto out;
5586		}
5587
5588		for (i = 0; i < arg->clone_sources_count; i++) {
5589			key.objectid = clone_sources_tmp[i];
5590			key.type = BTRFS_ROOT_ITEM_KEY;
5591			key.offset = (u64)-1;
5592
5593			index = srcu_read_lock(&fs_info->subvol_srcu);
5594
5595			clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
5596			if (IS_ERR(clone_root)) {
5597				srcu_read_unlock(&fs_info->subvol_srcu, index);
5598				ret = PTR_ERR(clone_root);
5599				goto out;
5600			}
5601			clone_sources_to_rollback = i + 1;
5602			spin_lock(&clone_root->root_item_lock);
5603			clone_root->send_in_progress++;
5604			if (!btrfs_root_readonly(clone_root)) {
5605				spin_unlock(&clone_root->root_item_lock);
5606				srcu_read_unlock(&fs_info->subvol_srcu, index);
5607				ret = -EPERM;
5608				goto out;
5609			}
5610			spin_unlock(&clone_root->root_item_lock);
5611			srcu_read_unlock(&fs_info->subvol_srcu, index);
5612
5613			sctx->clone_roots[i].root = clone_root;
5614		}
5615		vfree(clone_sources_tmp);
5616		clone_sources_tmp = NULL;
5617	}
5618
5619	if (arg->parent_root) {
5620		key.objectid = arg->parent_root;
5621		key.type = BTRFS_ROOT_ITEM_KEY;
5622		key.offset = (u64)-1;
5623
5624		index = srcu_read_lock(&fs_info->subvol_srcu);
5625
5626		sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
5627		if (IS_ERR(sctx->parent_root)) {
5628			srcu_read_unlock(&fs_info->subvol_srcu, index);
5629			ret = PTR_ERR(sctx->parent_root);
5630			goto out;
5631		}
5632
5633		spin_lock(&sctx->parent_root->root_item_lock);
5634		sctx->parent_root->send_in_progress++;
5635		if (!btrfs_root_readonly(sctx->parent_root)) {
5636			spin_unlock(&sctx->parent_root->root_item_lock);
5637			srcu_read_unlock(&fs_info->subvol_srcu, index);
5638			ret = -EPERM;
5639			goto out;
5640		}
5641		spin_unlock(&sctx->parent_root->root_item_lock);
5642
5643		srcu_read_unlock(&fs_info->subvol_srcu, index);
5644	}
5645
5646	/*
5647	 * Clones from send_root are allowed, but only if the clone source
5648	 * is behind the current send position. This is checked while searching
5649	 * for possible clone sources.
5650	 */
5651	sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
5652
5653	/* We do a bsearch later */
5654	sort(sctx->clone_roots, sctx->clone_roots_cnt,
5655			sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
5656			NULL);
5657	sort_clone_roots = 1;
5658
5659	ret = send_subvol(sctx);
5660	if (ret < 0)
5661		goto out;
5662
5663	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
5664		ret = begin_cmd(sctx, BTRFS_SEND_C_END);
5665		if (ret < 0)
5666			goto out;
5667		ret = send_cmd(sctx);
5668		if (ret < 0)
5669			goto out;
5670	}
5671
5672out:
5673	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
5674	while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
5675		struct rb_node *n;
5676		struct pending_dir_move *pm;
5677
5678		n = rb_first(&sctx->pending_dir_moves);
5679		pm = rb_entry(n, struct pending_dir_move, node);
5680		while (!list_empty(&pm->list)) {
5681			struct pending_dir_move *pm2;
5682
5683			pm2 = list_first_entry(&pm->list,
5684					       struct pending_dir_move, list);
5685			free_pending_move(sctx, pm2);
5686		}
5687		free_pending_move(sctx, pm);
5688	}
5689
5690	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
5691	while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
5692		struct rb_node *n;
5693		struct waiting_dir_move *dm;
5694
5695		n = rb_first(&sctx->waiting_dir_moves);
5696		dm = rb_entry(n, struct waiting_dir_move, node);
5697		rb_erase(&dm->node, &sctx->waiting_dir_moves);
5698		kfree(dm);
5699	}
5700
5701	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
5702	while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
5703		struct rb_node *n;
5704		struct orphan_dir_info *odi;
5705
5706		n = rb_first(&sctx->orphan_dirs);
5707		odi = rb_entry(n, struct orphan_dir_info, node);
5708		free_orphan_dir_info(sctx, odi);
5709	}
5710
5711	if (sort_clone_roots) {
5712		for (i = 0; i < sctx->clone_roots_cnt; i++)
5713			btrfs_root_dec_send_in_progress(
5714					sctx->clone_roots[i].root);
5715	} else {
5716		for (i = 0; sctx && i < clone_sources_to_rollback; i++)
5717			btrfs_root_dec_send_in_progress(
5718					sctx->clone_roots[i].root);
5719
5720		btrfs_root_dec_send_in_progress(send_root);
5721	}
5722	if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
5723		btrfs_root_dec_send_in_progress(sctx->parent_root);
5724
5725	kfree(arg);
5726	vfree(clone_sources_tmp);
5727
5728	if (sctx) {
5729		if (sctx->send_filp)
5730			fput(sctx->send_filp);
5731
5732		vfree(sctx->clone_roots);
5733		vfree(sctx->send_buf);
5734		vfree(sctx->read_buf);
5735
5736		name_cache_free(sctx);
5737
5738		kfree(sctx);
5739	}
5740
5741	return ret;
5742}
5743