send.c revision d447d0da44cd7d396277d1d8f46b418c721fbc02
1/* 2 * Copyright (C) 2012 Alexander Block. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19#include <linux/bsearch.h> 20#include <linux/fs.h> 21#include <linux/file.h> 22#include <linux/sort.h> 23#include <linux/mount.h> 24#include <linux/xattr.h> 25#include <linux/posix_acl_xattr.h> 26#include <linux/radix-tree.h> 27#include <linux/vmalloc.h> 28#include <linux/string.h> 29 30#include "send.h" 31#include "backref.h" 32#include "hash.h" 33#include "locking.h" 34#include "disk-io.h" 35#include "btrfs_inode.h" 36#include "transaction.h" 37 38static int g_verbose = 0; 39 40#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) 41 42/* 43 * A fs_path is a helper to dynamically build path names with unknown size. 44 * It reallocates the internal buffer on demand. 45 * It allows fast adding of path elements on the right side (normal path) and 46 * fast adding to the left side (reversed path). A reversed path can also be 47 * unreversed if needed. 48 */ 49struct fs_path { 50 union { 51 struct { 52 char *start; 53 char *end; 54 55 char *buf; 56 unsigned short buf_len:15; 57 unsigned short reversed:1; 58 char inline_buf[]; 59 }; 60 /* 61 * Average path length does not exceed 200 bytes, we'll have 62 * better packing in the slab and higher chance to satisfy 63 * a allocation later during send. 64 */ 65 char pad[256]; 66 }; 67}; 68#define FS_PATH_INLINE_SIZE \ 69 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) 70 71 72/* reused for each extent */ 73struct clone_root { 74 struct btrfs_root *root; 75 u64 ino; 76 u64 offset; 77 78 u64 found_refs; 79}; 80 81#define SEND_CTX_MAX_NAME_CACHE_SIZE 128 82#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) 83 84struct send_ctx { 85 struct file *send_filp; 86 loff_t send_off; 87 char *send_buf; 88 u32 send_size; 89 u32 send_max_size; 90 u64 total_send_size; 91 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 92 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 93 94 struct btrfs_root *send_root; 95 struct btrfs_root *parent_root; 96 struct clone_root *clone_roots; 97 int clone_roots_cnt; 98 99 /* current state of the compare_tree call */ 100 struct btrfs_path *left_path; 101 struct btrfs_path *right_path; 102 struct btrfs_key *cmp_key; 103 104 /* 105 * infos of the currently processed inode. In case of deleted inodes, 106 * these are the values from the deleted inode. 107 */ 108 u64 cur_ino; 109 u64 cur_inode_gen; 110 int cur_inode_new; 111 int cur_inode_new_gen; 112 int cur_inode_deleted; 113 u64 cur_inode_size; 114 u64 cur_inode_mode; 115 u64 cur_inode_rdev; 116 u64 cur_inode_last_extent; 117 118 u64 send_progress; 119 120 struct list_head new_refs; 121 struct list_head deleted_refs; 122 123 struct radix_tree_root name_cache; 124 struct list_head name_cache_list; 125 int name_cache_size; 126 127 struct file_ra_state ra; 128 129 char *read_buf; 130 131 /* 132 * We process inodes by their increasing order, so if before an 133 * incremental send we reverse the parent/child relationship of 134 * directories such that a directory with a lower inode number was 135 * the parent of a directory with a higher inode number, and the one 136 * becoming the new parent got renamed too, we can't rename/move the 137 * directory with lower inode number when we finish processing it - we 138 * must process the directory with higher inode number first, then 139 * rename/move it and then rename/move the directory with lower inode 140 * number. Example follows. 141 * 142 * Tree state when the first send was performed: 143 * 144 * . 145 * |-- a (ino 257) 146 * |-- b (ino 258) 147 * | 148 * | 149 * |-- c (ino 259) 150 * | |-- d (ino 260) 151 * | 152 * |-- c2 (ino 261) 153 * 154 * Tree state when the second (incremental) send is performed: 155 * 156 * . 157 * |-- a (ino 257) 158 * |-- b (ino 258) 159 * |-- c2 (ino 261) 160 * |-- d2 (ino 260) 161 * |-- cc (ino 259) 162 * 163 * The sequence of steps that lead to the second state was: 164 * 165 * mv /a/b/c/d /a/b/c2/d2 166 * mv /a/b/c /a/b/c2/d2/cc 167 * 168 * "c" has lower inode number, but we can't move it (2nd mv operation) 169 * before we move "d", which has higher inode number. 170 * 171 * So we just memorize which move/rename operations must be performed 172 * later when their respective parent is processed and moved/renamed. 173 */ 174 175 /* Indexed by parent directory inode number. */ 176 struct rb_root pending_dir_moves; 177 178 /* 179 * Reverse index, indexed by the inode number of a directory that 180 * is waiting for the move/rename of its immediate parent before its 181 * own move/rename can be performed. 182 */ 183 struct rb_root waiting_dir_moves; 184 185 /* 186 * A directory that is going to be rm'ed might have a child directory 187 * which is in the pending directory moves index above. In this case, 188 * the directory can only be removed after the move/rename of its child 189 * is performed. Example: 190 * 191 * Parent snapshot: 192 * 193 * . (ino 256) 194 * |-- a/ (ino 257) 195 * |-- b/ (ino 258) 196 * |-- c/ (ino 259) 197 * | |-- x/ (ino 260) 198 * | 199 * |-- y/ (ino 261) 200 * 201 * Send snapshot: 202 * 203 * . (ino 256) 204 * |-- a/ (ino 257) 205 * |-- b/ (ino 258) 206 * |-- YY/ (ino 261) 207 * |-- x/ (ino 260) 208 * 209 * Sequence of steps that lead to the send snapshot: 210 * rm -f /a/b/c/foo.txt 211 * mv /a/b/y /a/b/YY 212 * mv /a/b/c/x /a/b/YY 213 * rmdir /a/b/c 214 * 215 * When the child is processed, its move/rename is delayed until its 216 * parent is processed (as explained above), but all other operations 217 * like update utimes, chown, chgrp, etc, are performed and the paths 218 * that it uses for those operations must use the orphanized name of 219 * its parent (the directory we're going to rm later), so we need to 220 * memorize that name. 221 * 222 * Indexed by the inode number of the directory to be deleted. 223 */ 224 struct rb_root orphan_dirs; 225}; 226 227struct pending_dir_move { 228 struct rb_node node; 229 struct list_head list; 230 u64 parent_ino; 231 u64 ino; 232 u64 gen; 233 struct list_head update_refs; 234}; 235 236struct waiting_dir_move { 237 struct rb_node node; 238 u64 ino; 239 /* 240 * There might be some directory that could not be removed because it 241 * was waiting for this directory inode to be moved first. Therefore 242 * after this directory is moved, we can try to rmdir the ino rmdir_ino. 243 */ 244 u64 rmdir_ino; 245}; 246 247struct orphan_dir_info { 248 struct rb_node node; 249 u64 ino; 250 u64 gen; 251}; 252 253struct name_cache_entry { 254 struct list_head list; 255 /* 256 * radix_tree has only 32bit entries but we need to handle 64bit inums. 257 * We use the lower 32bit of the 64bit inum to store it in the tree. If 258 * more then one inum would fall into the same entry, we use radix_list 259 * to store the additional entries. radix_list is also used to store 260 * entries where two entries have the same inum but different 261 * generations. 262 */ 263 struct list_head radix_list; 264 u64 ino; 265 u64 gen; 266 u64 parent_ino; 267 u64 parent_gen; 268 int ret; 269 int need_later_update; 270 int name_len; 271 char name[]; 272}; 273 274static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 275 276static struct waiting_dir_move * 277get_waiting_dir_move(struct send_ctx *sctx, u64 ino); 278 279static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); 280 281static int need_send_hole(struct send_ctx *sctx) 282{ 283 return (sctx->parent_root && !sctx->cur_inode_new && 284 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && 285 S_ISREG(sctx->cur_inode_mode)); 286} 287 288static void fs_path_reset(struct fs_path *p) 289{ 290 if (p->reversed) { 291 p->start = p->buf + p->buf_len - 1; 292 p->end = p->start; 293 *p->start = 0; 294 } else { 295 p->start = p->buf; 296 p->end = p->start; 297 *p->start = 0; 298 } 299} 300 301static struct fs_path *fs_path_alloc(void) 302{ 303 struct fs_path *p; 304 305 p = kmalloc(sizeof(*p), GFP_NOFS); 306 if (!p) 307 return NULL; 308 p->reversed = 0; 309 p->buf = p->inline_buf; 310 p->buf_len = FS_PATH_INLINE_SIZE; 311 fs_path_reset(p); 312 return p; 313} 314 315static struct fs_path *fs_path_alloc_reversed(void) 316{ 317 struct fs_path *p; 318 319 p = fs_path_alloc(); 320 if (!p) 321 return NULL; 322 p->reversed = 1; 323 fs_path_reset(p); 324 return p; 325} 326 327static void fs_path_free(struct fs_path *p) 328{ 329 if (!p) 330 return; 331 if (p->buf != p->inline_buf) 332 kfree(p->buf); 333 kfree(p); 334} 335 336static int fs_path_len(struct fs_path *p) 337{ 338 return p->end - p->start; 339} 340 341static int fs_path_ensure_buf(struct fs_path *p, int len) 342{ 343 char *tmp_buf; 344 int path_len; 345 int old_buf_len; 346 347 len++; 348 349 if (p->buf_len >= len) 350 return 0; 351 352 if (len > PATH_MAX) { 353 WARN_ON(1); 354 return -ENOMEM; 355 } 356 357 path_len = p->end - p->start; 358 old_buf_len = p->buf_len; 359 360 /* 361 * First time the inline_buf does not suffice 362 */ 363 if (p->buf == p->inline_buf) { 364 tmp_buf = kmalloc(len, GFP_NOFS); 365 if (tmp_buf) 366 memcpy(tmp_buf, p->buf, old_buf_len); 367 } else { 368 tmp_buf = krealloc(p->buf, len, GFP_NOFS); 369 } 370 if (!tmp_buf) 371 return -ENOMEM; 372 p->buf = tmp_buf; 373 /* 374 * The real size of the buffer is bigger, this will let the fast path 375 * happen most of the time 376 */ 377 p->buf_len = ksize(p->buf); 378 379 if (p->reversed) { 380 tmp_buf = p->buf + old_buf_len - path_len - 1; 381 p->end = p->buf + p->buf_len - 1; 382 p->start = p->end - path_len; 383 memmove(p->start, tmp_buf, path_len + 1); 384 } else { 385 p->start = p->buf; 386 p->end = p->start + path_len; 387 } 388 return 0; 389} 390 391static int fs_path_prepare_for_add(struct fs_path *p, int name_len, 392 char **prepared) 393{ 394 int ret; 395 int new_len; 396 397 new_len = p->end - p->start + name_len; 398 if (p->start != p->end) 399 new_len++; 400 ret = fs_path_ensure_buf(p, new_len); 401 if (ret < 0) 402 goto out; 403 404 if (p->reversed) { 405 if (p->start != p->end) 406 *--p->start = '/'; 407 p->start -= name_len; 408 *prepared = p->start; 409 } else { 410 if (p->start != p->end) 411 *p->end++ = '/'; 412 *prepared = p->end; 413 p->end += name_len; 414 *p->end = 0; 415 } 416 417out: 418 return ret; 419} 420 421static int fs_path_add(struct fs_path *p, const char *name, int name_len) 422{ 423 int ret; 424 char *prepared; 425 426 ret = fs_path_prepare_for_add(p, name_len, &prepared); 427 if (ret < 0) 428 goto out; 429 memcpy(prepared, name, name_len); 430 431out: 432 return ret; 433} 434 435static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 436{ 437 int ret; 438 char *prepared; 439 440 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); 441 if (ret < 0) 442 goto out; 443 memcpy(prepared, p2->start, p2->end - p2->start); 444 445out: 446 return ret; 447} 448 449static int fs_path_add_from_extent_buffer(struct fs_path *p, 450 struct extent_buffer *eb, 451 unsigned long off, int len) 452{ 453 int ret; 454 char *prepared; 455 456 ret = fs_path_prepare_for_add(p, len, &prepared); 457 if (ret < 0) 458 goto out; 459 460 read_extent_buffer(eb, prepared, off, len); 461 462out: 463 return ret; 464} 465 466static int fs_path_copy(struct fs_path *p, struct fs_path *from) 467{ 468 int ret; 469 470 p->reversed = from->reversed; 471 fs_path_reset(p); 472 473 ret = fs_path_add_path(p, from); 474 475 return ret; 476} 477 478 479static void fs_path_unreverse(struct fs_path *p) 480{ 481 char *tmp; 482 int len; 483 484 if (!p->reversed) 485 return; 486 487 tmp = p->start; 488 len = p->end - p->start; 489 p->start = p->buf; 490 p->end = p->start + len; 491 memmove(p->start, tmp, len + 1); 492 p->reversed = 0; 493} 494 495static struct btrfs_path *alloc_path_for_send(void) 496{ 497 struct btrfs_path *path; 498 499 path = btrfs_alloc_path(); 500 if (!path) 501 return NULL; 502 path->search_commit_root = 1; 503 path->skip_locking = 1; 504 path->need_commit_sem = 1; 505 return path; 506} 507 508static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 509{ 510 int ret; 511 mm_segment_t old_fs; 512 u32 pos = 0; 513 514 old_fs = get_fs(); 515 set_fs(KERNEL_DS); 516 517 while (pos < len) { 518 ret = vfs_write(filp, (__force const char __user *)buf + pos, 519 len - pos, off); 520 /* TODO handle that correctly */ 521 /*if (ret == -ERESTARTSYS) { 522 continue; 523 }*/ 524 if (ret < 0) 525 goto out; 526 if (ret == 0) { 527 ret = -EIO; 528 goto out; 529 } 530 pos += ret; 531 } 532 533 ret = 0; 534 535out: 536 set_fs(old_fs); 537 return ret; 538} 539 540static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) 541{ 542 struct btrfs_tlv_header *hdr; 543 int total_len = sizeof(*hdr) + len; 544 int left = sctx->send_max_size - sctx->send_size; 545 546 if (unlikely(left < total_len)) 547 return -EOVERFLOW; 548 549 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); 550 hdr->tlv_type = cpu_to_le16(attr); 551 hdr->tlv_len = cpu_to_le16(len); 552 memcpy(hdr + 1, data, len); 553 sctx->send_size += total_len; 554 555 return 0; 556} 557 558#define TLV_PUT_DEFINE_INT(bits) \ 559 static int tlv_put_u##bits(struct send_ctx *sctx, \ 560 u##bits attr, u##bits value) \ 561 { \ 562 __le##bits __tmp = cpu_to_le##bits(value); \ 563 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ 564 } 565 566TLV_PUT_DEFINE_INT(64) 567 568static int tlv_put_string(struct send_ctx *sctx, u16 attr, 569 const char *str, int len) 570{ 571 if (len == -1) 572 len = strlen(str); 573 return tlv_put(sctx, attr, str, len); 574} 575 576static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, 577 const u8 *uuid) 578{ 579 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 580} 581 582static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 583 struct extent_buffer *eb, 584 struct btrfs_timespec *ts) 585{ 586 struct btrfs_timespec bts; 587 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); 588 return tlv_put(sctx, attr, &bts, sizeof(bts)); 589} 590 591 592#define TLV_PUT(sctx, attrtype, attrlen, data) \ 593 do { \ 594 ret = tlv_put(sctx, attrtype, attrlen, data); \ 595 if (ret < 0) \ 596 goto tlv_put_failure; \ 597 } while (0) 598 599#define TLV_PUT_INT(sctx, attrtype, bits, value) \ 600 do { \ 601 ret = tlv_put_u##bits(sctx, attrtype, value); \ 602 if (ret < 0) \ 603 goto tlv_put_failure; \ 604 } while (0) 605 606#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) 607#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) 608#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) 609#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) 610#define TLV_PUT_STRING(sctx, attrtype, str, len) \ 611 do { \ 612 ret = tlv_put_string(sctx, attrtype, str, len); \ 613 if (ret < 0) \ 614 goto tlv_put_failure; \ 615 } while (0) 616#define TLV_PUT_PATH(sctx, attrtype, p) \ 617 do { \ 618 ret = tlv_put_string(sctx, attrtype, p->start, \ 619 p->end - p->start); \ 620 if (ret < 0) \ 621 goto tlv_put_failure; \ 622 } while(0) 623#define TLV_PUT_UUID(sctx, attrtype, uuid) \ 624 do { \ 625 ret = tlv_put_uuid(sctx, attrtype, uuid); \ 626 if (ret < 0) \ 627 goto tlv_put_failure; \ 628 } while (0) 629#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 630 do { \ 631 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 632 if (ret < 0) \ 633 goto tlv_put_failure; \ 634 } while (0) 635 636static int send_header(struct send_ctx *sctx) 637{ 638 struct btrfs_stream_header hdr; 639 640 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); 641 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); 642 643 return write_buf(sctx->send_filp, &hdr, sizeof(hdr), 644 &sctx->send_off); 645} 646 647/* 648 * For each command/item we want to send to userspace, we call this function. 649 */ 650static int begin_cmd(struct send_ctx *sctx, int cmd) 651{ 652 struct btrfs_cmd_header *hdr; 653 654 if (WARN_ON(!sctx->send_buf)) 655 return -EINVAL; 656 657 BUG_ON(sctx->send_size); 658 659 sctx->send_size += sizeof(*hdr); 660 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 661 hdr->cmd = cpu_to_le16(cmd); 662 663 return 0; 664} 665 666static int send_cmd(struct send_ctx *sctx) 667{ 668 int ret; 669 struct btrfs_cmd_header *hdr; 670 u32 crc; 671 672 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 673 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 674 hdr->crc = 0; 675 676 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 677 hdr->crc = cpu_to_le32(crc); 678 679 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 680 &sctx->send_off); 681 682 sctx->total_send_size += sctx->send_size; 683 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; 684 sctx->send_size = 0; 685 686 return ret; 687} 688 689/* 690 * Sends a move instruction to user space 691 */ 692static int send_rename(struct send_ctx *sctx, 693 struct fs_path *from, struct fs_path *to) 694{ 695 int ret; 696 697verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); 698 699 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 700 if (ret < 0) 701 goto out; 702 703 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 704 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); 705 706 ret = send_cmd(sctx); 707 708tlv_put_failure: 709out: 710 return ret; 711} 712 713/* 714 * Sends a link instruction to user space 715 */ 716static int send_link(struct send_ctx *sctx, 717 struct fs_path *path, struct fs_path *lnk) 718{ 719 int ret; 720 721verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); 722 723 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 724 if (ret < 0) 725 goto out; 726 727 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 728 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); 729 730 ret = send_cmd(sctx); 731 732tlv_put_failure: 733out: 734 return ret; 735} 736 737/* 738 * Sends an unlink instruction to user space 739 */ 740static int send_unlink(struct send_ctx *sctx, struct fs_path *path) 741{ 742 int ret; 743 744verbose_printk("btrfs: send_unlink %s\n", path->start); 745 746 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 747 if (ret < 0) 748 goto out; 749 750 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 751 752 ret = send_cmd(sctx); 753 754tlv_put_failure: 755out: 756 return ret; 757} 758 759/* 760 * Sends a rmdir instruction to user space 761 */ 762static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) 763{ 764 int ret; 765 766verbose_printk("btrfs: send_rmdir %s\n", path->start); 767 768 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 769 if (ret < 0) 770 goto out; 771 772 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 773 774 ret = send_cmd(sctx); 775 776tlv_put_failure: 777out: 778 return ret; 779} 780 781/* 782 * Helper function to retrieve some fields from an inode item. 783 */ 784static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, 785 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, 786 u64 *gid, u64 *rdev) 787{ 788 int ret; 789 struct btrfs_inode_item *ii; 790 struct btrfs_key key; 791 792 key.objectid = ino; 793 key.type = BTRFS_INODE_ITEM_KEY; 794 key.offset = 0; 795 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 796 if (ret) { 797 if (ret > 0) 798 ret = -ENOENT; 799 return ret; 800 } 801 802 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 803 struct btrfs_inode_item); 804 if (size) 805 *size = btrfs_inode_size(path->nodes[0], ii); 806 if (gen) 807 *gen = btrfs_inode_generation(path->nodes[0], ii); 808 if (mode) 809 *mode = btrfs_inode_mode(path->nodes[0], ii); 810 if (uid) 811 *uid = btrfs_inode_uid(path->nodes[0], ii); 812 if (gid) 813 *gid = btrfs_inode_gid(path->nodes[0], ii); 814 if (rdev) 815 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 816 817 return ret; 818} 819 820static int get_inode_info(struct btrfs_root *root, 821 u64 ino, u64 *size, u64 *gen, 822 u64 *mode, u64 *uid, u64 *gid, 823 u64 *rdev) 824{ 825 struct btrfs_path *path; 826 int ret; 827 828 path = alloc_path_for_send(); 829 if (!path) 830 return -ENOMEM; 831 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, 832 rdev); 833 btrfs_free_path(path); 834 return ret; 835} 836 837typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, 838 struct fs_path *p, 839 void *ctx); 840 841/* 842 * Helper function to iterate the entries in ONE btrfs_inode_ref or 843 * btrfs_inode_extref. 844 * The iterate callback may return a non zero value to stop iteration. This can 845 * be a negative value for error codes or 1 to simply stop it. 846 * 847 * path must point to the INODE_REF or INODE_EXTREF when called. 848 */ 849static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 850 struct btrfs_key *found_key, int resolve, 851 iterate_inode_ref_t iterate, void *ctx) 852{ 853 struct extent_buffer *eb = path->nodes[0]; 854 struct btrfs_item *item; 855 struct btrfs_inode_ref *iref; 856 struct btrfs_inode_extref *extref; 857 struct btrfs_path *tmp_path; 858 struct fs_path *p; 859 u32 cur = 0; 860 u32 total; 861 int slot = path->slots[0]; 862 u32 name_len; 863 char *start; 864 int ret = 0; 865 int num = 0; 866 int index; 867 u64 dir; 868 unsigned long name_off; 869 unsigned long elem_size; 870 unsigned long ptr; 871 872 p = fs_path_alloc_reversed(); 873 if (!p) 874 return -ENOMEM; 875 876 tmp_path = alloc_path_for_send(); 877 if (!tmp_path) { 878 fs_path_free(p); 879 return -ENOMEM; 880 } 881 882 883 if (found_key->type == BTRFS_INODE_REF_KEY) { 884 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 885 struct btrfs_inode_ref); 886 item = btrfs_item_nr(slot); 887 total = btrfs_item_size(eb, item); 888 elem_size = sizeof(*iref); 889 } else { 890 ptr = btrfs_item_ptr_offset(eb, slot); 891 total = btrfs_item_size_nr(eb, slot); 892 elem_size = sizeof(*extref); 893 } 894 895 while (cur < total) { 896 fs_path_reset(p); 897 898 if (found_key->type == BTRFS_INODE_REF_KEY) { 899 iref = (struct btrfs_inode_ref *)(ptr + cur); 900 name_len = btrfs_inode_ref_name_len(eb, iref); 901 name_off = (unsigned long)(iref + 1); 902 index = btrfs_inode_ref_index(eb, iref); 903 dir = found_key->offset; 904 } else { 905 extref = (struct btrfs_inode_extref *)(ptr + cur); 906 name_len = btrfs_inode_extref_name_len(eb, extref); 907 name_off = (unsigned long)&extref->name; 908 index = btrfs_inode_extref_index(eb, extref); 909 dir = btrfs_inode_extref_parent(eb, extref); 910 } 911 912 if (resolve) { 913 start = btrfs_ref_to_path(root, tmp_path, name_len, 914 name_off, eb, dir, 915 p->buf, p->buf_len); 916 if (IS_ERR(start)) { 917 ret = PTR_ERR(start); 918 goto out; 919 } 920 if (start < p->buf) { 921 /* overflow , try again with larger buffer */ 922 ret = fs_path_ensure_buf(p, 923 p->buf_len + p->buf - start); 924 if (ret < 0) 925 goto out; 926 start = btrfs_ref_to_path(root, tmp_path, 927 name_len, name_off, 928 eb, dir, 929 p->buf, p->buf_len); 930 if (IS_ERR(start)) { 931 ret = PTR_ERR(start); 932 goto out; 933 } 934 BUG_ON(start < p->buf); 935 } 936 p->start = start; 937 } else { 938 ret = fs_path_add_from_extent_buffer(p, eb, name_off, 939 name_len); 940 if (ret < 0) 941 goto out; 942 } 943 944 cur += elem_size + name_len; 945 ret = iterate(num, dir, index, p, ctx); 946 if (ret) 947 goto out; 948 num++; 949 } 950 951out: 952 btrfs_free_path(tmp_path); 953 fs_path_free(p); 954 return ret; 955} 956 957typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, 958 const char *name, int name_len, 959 const char *data, int data_len, 960 u8 type, void *ctx); 961 962/* 963 * Helper function to iterate the entries in ONE btrfs_dir_item. 964 * The iterate callback may return a non zero value to stop iteration. This can 965 * be a negative value for error codes or 1 to simply stop it. 966 * 967 * path must point to the dir item when called. 968 */ 969static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, 970 struct btrfs_key *found_key, 971 iterate_dir_item_t iterate, void *ctx) 972{ 973 int ret = 0; 974 struct extent_buffer *eb; 975 struct btrfs_item *item; 976 struct btrfs_dir_item *di; 977 struct btrfs_key di_key; 978 char *buf = NULL; 979 int buf_len; 980 u32 name_len; 981 u32 data_len; 982 u32 cur; 983 u32 len; 984 u32 total; 985 int slot; 986 int num; 987 u8 type; 988 989 if (found_key->type == BTRFS_XATTR_ITEM_KEY) 990 buf_len = BTRFS_MAX_XATTR_SIZE(root); 991 else 992 buf_len = PATH_MAX; 993 994 buf = kmalloc(buf_len, GFP_NOFS); 995 if (!buf) { 996 ret = -ENOMEM; 997 goto out; 998 } 999 1000 eb = path->nodes[0]; 1001 slot = path->slots[0]; 1002 item = btrfs_item_nr(slot); 1003 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 1004 cur = 0; 1005 len = 0; 1006 total = btrfs_item_size(eb, item); 1007 1008 num = 0; 1009 while (cur < total) { 1010 name_len = btrfs_dir_name_len(eb, di); 1011 data_len = btrfs_dir_data_len(eb, di); 1012 type = btrfs_dir_type(eb, di); 1013 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1014 1015 if (type == BTRFS_FT_XATTR) { 1016 if (name_len > XATTR_NAME_MAX) { 1017 ret = -ENAMETOOLONG; 1018 goto out; 1019 } 1020 if (name_len + data_len > buf_len) { 1021 ret = -E2BIG; 1022 goto out; 1023 } 1024 } else { 1025 /* 1026 * Path too long 1027 */ 1028 if (name_len + data_len > buf_len) { 1029 ret = -ENAMETOOLONG; 1030 goto out; 1031 } 1032 } 1033 1034 read_extent_buffer(eb, buf, (unsigned long)(di + 1), 1035 name_len + data_len); 1036 1037 len = sizeof(*di) + name_len + data_len; 1038 di = (struct btrfs_dir_item *)((char *)di + len); 1039 cur += len; 1040 1041 ret = iterate(num, &di_key, buf, name_len, buf + name_len, 1042 data_len, type, ctx); 1043 if (ret < 0) 1044 goto out; 1045 if (ret) { 1046 ret = 0; 1047 goto out; 1048 } 1049 1050 num++; 1051 } 1052 1053out: 1054 kfree(buf); 1055 return ret; 1056} 1057 1058static int __copy_first_ref(int num, u64 dir, int index, 1059 struct fs_path *p, void *ctx) 1060{ 1061 int ret; 1062 struct fs_path *pt = ctx; 1063 1064 ret = fs_path_copy(pt, p); 1065 if (ret < 0) 1066 return ret; 1067 1068 /* we want the first only */ 1069 return 1; 1070} 1071 1072/* 1073 * Retrieve the first path of an inode. If an inode has more then one 1074 * ref/hardlink, this is ignored. 1075 */ 1076static int get_inode_path(struct btrfs_root *root, 1077 u64 ino, struct fs_path *path) 1078{ 1079 int ret; 1080 struct btrfs_key key, found_key; 1081 struct btrfs_path *p; 1082 1083 p = alloc_path_for_send(); 1084 if (!p) 1085 return -ENOMEM; 1086 1087 fs_path_reset(path); 1088 1089 key.objectid = ino; 1090 key.type = BTRFS_INODE_REF_KEY; 1091 key.offset = 0; 1092 1093 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1094 if (ret < 0) 1095 goto out; 1096 if (ret) { 1097 ret = 1; 1098 goto out; 1099 } 1100 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1101 if (found_key.objectid != ino || 1102 (found_key.type != BTRFS_INODE_REF_KEY && 1103 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1104 ret = -ENOENT; 1105 goto out; 1106 } 1107 1108 ret = iterate_inode_ref(root, p, &found_key, 1, 1109 __copy_first_ref, path); 1110 if (ret < 0) 1111 goto out; 1112 ret = 0; 1113 1114out: 1115 btrfs_free_path(p); 1116 return ret; 1117} 1118 1119struct backref_ctx { 1120 struct send_ctx *sctx; 1121 1122 struct btrfs_path *path; 1123 /* number of total found references */ 1124 u64 found; 1125 1126 /* 1127 * used for clones found in send_root. clones found behind cur_objectid 1128 * and cur_offset are not considered as allowed clones. 1129 */ 1130 u64 cur_objectid; 1131 u64 cur_offset; 1132 1133 /* may be truncated in case it's the last extent in a file */ 1134 u64 extent_len; 1135 1136 /* Just to check for bugs in backref resolving */ 1137 int found_itself; 1138}; 1139 1140static int __clone_root_cmp_bsearch(const void *key, const void *elt) 1141{ 1142 u64 root = (u64)(uintptr_t)key; 1143 struct clone_root *cr = (struct clone_root *)elt; 1144 1145 if (root < cr->root->objectid) 1146 return -1; 1147 if (root > cr->root->objectid) 1148 return 1; 1149 return 0; 1150} 1151 1152static int __clone_root_cmp_sort(const void *e1, const void *e2) 1153{ 1154 struct clone_root *cr1 = (struct clone_root *)e1; 1155 struct clone_root *cr2 = (struct clone_root *)e2; 1156 1157 if (cr1->root->objectid < cr2->root->objectid) 1158 return -1; 1159 if (cr1->root->objectid > cr2->root->objectid) 1160 return 1; 1161 return 0; 1162} 1163 1164/* 1165 * Called for every backref that is found for the current extent. 1166 * Results are collected in sctx->clone_roots->ino/offset/found_refs 1167 */ 1168static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) 1169{ 1170 struct backref_ctx *bctx = ctx_; 1171 struct clone_root *found; 1172 int ret; 1173 u64 i_size; 1174 1175 /* First check if the root is in the list of accepted clone sources */ 1176 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1177 bctx->sctx->clone_roots_cnt, 1178 sizeof(struct clone_root), 1179 __clone_root_cmp_bsearch); 1180 if (!found) 1181 return 0; 1182 1183 if (found->root == bctx->sctx->send_root && 1184 ino == bctx->cur_objectid && 1185 offset == bctx->cur_offset) { 1186 bctx->found_itself = 1; 1187 } 1188 1189 /* 1190 * There are inodes that have extents that lie behind its i_size. Don't 1191 * accept clones from these extents. 1192 */ 1193 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, 1194 NULL, NULL, NULL); 1195 btrfs_release_path(bctx->path); 1196 if (ret < 0) 1197 return ret; 1198 1199 if (offset + bctx->extent_len > i_size) 1200 return 0; 1201 1202 /* 1203 * Make sure we don't consider clones from send_root that are 1204 * behind the current inode/offset. 1205 */ 1206 if (found->root == bctx->sctx->send_root) { 1207 /* 1208 * TODO for the moment we don't accept clones from the inode 1209 * that is currently send. We may change this when 1210 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same 1211 * file. 1212 */ 1213 if (ino >= bctx->cur_objectid) 1214 return 0; 1215#if 0 1216 if (ino > bctx->cur_objectid) 1217 return 0; 1218 if (offset + bctx->extent_len > bctx->cur_offset) 1219 return 0; 1220#endif 1221 } 1222 1223 bctx->found++; 1224 found->found_refs++; 1225 if (ino < found->ino) { 1226 found->ino = ino; 1227 found->offset = offset; 1228 } else if (found->ino == ino) { 1229 /* 1230 * same extent found more then once in the same file. 1231 */ 1232 if (found->offset > offset + bctx->extent_len) 1233 found->offset = offset; 1234 } 1235 1236 return 0; 1237} 1238 1239/* 1240 * Given an inode, offset and extent item, it finds a good clone for a clone 1241 * instruction. Returns -ENOENT when none could be found. The function makes 1242 * sure that the returned clone is usable at the point where sending is at the 1243 * moment. This means, that no clones are accepted which lie behind the current 1244 * inode+offset. 1245 * 1246 * path must point to the extent item when called. 1247 */ 1248static int find_extent_clone(struct send_ctx *sctx, 1249 struct btrfs_path *path, 1250 u64 ino, u64 data_offset, 1251 u64 ino_size, 1252 struct clone_root **found) 1253{ 1254 int ret; 1255 int extent_type; 1256 u64 logical; 1257 u64 disk_byte; 1258 u64 num_bytes; 1259 u64 extent_item_pos; 1260 u64 flags = 0; 1261 struct btrfs_file_extent_item *fi; 1262 struct extent_buffer *eb = path->nodes[0]; 1263 struct backref_ctx *backref_ctx = NULL; 1264 struct clone_root *cur_clone_root; 1265 struct btrfs_key found_key; 1266 struct btrfs_path *tmp_path; 1267 int compressed; 1268 u32 i; 1269 1270 tmp_path = alloc_path_for_send(); 1271 if (!tmp_path) 1272 return -ENOMEM; 1273 1274 /* We only use this path under the commit sem */ 1275 tmp_path->need_commit_sem = 0; 1276 1277 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1278 if (!backref_ctx) { 1279 ret = -ENOMEM; 1280 goto out; 1281 } 1282 1283 backref_ctx->path = tmp_path; 1284 1285 if (data_offset >= ino_size) { 1286 /* 1287 * There may be extents that lie behind the file's size. 1288 * I at least had this in combination with snapshotting while 1289 * writing large files. 1290 */ 1291 ret = 0; 1292 goto out; 1293 } 1294 1295 fi = btrfs_item_ptr(eb, path->slots[0], 1296 struct btrfs_file_extent_item); 1297 extent_type = btrfs_file_extent_type(eb, fi); 1298 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1299 ret = -ENOENT; 1300 goto out; 1301 } 1302 compressed = btrfs_file_extent_compression(eb, fi); 1303 1304 num_bytes = btrfs_file_extent_num_bytes(eb, fi); 1305 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 1306 if (disk_byte == 0) { 1307 ret = -ENOENT; 1308 goto out; 1309 } 1310 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1311 1312 down_read(&sctx->send_root->fs_info->commit_root_sem); 1313 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, 1314 &found_key, &flags); 1315 up_read(&sctx->send_root->fs_info->commit_root_sem); 1316 btrfs_release_path(tmp_path); 1317 1318 if (ret < 0) 1319 goto out; 1320 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1321 ret = -EIO; 1322 goto out; 1323 } 1324 1325 /* 1326 * Setup the clone roots. 1327 */ 1328 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1329 cur_clone_root = sctx->clone_roots + i; 1330 cur_clone_root->ino = (u64)-1; 1331 cur_clone_root->offset = 0; 1332 cur_clone_root->found_refs = 0; 1333 } 1334 1335 backref_ctx->sctx = sctx; 1336 backref_ctx->found = 0; 1337 backref_ctx->cur_objectid = ino; 1338 backref_ctx->cur_offset = data_offset; 1339 backref_ctx->found_itself = 0; 1340 backref_ctx->extent_len = num_bytes; 1341 1342 /* 1343 * The last extent of a file may be too large due to page alignment. 1344 * We need to adjust extent_len in this case so that the checks in 1345 * __iterate_backrefs work. 1346 */ 1347 if (data_offset + num_bytes >= ino_size) 1348 backref_ctx->extent_len = ino_size - data_offset; 1349 1350 /* 1351 * Now collect all backrefs. 1352 */ 1353 if (compressed == BTRFS_COMPRESS_NONE) 1354 extent_item_pos = logical - found_key.objectid; 1355 else 1356 extent_item_pos = 0; 1357 ret = iterate_extent_inodes(sctx->send_root->fs_info, 1358 found_key.objectid, extent_item_pos, 1, 1359 __iterate_backrefs, backref_ctx); 1360 1361 if (ret < 0) 1362 goto out; 1363 1364 if (!backref_ctx->found_itself) { 1365 /* found a bug in backref code? */ 1366 ret = -EIO; 1367 btrfs_err(sctx->send_root->fs_info, "did not find backref in " 1368 "send_root. inode=%llu, offset=%llu, " 1369 "disk_byte=%llu found extent=%llu", 1370 ino, data_offset, disk_byte, found_key.objectid); 1371 goto out; 1372 } 1373 1374verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " 1375 "ino=%llu, " 1376 "num_bytes=%llu, logical=%llu\n", 1377 data_offset, ino, num_bytes, logical); 1378 1379 if (!backref_ctx->found) 1380 verbose_printk("btrfs: no clones found\n"); 1381 1382 cur_clone_root = NULL; 1383 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1384 if (sctx->clone_roots[i].found_refs) { 1385 if (!cur_clone_root) 1386 cur_clone_root = sctx->clone_roots + i; 1387 else if (sctx->clone_roots[i].root == sctx->send_root) 1388 /* prefer clones from send_root over others */ 1389 cur_clone_root = sctx->clone_roots + i; 1390 } 1391 1392 } 1393 1394 if (cur_clone_root) { 1395 if (compressed != BTRFS_COMPRESS_NONE) { 1396 /* 1397 * Offsets given by iterate_extent_inodes() are relative 1398 * to the start of the extent, we need to add logical 1399 * offset from the file extent item. 1400 * (See why at backref.c:check_extent_in_eb()) 1401 */ 1402 cur_clone_root->offset += btrfs_file_extent_offset(eb, 1403 fi); 1404 } 1405 *found = cur_clone_root; 1406 ret = 0; 1407 } else { 1408 ret = -ENOENT; 1409 } 1410 1411out: 1412 btrfs_free_path(tmp_path); 1413 kfree(backref_ctx); 1414 return ret; 1415} 1416 1417static int read_symlink(struct btrfs_root *root, 1418 u64 ino, 1419 struct fs_path *dest) 1420{ 1421 int ret; 1422 struct btrfs_path *path; 1423 struct btrfs_key key; 1424 struct btrfs_file_extent_item *ei; 1425 u8 type; 1426 u8 compression; 1427 unsigned long off; 1428 int len; 1429 1430 path = alloc_path_for_send(); 1431 if (!path) 1432 return -ENOMEM; 1433 1434 key.objectid = ino; 1435 key.type = BTRFS_EXTENT_DATA_KEY; 1436 key.offset = 0; 1437 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1438 if (ret < 0) 1439 goto out; 1440 BUG_ON(ret); 1441 1442 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 1443 struct btrfs_file_extent_item); 1444 type = btrfs_file_extent_type(path->nodes[0], ei); 1445 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1446 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); 1447 BUG_ON(compression); 1448 1449 off = btrfs_file_extent_inline_start(ei); 1450 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1451 1452 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1453 1454out: 1455 btrfs_free_path(path); 1456 return ret; 1457} 1458 1459/* 1460 * Helper function to generate a file name that is unique in the root of 1461 * send_root and parent_root. This is used to generate names for orphan inodes. 1462 */ 1463static int gen_unique_name(struct send_ctx *sctx, 1464 u64 ino, u64 gen, 1465 struct fs_path *dest) 1466{ 1467 int ret = 0; 1468 struct btrfs_path *path; 1469 struct btrfs_dir_item *di; 1470 char tmp[64]; 1471 int len; 1472 u64 idx = 0; 1473 1474 path = alloc_path_for_send(); 1475 if (!path) 1476 return -ENOMEM; 1477 1478 while (1) { 1479 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", 1480 ino, gen, idx); 1481 ASSERT(len < sizeof(tmp)); 1482 1483 di = btrfs_lookup_dir_item(NULL, sctx->send_root, 1484 path, BTRFS_FIRST_FREE_OBJECTID, 1485 tmp, strlen(tmp), 0); 1486 btrfs_release_path(path); 1487 if (IS_ERR(di)) { 1488 ret = PTR_ERR(di); 1489 goto out; 1490 } 1491 if (di) { 1492 /* not unique, try again */ 1493 idx++; 1494 continue; 1495 } 1496 1497 if (!sctx->parent_root) { 1498 /* unique */ 1499 ret = 0; 1500 break; 1501 } 1502 1503 di = btrfs_lookup_dir_item(NULL, sctx->parent_root, 1504 path, BTRFS_FIRST_FREE_OBJECTID, 1505 tmp, strlen(tmp), 0); 1506 btrfs_release_path(path); 1507 if (IS_ERR(di)) { 1508 ret = PTR_ERR(di); 1509 goto out; 1510 } 1511 if (di) { 1512 /* not unique, try again */ 1513 idx++; 1514 continue; 1515 } 1516 /* unique */ 1517 break; 1518 } 1519 1520 ret = fs_path_add(dest, tmp, strlen(tmp)); 1521 1522out: 1523 btrfs_free_path(path); 1524 return ret; 1525} 1526 1527enum inode_state { 1528 inode_state_no_change, 1529 inode_state_will_create, 1530 inode_state_did_create, 1531 inode_state_will_delete, 1532 inode_state_did_delete, 1533}; 1534 1535static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) 1536{ 1537 int ret; 1538 int left_ret; 1539 int right_ret; 1540 u64 left_gen; 1541 u64 right_gen; 1542 1543 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, 1544 NULL, NULL); 1545 if (ret < 0 && ret != -ENOENT) 1546 goto out; 1547 left_ret = ret; 1548 1549 if (!sctx->parent_root) { 1550 right_ret = -ENOENT; 1551 } else { 1552 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, 1553 NULL, NULL, NULL, NULL); 1554 if (ret < 0 && ret != -ENOENT) 1555 goto out; 1556 right_ret = ret; 1557 } 1558 1559 if (!left_ret && !right_ret) { 1560 if (left_gen == gen && right_gen == gen) { 1561 ret = inode_state_no_change; 1562 } else if (left_gen == gen) { 1563 if (ino < sctx->send_progress) 1564 ret = inode_state_did_create; 1565 else 1566 ret = inode_state_will_create; 1567 } else if (right_gen == gen) { 1568 if (ino < sctx->send_progress) 1569 ret = inode_state_did_delete; 1570 else 1571 ret = inode_state_will_delete; 1572 } else { 1573 ret = -ENOENT; 1574 } 1575 } else if (!left_ret) { 1576 if (left_gen == gen) { 1577 if (ino < sctx->send_progress) 1578 ret = inode_state_did_create; 1579 else 1580 ret = inode_state_will_create; 1581 } else { 1582 ret = -ENOENT; 1583 } 1584 } else if (!right_ret) { 1585 if (right_gen == gen) { 1586 if (ino < sctx->send_progress) 1587 ret = inode_state_did_delete; 1588 else 1589 ret = inode_state_will_delete; 1590 } else { 1591 ret = -ENOENT; 1592 } 1593 } else { 1594 ret = -ENOENT; 1595 } 1596 1597out: 1598 return ret; 1599} 1600 1601static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) 1602{ 1603 int ret; 1604 1605 ret = get_cur_inode_state(sctx, ino, gen); 1606 if (ret < 0) 1607 goto out; 1608 1609 if (ret == inode_state_no_change || 1610 ret == inode_state_did_create || 1611 ret == inode_state_will_delete) 1612 ret = 1; 1613 else 1614 ret = 0; 1615 1616out: 1617 return ret; 1618} 1619 1620/* 1621 * Helper function to lookup a dir item in a dir. 1622 */ 1623static int lookup_dir_item_inode(struct btrfs_root *root, 1624 u64 dir, const char *name, int name_len, 1625 u64 *found_inode, 1626 u8 *found_type) 1627{ 1628 int ret = 0; 1629 struct btrfs_dir_item *di; 1630 struct btrfs_key key; 1631 struct btrfs_path *path; 1632 1633 path = alloc_path_for_send(); 1634 if (!path) 1635 return -ENOMEM; 1636 1637 di = btrfs_lookup_dir_item(NULL, root, path, 1638 dir, name, name_len, 0); 1639 if (!di) { 1640 ret = -ENOENT; 1641 goto out; 1642 } 1643 if (IS_ERR(di)) { 1644 ret = PTR_ERR(di); 1645 goto out; 1646 } 1647 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1648 if (key.type == BTRFS_ROOT_ITEM_KEY) { 1649 ret = -ENOENT; 1650 goto out; 1651 } 1652 *found_inode = key.objectid; 1653 *found_type = btrfs_dir_type(path->nodes[0], di); 1654 1655out: 1656 btrfs_free_path(path); 1657 return ret; 1658} 1659 1660/* 1661 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, 1662 * generation of the parent dir and the name of the dir entry. 1663 */ 1664static int get_first_ref(struct btrfs_root *root, u64 ino, 1665 u64 *dir, u64 *dir_gen, struct fs_path *name) 1666{ 1667 int ret; 1668 struct btrfs_key key; 1669 struct btrfs_key found_key; 1670 struct btrfs_path *path; 1671 int len; 1672 u64 parent_dir; 1673 1674 path = alloc_path_for_send(); 1675 if (!path) 1676 return -ENOMEM; 1677 1678 key.objectid = ino; 1679 key.type = BTRFS_INODE_REF_KEY; 1680 key.offset = 0; 1681 1682 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1683 if (ret < 0) 1684 goto out; 1685 if (!ret) 1686 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1687 path->slots[0]); 1688 if (ret || found_key.objectid != ino || 1689 (found_key.type != BTRFS_INODE_REF_KEY && 1690 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1691 ret = -ENOENT; 1692 goto out; 1693 } 1694 1695 if (found_key.type == BTRFS_INODE_REF_KEY) { 1696 struct btrfs_inode_ref *iref; 1697 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1698 struct btrfs_inode_ref); 1699 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1700 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1701 (unsigned long)(iref + 1), 1702 len); 1703 parent_dir = found_key.offset; 1704 } else { 1705 struct btrfs_inode_extref *extref; 1706 extref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1707 struct btrfs_inode_extref); 1708 len = btrfs_inode_extref_name_len(path->nodes[0], extref); 1709 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1710 (unsigned long)&extref->name, len); 1711 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 1712 } 1713 if (ret < 0) 1714 goto out; 1715 btrfs_release_path(path); 1716 1717 if (dir_gen) { 1718 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, 1719 NULL, NULL, NULL); 1720 if (ret < 0) 1721 goto out; 1722 } 1723 1724 *dir = parent_dir; 1725 1726out: 1727 btrfs_free_path(path); 1728 return ret; 1729} 1730 1731static int is_first_ref(struct btrfs_root *root, 1732 u64 ino, u64 dir, 1733 const char *name, int name_len) 1734{ 1735 int ret; 1736 struct fs_path *tmp_name; 1737 u64 tmp_dir; 1738 1739 tmp_name = fs_path_alloc(); 1740 if (!tmp_name) 1741 return -ENOMEM; 1742 1743 ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name); 1744 if (ret < 0) 1745 goto out; 1746 1747 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { 1748 ret = 0; 1749 goto out; 1750 } 1751 1752 ret = !memcmp(tmp_name->start, name, name_len); 1753 1754out: 1755 fs_path_free(tmp_name); 1756 return ret; 1757} 1758 1759/* 1760 * Used by process_recorded_refs to determine if a new ref would overwrite an 1761 * already existing ref. In case it detects an overwrite, it returns the 1762 * inode/gen in who_ino/who_gen. 1763 * When an overwrite is detected, process_recorded_refs does proper orphanizing 1764 * to make sure later references to the overwritten inode are possible. 1765 * Orphanizing is however only required for the first ref of an inode. 1766 * process_recorded_refs does an additional is_first_ref check to see if 1767 * orphanizing is really required. 1768 */ 1769static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, 1770 const char *name, int name_len, 1771 u64 *who_ino, u64 *who_gen) 1772{ 1773 int ret = 0; 1774 u64 gen; 1775 u64 other_inode = 0; 1776 u8 other_type = 0; 1777 1778 if (!sctx->parent_root) 1779 goto out; 1780 1781 ret = is_inode_existent(sctx, dir, dir_gen); 1782 if (ret <= 0) 1783 goto out; 1784 1785 /* 1786 * If we have a parent root we need to verify that the parent dir was 1787 * not delted and then re-created, if it was then we have no overwrite 1788 * and we can just unlink this entry. 1789 */ 1790 if (sctx->parent_root) { 1791 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, 1792 NULL, NULL, NULL); 1793 if (ret < 0 && ret != -ENOENT) 1794 goto out; 1795 if (ret) { 1796 ret = 0; 1797 goto out; 1798 } 1799 if (gen != dir_gen) 1800 goto out; 1801 } 1802 1803 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1804 &other_inode, &other_type); 1805 if (ret < 0 && ret != -ENOENT) 1806 goto out; 1807 if (ret) { 1808 ret = 0; 1809 goto out; 1810 } 1811 1812 /* 1813 * Check if the overwritten ref was already processed. If yes, the ref 1814 * was already unlinked/moved, so we can safely assume that we will not 1815 * overwrite anything at this point in time. 1816 */ 1817 if (other_inode > sctx->send_progress) { 1818 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1819 who_gen, NULL, NULL, NULL, NULL); 1820 if (ret < 0) 1821 goto out; 1822 1823 ret = 1; 1824 *who_ino = other_inode; 1825 } else { 1826 ret = 0; 1827 } 1828 1829out: 1830 return ret; 1831} 1832 1833/* 1834 * Checks if the ref was overwritten by an already processed inode. This is 1835 * used by __get_cur_name_and_parent to find out if the ref was orphanized and 1836 * thus the orphan name needs be used. 1837 * process_recorded_refs also uses it to avoid unlinking of refs that were 1838 * overwritten. 1839 */ 1840static int did_overwrite_ref(struct send_ctx *sctx, 1841 u64 dir, u64 dir_gen, 1842 u64 ino, u64 ino_gen, 1843 const char *name, int name_len) 1844{ 1845 int ret = 0; 1846 u64 gen; 1847 u64 ow_inode; 1848 u8 other_type; 1849 1850 if (!sctx->parent_root) 1851 goto out; 1852 1853 ret = is_inode_existent(sctx, dir, dir_gen); 1854 if (ret <= 0) 1855 goto out; 1856 1857 /* check if the ref was overwritten by another ref */ 1858 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, 1859 &ow_inode, &other_type); 1860 if (ret < 0 && ret != -ENOENT) 1861 goto out; 1862 if (ret) { 1863 /* was never and will never be overwritten */ 1864 ret = 0; 1865 goto out; 1866 } 1867 1868 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, 1869 NULL, NULL); 1870 if (ret < 0) 1871 goto out; 1872 1873 if (ow_inode == ino && gen == ino_gen) { 1874 ret = 0; 1875 goto out; 1876 } 1877 1878 /* we know that it is or will be overwritten. check this now */ 1879 if (ow_inode < sctx->send_progress) 1880 ret = 1; 1881 else 1882 ret = 0; 1883 1884out: 1885 return ret; 1886} 1887 1888/* 1889 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode 1890 * that got overwritten. This is used by process_recorded_refs to determine 1891 * if it has to use the path as returned by get_cur_path or the orphan name. 1892 */ 1893static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) 1894{ 1895 int ret = 0; 1896 struct fs_path *name = NULL; 1897 u64 dir; 1898 u64 dir_gen; 1899 1900 if (!sctx->parent_root) 1901 goto out; 1902 1903 name = fs_path_alloc(); 1904 if (!name) 1905 return -ENOMEM; 1906 1907 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); 1908 if (ret < 0) 1909 goto out; 1910 1911 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, 1912 name->start, fs_path_len(name)); 1913 1914out: 1915 fs_path_free(name); 1916 return ret; 1917} 1918 1919/* 1920 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 1921 * so we need to do some special handling in case we have clashes. This function 1922 * takes care of this with the help of name_cache_entry::radix_list. 1923 * In case of error, nce is kfreed. 1924 */ 1925static int name_cache_insert(struct send_ctx *sctx, 1926 struct name_cache_entry *nce) 1927{ 1928 int ret = 0; 1929 struct list_head *nce_head; 1930 1931 nce_head = radix_tree_lookup(&sctx->name_cache, 1932 (unsigned long)nce->ino); 1933 if (!nce_head) { 1934 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1935 if (!nce_head) { 1936 kfree(nce); 1937 return -ENOMEM; 1938 } 1939 INIT_LIST_HEAD(nce_head); 1940 1941 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1942 if (ret < 0) { 1943 kfree(nce_head); 1944 kfree(nce); 1945 return ret; 1946 } 1947 } 1948 list_add_tail(&nce->radix_list, nce_head); 1949 list_add_tail(&nce->list, &sctx->name_cache_list); 1950 sctx->name_cache_size++; 1951 1952 return ret; 1953} 1954 1955static void name_cache_delete(struct send_ctx *sctx, 1956 struct name_cache_entry *nce) 1957{ 1958 struct list_head *nce_head; 1959 1960 nce_head = radix_tree_lookup(&sctx->name_cache, 1961 (unsigned long)nce->ino); 1962 if (!nce_head) { 1963 btrfs_err(sctx->send_root->fs_info, 1964 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", 1965 nce->ino, sctx->name_cache_size); 1966 } 1967 1968 list_del(&nce->radix_list); 1969 list_del(&nce->list); 1970 sctx->name_cache_size--; 1971 1972 /* 1973 * We may not get to the final release of nce_head if the lookup fails 1974 */ 1975 if (nce_head && list_empty(nce_head)) { 1976 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 1977 kfree(nce_head); 1978 } 1979} 1980 1981static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, 1982 u64 ino, u64 gen) 1983{ 1984 struct list_head *nce_head; 1985 struct name_cache_entry *cur; 1986 1987 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 1988 if (!nce_head) 1989 return NULL; 1990 1991 list_for_each_entry(cur, nce_head, radix_list) { 1992 if (cur->ino == ino && cur->gen == gen) 1993 return cur; 1994 } 1995 return NULL; 1996} 1997 1998/* 1999 * Removes the entry from the list and adds it back to the end. This marks the 2000 * entry as recently used so that name_cache_clean_unused does not remove it. 2001 */ 2002static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) 2003{ 2004 list_del(&nce->list); 2005 list_add_tail(&nce->list, &sctx->name_cache_list); 2006} 2007 2008/* 2009 * Remove some entries from the beginning of name_cache_list. 2010 */ 2011static void name_cache_clean_unused(struct send_ctx *sctx) 2012{ 2013 struct name_cache_entry *nce; 2014 2015 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) 2016 return; 2017 2018 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { 2019 nce = list_entry(sctx->name_cache_list.next, 2020 struct name_cache_entry, list); 2021 name_cache_delete(sctx, nce); 2022 kfree(nce); 2023 } 2024} 2025 2026static void name_cache_free(struct send_ctx *sctx) 2027{ 2028 struct name_cache_entry *nce; 2029 2030 while (!list_empty(&sctx->name_cache_list)) { 2031 nce = list_entry(sctx->name_cache_list.next, 2032 struct name_cache_entry, list); 2033 name_cache_delete(sctx, nce); 2034 kfree(nce); 2035 } 2036} 2037 2038/* 2039 * Used by get_cur_path for each ref up to the root. 2040 * Returns 0 if it succeeded. 2041 * Returns 1 if the inode is not existent or got overwritten. In that case, the 2042 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 2043 * is returned, parent_ino/parent_gen are not guaranteed to be valid. 2044 * Returns <0 in case of error. 2045 */ 2046static int __get_cur_name_and_parent(struct send_ctx *sctx, 2047 u64 ino, u64 gen, 2048 u64 *parent_ino, 2049 u64 *parent_gen, 2050 struct fs_path *dest) 2051{ 2052 int ret; 2053 int nce_ret; 2054 struct name_cache_entry *nce = NULL; 2055 2056 /* 2057 * First check if we already did a call to this function with the same 2058 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 2059 * return the cached result. 2060 */ 2061 nce = name_cache_search(sctx, ino, gen); 2062 if (nce) { 2063 if (ino < sctx->send_progress && nce->need_later_update) { 2064 name_cache_delete(sctx, nce); 2065 kfree(nce); 2066 nce = NULL; 2067 } else { 2068 name_cache_used(sctx, nce); 2069 *parent_ino = nce->parent_ino; 2070 *parent_gen = nce->parent_gen; 2071 ret = fs_path_add(dest, nce->name, nce->name_len); 2072 if (ret < 0) 2073 goto out; 2074 ret = nce->ret; 2075 goto out; 2076 } 2077 } 2078 2079 /* 2080 * If the inode is not existent yet, add the orphan name and return 1. 2081 * This should only happen for the parent dir that we determine in 2082 * __record_new_ref 2083 */ 2084 ret = is_inode_existent(sctx, ino, gen); 2085 if (ret < 0) 2086 goto out; 2087 2088 if (!ret) { 2089 ret = gen_unique_name(sctx, ino, gen, dest); 2090 if (ret < 0) 2091 goto out; 2092 ret = 1; 2093 goto out_cache; 2094 } 2095 2096 /* 2097 * Depending on whether the inode was already processed or not, use 2098 * send_root or parent_root for ref lookup. 2099 */ 2100 if (ino < sctx->send_progress) 2101 ret = get_first_ref(sctx->send_root, ino, 2102 parent_ino, parent_gen, dest); 2103 else 2104 ret = get_first_ref(sctx->parent_root, ino, 2105 parent_ino, parent_gen, dest); 2106 if (ret < 0) 2107 goto out; 2108 2109 /* 2110 * Check if the ref was overwritten by an inode's ref that was processed 2111 * earlier. If yes, treat as orphan and return 1. 2112 */ 2113 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2114 dest->start, dest->end - dest->start); 2115 if (ret < 0) 2116 goto out; 2117 if (ret) { 2118 fs_path_reset(dest); 2119 ret = gen_unique_name(sctx, ino, gen, dest); 2120 if (ret < 0) 2121 goto out; 2122 ret = 1; 2123 } 2124 2125out_cache: 2126 /* 2127 * Store the result of the lookup in the name cache. 2128 */ 2129 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); 2130 if (!nce) { 2131 ret = -ENOMEM; 2132 goto out; 2133 } 2134 2135 nce->ino = ino; 2136 nce->gen = gen; 2137 nce->parent_ino = *parent_ino; 2138 nce->parent_gen = *parent_gen; 2139 nce->name_len = fs_path_len(dest); 2140 nce->ret = ret; 2141 strcpy(nce->name, dest->start); 2142 2143 if (ino < sctx->send_progress) 2144 nce->need_later_update = 0; 2145 else 2146 nce->need_later_update = 1; 2147 2148 nce_ret = name_cache_insert(sctx, nce); 2149 if (nce_ret < 0) 2150 ret = nce_ret; 2151 name_cache_clean_unused(sctx); 2152 2153out: 2154 return ret; 2155} 2156 2157/* 2158 * Magic happens here. This function returns the first ref to an inode as it 2159 * would look like while receiving the stream at this point in time. 2160 * We walk the path up to the root. For every inode in between, we check if it 2161 * was already processed/sent. If yes, we continue with the parent as found 2162 * in send_root. If not, we continue with the parent as found in parent_root. 2163 * If we encounter an inode that was deleted at this point in time, we use the 2164 * inodes "orphan" name instead of the real name and stop. Same with new inodes 2165 * that were not created yet and overwritten inodes/refs. 2166 * 2167 * When do we have have orphan inodes: 2168 * 1. When an inode is freshly created and thus no valid refs are available yet 2169 * 2. When a directory lost all it's refs (deleted) but still has dir items 2170 * inside which were not processed yet (pending for move/delete). If anyone 2171 * tried to get the path to the dir items, it would get a path inside that 2172 * orphan directory. 2173 * 3. When an inode is moved around or gets new links, it may overwrite the ref 2174 * of an unprocessed inode. If in that case the first ref would be 2175 * overwritten, the overwritten inode gets "orphanized". Later when we 2176 * process this overwritten inode, it is restored at a new place by moving 2177 * the orphan inode. 2178 * 2179 * sctx->send_progress tells this function at which point in time receiving 2180 * would be. 2181 */ 2182static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, 2183 struct fs_path *dest) 2184{ 2185 int ret = 0; 2186 struct fs_path *name = NULL; 2187 u64 parent_inode = 0; 2188 u64 parent_gen = 0; 2189 int stop = 0; 2190 2191 name = fs_path_alloc(); 2192 if (!name) { 2193 ret = -ENOMEM; 2194 goto out; 2195 } 2196 2197 dest->reversed = 1; 2198 fs_path_reset(dest); 2199 2200 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2201 fs_path_reset(name); 2202 2203 if (is_waiting_for_rm(sctx, ino)) { 2204 ret = gen_unique_name(sctx, ino, gen, name); 2205 if (ret < 0) 2206 goto out; 2207 ret = fs_path_add_path(dest, name); 2208 break; 2209 } 2210 2211 if (is_waiting_for_move(sctx, ino)) { 2212 ret = get_first_ref(sctx->parent_root, ino, 2213 &parent_inode, &parent_gen, name); 2214 } else { 2215 ret = __get_cur_name_and_parent(sctx, ino, gen, 2216 &parent_inode, 2217 &parent_gen, name); 2218 if (ret) 2219 stop = 1; 2220 } 2221 2222 if (ret < 0) 2223 goto out; 2224 2225 ret = fs_path_add_path(dest, name); 2226 if (ret < 0) 2227 goto out; 2228 2229 ino = parent_inode; 2230 gen = parent_gen; 2231 } 2232 2233out: 2234 fs_path_free(name); 2235 if (!ret) 2236 fs_path_unreverse(dest); 2237 return ret; 2238} 2239 2240/* 2241 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2242 */ 2243static int send_subvol_begin(struct send_ctx *sctx) 2244{ 2245 int ret; 2246 struct btrfs_root *send_root = sctx->send_root; 2247 struct btrfs_root *parent_root = sctx->parent_root; 2248 struct btrfs_path *path; 2249 struct btrfs_key key; 2250 struct btrfs_root_ref *ref; 2251 struct extent_buffer *leaf; 2252 char *name = NULL; 2253 int namelen; 2254 2255 path = btrfs_alloc_path(); 2256 if (!path) 2257 return -ENOMEM; 2258 2259 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); 2260 if (!name) { 2261 btrfs_free_path(path); 2262 return -ENOMEM; 2263 } 2264 2265 key.objectid = send_root->objectid; 2266 key.type = BTRFS_ROOT_BACKREF_KEY; 2267 key.offset = 0; 2268 2269 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2270 &key, path, 1, 0); 2271 if (ret < 0) 2272 goto out; 2273 if (ret) { 2274 ret = -ENOENT; 2275 goto out; 2276 } 2277 2278 leaf = path->nodes[0]; 2279 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2280 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2281 key.objectid != send_root->objectid) { 2282 ret = -ENOENT; 2283 goto out; 2284 } 2285 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2286 namelen = btrfs_root_ref_name_len(leaf, ref); 2287 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); 2288 btrfs_release_path(path); 2289 2290 if (parent_root) { 2291 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2292 if (ret < 0) 2293 goto out; 2294 } else { 2295 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2296 if (ret < 0) 2297 goto out; 2298 } 2299 2300 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); 2301 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2302 sctx->send_root->root_item.uuid); 2303 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2304 le64_to_cpu(sctx->send_root->root_item.ctransid)); 2305 if (parent_root) { 2306 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2307 sctx->parent_root->root_item.uuid); 2308 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2309 le64_to_cpu(sctx->parent_root->root_item.ctransid)); 2310 } 2311 2312 ret = send_cmd(sctx); 2313 2314tlv_put_failure: 2315out: 2316 btrfs_free_path(path); 2317 kfree(name); 2318 return ret; 2319} 2320 2321static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2322{ 2323 int ret = 0; 2324 struct fs_path *p; 2325 2326verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); 2327 2328 p = fs_path_alloc(); 2329 if (!p) 2330 return -ENOMEM; 2331 2332 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2333 if (ret < 0) 2334 goto out; 2335 2336 ret = get_cur_path(sctx, ino, gen, p); 2337 if (ret < 0) 2338 goto out; 2339 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2340 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2341 2342 ret = send_cmd(sctx); 2343 2344tlv_put_failure: 2345out: 2346 fs_path_free(p); 2347 return ret; 2348} 2349 2350static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) 2351{ 2352 int ret = 0; 2353 struct fs_path *p; 2354 2355verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); 2356 2357 p = fs_path_alloc(); 2358 if (!p) 2359 return -ENOMEM; 2360 2361 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2362 if (ret < 0) 2363 goto out; 2364 2365 ret = get_cur_path(sctx, ino, gen, p); 2366 if (ret < 0) 2367 goto out; 2368 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2369 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2370 2371 ret = send_cmd(sctx); 2372 2373tlv_put_failure: 2374out: 2375 fs_path_free(p); 2376 return ret; 2377} 2378 2379static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) 2380{ 2381 int ret = 0; 2382 struct fs_path *p; 2383 2384verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); 2385 2386 p = fs_path_alloc(); 2387 if (!p) 2388 return -ENOMEM; 2389 2390 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2391 if (ret < 0) 2392 goto out; 2393 2394 ret = get_cur_path(sctx, ino, gen, p); 2395 if (ret < 0) 2396 goto out; 2397 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2398 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2399 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); 2400 2401 ret = send_cmd(sctx); 2402 2403tlv_put_failure: 2404out: 2405 fs_path_free(p); 2406 return ret; 2407} 2408 2409static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) 2410{ 2411 int ret = 0; 2412 struct fs_path *p = NULL; 2413 struct btrfs_inode_item *ii; 2414 struct btrfs_path *path = NULL; 2415 struct extent_buffer *eb; 2416 struct btrfs_key key; 2417 int slot; 2418 2419verbose_printk("btrfs: send_utimes %llu\n", ino); 2420 2421 p = fs_path_alloc(); 2422 if (!p) 2423 return -ENOMEM; 2424 2425 path = alloc_path_for_send(); 2426 if (!path) { 2427 ret = -ENOMEM; 2428 goto out; 2429 } 2430 2431 key.objectid = ino; 2432 key.type = BTRFS_INODE_ITEM_KEY; 2433 key.offset = 0; 2434 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2435 if (ret < 0) 2436 goto out; 2437 2438 eb = path->nodes[0]; 2439 slot = path->slots[0]; 2440 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 2441 2442 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); 2443 if (ret < 0) 2444 goto out; 2445 2446 ret = get_cur_path(sctx, ino, gen, p); 2447 if (ret < 0) 2448 goto out; 2449 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2450 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, 2451 btrfs_inode_atime(ii)); 2452 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, 2453 btrfs_inode_mtime(ii)); 2454 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, 2455 btrfs_inode_ctime(ii)); 2456 /* TODO Add otime support when the otime patches get into upstream */ 2457 2458 ret = send_cmd(sctx); 2459 2460tlv_put_failure: 2461out: 2462 fs_path_free(p); 2463 btrfs_free_path(path); 2464 return ret; 2465} 2466 2467/* 2468 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have 2469 * a valid path yet because we did not process the refs yet. So, the inode 2470 * is created as orphan. 2471 */ 2472static int send_create_inode(struct send_ctx *sctx, u64 ino) 2473{ 2474 int ret = 0; 2475 struct fs_path *p; 2476 int cmd; 2477 u64 gen; 2478 u64 mode; 2479 u64 rdev; 2480 2481verbose_printk("btrfs: send_create_inode %llu\n", ino); 2482 2483 p = fs_path_alloc(); 2484 if (!p) 2485 return -ENOMEM; 2486 2487 if (ino != sctx->cur_ino) { 2488 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, 2489 NULL, NULL, &rdev); 2490 if (ret < 0) 2491 goto out; 2492 } else { 2493 gen = sctx->cur_inode_gen; 2494 mode = sctx->cur_inode_mode; 2495 rdev = sctx->cur_inode_rdev; 2496 } 2497 2498 if (S_ISREG(mode)) { 2499 cmd = BTRFS_SEND_C_MKFILE; 2500 } else if (S_ISDIR(mode)) { 2501 cmd = BTRFS_SEND_C_MKDIR; 2502 } else if (S_ISLNK(mode)) { 2503 cmd = BTRFS_SEND_C_SYMLINK; 2504 } else if (S_ISCHR(mode) || S_ISBLK(mode)) { 2505 cmd = BTRFS_SEND_C_MKNOD; 2506 } else if (S_ISFIFO(mode)) { 2507 cmd = BTRFS_SEND_C_MKFIFO; 2508 } else if (S_ISSOCK(mode)) { 2509 cmd = BTRFS_SEND_C_MKSOCK; 2510 } else { 2511 printk(KERN_WARNING "btrfs: unexpected inode type %o", 2512 (int)(mode & S_IFMT)); 2513 ret = -ENOTSUPP; 2514 goto out; 2515 } 2516 2517 ret = begin_cmd(sctx, cmd); 2518 if (ret < 0) 2519 goto out; 2520 2521 ret = gen_unique_name(sctx, ino, gen, p); 2522 if (ret < 0) 2523 goto out; 2524 2525 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2526 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); 2527 2528 if (S_ISLNK(mode)) { 2529 fs_path_reset(p); 2530 ret = read_symlink(sctx->send_root, ino, p); 2531 if (ret < 0) 2532 goto out; 2533 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2534 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2535 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2536 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); 2537 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); 2538 } 2539 2540 ret = send_cmd(sctx); 2541 if (ret < 0) 2542 goto out; 2543 2544 2545tlv_put_failure: 2546out: 2547 fs_path_free(p); 2548 return ret; 2549} 2550 2551/* 2552 * We need some special handling for inodes that get processed before the parent 2553 * directory got created. See process_recorded_refs for details. 2554 * This function does the check if we already created the dir out of order. 2555 */ 2556static int did_create_dir(struct send_ctx *sctx, u64 dir) 2557{ 2558 int ret = 0; 2559 struct btrfs_path *path = NULL; 2560 struct btrfs_key key; 2561 struct btrfs_key found_key; 2562 struct btrfs_key di_key; 2563 struct extent_buffer *eb; 2564 struct btrfs_dir_item *di; 2565 int slot; 2566 2567 path = alloc_path_for_send(); 2568 if (!path) { 2569 ret = -ENOMEM; 2570 goto out; 2571 } 2572 2573 key.objectid = dir; 2574 key.type = BTRFS_DIR_INDEX_KEY; 2575 key.offset = 0; 2576 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2577 if (ret < 0) 2578 goto out; 2579 2580 while (1) { 2581 eb = path->nodes[0]; 2582 slot = path->slots[0]; 2583 if (slot >= btrfs_header_nritems(eb)) { 2584 ret = btrfs_next_leaf(sctx->send_root, path); 2585 if (ret < 0) { 2586 goto out; 2587 } else if (ret > 0) { 2588 ret = 0; 2589 break; 2590 } 2591 continue; 2592 } 2593 2594 btrfs_item_key_to_cpu(eb, &found_key, slot); 2595 if (found_key.objectid != key.objectid || 2596 found_key.type != key.type) { 2597 ret = 0; 2598 goto out; 2599 } 2600 2601 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2602 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2603 2604 if (di_key.type != BTRFS_ROOT_ITEM_KEY && 2605 di_key.objectid < sctx->send_progress) { 2606 ret = 1; 2607 goto out; 2608 } 2609 2610 path->slots[0]++; 2611 } 2612 2613out: 2614 btrfs_free_path(path); 2615 return ret; 2616} 2617 2618/* 2619 * Only creates the inode if it is: 2620 * 1. Not a directory 2621 * 2. Or a directory which was not created already due to out of order 2622 * directories. See did_create_dir and process_recorded_refs for details. 2623 */ 2624static int send_create_inode_if_needed(struct send_ctx *sctx) 2625{ 2626 int ret; 2627 2628 if (S_ISDIR(sctx->cur_inode_mode)) { 2629 ret = did_create_dir(sctx, sctx->cur_ino); 2630 if (ret < 0) 2631 goto out; 2632 if (ret) { 2633 ret = 0; 2634 goto out; 2635 } 2636 } 2637 2638 ret = send_create_inode(sctx, sctx->cur_ino); 2639 if (ret < 0) 2640 goto out; 2641 2642out: 2643 return ret; 2644} 2645 2646struct recorded_ref { 2647 struct list_head list; 2648 char *dir_path; 2649 char *name; 2650 struct fs_path *full_path; 2651 u64 dir; 2652 u64 dir_gen; 2653 int dir_path_len; 2654 int name_len; 2655}; 2656 2657/* 2658 * We need to process new refs before deleted refs, but compare_tree gives us 2659 * everything mixed. So we first record all refs and later process them. 2660 * This function is a helper to record one ref. 2661 */ 2662static int __record_ref(struct list_head *head, u64 dir, 2663 u64 dir_gen, struct fs_path *path) 2664{ 2665 struct recorded_ref *ref; 2666 2667 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2668 if (!ref) 2669 return -ENOMEM; 2670 2671 ref->dir = dir; 2672 ref->dir_gen = dir_gen; 2673 ref->full_path = path; 2674 2675 ref->name = (char *)kbasename(ref->full_path->start); 2676 ref->name_len = ref->full_path->end - ref->name; 2677 ref->dir_path = ref->full_path->start; 2678 if (ref->name == ref->full_path->start) 2679 ref->dir_path_len = 0; 2680 else 2681 ref->dir_path_len = ref->full_path->end - 2682 ref->full_path->start - 1 - ref->name_len; 2683 2684 list_add_tail(&ref->list, head); 2685 return 0; 2686} 2687 2688static int dup_ref(struct recorded_ref *ref, struct list_head *list) 2689{ 2690 struct recorded_ref *new; 2691 2692 new = kmalloc(sizeof(*ref), GFP_NOFS); 2693 if (!new) 2694 return -ENOMEM; 2695 2696 new->dir = ref->dir; 2697 new->dir_gen = ref->dir_gen; 2698 new->full_path = NULL; 2699 INIT_LIST_HEAD(&new->list); 2700 list_add_tail(&new->list, list); 2701 return 0; 2702} 2703 2704static void __free_recorded_refs(struct list_head *head) 2705{ 2706 struct recorded_ref *cur; 2707 2708 while (!list_empty(head)) { 2709 cur = list_entry(head->next, struct recorded_ref, list); 2710 fs_path_free(cur->full_path); 2711 list_del(&cur->list); 2712 kfree(cur); 2713 } 2714} 2715 2716static void free_recorded_refs(struct send_ctx *sctx) 2717{ 2718 __free_recorded_refs(&sctx->new_refs); 2719 __free_recorded_refs(&sctx->deleted_refs); 2720} 2721 2722/* 2723 * Renames/moves a file/dir to its orphan name. Used when the first 2724 * ref of an unprocessed inode gets overwritten and for all non empty 2725 * directories. 2726 */ 2727static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, 2728 struct fs_path *path) 2729{ 2730 int ret; 2731 struct fs_path *orphan; 2732 2733 orphan = fs_path_alloc(); 2734 if (!orphan) 2735 return -ENOMEM; 2736 2737 ret = gen_unique_name(sctx, ino, gen, orphan); 2738 if (ret < 0) 2739 goto out; 2740 2741 ret = send_rename(sctx, path, orphan); 2742 2743out: 2744 fs_path_free(orphan); 2745 return ret; 2746} 2747 2748static struct orphan_dir_info * 2749add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2750{ 2751 struct rb_node **p = &sctx->orphan_dirs.rb_node; 2752 struct rb_node *parent = NULL; 2753 struct orphan_dir_info *entry, *odi; 2754 2755 odi = kmalloc(sizeof(*odi), GFP_NOFS); 2756 if (!odi) 2757 return ERR_PTR(-ENOMEM); 2758 odi->ino = dir_ino; 2759 odi->gen = 0; 2760 2761 while (*p) { 2762 parent = *p; 2763 entry = rb_entry(parent, struct orphan_dir_info, node); 2764 if (dir_ino < entry->ino) { 2765 p = &(*p)->rb_left; 2766 } else if (dir_ino > entry->ino) { 2767 p = &(*p)->rb_right; 2768 } else { 2769 kfree(odi); 2770 return entry; 2771 } 2772 } 2773 2774 rb_link_node(&odi->node, parent, p); 2775 rb_insert_color(&odi->node, &sctx->orphan_dirs); 2776 return odi; 2777} 2778 2779static struct orphan_dir_info * 2780get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2781{ 2782 struct rb_node *n = sctx->orphan_dirs.rb_node; 2783 struct orphan_dir_info *entry; 2784 2785 while (n) { 2786 entry = rb_entry(n, struct orphan_dir_info, node); 2787 if (dir_ino < entry->ino) 2788 n = n->rb_left; 2789 else if (dir_ino > entry->ino) 2790 n = n->rb_right; 2791 else 2792 return entry; 2793 } 2794 return NULL; 2795} 2796 2797static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) 2798{ 2799 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); 2800 2801 return odi != NULL; 2802} 2803 2804static void free_orphan_dir_info(struct send_ctx *sctx, 2805 struct orphan_dir_info *odi) 2806{ 2807 if (!odi) 2808 return; 2809 rb_erase(&odi->node, &sctx->orphan_dirs); 2810 kfree(odi); 2811} 2812 2813/* 2814 * Returns 1 if a directory can be removed at this point in time. 2815 * We check this by iterating all dir items and checking if the inode behind 2816 * the dir item was already processed. 2817 */ 2818static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, 2819 u64 send_progress) 2820{ 2821 int ret = 0; 2822 struct btrfs_root *root = sctx->parent_root; 2823 struct btrfs_path *path; 2824 struct btrfs_key key; 2825 struct btrfs_key found_key; 2826 struct btrfs_key loc; 2827 struct btrfs_dir_item *di; 2828 2829 /* 2830 * Don't try to rmdir the top/root subvolume dir. 2831 */ 2832 if (dir == BTRFS_FIRST_FREE_OBJECTID) 2833 return 0; 2834 2835 path = alloc_path_for_send(); 2836 if (!path) 2837 return -ENOMEM; 2838 2839 key.objectid = dir; 2840 key.type = BTRFS_DIR_INDEX_KEY; 2841 key.offset = 0; 2842 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2843 if (ret < 0) 2844 goto out; 2845 2846 while (1) { 2847 struct waiting_dir_move *dm; 2848 2849 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2850 ret = btrfs_next_leaf(root, path); 2851 if (ret < 0) 2852 goto out; 2853 else if (ret > 0) 2854 break; 2855 continue; 2856 } 2857 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2858 path->slots[0]); 2859 if (found_key.objectid != key.objectid || 2860 found_key.type != key.type) 2861 break; 2862 2863 di = btrfs_item_ptr(path->nodes[0], path->slots[0], 2864 struct btrfs_dir_item); 2865 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); 2866 2867 dm = get_waiting_dir_move(sctx, loc.objectid); 2868 if (dm) { 2869 struct orphan_dir_info *odi; 2870 2871 odi = add_orphan_dir_info(sctx, dir); 2872 if (IS_ERR(odi)) { 2873 ret = PTR_ERR(odi); 2874 goto out; 2875 } 2876 odi->gen = dir_gen; 2877 dm->rmdir_ino = dir; 2878 ret = 0; 2879 goto out; 2880 } 2881 2882 if (loc.objectid > send_progress) { 2883 ret = 0; 2884 goto out; 2885 } 2886 2887 path->slots[0]++; 2888 } 2889 2890 ret = 1; 2891 2892out: 2893 btrfs_free_path(path); 2894 return ret; 2895} 2896 2897static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) 2898{ 2899 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); 2900 2901 return entry != NULL; 2902} 2903 2904static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2905{ 2906 struct rb_node **p = &sctx->waiting_dir_moves.rb_node; 2907 struct rb_node *parent = NULL; 2908 struct waiting_dir_move *entry, *dm; 2909 2910 dm = kmalloc(sizeof(*dm), GFP_NOFS); 2911 if (!dm) 2912 return -ENOMEM; 2913 dm->ino = ino; 2914 dm->rmdir_ino = 0; 2915 2916 while (*p) { 2917 parent = *p; 2918 entry = rb_entry(parent, struct waiting_dir_move, node); 2919 if (ino < entry->ino) { 2920 p = &(*p)->rb_left; 2921 } else if (ino > entry->ino) { 2922 p = &(*p)->rb_right; 2923 } else { 2924 kfree(dm); 2925 return -EEXIST; 2926 } 2927 } 2928 2929 rb_link_node(&dm->node, parent, p); 2930 rb_insert_color(&dm->node, &sctx->waiting_dir_moves); 2931 return 0; 2932} 2933 2934static struct waiting_dir_move * 2935get_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2936{ 2937 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 2938 struct waiting_dir_move *entry; 2939 2940 while (n) { 2941 entry = rb_entry(n, struct waiting_dir_move, node); 2942 if (ino < entry->ino) 2943 n = n->rb_left; 2944 else if (ino > entry->ino) 2945 n = n->rb_right; 2946 else 2947 return entry; 2948 } 2949 return NULL; 2950} 2951 2952static void free_waiting_dir_move(struct send_ctx *sctx, 2953 struct waiting_dir_move *dm) 2954{ 2955 if (!dm) 2956 return; 2957 rb_erase(&dm->node, &sctx->waiting_dir_moves); 2958 kfree(dm); 2959} 2960 2961static int add_pending_dir_move(struct send_ctx *sctx, 2962 u64 ino, 2963 u64 ino_gen, 2964 u64 parent_ino, 2965 struct list_head *new_refs, 2966 struct list_head *deleted_refs) 2967{ 2968 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2969 struct rb_node *parent = NULL; 2970 struct pending_dir_move *entry = NULL, *pm; 2971 struct recorded_ref *cur; 2972 int exists = 0; 2973 int ret; 2974 2975 pm = kmalloc(sizeof(*pm), GFP_NOFS); 2976 if (!pm) 2977 return -ENOMEM; 2978 pm->parent_ino = parent_ino; 2979 pm->ino = ino; 2980 pm->gen = ino_gen; 2981 INIT_LIST_HEAD(&pm->list); 2982 INIT_LIST_HEAD(&pm->update_refs); 2983 RB_CLEAR_NODE(&pm->node); 2984 2985 while (*p) { 2986 parent = *p; 2987 entry = rb_entry(parent, struct pending_dir_move, node); 2988 if (parent_ino < entry->parent_ino) { 2989 p = &(*p)->rb_left; 2990 } else if (parent_ino > entry->parent_ino) { 2991 p = &(*p)->rb_right; 2992 } else { 2993 exists = 1; 2994 break; 2995 } 2996 } 2997 2998 list_for_each_entry(cur, deleted_refs, list) { 2999 ret = dup_ref(cur, &pm->update_refs); 3000 if (ret < 0) 3001 goto out; 3002 } 3003 list_for_each_entry(cur, new_refs, list) { 3004 ret = dup_ref(cur, &pm->update_refs); 3005 if (ret < 0) 3006 goto out; 3007 } 3008 3009 ret = add_waiting_dir_move(sctx, pm->ino); 3010 if (ret) 3011 goto out; 3012 3013 if (exists) { 3014 list_add_tail(&pm->list, &entry->list); 3015 } else { 3016 rb_link_node(&pm->node, parent, p); 3017 rb_insert_color(&pm->node, &sctx->pending_dir_moves); 3018 } 3019 ret = 0; 3020out: 3021 if (ret) { 3022 __free_recorded_refs(&pm->update_refs); 3023 kfree(pm); 3024 } 3025 return ret; 3026} 3027 3028static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, 3029 u64 parent_ino) 3030{ 3031 struct rb_node *n = sctx->pending_dir_moves.rb_node; 3032 struct pending_dir_move *entry; 3033 3034 while (n) { 3035 entry = rb_entry(n, struct pending_dir_move, node); 3036 if (parent_ino < entry->parent_ino) 3037 n = n->rb_left; 3038 else if (parent_ino > entry->parent_ino) 3039 n = n->rb_right; 3040 else 3041 return entry; 3042 } 3043 return NULL; 3044} 3045 3046static int path_loop(struct send_ctx *sctx, struct fs_path *name, 3047 u64 ino, u64 gen, u64 *ancestor_ino) 3048{ 3049 int ret = 0; 3050 u64 parent_inode = 0; 3051 u64 parent_gen = 0; 3052 u64 start_ino = ino; 3053 3054 *ancestor_ino = 0; 3055 while (ino != BTRFS_FIRST_FREE_OBJECTID) { 3056 fs_path_reset(name); 3057 3058 if (is_waiting_for_rm(sctx, ino)) 3059 break; 3060 if (is_waiting_for_move(sctx, ino)) { 3061 if (*ancestor_ino == 0) 3062 *ancestor_ino = ino; 3063 ret = get_first_ref(sctx->parent_root, ino, 3064 &parent_inode, &parent_gen, name); 3065 } else { 3066 ret = __get_cur_name_and_parent(sctx, ino, gen, 3067 &parent_inode, 3068 &parent_gen, name); 3069 if (ret > 0) { 3070 ret = 0; 3071 break; 3072 } 3073 } 3074 if (ret < 0) 3075 break; 3076 if (parent_inode == start_ino) { 3077 ret = 1; 3078 if (*ancestor_ino == 0) 3079 *ancestor_ino = ino; 3080 break; 3081 } 3082 ino = parent_inode; 3083 gen = parent_gen; 3084 } 3085 return ret; 3086} 3087 3088static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3089{ 3090 struct fs_path *from_path = NULL; 3091 struct fs_path *to_path = NULL; 3092 struct fs_path *name = NULL; 3093 u64 orig_progress = sctx->send_progress; 3094 struct recorded_ref *cur; 3095 u64 parent_ino, parent_gen; 3096 struct waiting_dir_move *dm = NULL; 3097 u64 rmdir_ino = 0; 3098 int ret; 3099 u64 ancestor = 0; 3100 3101 name = fs_path_alloc(); 3102 from_path = fs_path_alloc(); 3103 if (!name || !from_path) { 3104 ret = -ENOMEM; 3105 goto out; 3106 } 3107 3108 dm = get_waiting_dir_move(sctx, pm->ino); 3109 ASSERT(dm); 3110 rmdir_ino = dm->rmdir_ino; 3111 free_waiting_dir_move(sctx, dm); 3112 3113 ret = get_first_ref(sctx->parent_root, pm->ino, 3114 &parent_ino, &parent_gen, name); 3115 if (ret < 0) 3116 goto out; 3117 3118 ret = get_cur_path(sctx, parent_ino, parent_gen, 3119 from_path); 3120 if (ret < 0) 3121 goto out; 3122 ret = fs_path_add_path(from_path, name); 3123 if (ret < 0) 3124 goto out; 3125 3126 sctx->send_progress = sctx->cur_ino + 1; 3127 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); 3128 if (ret) { 3129 LIST_HEAD(deleted_refs); 3130 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); 3131 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, 3132 &pm->update_refs, &deleted_refs); 3133 if (ret < 0) 3134 goto out; 3135 if (rmdir_ino) { 3136 dm = get_waiting_dir_move(sctx, pm->ino); 3137 ASSERT(dm); 3138 dm->rmdir_ino = rmdir_ino; 3139 } 3140 goto out; 3141 } 3142 fs_path_reset(name); 3143 to_path = name; 3144 name = NULL; 3145 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 3146 if (ret < 0) 3147 goto out; 3148 3149 ret = send_rename(sctx, from_path, to_path); 3150 if (ret < 0) 3151 goto out; 3152 3153 if (rmdir_ino) { 3154 struct orphan_dir_info *odi; 3155 3156 odi = get_orphan_dir_info(sctx, rmdir_ino); 3157 if (!odi) { 3158 /* already deleted */ 3159 goto finish; 3160 } 3161 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); 3162 if (ret < 0) 3163 goto out; 3164 if (!ret) 3165 goto finish; 3166 3167 name = fs_path_alloc(); 3168 if (!name) { 3169 ret = -ENOMEM; 3170 goto out; 3171 } 3172 ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); 3173 if (ret < 0) 3174 goto out; 3175 ret = send_rmdir(sctx, name); 3176 if (ret < 0) 3177 goto out; 3178 free_orphan_dir_info(sctx, odi); 3179 } 3180 3181finish: 3182 ret = send_utimes(sctx, pm->ino, pm->gen); 3183 if (ret < 0) 3184 goto out; 3185 3186 /* 3187 * After rename/move, need to update the utimes of both new parent(s) 3188 * and old parent(s). 3189 */ 3190 list_for_each_entry(cur, &pm->update_refs, list) { 3191 if (cur->dir == rmdir_ino) 3192 continue; 3193 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3194 if (ret < 0) 3195 goto out; 3196 } 3197 3198out: 3199 fs_path_free(name); 3200 fs_path_free(from_path); 3201 fs_path_free(to_path); 3202 sctx->send_progress = orig_progress; 3203 3204 return ret; 3205} 3206 3207static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) 3208{ 3209 if (!list_empty(&m->list)) 3210 list_del(&m->list); 3211 if (!RB_EMPTY_NODE(&m->node)) 3212 rb_erase(&m->node, &sctx->pending_dir_moves); 3213 __free_recorded_refs(&m->update_refs); 3214 kfree(m); 3215} 3216 3217static void tail_append_pending_moves(struct pending_dir_move *moves, 3218 struct list_head *stack) 3219{ 3220 if (list_empty(&moves->list)) { 3221 list_add_tail(&moves->list, stack); 3222 } else { 3223 LIST_HEAD(list); 3224 list_splice_init(&moves->list, &list); 3225 list_add_tail(&moves->list, stack); 3226 list_splice_tail(&list, stack); 3227 } 3228} 3229 3230static int apply_children_dir_moves(struct send_ctx *sctx) 3231{ 3232 struct pending_dir_move *pm; 3233 struct list_head stack; 3234 u64 parent_ino = sctx->cur_ino; 3235 int ret = 0; 3236 3237 pm = get_pending_dir_moves(sctx, parent_ino); 3238 if (!pm) 3239 return 0; 3240 3241 INIT_LIST_HEAD(&stack); 3242 tail_append_pending_moves(pm, &stack); 3243 3244 while (!list_empty(&stack)) { 3245 pm = list_first_entry(&stack, struct pending_dir_move, list); 3246 parent_ino = pm->ino; 3247 ret = apply_dir_move(sctx, pm); 3248 free_pending_move(sctx, pm); 3249 if (ret) 3250 goto out; 3251 pm = get_pending_dir_moves(sctx, parent_ino); 3252 if (pm) 3253 tail_append_pending_moves(pm, &stack); 3254 } 3255 return 0; 3256 3257out: 3258 while (!list_empty(&stack)) { 3259 pm = list_first_entry(&stack, struct pending_dir_move, list); 3260 free_pending_move(sctx, pm); 3261 } 3262 return ret; 3263} 3264 3265static int wait_for_parent_move(struct send_ctx *sctx, 3266 struct recorded_ref *parent_ref) 3267{ 3268 int ret = 0; 3269 u64 ino = parent_ref->dir; 3270 u64 parent_ino_before, parent_ino_after; 3271 struct fs_path *path_before = NULL; 3272 struct fs_path *path_after = NULL; 3273 int len1, len2; 3274 3275 path_after = fs_path_alloc(); 3276 path_before = fs_path_alloc(); 3277 if (!path_after || !path_before) { 3278 ret = -ENOMEM; 3279 goto out; 3280 } 3281 3282 /* 3283 * Our current directory inode may not yet be renamed/moved because some 3284 * ancestor (immediate or not) has to be renamed/moved first. So find if 3285 * such ancestor exists and make sure our own rename/move happens after 3286 * that ancestor is processed. 3287 */ 3288 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3289 if (is_waiting_for_move(sctx, ino)) { 3290 ret = 1; 3291 break; 3292 } 3293 3294 fs_path_reset(path_before); 3295 fs_path_reset(path_after); 3296 3297 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3298 NULL, path_after); 3299 if (ret < 0) 3300 goto out; 3301 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3302 NULL, path_before); 3303 if (ret < 0 && ret != -ENOENT) { 3304 goto out; 3305 } else if (ret == -ENOENT) { 3306 ret = 1; 3307 break; 3308 } 3309 3310 len1 = fs_path_len(path_before); 3311 len2 = fs_path_len(path_after); 3312 if (ino > sctx->cur_ino && 3313 (parent_ino_before != parent_ino_after || len1 != len2 || 3314 memcmp(path_before->start, path_after->start, len1))) { 3315 ret = 1; 3316 break; 3317 } 3318 ino = parent_ino_after; 3319 } 3320 3321out: 3322 fs_path_free(path_before); 3323 fs_path_free(path_after); 3324 3325 if (ret == 1) { 3326 ret = add_pending_dir_move(sctx, 3327 sctx->cur_ino, 3328 sctx->cur_inode_gen, 3329 ino, 3330 &sctx->new_refs, 3331 &sctx->deleted_refs); 3332 if (!ret) 3333 ret = 1; 3334 } 3335 3336 return ret; 3337} 3338 3339/* 3340 * This does all the move/link/unlink/rmdir magic. 3341 */ 3342static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) 3343{ 3344 int ret = 0; 3345 struct recorded_ref *cur; 3346 struct recorded_ref *cur2; 3347 struct list_head check_dirs; 3348 struct fs_path *valid_path = NULL; 3349 u64 ow_inode = 0; 3350 u64 ow_gen; 3351 int did_overwrite = 0; 3352 int is_orphan = 0; 3353 u64 last_dir_ino_rm = 0; 3354 3355verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3356 3357 /* 3358 * This should never happen as the root dir always has the same ref 3359 * which is always '..' 3360 */ 3361 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 3362 INIT_LIST_HEAD(&check_dirs); 3363 3364 valid_path = fs_path_alloc(); 3365 if (!valid_path) { 3366 ret = -ENOMEM; 3367 goto out; 3368 } 3369 3370 /* 3371 * First, check if the first ref of the current inode was overwritten 3372 * before. If yes, we know that the current inode was already orphanized 3373 * and thus use the orphan name. If not, we can use get_cur_path to 3374 * get the path of the first ref as it would like while receiving at 3375 * this point in time. 3376 * New inodes are always orphan at the beginning, so force to use the 3377 * orphan name in this case. 3378 * The first ref is stored in valid_path and will be updated if it 3379 * gets moved around. 3380 */ 3381 if (!sctx->cur_inode_new) { 3382 ret = did_overwrite_first_ref(sctx, sctx->cur_ino, 3383 sctx->cur_inode_gen); 3384 if (ret < 0) 3385 goto out; 3386 if (ret) 3387 did_overwrite = 1; 3388 } 3389 if (sctx->cur_inode_new || did_overwrite) { 3390 ret = gen_unique_name(sctx, sctx->cur_ino, 3391 sctx->cur_inode_gen, valid_path); 3392 if (ret < 0) 3393 goto out; 3394 is_orphan = 1; 3395 } else { 3396 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3397 valid_path); 3398 if (ret < 0) 3399 goto out; 3400 } 3401 3402 list_for_each_entry(cur, &sctx->new_refs, list) { 3403 /* 3404 * We may have refs where the parent directory does not exist 3405 * yet. This happens if the parent directories inum is higher 3406 * the the current inum. To handle this case, we create the 3407 * parent directory out of order. But we need to check if this 3408 * did already happen before due to other refs in the same dir. 3409 */ 3410 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3411 if (ret < 0) 3412 goto out; 3413 if (ret == inode_state_will_create) { 3414 ret = 0; 3415 /* 3416 * First check if any of the current inodes refs did 3417 * already create the dir. 3418 */ 3419 list_for_each_entry(cur2, &sctx->new_refs, list) { 3420 if (cur == cur2) 3421 break; 3422 if (cur2->dir == cur->dir) { 3423 ret = 1; 3424 break; 3425 } 3426 } 3427 3428 /* 3429 * If that did not happen, check if a previous inode 3430 * did already create the dir. 3431 */ 3432 if (!ret) 3433 ret = did_create_dir(sctx, cur->dir); 3434 if (ret < 0) 3435 goto out; 3436 if (!ret) { 3437 ret = send_create_inode(sctx, cur->dir); 3438 if (ret < 0) 3439 goto out; 3440 } 3441 } 3442 3443 /* 3444 * Check if this new ref would overwrite the first ref of 3445 * another unprocessed inode. If yes, orphanize the 3446 * overwritten inode. If we find an overwritten ref that is 3447 * not the first ref, simply unlink it. 3448 */ 3449 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3450 cur->name, cur->name_len, 3451 &ow_inode, &ow_gen); 3452 if (ret < 0) 3453 goto out; 3454 if (ret) { 3455 ret = is_first_ref(sctx->parent_root, 3456 ow_inode, cur->dir, cur->name, 3457 cur->name_len); 3458 if (ret < 0) 3459 goto out; 3460 if (ret) { 3461 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3462 cur->full_path); 3463 if (ret < 0) 3464 goto out; 3465 } else { 3466 ret = send_unlink(sctx, cur->full_path); 3467 if (ret < 0) 3468 goto out; 3469 } 3470 } 3471 3472 /* 3473 * link/move the ref to the new place. If we have an orphan 3474 * inode, move it and update valid_path. If not, link or move 3475 * it depending on the inode mode. 3476 */ 3477 if (is_orphan) { 3478 ret = send_rename(sctx, valid_path, cur->full_path); 3479 if (ret < 0) 3480 goto out; 3481 is_orphan = 0; 3482 ret = fs_path_copy(valid_path, cur->full_path); 3483 if (ret < 0) 3484 goto out; 3485 } else { 3486 if (S_ISDIR(sctx->cur_inode_mode)) { 3487 /* 3488 * Dirs can't be linked, so move it. For moved 3489 * dirs, we always have one new and one deleted 3490 * ref. The deleted ref is ignored later. 3491 */ 3492 ret = wait_for_parent_move(sctx, cur); 3493 if (ret < 0) 3494 goto out; 3495 if (ret) { 3496 *pending_move = 1; 3497 } else { 3498 ret = send_rename(sctx, valid_path, 3499 cur->full_path); 3500 if (!ret) 3501 ret = fs_path_copy(valid_path, 3502 cur->full_path); 3503 } 3504 if (ret < 0) 3505 goto out; 3506 } else { 3507 ret = send_link(sctx, cur->full_path, 3508 valid_path); 3509 if (ret < 0) 3510 goto out; 3511 } 3512 } 3513 ret = dup_ref(cur, &check_dirs); 3514 if (ret < 0) 3515 goto out; 3516 } 3517 3518 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { 3519 /* 3520 * Check if we can already rmdir the directory. If not, 3521 * orphanize it. For every dir item inside that gets deleted 3522 * later, we do this check again and rmdir it then if possible. 3523 * See the use of check_dirs for more details. 3524 */ 3525 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3526 sctx->cur_ino); 3527 if (ret < 0) 3528 goto out; 3529 if (ret) { 3530 ret = send_rmdir(sctx, valid_path); 3531 if (ret < 0) 3532 goto out; 3533 } else if (!is_orphan) { 3534 ret = orphanize_inode(sctx, sctx->cur_ino, 3535 sctx->cur_inode_gen, valid_path); 3536 if (ret < 0) 3537 goto out; 3538 is_orphan = 1; 3539 } 3540 3541 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3542 ret = dup_ref(cur, &check_dirs); 3543 if (ret < 0) 3544 goto out; 3545 } 3546 } else if (S_ISDIR(sctx->cur_inode_mode) && 3547 !list_empty(&sctx->deleted_refs)) { 3548 /* 3549 * We have a moved dir. Add the old parent to check_dirs 3550 */ 3551 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 3552 list); 3553 ret = dup_ref(cur, &check_dirs); 3554 if (ret < 0) 3555 goto out; 3556 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 3557 /* 3558 * We have a non dir inode. Go through all deleted refs and 3559 * unlink them if they were not already overwritten by other 3560 * inodes. 3561 */ 3562 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3563 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3564 sctx->cur_ino, sctx->cur_inode_gen, 3565 cur->name, cur->name_len); 3566 if (ret < 0) 3567 goto out; 3568 if (!ret) { 3569 ret = send_unlink(sctx, cur->full_path); 3570 if (ret < 0) 3571 goto out; 3572 } 3573 ret = dup_ref(cur, &check_dirs); 3574 if (ret < 0) 3575 goto out; 3576 } 3577 /* 3578 * If the inode is still orphan, unlink the orphan. This may 3579 * happen when a previous inode did overwrite the first ref 3580 * of this inode and no new refs were added for the current 3581 * inode. Unlinking does not mean that the inode is deleted in 3582 * all cases. There may still be links to this inode in other 3583 * places. 3584 */ 3585 if (is_orphan) { 3586 ret = send_unlink(sctx, valid_path); 3587 if (ret < 0) 3588 goto out; 3589 } 3590 } 3591 3592 /* 3593 * We did collect all parent dirs where cur_inode was once located. We 3594 * now go through all these dirs and check if they are pending for 3595 * deletion and if it's finally possible to perform the rmdir now. 3596 * We also update the inode stats of the parent dirs here. 3597 */ 3598 list_for_each_entry(cur, &check_dirs, list) { 3599 /* 3600 * In case we had refs into dirs that were not processed yet, 3601 * we don't need to do the utime and rmdir logic for these dirs. 3602 * The dir will be processed later. 3603 */ 3604 if (cur->dir > sctx->cur_ino) 3605 continue; 3606 3607 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3608 if (ret < 0) 3609 goto out; 3610 3611 if (ret == inode_state_did_create || 3612 ret == inode_state_no_change) { 3613 /* TODO delayed utimes */ 3614 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3615 if (ret < 0) 3616 goto out; 3617 } else if (ret == inode_state_did_delete && 3618 cur->dir != last_dir_ino_rm) { 3619 ret = can_rmdir(sctx, cur->dir, cur->dir_gen, 3620 sctx->cur_ino); 3621 if (ret < 0) 3622 goto out; 3623 if (ret) { 3624 ret = get_cur_path(sctx, cur->dir, 3625 cur->dir_gen, valid_path); 3626 if (ret < 0) 3627 goto out; 3628 ret = send_rmdir(sctx, valid_path); 3629 if (ret < 0) 3630 goto out; 3631 last_dir_ino_rm = cur->dir; 3632 } 3633 } 3634 } 3635 3636 ret = 0; 3637 3638out: 3639 __free_recorded_refs(&check_dirs); 3640 free_recorded_refs(sctx); 3641 fs_path_free(valid_path); 3642 return ret; 3643} 3644 3645static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, 3646 struct fs_path *name, void *ctx, struct list_head *refs) 3647{ 3648 int ret = 0; 3649 struct send_ctx *sctx = ctx; 3650 struct fs_path *p; 3651 u64 gen; 3652 3653 p = fs_path_alloc(); 3654 if (!p) 3655 return -ENOMEM; 3656 3657 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, 3658 NULL, NULL); 3659 if (ret < 0) 3660 goto out; 3661 3662 ret = get_cur_path(sctx, dir, gen, p); 3663 if (ret < 0) 3664 goto out; 3665 ret = fs_path_add_path(p, name); 3666 if (ret < 0) 3667 goto out; 3668 3669 ret = __record_ref(refs, dir, gen, p); 3670 3671out: 3672 if (ret) 3673 fs_path_free(p); 3674 return ret; 3675} 3676 3677static int __record_new_ref(int num, u64 dir, int index, 3678 struct fs_path *name, 3679 void *ctx) 3680{ 3681 struct send_ctx *sctx = ctx; 3682 return record_ref(sctx->send_root, num, dir, index, name, 3683 ctx, &sctx->new_refs); 3684} 3685 3686 3687static int __record_deleted_ref(int num, u64 dir, int index, 3688 struct fs_path *name, 3689 void *ctx) 3690{ 3691 struct send_ctx *sctx = ctx; 3692 return record_ref(sctx->parent_root, num, dir, index, name, 3693 ctx, &sctx->deleted_refs); 3694} 3695 3696static int record_new_ref(struct send_ctx *sctx) 3697{ 3698 int ret; 3699 3700 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3701 sctx->cmp_key, 0, __record_new_ref, sctx); 3702 if (ret < 0) 3703 goto out; 3704 ret = 0; 3705 3706out: 3707 return ret; 3708} 3709 3710static int record_deleted_ref(struct send_ctx *sctx) 3711{ 3712 int ret; 3713 3714 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3715 sctx->cmp_key, 0, __record_deleted_ref, sctx); 3716 if (ret < 0) 3717 goto out; 3718 ret = 0; 3719 3720out: 3721 return ret; 3722} 3723 3724struct find_ref_ctx { 3725 u64 dir; 3726 u64 dir_gen; 3727 struct btrfs_root *root; 3728 struct fs_path *name; 3729 int found_idx; 3730}; 3731 3732static int __find_iref(int num, u64 dir, int index, 3733 struct fs_path *name, 3734 void *ctx_) 3735{ 3736 struct find_ref_ctx *ctx = ctx_; 3737 u64 dir_gen; 3738 int ret; 3739 3740 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3741 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3742 /* 3743 * To avoid doing extra lookups we'll only do this if everything 3744 * else matches. 3745 */ 3746 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, 3747 NULL, NULL, NULL); 3748 if (ret) 3749 return ret; 3750 if (dir_gen != ctx->dir_gen) 3751 return 0; 3752 ctx->found_idx = num; 3753 return 1; 3754 } 3755 return 0; 3756} 3757 3758static int find_iref(struct btrfs_root *root, 3759 struct btrfs_path *path, 3760 struct btrfs_key *key, 3761 u64 dir, u64 dir_gen, struct fs_path *name) 3762{ 3763 int ret; 3764 struct find_ref_ctx ctx; 3765 3766 ctx.dir = dir; 3767 ctx.name = name; 3768 ctx.dir_gen = dir_gen; 3769 ctx.found_idx = -1; 3770 ctx.root = root; 3771 3772 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3773 if (ret < 0) 3774 return ret; 3775 3776 if (ctx.found_idx == -1) 3777 return -ENOENT; 3778 3779 return ctx.found_idx; 3780} 3781 3782static int __record_changed_new_ref(int num, u64 dir, int index, 3783 struct fs_path *name, 3784 void *ctx) 3785{ 3786 u64 dir_gen; 3787 int ret; 3788 struct send_ctx *sctx = ctx; 3789 3790 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, 3791 NULL, NULL, NULL); 3792 if (ret) 3793 return ret; 3794 3795 ret = find_iref(sctx->parent_root, sctx->right_path, 3796 sctx->cmp_key, dir, dir_gen, name); 3797 if (ret == -ENOENT) 3798 ret = __record_new_ref(num, dir, index, name, sctx); 3799 else if (ret > 0) 3800 ret = 0; 3801 3802 return ret; 3803} 3804 3805static int __record_changed_deleted_ref(int num, u64 dir, int index, 3806 struct fs_path *name, 3807 void *ctx) 3808{ 3809 u64 dir_gen; 3810 int ret; 3811 struct send_ctx *sctx = ctx; 3812 3813 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, 3814 NULL, NULL, NULL); 3815 if (ret) 3816 return ret; 3817 3818 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 3819 dir, dir_gen, name); 3820 if (ret == -ENOENT) 3821 ret = __record_deleted_ref(num, dir, index, name, sctx); 3822 else if (ret > 0) 3823 ret = 0; 3824 3825 return ret; 3826} 3827 3828static int record_changed_ref(struct send_ctx *sctx) 3829{ 3830 int ret = 0; 3831 3832 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3833 sctx->cmp_key, 0, __record_changed_new_ref, sctx); 3834 if (ret < 0) 3835 goto out; 3836 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3837 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); 3838 if (ret < 0) 3839 goto out; 3840 ret = 0; 3841 3842out: 3843 return ret; 3844} 3845 3846/* 3847 * Record and process all refs at once. Needed when an inode changes the 3848 * generation number, which means that it was deleted and recreated. 3849 */ 3850static int process_all_refs(struct send_ctx *sctx, 3851 enum btrfs_compare_tree_result cmd) 3852{ 3853 int ret; 3854 struct btrfs_root *root; 3855 struct btrfs_path *path; 3856 struct btrfs_key key; 3857 struct btrfs_key found_key; 3858 struct extent_buffer *eb; 3859 int slot; 3860 iterate_inode_ref_t cb; 3861 int pending_move = 0; 3862 3863 path = alloc_path_for_send(); 3864 if (!path) 3865 return -ENOMEM; 3866 3867 if (cmd == BTRFS_COMPARE_TREE_NEW) { 3868 root = sctx->send_root; 3869 cb = __record_new_ref; 3870 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { 3871 root = sctx->parent_root; 3872 cb = __record_deleted_ref; 3873 } else { 3874 btrfs_err(sctx->send_root->fs_info, 3875 "Wrong command %d in process_all_refs", cmd); 3876 ret = -EINVAL; 3877 goto out; 3878 } 3879 3880 key.objectid = sctx->cmp_key->objectid; 3881 key.type = BTRFS_INODE_REF_KEY; 3882 key.offset = 0; 3883 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3884 if (ret < 0) 3885 goto out; 3886 3887 while (1) { 3888 eb = path->nodes[0]; 3889 slot = path->slots[0]; 3890 if (slot >= btrfs_header_nritems(eb)) { 3891 ret = btrfs_next_leaf(root, path); 3892 if (ret < 0) 3893 goto out; 3894 else if (ret > 0) 3895 break; 3896 continue; 3897 } 3898 3899 btrfs_item_key_to_cpu(eb, &found_key, slot); 3900 3901 if (found_key.objectid != key.objectid || 3902 (found_key.type != BTRFS_INODE_REF_KEY && 3903 found_key.type != BTRFS_INODE_EXTREF_KEY)) 3904 break; 3905 3906 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 3907 if (ret < 0) 3908 goto out; 3909 3910 path->slots[0]++; 3911 } 3912 btrfs_release_path(path); 3913 3914 ret = process_recorded_refs(sctx, &pending_move); 3915 /* Only applicable to an incremental send. */ 3916 ASSERT(pending_move == 0); 3917 3918out: 3919 btrfs_free_path(path); 3920 return ret; 3921} 3922 3923static int send_set_xattr(struct send_ctx *sctx, 3924 struct fs_path *path, 3925 const char *name, int name_len, 3926 const char *data, int data_len) 3927{ 3928 int ret = 0; 3929 3930 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 3931 if (ret < 0) 3932 goto out; 3933 3934 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 3935 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 3936 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); 3937 3938 ret = send_cmd(sctx); 3939 3940tlv_put_failure: 3941out: 3942 return ret; 3943} 3944 3945static int send_remove_xattr(struct send_ctx *sctx, 3946 struct fs_path *path, 3947 const char *name, int name_len) 3948{ 3949 int ret = 0; 3950 3951 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 3952 if (ret < 0) 3953 goto out; 3954 3955 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 3956 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 3957 3958 ret = send_cmd(sctx); 3959 3960tlv_put_failure: 3961out: 3962 return ret; 3963} 3964 3965static int __process_new_xattr(int num, struct btrfs_key *di_key, 3966 const char *name, int name_len, 3967 const char *data, int data_len, 3968 u8 type, void *ctx) 3969{ 3970 int ret; 3971 struct send_ctx *sctx = ctx; 3972 struct fs_path *p; 3973 posix_acl_xattr_header dummy_acl; 3974 3975 p = fs_path_alloc(); 3976 if (!p) 3977 return -ENOMEM; 3978 3979 /* 3980 * This hack is needed because empty acl's are stored as zero byte 3981 * data in xattrs. Problem with that is, that receiving these zero byte 3982 * acl's will fail later. To fix this, we send a dummy acl list that 3983 * only contains the version number and no entries. 3984 */ 3985 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || 3986 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { 3987 if (data_len == 0) { 3988 dummy_acl.a_version = 3989 cpu_to_le32(POSIX_ACL_XATTR_VERSION); 3990 data = (char *)&dummy_acl; 3991 data_len = sizeof(dummy_acl); 3992 } 3993 } 3994 3995 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 3996 if (ret < 0) 3997 goto out; 3998 3999 ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 4000 4001out: 4002 fs_path_free(p); 4003 return ret; 4004} 4005 4006static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 4007 const char *name, int name_len, 4008 const char *data, int data_len, 4009 u8 type, void *ctx) 4010{ 4011 int ret; 4012 struct send_ctx *sctx = ctx; 4013 struct fs_path *p; 4014 4015 p = fs_path_alloc(); 4016 if (!p) 4017 return -ENOMEM; 4018 4019 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4020 if (ret < 0) 4021 goto out; 4022 4023 ret = send_remove_xattr(sctx, p, name, name_len); 4024 4025out: 4026 fs_path_free(p); 4027 return ret; 4028} 4029 4030static int process_new_xattr(struct send_ctx *sctx) 4031{ 4032 int ret = 0; 4033 4034 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4035 sctx->cmp_key, __process_new_xattr, sctx); 4036 4037 return ret; 4038} 4039 4040static int process_deleted_xattr(struct send_ctx *sctx) 4041{ 4042 int ret; 4043 4044 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4045 sctx->cmp_key, __process_deleted_xattr, sctx); 4046 4047 return ret; 4048} 4049 4050struct find_xattr_ctx { 4051 const char *name; 4052 int name_len; 4053 int found_idx; 4054 char *found_data; 4055 int found_data_len; 4056}; 4057 4058static int __find_xattr(int num, struct btrfs_key *di_key, 4059 const char *name, int name_len, 4060 const char *data, int data_len, 4061 u8 type, void *vctx) 4062{ 4063 struct find_xattr_ctx *ctx = vctx; 4064 4065 if (name_len == ctx->name_len && 4066 strncmp(name, ctx->name, name_len) == 0) { 4067 ctx->found_idx = num; 4068 ctx->found_data_len = data_len; 4069 ctx->found_data = kmemdup(data, data_len, GFP_NOFS); 4070 if (!ctx->found_data) 4071 return -ENOMEM; 4072 return 1; 4073 } 4074 return 0; 4075} 4076 4077static int find_xattr(struct btrfs_root *root, 4078 struct btrfs_path *path, 4079 struct btrfs_key *key, 4080 const char *name, int name_len, 4081 char **data, int *data_len) 4082{ 4083 int ret; 4084 struct find_xattr_ctx ctx; 4085 4086 ctx.name = name; 4087 ctx.name_len = name_len; 4088 ctx.found_idx = -1; 4089 ctx.found_data = NULL; 4090 ctx.found_data_len = 0; 4091 4092 ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); 4093 if (ret < 0) 4094 return ret; 4095 4096 if (ctx.found_idx == -1) 4097 return -ENOENT; 4098 if (data) { 4099 *data = ctx.found_data; 4100 *data_len = ctx.found_data_len; 4101 } else { 4102 kfree(ctx.found_data); 4103 } 4104 return ctx.found_idx; 4105} 4106 4107 4108static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, 4109 const char *name, int name_len, 4110 const char *data, int data_len, 4111 u8 type, void *ctx) 4112{ 4113 int ret; 4114 struct send_ctx *sctx = ctx; 4115 char *found_data = NULL; 4116 int found_data_len = 0; 4117 4118 ret = find_xattr(sctx->parent_root, sctx->right_path, 4119 sctx->cmp_key, name, name_len, &found_data, 4120 &found_data_len); 4121 if (ret == -ENOENT) { 4122 ret = __process_new_xattr(num, di_key, name, name_len, data, 4123 data_len, type, ctx); 4124 } else if (ret >= 0) { 4125 if (data_len != found_data_len || 4126 memcmp(data, found_data, data_len)) { 4127 ret = __process_new_xattr(num, di_key, name, name_len, 4128 data, data_len, type, ctx); 4129 } else { 4130 ret = 0; 4131 } 4132 } 4133 4134 kfree(found_data); 4135 return ret; 4136} 4137 4138static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, 4139 const char *name, int name_len, 4140 const char *data, int data_len, 4141 u8 type, void *ctx) 4142{ 4143 int ret; 4144 struct send_ctx *sctx = ctx; 4145 4146 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, 4147 name, name_len, NULL, NULL); 4148 if (ret == -ENOENT) 4149 ret = __process_deleted_xattr(num, di_key, name, name_len, data, 4150 data_len, type, ctx); 4151 else if (ret >= 0) 4152 ret = 0; 4153 4154 return ret; 4155} 4156 4157static int process_changed_xattr(struct send_ctx *sctx) 4158{ 4159 int ret = 0; 4160 4161 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4162 sctx->cmp_key, __process_changed_new_xattr, sctx); 4163 if (ret < 0) 4164 goto out; 4165 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4166 sctx->cmp_key, __process_changed_deleted_xattr, sctx); 4167 4168out: 4169 return ret; 4170} 4171 4172static int process_all_new_xattrs(struct send_ctx *sctx) 4173{ 4174 int ret; 4175 struct btrfs_root *root; 4176 struct btrfs_path *path; 4177 struct btrfs_key key; 4178 struct btrfs_key found_key; 4179 struct extent_buffer *eb; 4180 int slot; 4181 4182 path = alloc_path_for_send(); 4183 if (!path) 4184 return -ENOMEM; 4185 4186 root = sctx->send_root; 4187 4188 key.objectid = sctx->cmp_key->objectid; 4189 key.type = BTRFS_XATTR_ITEM_KEY; 4190 key.offset = 0; 4191 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4192 if (ret < 0) 4193 goto out; 4194 4195 while (1) { 4196 eb = path->nodes[0]; 4197 slot = path->slots[0]; 4198 if (slot >= btrfs_header_nritems(eb)) { 4199 ret = btrfs_next_leaf(root, path); 4200 if (ret < 0) { 4201 goto out; 4202 } else if (ret > 0) { 4203 ret = 0; 4204 break; 4205 } 4206 continue; 4207 } 4208 4209 btrfs_item_key_to_cpu(eb, &found_key, slot); 4210 if (found_key.objectid != key.objectid || 4211 found_key.type != key.type) { 4212 ret = 0; 4213 goto out; 4214 } 4215 4216 ret = iterate_dir_item(root, path, &found_key, 4217 __process_new_xattr, sctx); 4218 if (ret < 0) 4219 goto out; 4220 4221 path->slots[0]++; 4222 } 4223 4224out: 4225 btrfs_free_path(path); 4226 return ret; 4227} 4228 4229static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) 4230{ 4231 struct btrfs_root *root = sctx->send_root; 4232 struct btrfs_fs_info *fs_info = root->fs_info; 4233 struct inode *inode; 4234 struct page *page; 4235 char *addr; 4236 struct btrfs_key key; 4237 pgoff_t index = offset >> PAGE_CACHE_SHIFT; 4238 pgoff_t last_index; 4239 unsigned pg_offset = offset & ~PAGE_CACHE_MASK; 4240 ssize_t ret = 0; 4241 4242 key.objectid = sctx->cur_ino; 4243 key.type = BTRFS_INODE_ITEM_KEY; 4244 key.offset = 0; 4245 4246 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4247 if (IS_ERR(inode)) 4248 return PTR_ERR(inode); 4249 4250 if (offset + len > i_size_read(inode)) { 4251 if (offset > i_size_read(inode)) 4252 len = 0; 4253 else 4254 len = offset - i_size_read(inode); 4255 } 4256 if (len == 0) 4257 goto out; 4258 4259 last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; 4260 4261 /* initial readahead */ 4262 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 4263 file_ra_state_init(&sctx->ra, inode->i_mapping); 4264 btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, 4265 last_index - index + 1); 4266 4267 while (index <= last_index) { 4268 unsigned cur_len = min_t(unsigned, len, 4269 PAGE_CACHE_SIZE - pg_offset); 4270 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 4271 if (!page) { 4272 ret = -ENOMEM; 4273 break; 4274 } 4275 4276 if (!PageUptodate(page)) { 4277 btrfs_readpage(NULL, page); 4278 lock_page(page); 4279 if (!PageUptodate(page)) { 4280 unlock_page(page); 4281 page_cache_release(page); 4282 ret = -EIO; 4283 break; 4284 } 4285 } 4286 4287 addr = kmap(page); 4288 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); 4289 kunmap(page); 4290 unlock_page(page); 4291 page_cache_release(page); 4292 index++; 4293 pg_offset = 0; 4294 len -= cur_len; 4295 ret += cur_len; 4296 } 4297out: 4298 iput(inode); 4299 return ret; 4300} 4301 4302/* 4303 * Read some bytes from the current inode/file and send a write command to 4304 * user space. 4305 */ 4306static int send_write(struct send_ctx *sctx, u64 offset, u32 len) 4307{ 4308 int ret = 0; 4309 struct fs_path *p; 4310 ssize_t num_read = 0; 4311 4312 p = fs_path_alloc(); 4313 if (!p) 4314 return -ENOMEM; 4315 4316verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); 4317 4318 num_read = fill_read_buf(sctx, offset, len); 4319 if (num_read <= 0) { 4320 if (num_read < 0) 4321 ret = num_read; 4322 goto out; 4323 } 4324 4325 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4326 if (ret < 0) 4327 goto out; 4328 4329 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4330 if (ret < 0) 4331 goto out; 4332 4333 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4334 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4335 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); 4336 4337 ret = send_cmd(sctx); 4338 4339tlv_put_failure: 4340out: 4341 fs_path_free(p); 4342 if (ret < 0) 4343 return ret; 4344 return num_read; 4345} 4346 4347/* 4348 * Send a clone command to user space. 4349 */ 4350static int send_clone(struct send_ctx *sctx, 4351 u64 offset, u32 len, 4352 struct clone_root *clone_root) 4353{ 4354 int ret = 0; 4355 struct fs_path *p; 4356 u64 gen; 4357 4358verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " 4359 "clone_inode=%llu, clone_offset=%llu\n", offset, len, 4360 clone_root->root->objectid, clone_root->ino, 4361 clone_root->offset); 4362 4363 p = fs_path_alloc(); 4364 if (!p) 4365 return -ENOMEM; 4366 4367 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); 4368 if (ret < 0) 4369 goto out; 4370 4371 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4372 if (ret < 0) 4373 goto out; 4374 4375 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4376 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 4377 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4378 4379 if (clone_root->root == sctx->send_root) { 4380 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, 4381 &gen, NULL, NULL, NULL, NULL); 4382 if (ret < 0) 4383 goto out; 4384 ret = get_cur_path(sctx, clone_root->ino, gen, p); 4385 } else { 4386 ret = get_inode_path(clone_root->root, clone_root->ino, p); 4387 } 4388 if (ret < 0) 4389 goto out; 4390 4391 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4392 clone_root->root->root_item.uuid); 4393 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4394 le64_to_cpu(clone_root->root->root_item.ctransid)); 4395 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4396 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4397 clone_root->offset); 4398 4399 ret = send_cmd(sctx); 4400 4401tlv_put_failure: 4402out: 4403 fs_path_free(p); 4404 return ret; 4405} 4406 4407/* 4408 * Send an update extent command to user space. 4409 */ 4410static int send_update_extent(struct send_ctx *sctx, 4411 u64 offset, u32 len) 4412{ 4413 int ret = 0; 4414 struct fs_path *p; 4415 4416 p = fs_path_alloc(); 4417 if (!p) 4418 return -ENOMEM; 4419 4420 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 4421 if (ret < 0) 4422 goto out; 4423 4424 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4425 if (ret < 0) 4426 goto out; 4427 4428 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4429 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4430 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); 4431 4432 ret = send_cmd(sctx); 4433 4434tlv_put_failure: 4435out: 4436 fs_path_free(p); 4437 return ret; 4438} 4439 4440static int send_hole(struct send_ctx *sctx, u64 end) 4441{ 4442 struct fs_path *p = NULL; 4443 u64 offset = sctx->cur_inode_last_extent; 4444 u64 len; 4445 int ret = 0; 4446 4447 p = fs_path_alloc(); 4448 if (!p) 4449 return -ENOMEM; 4450 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4451 if (ret < 0) 4452 goto tlv_put_failure; 4453 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4454 while (offset < end) { 4455 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4456 4457 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4458 if (ret < 0) 4459 break; 4460 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4461 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4462 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4463 ret = send_cmd(sctx); 4464 if (ret < 0) 4465 break; 4466 offset += len; 4467 } 4468tlv_put_failure: 4469 fs_path_free(p); 4470 return ret; 4471} 4472 4473static int send_write_or_clone(struct send_ctx *sctx, 4474 struct btrfs_path *path, 4475 struct btrfs_key *key, 4476 struct clone_root *clone_root) 4477{ 4478 int ret = 0; 4479 struct btrfs_file_extent_item *ei; 4480 u64 offset = key->offset; 4481 u64 pos = 0; 4482 u64 len; 4483 u32 l; 4484 u8 type; 4485 u64 bs = sctx->send_root->fs_info->sb->s_blocksize; 4486 4487 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4488 struct btrfs_file_extent_item); 4489 type = btrfs_file_extent_type(path->nodes[0], ei); 4490 if (type == BTRFS_FILE_EXTENT_INLINE) { 4491 len = btrfs_file_extent_inline_len(path->nodes[0], 4492 path->slots[0], ei); 4493 /* 4494 * it is possible the inline item won't cover the whole page, 4495 * but there may be items after this page. Make 4496 * sure to send the whole thing 4497 */ 4498 len = PAGE_CACHE_ALIGN(len); 4499 } else { 4500 len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 4501 } 4502 4503 if (offset + len > sctx->cur_inode_size) 4504 len = sctx->cur_inode_size - offset; 4505 if (len == 0) { 4506 ret = 0; 4507 goto out; 4508 } 4509 4510 if (clone_root && IS_ALIGNED(offset + len, bs)) { 4511 ret = send_clone(sctx, offset, len, clone_root); 4512 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { 4513 ret = send_update_extent(sctx, offset, len); 4514 } else { 4515 while (pos < len) { 4516 l = len - pos; 4517 if (l > BTRFS_SEND_READ_SIZE) 4518 l = BTRFS_SEND_READ_SIZE; 4519 ret = send_write(sctx, pos + offset, l); 4520 if (ret < 0) 4521 goto out; 4522 if (!ret) 4523 break; 4524 pos += ret; 4525 } 4526 ret = 0; 4527 } 4528out: 4529 return ret; 4530} 4531 4532static int is_extent_unchanged(struct send_ctx *sctx, 4533 struct btrfs_path *left_path, 4534 struct btrfs_key *ekey) 4535{ 4536 int ret = 0; 4537 struct btrfs_key key; 4538 struct btrfs_path *path = NULL; 4539 struct extent_buffer *eb; 4540 int slot; 4541 struct btrfs_key found_key; 4542 struct btrfs_file_extent_item *ei; 4543 u64 left_disknr; 4544 u64 right_disknr; 4545 u64 left_offset; 4546 u64 right_offset; 4547 u64 left_offset_fixed; 4548 u64 left_len; 4549 u64 right_len; 4550 u64 left_gen; 4551 u64 right_gen; 4552 u8 left_type; 4553 u8 right_type; 4554 4555 path = alloc_path_for_send(); 4556 if (!path) 4557 return -ENOMEM; 4558 4559 eb = left_path->nodes[0]; 4560 slot = left_path->slots[0]; 4561 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4562 left_type = btrfs_file_extent_type(eb, ei); 4563 4564 if (left_type != BTRFS_FILE_EXTENT_REG) { 4565 ret = 0; 4566 goto out; 4567 } 4568 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4569 left_len = btrfs_file_extent_num_bytes(eb, ei); 4570 left_offset = btrfs_file_extent_offset(eb, ei); 4571 left_gen = btrfs_file_extent_generation(eb, ei); 4572 4573 /* 4574 * Following comments will refer to these graphics. L is the left 4575 * extents which we are checking at the moment. 1-8 are the right 4576 * extents that we iterate. 4577 * 4578 * |-----L-----| 4579 * |-1-|-2a-|-3-|-4-|-5-|-6-| 4580 * 4581 * |-----L-----| 4582 * |--1--|-2b-|...(same as above) 4583 * 4584 * Alternative situation. Happens on files where extents got split. 4585 * |-----L-----| 4586 * |-----------7-----------|-6-| 4587 * 4588 * Alternative situation. Happens on files which got larger. 4589 * |-----L-----| 4590 * |-8-| 4591 * Nothing follows after 8. 4592 */ 4593 4594 key.objectid = ekey->objectid; 4595 key.type = BTRFS_EXTENT_DATA_KEY; 4596 key.offset = ekey->offset; 4597 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 4598 if (ret < 0) 4599 goto out; 4600 if (ret) { 4601 ret = 0; 4602 goto out; 4603 } 4604 4605 /* 4606 * Handle special case where the right side has no extents at all. 4607 */ 4608 eb = path->nodes[0]; 4609 slot = path->slots[0]; 4610 btrfs_item_key_to_cpu(eb, &found_key, slot); 4611 if (found_key.objectid != key.objectid || 4612 found_key.type != key.type) { 4613 /* If we're a hole then just pretend nothing changed */ 4614 ret = (left_disknr) ? 0 : 1; 4615 goto out; 4616 } 4617 4618 /* 4619 * We're now on 2a, 2b or 7. 4620 */ 4621 key = found_key; 4622 while (key.offset < ekey->offset + left_len) { 4623 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4624 right_type = btrfs_file_extent_type(eb, ei); 4625 if (right_type != BTRFS_FILE_EXTENT_REG) { 4626 ret = 0; 4627 goto out; 4628 } 4629 4630 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4631 right_len = btrfs_file_extent_num_bytes(eb, ei); 4632 right_offset = btrfs_file_extent_offset(eb, ei); 4633 right_gen = btrfs_file_extent_generation(eb, ei); 4634 4635 /* 4636 * Are we at extent 8? If yes, we know the extent is changed. 4637 * This may only happen on the first iteration. 4638 */ 4639 if (found_key.offset + right_len <= ekey->offset) { 4640 /* If we're a hole just pretend nothing changed */ 4641 ret = (left_disknr) ? 0 : 1; 4642 goto out; 4643 } 4644 4645 left_offset_fixed = left_offset; 4646 if (key.offset < ekey->offset) { 4647 /* Fix the right offset for 2a and 7. */ 4648 right_offset += ekey->offset - key.offset; 4649 } else { 4650 /* Fix the left offset for all behind 2a and 2b */ 4651 left_offset_fixed += key.offset - ekey->offset; 4652 } 4653 4654 /* 4655 * Check if we have the same extent. 4656 */ 4657 if (left_disknr != right_disknr || 4658 left_offset_fixed != right_offset || 4659 left_gen != right_gen) { 4660 ret = 0; 4661 goto out; 4662 } 4663 4664 /* 4665 * Go to the next extent. 4666 */ 4667 ret = btrfs_next_item(sctx->parent_root, path); 4668 if (ret < 0) 4669 goto out; 4670 if (!ret) { 4671 eb = path->nodes[0]; 4672 slot = path->slots[0]; 4673 btrfs_item_key_to_cpu(eb, &found_key, slot); 4674 } 4675 if (ret || found_key.objectid != key.objectid || 4676 found_key.type != key.type) { 4677 key.offset += right_len; 4678 break; 4679 } 4680 if (found_key.offset != key.offset + right_len) { 4681 ret = 0; 4682 goto out; 4683 } 4684 key = found_key; 4685 } 4686 4687 /* 4688 * We're now behind the left extent (treat as unchanged) or at the end 4689 * of the right side (treat as changed). 4690 */ 4691 if (key.offset >= ekey->offset + left_len) 4692 ret = 1; 4693 else 4694 ret = 0; 4695 4696 4697out: 4698 btrfs_free_path(path); 4699 return ret; 4700} 4701 4702static int get_last_extent(struct send_ctx *sctx, u64 offset) 4703{ 4704 struct btrfs_path *path; 4705 struct btrfs_root *root = sctx->send_root; 4706 struct btrfs_file_extent_item *fi; 4707 struct btrfs_key key; 4708 u64 extent_end; 4709 u8 type; 4710 int ret; 4711 4712 path = alloc_path_for_send(); 4713 if (!path) 4714 return -ENOMEM; 4715 4716 sctx->cur_inode_last_extent = 0; 4717 4718 key.objectid = sctx->cur_ino; 4719 key.type = BTRFS_EXTENT_DATA_KEY; 4720 key.offset = offset; 4721 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 4722 if (ret < 0) 4723 goto out; 4724 ret = 0; 4725 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 4726 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 4727 goto out; 4728 4729 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4730 struct btrfs_file_extent_item); 4731 type = btrfs_file_extent_type(path->nodes[0], fi); 4732 if (type == BTRFS_FILE_EXTENT_INLINE) { 4733 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4734 path->slots[0], fi); 4735 extent_end = ALIGN(key.offset + size, 4736 sctx->send_root->sectorsize); 4737 } else { 4738 extent_end = key.offset + 4739 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4740 } 4741 sctx->cur_inode_last_extent = extent_end; 4742out: 4743 btrfs_free_path(path); 4744 return ret; 4745} 4746 4747static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, 4748 struct btrfs_key *key) 4749{ 4750 struct btrfs_file_extent_item *fi; 4751 u64 extent_end; 4752 u8 type; 4753 int ret = 0; 4754 4755 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) 4756 return 0; 4757 4758 if (sctx->cur_inode_last_extent == (u64)-1) { 4759 ret = get_last_extent(sctx, key->offset - 1); 4760 if (ret) 4761 return ret; 4762 } 4763 4764 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4765 struct btrfs_file_extent_item); 4766 type = btrfs_file_extent_type(path->nodes[0], fi); 4767 if (type == BTRFS_FILE_EXTENT_INLINE) { 4768 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4769 path->slots[0], fi); 4770 extent_end = ALIGN(key->offset + size, 4771 sctx->send_root->sectorsize); 4772 } else { 4773 extent_end = key->offset + 4774 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4775 } 4776 4777 if (path->slots[0] == 0 && 4778 sctx->cur_inode_last_extent < key->offset) { 4779 /* 4780 * We might have skipped entire leafs that contained only 4781 * file extent items for our current inode. These leafs have 4782 * a generation number smaller (older) than the one in the 4783 * current leaf and the leaf our last extent came from, and 4784 * are located between these 2 leafs. 4785 */ 4786 ret = get_last_extent(sctx, key->offset - 1); 4787 if (ret) 4788 return ret; 4789 } 4790 4791 if (sctx->cur_inode_last_extent < key->offset) 4792 ret = send_hole(sctx, key->offset); 4793 sctx->cur_inode_last_extent = extent_end; 4794 return ret; 4795} 4796 4797static int process_extent(struct send_ctx *sctx, 4798 struct btrfs_path *path, 4799 struct btrfs_key *key) 4800{ 4801 struct clone_root *found_clone = NULL; 4802 int ret = 0; 4803 4804 if (S_ISLNK(sctx->cur_inode_mode)) 4805 return 0; 4806 4807 if (sctx->parent_root && !sctx->cur_inode_new) { 4808 ret = is_extent_unchanged(sctx, path, key); 4809 if (ret < 0) 4810 goto out; 4811 if (ret) { 4812 ret = 0; 4813 goto out_hole; 4814 } 4815 } else { 4816 struct btrfs_file_extent_item *ei; 4817 u8 type; 4818 4819 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4820 struct btrfs_file_extent_item); 4821 type = btrfs_file_extent_type(path->nodes[0], ei); 4822 if (type == BTRFS_FILE_EXTENT_PREALLOC || 4823 type == BTRFS_FILE_EXTENT_REG) { 4824 /* 4825 * The send spec does not have a prealloc command yet, 4826 * so just leave a hole for prealloc'ed extents until 4827 * we have enough commands queued up to justify rev'ing 4828 * the send spec. 4829 */ 4830 if (type == BTRFS_FILE_EXTENT_PREALLOC) { 4831 ret = 0; 4832 goto out; 4833 } 4834 4835 /* Have a hole, just skip it. */ 4836 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { 4837 ret = 0; 4838 goto out; 4839 } 4840 } 4841 } 4842 4843 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 4844 sctx->cur_inode_size, &found_clone); 4845 if (ret != -ENOENT && ret < 0) 4846 goto out; 4847 4848 ret = send_write_or_clone(sctx, path, key, found_clone); 4849 if (ret) 4850 goto out; 4851out_hole: 4852 ret = maybe_send_hole(sctx, path, key); 4853out: 4854 return ret; 4855} 4856 4857static int process_all_extents(struct send_ctx *sctx) 4858{ 4859 int ret; 4860 struct btrfs_root *root; 4861 struct btrfs_path *path; 4862 struct btrfs_key key; 4863 struct btrfs_key found_key; 4864 struct extent_buffer *eb; 4865 int slot; 4866 4867 root = sctx->send_root; 4868 path = alloc_path_for_send(); 4869 if (!path) 4870 return -ENOMEM; 4871 4872 key.objectid = sctx->cmp_key->objectid; 4873 key.type = BTRFS_EXTENT_DATA_KEY; 4874 key.offset = 0; 4875 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4876 if (ret < 0) 4877 goto out; 4878 4879 while (1) { 4880 eb = path->nodes[0]; 4881 slot = path->slots[0]; 4882 4883 if (slot >= btrfs_header_nritems(eb)) { 4884 ret = btrfs_next_leaf(root, path); 4885 if (ret < 0) { 4886 goto out; 4887 } else if (ret > 0) { 4888 ret = 0; 4889 break; 4890 } 4891 continue; 4892 } 4893 4894 btrfs_item_key_to_cpu(eb, &found_key, slot); 4895 4896 if (found_key.objectid != key.objectid || 4897 found_key.type != key.type) { 4898 ret = 0; 4899 goto out; 4900 } 4901 4902 ret = process_extent(sctx, path, &found_key); 4903 if (ret < 0) 4904 goto out; 4905 4906 path->slots[0]++; 4907 } 4908 4909out: 4910 btrfs_free_path(path); 4911 return ret; 4912} 4913 4914static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 4915 int *pending_move, 4916 int *refs_processed) 4917{ 4918 int ret = 0; 4919 4920 if (sctx->cur_ino == 0) 4921 goto out; 4922 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 4923 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) 4924 goto out; 4925 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4926 goto out; 4927 4928 ret = process_recorded_refs(sctx, pending_move); 4929 if (ret < 0) 4930 goto out; 4931 4932 *refs_processed = 1; 4933out: 4934 return ret; 4935} 4936 4937static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 4938{ 4939 int ret = 0; 4940 u64 left_mode; 4941 u64 left_uid; 4942 u64 left_gid; 4943 u64 right_mode; 4944 u64 right_uid; 4945 u64 right_gid; 4946 int need_chmod = 0; 4947 int need_chown = 0; 4948 int pending_move = 0; 4949 int refs_processed = 0; 4950 4951 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 4952 &refs_processed); 4953 if (ret < 0) 4954 goto out; 4955 4956 /* 4957 * We have processed the refs and thus need to advance send_progress. 4958 * Now, calls to get_cur_xxx will take the updated refs of the current 4959 * inode into account. 4960 * 4961 * On the other hand, if our current inode is a directory and couldn't 4962 * be moved/renamed because its parent was renamed/moved too and it has 4963 * a higher inode number, we can only move/rename our current inode 4964 * after we moved/renamed its parent. Therefore in this case operate on 4965 * the old path (pre move/rename) of our current inode, and the 4966 * move/rename will be performed later. 4967 */ 4968 if (refs_processed && !pending_move) 4969 sctx->send_progress = sctx->cur_ino + 1; 4970 4971 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 4972 goto out; 4973 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 4974 goto out; 4975 4976 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, 4977 &left_mode, &left_uid, &left_gid, NULL); 4978 if (ret < 0) 4979 goto out; 4980 4981 if (!sctx->parent_root || sctx->cur_inode_new) { 4982 need_chown = 1; 4983 if (!S_ISLNK(sctx->cur_inode_mode)) 4984 need_chmod = 1; 4985 } else { 4986 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 4987 NULL, NULL, &right_mode, &right_uid, 4988 &right_gid, NULL); 4989 if (ret < 0) 4990 goto out; 4991 4992 if (left_uid != right_uid || left_gid != right_gid) 4993 need_chown = 1; 4994 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) 4995 need_chmod = 1; 4996 } 4997 4998 if (S_ISREG(sctx->cur_inode_mode)) { 4999 if (need_send_hole(sctx)) { 5000 if (sctx->cur_inode_last_extent == (u64)-1 || 5001 sctx->cur_inode_last_extent < 5002 sctx->cur_inode_size) { 5003 ret = get_last_extent(sctx, (u64)-1); 5004 if (ret) 5005 goto out; 5006 } 5007 if (sctx->cur_inode_last_extent < 5008 sctx->cur_inode_size) { 5009 ret = send_hole(sctx, sctx->cur_inode_size); 5010 if (ret) 5011 goto out; 5012 } 5013 } 5014 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5015 sctx->cur_inode_size); 5016 if (ret < 0) 5017 goto out; 5018 } 5019 5020 if (need_chown) { 5021 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5022 left_uid, left_gid); 5023 if (ret < 0) 5024 goto out; 5025 } 5026 if (need_chmod) { 5027 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5028 left_mode); 5029 if (ret < 0) 5030 goto out; 5031 } 5032 5033 /* 5034 * If other directory inodes depended on our current directory 5035 * inode's move/rename, now do their move/rename operations. 5036 */ 5037 if (!is_waiting_for_move(sctx, sctx->cur_ino)) { 5038 ret = apply_children_dir_moves(sctx); 5039 if (ret) 5040 goto out; 5041 /* 5042 * Need to send that every time, no matter if it actually 5043 * changed between the two trees as we have done changes to 5044 * the inode before. If our inode is a directory and it's 5045 * waiting to be moved/renamed, we will send its utimes when 5046 * it's moved/renamed, therefore we don't need to do it here. 5047 */ 5048 sctx->send_progress = sctx->cur_ino + 1; 5049 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 5050 if (ret < 0) 5051 goto out; 5052 } 5053 5054out: 5055 return ret; 5056} 5057 5058static int changed_inode(struct send_ctx *sctx, 5059 enum btrfs_compare_tree_result result) 5060{ 5061 int ret = 0; 5062 struct btrfs_key *key = sctx->cmp_key; 5063 struct btrfs_inode_item *left_ii = NULL; 5064 struct btrfs_inode_item *right_ii = NULL; 5065 u64 left_gen = 0; 5066 u64 right_gen = 0; 5067 5068 sctx->cur_ino = key->objectid; 5069 sctx->cur_inode_new_gen = 0; 5070 sctx->cur_inode_last_extent = (u64)-1; 5071 5072 /* 5073 * Set send_progress to current inode. This will tell all get_cur_xxx 5074 * functions that the current inode's refs are not updated yet. Later, 5075 * when process_recorded_refs is finished, it is set to cur_ino + 1. 5076 */ 5077 sctx->send_progress = sctx->cur_ino; 5078 5079 if (result == BTRFS_COMPARE_TREE_NEW || 5080 result == BTRFS_COMPARE_TREE_CHANGED) { 5081 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], 5082 sctx->left_path->slots[0], 5083 struct btrfs_inode_item); 5084 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], 5085 left_ii); 5086 } else { 5087 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5088 sctx->right_path->slots[0], 5089 struct btrfs_inode_item); 5090 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5091 right_ii); 5092 } 5093 if (result == BTRFS_COMPARE_TREE_CHANGED) { 5094 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5095 sctx->right_path->slots[0], 5096 struct btrfs_inode_item); 5097 5098 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5099 right_ii); 5100 5101 /* 5102 * The cur_ino = root dir case is special here. We can't treat 5103 * the inode as deleted+reused because it would generate a 5104 * stream that tries to delete/mkdir the root dir. 5105 */ 5106 if (left_gen != right_gen && 5107 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5108 sctx->cur_inode_new_gen = 1; 5109 } 5110 5111 if (result == BTRFS_COMPARE_TREE_NEW) { 5112 sctx->cur_inode_gen = left_gen; 5113 sctx->cur_inode_new = 1; 5114 sctx->cur_inode_deleted = 0; 5115 sctx->cur_inode_size = btrfs_inode_size( 5116 sctx->left_path->nodes[0], left_ii); 5117 sctx->cur_inode_mode = btrfs_inode_mode( 5118 sctx->left_path->nodes[0], left_ii); 5119 sctx->cur_inode_rdev = btrfs_inode_rdev( 5120 sctx->left_path->nodes[0], left_ii); 5121 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5122 ret = send_create_inode_if_needed(sctx); 5123 } else if (result == BTRFS_COMPARE_TREE_DELETED) { 5124 sctx->cur_inode_gen = right_gen; 5125 sctx->cur_inode_new = 0; 5126 sctx->cur_inode_deleted = 1; 5127 sctx->cur_inode_size = btrfs_inode_size( 5128 sctx->right_path->nodes[0], right_ii); 5129 sctx->cur_inode_mode = btrfs_inode_mode( 5130 sctx->right_path->nodes[0], right_ii); 5131 } else if (result == BTRFS_COMPARE_TREE_CHANGED) { 5132 /* 5133 * We need to do some special handling in case the inode was 5134 * reported as changed with a changed generation number. This 5135 * means that the original inode was deleted and new inode 5136 * reused the same inum. So we have to treat the old inode as 5137 * deleted and the new one as new. 5138 */ 5139 if (sctx->cur_inode_new_gen) { 5140 /* 5141 * First, process the inode as if it was deleted. 5142 */ 5143 sctx->cur_inode_gen = right_gen; 5144 sctx->cur_inode_new = 0; 5145 sctx->cur_inode_deleted = 1; 5146 sctx->cur_inode_size = btrfs_inode_size( 5147 sctx->right_path->nodes[0], right_ii); 5148 sctx->cur_inode_mode = btrfs_inode_mode( 5149 sctx->right_path->nodes[0], right_ii); 5150 ret = process_all_refs(sctx, 5151 BTRFS_COMPARE_TREE_DELETED); 5152 if (ret < 0) 5153 goto out; 5154 5155 /* 5156 * Now process the inode as if it was new. 5157 */ 5158 sctx->cur_inode_gen = left_gen; 5159 sctx->cur_inode_new = 1; 5160 sctx->cur_inode_deleted = 0; 5161 sctx->cur_inode_size = btrfs_inode_size( 5162 sctx->left_path->nodes[0], left_ii); 5163 sctx->cur_inode_mode = btrfs_inode_mode( 5164 sctx->left_path->nodes[0], left_ii); 5165 sctx->cur_inode_rdev = btrfs_inode_rdev( 5166 sctx->left_path->nodes[0], left_ii); 5167 ret = send_create_inode_if_needed(sctx); 5168 if (ret < 0) 5169 goto out; 5170 5171 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); 5172 if (ret < 0) 5173 goto out; 5174 /* 5175 * Advance send_progress now as we did not get into 5176 * process_recorded_refs_if_needed in the new_gen case. 5177 */ 5178 sctx->send_progress = sctx->cur_ino + 1; 5179 5180 /* 5181 * Now process all extents and xattrs of the inode as if 5182 * they were all new. 5183 */ 5184 ret = process_all_extents(sctx); 5185 if (ret < 0) 5186 goto out; 5187 ret = process_all_new_xattrs(sctx); 5188 if (ret < 0) 5189 goto out; 5190 } else { 5191 sctx->cur_inode_gen = left_gen; 5192 sctx->cur_inode_new = 0; 5193 sctx->cur_inode_new_gen = 0; 5194 sctx->cur_inode_deleted = 0; 5195 sctx->cur_inode_size = btrfs_inode_size( 5196 sctx->left_path->nodes[0], left_ii); 5197 sctx->cur_inode_mode = btrfs_inode_mode( 5198 sctx->left_path->nodes[0], left_ii); 5199 } 5200 } 5201 5202out: 5203 return ret; 5204} 5205 5206/* 5207 * We have to process new refs before deleted refs, but compare_trees gives us 5208 * the new and deleted refs mixed. To fix this, we record the new/deleted refs 5209 * first and later process them in process_recorded_refs. 5210 * For the cur_inode_new_gen case, we skip recording completely because 5211 * changed_inode did already initiate processing of refs. The reason for this is 5212 * that in this case, compare_tree actually compares the refs of 2 different 5213 * inodes. To fix this, process_all_refs is used in changed_inode to handle all 5214 * refs of the right tree as deleted and all refs of the left tree as new. 5215 */ 5216static int changed_ref(struct send_ctx *sctx, 5217 enum btrfs_compare_tree_result result) 5218{ 5219 int ret = 0; 5220 5221 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5222 5223 if (!sctx->cur_inode_new_gen && 5224 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5225 if (result == BTRFS_COMPARE_TREE_NEW) 5226 ret = record_new_ref(sctx); 5227 else if (result == BTRFS_COMPARE_TREE_DELETED) 5228 ret = record_deleted_ref(sctx); 5229 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5230 ret = record_changed_ref(sctx); 5231 } 5232 5233 return ret; 5234} 5235 5236/* 5237 * Process new/deleted/changed xattrs. We skip processing in the 5238 * cur_inode_new_gen case because changed_inode did already initiate processing 5239 * of xattrs. The reason is the same as in changed_ref 5240 */ 5241static int changed_xattr(struct send_ctx *sctx, 5242 enum btrfs_compare_tree_result result) 5243{ 5244 int ret = 0; 5245 5246 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5247 5248 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5249 if (result == BTRFS_COMPARE_TREE_NEW) 5250 ret = process_new_xattr(sctx); 5251 else if (result == BTRFS_COMPARE_TREE_DELETED) 5252 ret = process_deleted_xattr(sctx); 5253 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5254 ret = process_changed_xattr(sctx); 5255 } 5256 5257 return ret; 5258} 5259 5260/* 5261 * Process new/deleted/changed extents. We skip processing in the 5262 * cur_inode_new_gen case because changed_inode did already initiate processing 5263 * of extents. The reason is the same as in changed_ref 5264 */ 5265static int changed_extent(struct send_ctx *sctx, 5266 enum btrfs_compare_tree_result result) 5267{ 5268 int ret = 0; 5269 5270 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5271 5272 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5273 if (result != BTRFS_COMPARE_TREE_DELETED) 5274 ret = process_extent(sctx, sctx->left_path, 5275 sctx->cmp_key); 5276 } 5277 5278 return ret; 5279} 5280 5281static int dir_changed(struct send_ctx *sctx, u64 dir) 5282{ 5283 u64 orig_gen, new_gen; 5284 int ret; 5285 5286 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, 5287 NULL, NULL); 5288 if (ret) 5289 return ret; 5290 5291 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, 5292 NULL, NULL, NULL); 5293 if (ret) 5294 return ret; 5295 5296 return (orig_gen != new_gen) ? 1 : 0; 5297} 5298 5299static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, 5300 struct btrfs_key *key) 5301{ 5302 struct btrfs_inode_extref *extref; 5303 struct extent_buffer *leaf; 5304 u64 dirid = 0, last_dirid = 0; 5305 unsigned long ptr; 5306 u32 item_size; 5307 u32 cur_offset = 0; 5308 int ref_name_len; 5309 int ret = 0; 5310 5311 /* Easy case, just check this one dirid */ 5312 if (key->type == BTRFS_INODE_REF_KEY) { 5313 dirid = key->offset; 5314 5315 ret = dir_changed(sctx, dirid); 5316 goto out; 5317 } 5318 5319 leaf = path->nodes[0]; 5320 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 5321 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 5322 while (cur_offset < item_size) { 5323 extref = (struct btrfs_inode_extref *)(ptr + 5324 cur_offset); 5325 dirid = btrfs_inode_extref_parent(leaf, extref); 5326 ref_name_len = btrfs_inode_extref_name_len(leaf, extref); 5327 cur_offset += ref_name_len + sizeof(*extref); 5328 if (dirid == last_dirid) 5329 continue; 5330 ret = dir_changed(sctx, dirid); 5331 if (ret) 5332 break; 5333 last_dirid = dirid; 5334 } 5335out: 5336 return ret; 5337} 5338 5339/* 5340 * Updates compare related fields in sctx and simply forwards to the actual 5341 * changed_xxx functions. 5342 */ 5343static int changed_cb(struct btrfs_root *left_root, 5344 struct btrfs_root *right_root, 5345 struct btrfs_path *left_path, 5346 struct btrfs_path *right_path, 5347 struct btrfs_key *key, 5348 enum btrfs_compare_tree_result result, 5349 void *ctx) 5350{ 5351 int ret = 0; 5352 struct send_ctx *sctx = ctx; 5353 5354 if (result == BTRFS_COMPARE_TREE_SAME) { 5355 if (key->type == BTRFS_INODE_REF_KEY || 5356 key->type == BTRFS_INODE_EXTREF_KEY) { 5357 ret = compare_refs(sctx, left_path, key); 5358 if (!ret) 5359 return 0; 5360 if (ret < 0) 5361 return ret; 5362 } else if (key->type == BTRFS_EXTENT_DATA_KEY) { 5363 return maybe_send_hole(sctx, left_path, key); 5364 } else { 5365 return 0; 5366 } 5367 result = BTRFS_COMPARE_TREE_CHANGED; 5368 ret = 0; 5369 } 5370 5371 sctx->left_path = left_path; 5372 sctx->right_path = right_path; 5373 sctx->cmp_key = key; 5374 5375 ret = finish_inode_if_needed(sctx, 0); 5376 if (ret < 0) 5377 goto out; 5378 5379 /* Ignore non-FS objects */ 5380 if (key->objectid == BTRFS_FREE_INO_OBJECTID || 5381 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 5382 goto out; 5383 5384 if (key->type == BTRFS_INODE_ITEM_KEY) 5385 ret = changed_inode(sctx, result); 5386 else if (key->type == BTRFS_INODE_REF_KEY || 5387 key->type == BTRFS_INODE_EXTREF_KEY) 5388 ret = changed_ref(sctx, result); 5389 else if (key->type == BTRFS_XATTR_ITEM_KEY) 5390 ret = changed_xattr(sctx, result); 5391 else if (key->type == BTRFS_EXTENT_DATA_KEY) 5392 ret = changed_extent(sctx, result); 5393 5394out: 5395 return ret; 5396} 5397 5398static int full_send_tree(struct send_ctx *sctx) 5399{ 5400 int ret; 5401 struct btrfs_root *send_root = sctx->send_root; 5402 struct btrfs_key key; 5403 struct btrfs_key found_key; 5404 struct btrfs_path *path; 5405 struct extent_buffer *eb; 5406 int slot; 5407 5408 path = alloc_path_for_send(); 5409 if (!path) 5410 return -ENOMEM; 5411 5412 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5413 key.type = BTRFS_INODE_ITEM_KEY; 5414 key.offset = 0; 5415 5416 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5417 if (ret < 0) 5418 goto out; 5419 if (ret) 5420 goto out_finish; 5421 5422 while (1) { 5423 eb = path->nodes[0]; 5424 slot = path->slots[0]; 5425 btrfs_item_key_to_cpu(eb, &found_key, slot); 5426 5427 ret = changed_cb(send_root, NULL, path, NULL, 5428 &found_key, BTRFS_COMPARE_TREE_NEW, sctx); 5429 if (ret < 0) 5430 goto out; 5431 5432 key.objectid = found_key.objectid; 5433 key.type = found_key.type; 5434 key.offset = found_key.offset + 1; 5435 5436 ret = btrfs_next_item(send_root, path); 5437 if (ret < 0) 5438 goto out; 5439 if (ret) { 5440 ret = 0; 5441 break; 5442 } 5443 } 5444 5445out_finish: 5446 ret = finish_inode_if_needed(sctx, 1); 5447 5448out: 5449 btrfs_free_path(path); 5450 return ret; 5451} 5452 5453static int send_subvol(struct send_ctx *sctx) 5454{ 5455 int ret; 5456 5457 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) { 5458 ret = send_header(sctx); 5459 if (ret < 0) 5460 goto out; 5461 } 5462 5463 ret = send_subvol_begin(sctx); 5464 if (ret < 0) 5465 goto out; 5466 5467 if (sctx->parent_root) { 5468 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, 5469 changed_cb, sctx); 5470 if (ret < 0) 5471 goto out; 5472 ret = finish_inode_if_needed(sctx, 1); 5473 if (ret < 0) 5474 goto out; 5475 } else { 5476 ret = full_send_tree(sctx); 5477 if (ret < 0) 5478 goto out; 5479 } 5480 5481out: 5482 free_recorded_refs(sctx); 5483 return ret; 5484} 5485 5486static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 5487{ 5488 spin_lock(&root->root_item_lock); 5489 root->send_in_progress--; 5490 /* 5491 * Not much left to do, we don't know why it's unbalanced and 5492 * can't blindly reset it to 0. 5493 */ 5494 if (root->send_in_progress < 0) 5495 btrfs_err(root->fs_info, 5496 "send_in_progres unbalanced %d root %llu", 5497 root->send_in_progress, root->root_key.objectid); 5498 spin_unlock(&root->root_item_lock); 5499} 5500 5501long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5502{ 5503 int ret = 0; 5504 struct btrfs_root *send_root; 5505 struct btrfs_root *clone_root; 5506 struct btrfs_fs_info *fs_info; 5507 struct btrfs_ioctl_send_args *arg = NULL; 5508 struct btrfs_key key; 5509 struct send_ctx *sctx = NULL; 5510 u32 i; 5511 u64 *clone_sources_tmp = NULL; 5512 int clone_sources_to_rollback = 0; 5513 int sort_clone_roots = 0; 5514 int index; 5515 5516 if (!capable(CAP_SYS_ADMIN)) 5517 return -EPERM; 5518 5519 send_root = BTRFS_I(file_inode(mnt_file))->root; 5520 fs_info = send_root->fs_info; 5521 5522 /* 5523 * The subvolume must remain read-only during send, protect against 5524 * making it RW. This also protects against deletion. 5525 */ 5526 spin_lock(&send_root->root_item_lock); 5527 send_root->send_in_progress++; 5528 spin_unlock(&send_root->root_item_lock); 5529 5530 /* 5531 * This is done when we lookup the root, it should already be complete 5532 * by the time we get here. 5533 */ 5534 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5535 5536 /* 5537 * Userspace tools do the checks and warn the user if it's 5538 * not RO. 5539 */ 5540 if (!btrfs_root_readonly(send_root)) { 5541 ret = -EPERM; 5542 goto out; 5543 } 5544 5545 arg = memdup_user(arg_, sizeof(*arg)); 5546 if (IS_ERR(arg)) { 5547 ret = PTR_ERR(arg); 5548 arg = NULL; 5549 goto out; 5550 } 5551 5552 if (!access_ok(VERIFY_READ, arg->clone_sources, 5553 sizeof(*arg->clone_sources) * 5554 arg->clone_sources_count)) { 5555 ret = -EFAULT; 5556 goto out; 5557 } 5558 5559 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { 5560 ret = -EINVAL; 5561 goto out; 5562 } 5563 5564 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 5565 if (!sctx) { 5566 ret = -ENOMEM; 5567 goto out; 5568 } 5569 5570 INIT_LIST_HEAD(&sctx->new_refs); 5571 INIT_LIST_HEAD(&sctx->deleted_refs); 5572 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 5573 INIT_LIST_HEAD(&sctx->name_cache_list); 5574 5575 sctx->flags = arg->flags; 5576 5577 sctx->send_filp = fget(arg->send_fd); 5578 if (!sctx->send_filp) { 5579 ret = -EBADF; 5580 goto out; 5581 } 5582 5583 sctx->send_root = send_root; 5584 /* 5585 * Unlikely but possible, if the subvolume is marked for deletion but 5586 * is slow to remove the directory entry, send can still be started 5587 */ 5588 if (btrfs_root_dead(sctx->send_root)) { 5589 ret = -EPERM; 5590 goto out; 5591 } 5592 5593 sctx->clone_roots_cnt = arg->clone_sources_count; 5594 5595 sctx->send_max_size = BTRFS_SEND_BUF_SIZE; 5596 sctx->send_buf = vmalloc(sctx->send_max_size); 5597 if (!sctx->send_buf) { 5598 ret = -ENOMEM; 5599 goto out; 5600 } 5601 5602 sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); 5603 if (!sctx->read_buf) { 5604 ret = -ENOMEM; 5605 goto out; 5606 } 5607 5608 sctx->pending_dir_moves = RB_ROOT; 5609 sctx->waiting_dir_moves = RB_ROOT; 5610 sctx->orphan_dirs = RB_ROOT; 5611 5612 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5613 (arg->clone_sources_count + 1)); 5614 if (!sctx->clone_roots) { 5615 ret = -ENOMEM; 5616 goto out; 5617 } 5618 5619 if (arg->clone_sources_count) { 5620 clone_sources_tmp = vmalloc(arg->clone_sources_count * 5621 sizeof(*arg->clone_sources)); 5622 if (!clone_sources_tmp) { 5623 ret = -ENOMEM; 5624 goto out; 5625 } 5626 5627 ret = copy_from_user(clone_sources_tmp, arg->clone_sources, 5628 arg->clone_sources_count * 5629 sizeof(*arg->clone_sources)); 5630 if (ret) { 5631 ret = -EFAULT; 5632 goto out; 5633 } 5634 5635 for (i = 0; i < arg->clone_sources_count; i++) { 5636 key.objectid = clone_sources_tmp[i]; 5637 key.type = BTRFS_ROOT_ITEM_KEY; 5638 key.offset = (u64)-1; 5639 5640 index = srcu_read_lock(&fs_info->subvol_srcu); 5641 5642 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 5643 if (IS_ERR(clone_root)) { 5644 srcu_read_unlock(&fs_info->subvol_srcu, index); 5645 ret = PTR_ERR(clone_root); 5646 goto out; 5647 } 5648 clone_sources_to_rollback = i + 1; 5649 spin_lock(&clone_root->root_item_lock); 5650 clone_root->send_in_progress++; 5651 if (!btrfs_root_readonly(clone_root)) { 5652 spin_unlock(&clone_root->root_item_lock); 5653 srcu_read_unlock(&fs_info->subvol_srcu, index); 5654 ret = -EPERM; 5655 goto out; 5656 } 5657 spin_unlock(&clone_root->root_item_lock); 5658 srcu_read_unlock(&fs_info->subvol_srcu, index); 5659 5660 sctx->clone_roots[i].root = clone_root; 5661 } 5662 vfree(clone_sources_tmp); 5663 clone_sources_tmp = NULL; 5664 } 5665 5666 if (arg->parent_root) { 5667 key.objectid = arg->parent_root; 5668 key.type = BTRFS_ROOT_ITEM_KEY; 5669 key.offset = (u64)-1; 5670 5671 index = srcu_read_lock(&fs_info->subvol_srcu); 5672 5673 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 5674 if (IS_ERR(sctx->parent_root)) { 5675 srcu_read_unlock(&fs_info->subvol_srcu, index); 5676 ret = PTR_ERR(sctx->parent_root); 5677 goto out; 5678 } 5679 5680 spin_lock(&sctx->parent_root->root_item_lock); 5681 sctx->parent_root->send_in_progress++; 5682 if (!btrfs_root_readonly(sctx->parent_root) || 5683 btrfs_root_dead(sctx->parent_root)) { 5684 spin_unlock(&sctx->parent_root->root_item_lock); 5685 srcu_read_unlock(&fs_info->subvol_srcu, index); 5686 ret = -EPERM; 5687 goto out; 5688 } 5689 spin_unlock(&sctx->parent_root->root_item_lock); 5690 5691 srcu_read_unlock(&fs_info->subvol_srcu, index); 5692 } 5693 5694 /* 5695 * Clones from send_root are allowed, but only if the clone source 5696 * is behind the current send position. This is checked while searching 5697 * for possible clone sources. 5698 */ 5699 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; 5700 5701 /* We do a bsearch later */ 5702 sort(sctx->clone_roots, sctx->clone_roots_cnt, 5703 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 5704 NULL); 5705 sort_clone_roots = 1; 5706 5707 current->journal_info = (void *)BTRFS_SEND_TRANS_STUB; 5708 ret = send_subvol(sctx); 5709 current->journal_info = NULL; 5710 if (ret < 0) 5711 goto out; 5712 5713 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) { 5714 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 5715 if (ret < 0) 5716 goto out; 5717 ret = send_cmd(sctx); 5718 if (ret < 0) 5719 goto out; 5720 } 5721 5722out: 5723 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); 5724 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { 5725 struct rb_node *n; 5726 struct pending_dir_move *pm; 5727 5728 n = rb_first(&sctx->pending_dir_moves); 5729 pm = rb_entry(n, struct pending_dir_move, node); 5730 while (!list_empty(&pm->list)) { 5731 struct pending_dir_move *pm2; 5732 5733 pm2 = list_first_entry(&pm->list, 5734 struct pending_dir_move, list); 5735 free_pending_move(sctx, pm2); 5736 } 5737 free_pending_move(sctx, pm); 5738 } 5739 5740 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); 5741 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { 5742 struct rb_node *n; 5743 struct waiting_dir_move *dm; 5744 5745 n = rb_first(&sctx->waiting_dir_moves); 5746 dm = rb_entry(n, struct waiting_dir_move, node); 5747 rb_erase(&dm->node, &sctx->waiting_dir_moves); 5748 kfree(dm); 5749 } 5750 5751 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); 5752 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { 5753 struct rb_node *n; 5754 struct orphan_dir_info *odi; 5755 5756 n = rb_first(&sctx->orphan_dirs); 5757 odi = rb_entry(n, struct orphan_dir_info, node); 5758 free_orphan_dir_info(sctx, odi); 5759 } 5760 5761 if (sort_clone_roots) { 5762 for (i = 0; i < sctx->clone_roots_cnt; i++) 5763 btrfs_root_dec_send_in_progress( 5764 sctx->clone_roots[i].root); 5765 } else { 5766 for (i = 0; sctx && i < clone_sources_to_rollback; i++) 5767 btrfs_root_dec_send_in_progress( 5768 sctx->clone_roots[i].root); 5769 5770 btrfs_root_dec_send_in_progress(send_root); 5771 } 5772 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) 5773 btrfs_root_dec_send_in_progress(sctx->parent_root); 5774 5775 kfree(arg); 5776 vfree(clone_sources_tmp); 5777 5778 if (sctx) { 5779 if (sctx->send_filp) 5780 fput(sctx->send_filp); 5781 5782 vfree(sctx->clone_roots); 5783 vfree(sctx->send_buf); 5784 vfree(sctx->read_buf); 5785 5786 name_cache_free(sctx); 5787 5788 kfree(sctx); 5789 } 5790 5791 return ret; 5792} 5793