1/* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7*/ 8 9#include "fuse_i.h" 10 11#include <linux/init.h> 12#include <linux/module.h> 13#include <linux/poll.h> 14#include <linux/uio.h> 15#include <linux/miscdevice.h> 16#include <linux/pagemap.h> 17#include <linux/file.h> 18#include <linux/slab.h> 19#include <linux/pipe_fs_i.h> 20#include <linux/swap.h> 21#include <linux/splice.h> 22#include <linux/aio.h> 23#include <linux/freezer.h> 24 25MODULE_ALIAS_MISCDEV(FUSE_MINOR); 26MODULE_ALIAS("devname:fuse"); 27 28static struct kmem_cache *fuse_req_cachep; 29 30static struct fuse_conn *fuse_get_conn(struct file *file) 31{ 32 /* 33 * Lockless access is OK, because file->private data is set 34 * once during mount and is valid until the file is released. 35 */ 36 return file->private_data; 37} 38 39static void fuse_request_init(struct fuse_req *req, struct page **pages, 40 struct fuse_page_desc *page_descs, 41 unsigned npages) 42{ 43 memset(req, 0, sizeof(*req)); 44 memset(pages, 0, sizeof(*pages) * npages); 45 memset(page_descs, 0, sizeof(*page_descs) * npages); 46 INIT_LIST_HEAD(&req->list); 47 INIT_LIST_HEAD(&req->intr_entry); 48 init_waitqueue_head(&req->waitq); 49 atomic_set(&req->count, 1); 50 req->pages = pages; 51 req->page_descs = page_descs; 52 req->max_pages = npages; 53} 54 55static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) 56{ 57 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags); 58 if (req) { 59 struct page **pages; 60 struct fuse_page_desc *page_descs; 61 62 if (npages <= FUSE_REQ_INLINE_PAGES) { 63 pages = req->inline_pages; 64 page_descs = req->inline_page_descs; 65 } else { 66 pages = kmalloc(sizeof(struct page *) * npages, flags); 67 page_descs = kmalloc(sizeof(struct fuse_page_desc) * 68 npages, flags); 69 } 70 71 if (!pages || !page_descs) { 72 kfree(pages); 73 kfree(page_descs); 74 kmem_cache_free(fuse_req_cachep, req); 75 return NULL; 76 } 77 78 fuse_request_init(req, pages, page_descs, npages); 79 } 80 return req; 81} 82 83struct fuse_req *fuse_request_alloc(unsigned npages) 84{ 85 return __fuse_request_alloc(npages, GFP_KERNEL); 86} 87EXPORT_SYMBOL_GPL(fuse_request_alloc); 88 89struct fuse_req *fuse_request_alloc_nofs(unsigned npages) 90{ 91 return __fuse_request_alloc(npages, GFP_NOFS); 92} 93 94void fuse_request_free(struct fuse_req *req) 95{ 96 if (req->pages != req->inline_pages) { 97 kfree(req->pages); 98 kfree(req->page_descs); 99 } 100 kmem_cache_free(fuse_req_cachep, req); 101} 102 103static void block_sigs(sigset_t *oldset) 104{ 105 sigset_t mask; 106 107 siginitsetinv(&mask, sigmask(SIGKILL)); 108 sigprocmask(SIG_BLOCK, &mask, oldset); 109} 110 111static void restore_sigs(sigset_t *oldset) 112{ 113 sigprocmask(SIG_SETMASK, oldset, NULL); 114} 115 116void __fuse_get_request(struct fuse_req *req) 117{ 118 atomic_inc(&req->count); 119} 120 121/* Must be called with > 1 refcount */ 122static void __fuse_put_request(struct fuse_req *req) 123{ 124 BUG_ON(atomic_read(&req->count) < 2); 125 atomic_dec(&req->count); 126} 127 128static void fuse_req_init_context(struct fuse_req *req) 129{ 130 req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid()); 131 req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid()); 132 req->in.h.pid = current->pid; 133} 134 135static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) 136{ 137 return !fc->initialized || (for_background && fc->blocked); 138} 139 140static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, 141 bool for_background) 142{ 143 struct fuse_req *req; 144 int err; 145 atomic_inc(&fc->num_waiting); 146 147 if (fuse_block_alloc(fc, for_background)) { 148 sigset_t oldset; 149 int intr; 150 151 block_sigs(&oldset); 152 intr = wait_event_interruptible_exclusive(fc->blocked_waitq, 153 !fuse_block_alloc(fc, for_background)); 154 restore_sigs(&oldset); 155 err = -EINTR; 156 if (intr) 157 goto out; 158 } 159 160 err = -ENOTCONN; 161 if (!fc->connected) 162 goto out; 163 164 req = fuse_request_alloc(npages); 165 err = -ENOMEM; 166 if (!req) { 167 if (for_background) 168 wake_up(&fc->blocked_waitq); 169 goto out; 170 } 171 172 fuse_req_init_context(req); 173 req->waiting = 1; 174 req->background = for_background; 175 return req; 176 177 out: 178 atomic_dec(&fc->num_waiting); 179 return ERR_PTR(err); 180} 181 182struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) 183{ 184 return __fuse_get_req(fc, npages, false); 185} 186EXPORT_SYMBOL_GPL(fuse_get_req); 187 188struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, 189 unsigned npages) 190{ 191 return __fuse_get_req(fc, npages, true); 192} 193EXPORT_SYMBOL_GPL(fuse_get_req_for_background); 194 195/* 196 * Return request in fuse_file->reserved_req. However that may 197 * currently be in use. If that is the case, wait for it to become 198 * available. 199 */ 200static struct fuse_req *get_reserved_req(struct fuse_conn *fc, 201 struct file *file) 202{ 203 struct fuse_req *req = NULL; 204 struct fuse_file *ff = file->private_data; 205 206 do { 207 wait_event(fc->reserved_req_waitq, ff->reserved_req); 208 spin_lock(&fc->lock); 209 if (ff->reserved_req) { 210 req = ff->reserved_req; 211 ff->reserved_req = NULL; 212 req->stolen_file = get_file(file); 213 } 214 spin_unlock(&fc->lock); 215 } while (!req); 216 217 return req; 218} 219 220/* 221 * Put stolen request back into fuse_file->reserved_req 222 */ 223static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) 224{ 225 struct file *file = req->stolen_file; 226 struct fuse_file *ff = file->private_data; 227 228 spin_lock(&fc->lock); 229 fuse_request_init(req, req->pages, req->page_descs, req->max_pages); 230 BUG_ON(ff->reserved_req); 231 ff->reserved_req = req; 232 wake_up_all(&fc->reserved_req_waitq); 233 spin_unlock(&fc->lock); 234 fput(file); 235} 236 237/* 238 * Gets a requests for a file operation, always succeeds 239 * 240 * This is used for sending the FLUSH request, which must get to 241 * userspace, due to POSIX locks which may need to be unlocked. 242 * 243 * If allocation fails due to OOM, use the reserved request in 244 * fuse_file. 245 * 246 * This is very unlikely to deadlock accidentally, since the 247 * filesystem should not have it's own file open. If deadlock is 248 * intentional, it can still be broken by "aborting" the filesystem. 249 */ 250struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, 251 struct file *file) 252{ 253 struct fuse_req *req; 254 255 atomic_inc(&fc->num_waiting); 256 wait_event(fc->blocked_waitq, fc->initialized); 257 req = fuse_request_alloc(0); 258 if (!req) 259 req = get_reserved_req(fc, file); 260 261 fuse_req_init_context(req); 262 req->waiting = 1; 263 req->background = 0; 264 return req; 265} 266 267void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 268{ 269 if (atomic_dec_and_test(&req->count)) { 270 if (unlikely(req->background)) { 271 /* 272 * We get here in the unlikely case that a background 273 * request was allocated but not sent 274 */ 275 spin_lock(&fc->lock); 276 if (!fc->blocked) 277 wake_up(&fc->blocked_waitq); 278 spin_unlock(&fc->lock); 279 } 280 281 if (req->waiting) 282 atomic_dec(&fc->num_waiting); 283 284 if (req->stolen_file) 285 put_reserved_req(fc, req); 286 else 287 fuse_request_free(req); 288 } 289} 290EXPORT_SYMBOL_GPL(fuse_put_request); 291 292static unsigned len_args(unsigned numargs, struct fuse_arg *args) 293{ 294 unsigned nbytes = 0; 295 unsigned i; 296 297 for (i = 0; i < numargs; i++) 298 nbytes += args[i].size; 299 300 return nbytes; 301} 302 303static u64 fuse_get_unique(struct fuse_conn *fc) 304{ 305 fc->reqctr++; 306 /* zero is special */ 307 if (fc->reqctr == 0) 308 fc->reqctr = 1; 309 310 return fc->reqctr; 311} 312 313static void queue_request(struct fuse_conn *fc, struct fuse_req *req) 314{ 315 req->in.h.len = sizeof(struct fuse_in_header) + 316 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 317 list_add_tail(&req->list, &fc->pending); 318 req->state = FUSE_REQ_PENDING; 319 if (!req->waiting) { 320 req->waiting = 1; 321 atomic_inc(&fc->num_waiting); 322 } 323 wake_up(&fc->waitq); 324 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 325} 326 327void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, 328 u64 nodeid, u64 nlookup) 329{ 330 forget->forget_one.nodeid = nodeid; 331 forget->forget_one.nlookup = nlookup; 332 333 spin_lock(&fc->lock); 334 if (fc->connected) { 335 fc->forget_list_tail->next = forget; 336 fc->forget_list_tail = forget; 337 wake_up(&fc->waitq); 338 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 339 } else { 340 kfree(forget); 341 } 342 spin_unlock(&fc->lock); 343} 344 345static void flush_bg_queue(struct fuse_conn *fc) 346{ 347 while (fc->active_background < fc->max_background && 348 !list_empty(&fc->bg_queue)) { 349 struct fuse_req *req; 350 351 req = list_entry(fc->bg_queue.next, struct fuse_req, list); 352 list_del(&req->list); 353 fc->active_background++; 354 req->in.h.unique = fuse_get_unique(fc); 355 queue_request(fc, req); 356 } 357} 358 359/* 360 * This function is called when a request is finished. Either a reply 361 * has arrived or it was aborted (and not yet sent) or some error 362 * occurred during communication with userspace, or the device file 363 * was closed. The requester thread is woken up (if still waiting), 364 * the 'end' callback is called if given, else the reference to the 365 * request is released 366 * 367 * Called with fc->lock, unlocks it 368 */ 369static void request_end(struct fuse_conn *fc, struct fuse_req *req) 370__releases(fc->lock) 371{ 372 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 373 req->end = NULL; 374 list_del(&req->list); 375 list_del(&req->intr_entry); 376 req->state = FUSE_REQ_FINISHED; 377 if (req->background) { 378 req->background = 0; 379 380 if (fc->num_background == fc->max_background) 381 fc->blocked = 0; 382 383 /* Wake up next waiter, if any */ 384 if (!fc->blocked && waitqueue_active(&fc->blocked_waitq)) 385 wake_up(&fc->blocked_waitq); 386 387 if (fc->num_background == fc->congestion_threshold && 388 fc->connected && fc->bdi_initialized) { 389 clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); 390 clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); 391 } 392 fc->num_background--; 393 fc->active_background--; 394 flush_bg_queue(fc); 395 } 396 spin_unlock(&fc->lock); 397 wake_up(&req->waitq); 398 if (end) 399 end(fc, req); 400 fuse_put_request(fc, req); 401} 402 403static void wait_answer_interruptible(struct fuse_conn *fc, 404 struct fuse_req *req) 405__releases(fc->lock) 406__acquires(fc->lock) 407{ 408 if (signal_pending(current)) 409 return; 410 411 spin_unlock(&fc->lock); 412 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); 413 spin_lock(&fc->lock); 414} 415 416static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) 417{ 418 list_add_tail(&req->intr_entry, &fc->interrupts); 419 wake_up(&fc->waitq); 420 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 421} 422 423static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 424__releases(fc->lock) 425__acquires(fc->lock) 426{ 427 if (!fc->no_interrupt) { 428 /* Any signal may interrupt this */ 429 wait_answer_interruptible(fc, req); 430 431 if (req->aborted) 432 goto aborted; 433 if (req->state == FUSE_REQ_FINISHED) 434 return; 435 436 req->interrupted = 1; 437 if (req->state == FUSE_REQ_SENT) 438 queue_interrupt(fc, req); 439 } 440 441 if (!req->force) { 442 sigset_t oldset; 443 444 /* Only fatal signals may interrupt this */ 445 block_sigs(&oldset); 446 wait_answer_interruptible(fc, req); 447 restore_sigs(&oldset); 448 449 if (req->aborted) 450 goto aborted; 451 if (req->state == FUSE_REQ_FINISHED) 452 return; 453 454 /* Request is not yet in userspace, bail out */ 455 if (req->state == FUSE_REQ_PENDING) { 456 list_del(&req->list); 457 __fuse_put_request(req); 458 req->out.h.error = -EINTR; 459 return; 460 } 461 } 462 463 /* 464 * Either request is already in userspace, or it was forced. 465 * Wait it out. 466 */ 467 spin_unlock(&fc->lock); 468 469 while (req->state != FUSE_REQ_FINISHED) 470 wait_event_freezable(req->waitq, 471 req->state == FUSE_REQ_FINISHED); 472 spin_lock(&fc->lock); 473 474 if (!req->aborted) 475 return; 476 477 aborted: 478 BUG_ON(req->state != FUSE_REQ_FINISHED); 479 if (req->locked) { 480 /* This is uninterruptible sleep, because data is 481 being copied to/from the buffers of req. During 482 locked state, there mustn't be any filesystem 483 operation (e.g. page fault), since that could lead 484 to deadlock */ 485 spin_unlock(&fc->lock); 486 wait_event(req->waitq, !req->locked); 487 spin_lock(&fc->lock); 488 } 489} 490 491static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 492{ 493 BUG_ON(req->background); 494 spin_lock(&fc->lock); 495 if (!fc->connected) 496 req->out.h.error = -ENOTCONN; 497 else if (fc->conn_error) 498 req->out.h.error = -ECONNREFUSED; 499 else { 500 req->in.h.unique = fuse_get_unique(fc); 501 queue_request(fc, req); 502 /* acquire extra reference, since request is still needed 503 after request_end() */ 504 __fuse_get_request(req); 505 506 request_wait_answer(fc, req); 507 } 508 spin_unlock(&fc->lock); 509} 510 511void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 512{ 513 req->isreply = 1; 514 __fuse_request_send(fc, req); 515} 516EXPORT_SYMBOL_GPL(fuse_request_send); 517 518static void fuse_request_send_nowait_locked(struct fuse_conn *fc, 519 struct fuse_req *req) 520{ 521 BUG_ON(!req->background); 522 fc->num_background++; 523 if (fc->num_background == fc->max_background) 524 fc->blocked = 1; 525 if (fc->num_background == fc->congestion_threshold && 526 fc->bdi_initialized) { 527 set_bdi_congested(&fc->bdi, BLK_RW_SYNC); 528 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); 529 } 530 list_add_tail(&req->list, &fc->bg_queue); 531 flush_bg_queue(fc); 532} 533 534static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 535{ 536 spin_lock(&fc->lock); 537 if (fc->connected) { 538 fuse_request_send_nowait_locked(fc, req); 539 spin_unlock(&fc->lock); 540 } else { 541 req->out.h.error = -ENOTCONN; 542 request_end(fc, req); 543 } 544} 545 546void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) 547{ 548 req->isreply = 1; 549 fuse_request_send_nowait(fc, req); 550} 551EXPORT_SYMBOL_GPL(fuse_request_send_background); 552 553static int fuse_request_send_notify_reply(struct fuse_conn *fc, 554 struct fuse_req *req, u64 unique) 555{ 556 int err = -ENODEV; 557 558 req->isreply = 0; 559 req->in.h.unique = unique; 560 spin_lock(&fc->lock); 561 if (fc->connected) { 562 queue_request(fc, req); 563 err = 0; 564 } 565 spin_unlock(&fc->lock); 566 567 return err; 568} 569 570/* 571 * Called under fc->lock 572 * 573 * fc->connected must have been checked previously 574 */ 575void fuse_request_send_background_locked(struct fuse_conn *fc, 576 struct fuse_req *req) 577{ 578 req->isreply = 1; 579 fuse_request_send_nowait_locked(fc, req); 580} 581 582void fuse_force_forget(struct file *file, u64 nodeid) 583{ 584 struct inode *inode = file_inode(file); 585 struct fuse_conn *fc = get_fuse_conn(inode); 586 struct fuse_req *req; 587 struct fuse_forget_in inarg; 588 589 memset(&inarg, 0, sizeof(inarg)); 590 inarg.nlookup = 1; 591 req = fuse_get_req_nofail_nopages(fc, file); 592 req->in.h.opcode = FUSE_FORGET; 593 req->in.h.nodeid = nodeid; 594 req->in.numargs = 1; 595 req->in.args[0].size = sizeof(inarg); 596 req->in.args[0].value = &inarg; 597 req->isreply = 0; 598 __fuse_request_send(fc, req); 599 /* ignore errors */ 600 fuse_put_request(fc, req); 601} 602 603/* 604 * Lock the request. Up to the next unlock_request() there mustn't be 605 * anything that could cause a page-fault. If the request was already 606 * aborted bail out. 607 */ 608static int lock_request(struct fuse_conn *fc, struct fuse_req *req) 609{ 610 int err = 0; 611 if (req) { 612 spin_lock(&fc->lock); 613 if (req->aborted) 614 err = -ENOENT; 615 else 616 req->locked = 1; 617 spin_unlock(&fc->lock); 618 } 619 return err; 620} 621 622/* 623 * Unlock request. If it was aborted during being locked, the 624 * requester thread is currently waiting for it to be unlocked, so 625 * wake it up. 626 */ 627static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) 628{ 629 if (req) { 630 spin_lock(&fc->lock); 631 req->locked = 0; 632 if (req->aborted) 633 wake_up(&req->waitq); 634 spin_unlock(&fc->lock); 635 } 636} 637 638struct fuse_copy_state { 639 struct fuse_conn *fc; 640 int write; 641 struct fuse_req *req; 642 const struct iovec *iov; 643 struct pipe_buffer *pipebufs; 644 struct pipe_buffer *currbuf; 645 struct pipe_inode_info *pipe; 646 unsigned long nr_segs; 647 unsigned long seglen; 648 unsigned long addr; 649 struct page *pg; 650 unsigned len; 651 unsigned offset; 652 unsigned move_pages:1; 653}; 654 655static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, 656 int write, 657 const struct iovec *iov, unsigned long nr_segs) 658{ 659 memset(cs, 0, sizeof(*cs)); 660 cs->fc = fc; 661 cs->write = write; 662 cs->iov = iov; 663 cs->nr_segs = nr_segs; 664} 665 666/* Unmap and put previous page of userspace buffer */ 667static void fuse_copy_finish(struct fuse_copy_state *cs) 668{ 669 if (cs->currbuf) { 670 struct pipe_buffer *buf = cs->currbuf; 671 672 if (cs->write) 673 buf->len = PAGE_SIZE - cs->len; 674 cs->currbuf = NULL; 675 } else if (cs->pg) { 676 if (cs->write) { 677 flush_dcache_page(cs->pg); 678 set_page_dirty_lock(cs->pg); 679 } 680 put_page(cs->pg); 681 } 682 cs->pg = NULL; 683} 684 685/* 686 * Get another pagefull of userspace buffer, and map it to kernel 687 * address space, and lock request 688 */ 689static int fuse_copy_fill(struct fuse_copy_state *cs) 690{ 691 struct page *page; 692 int err; 693 694 unlock_request(cs->fc, cs->req); 695 fuse_copy_finish(cs); 696 if (cs->pipebufs) { 697 struct pipe_buffer *buf = cs->pipebufs; 698 699 if (!cs->write) { 700 err = buf->ops->confirm(cs->pipe, buf); 701 if (err) 702 return err; 703 704 BUG_ON(!cs->nr_segs); 705 cs->currbuf = buf; 706 cs->pg = buf->page; 707 cs->offset = buf->offset; 708 cs->len = buf->len; 709 cs->pipebufs++; 710 cs->nr_segs--; 711 } else { 712 if (cs->nr_segs == cs->pipe->buffers) 713 return -EIO; 714 715 page = alloc_page(GFP_HIGHUSER); 716 if (!page) 717 return -ENOMEM; 718 719 buf->page = page; 720 buf->offset = 0; 721 buf->len = 0; 722 723 cs->currbuf = buf; 724 cs->pg = page; 725 cs->offset = 0; 726 cs->len = PAGE_SIZE; 727 cs->pipebufs++; 728 cs->nr_segs++; 729 } 730 } else { 731 if (!cs->seglen) { 732 BUG_ON(!cs->nr_segs); 733 cs->seglen = cs->iov[0].iov_len; 734 cs->addr = (unsigned long) cs->iov[0].iov_base; 735 cs->iov++; 736 cs->nr_segs--; 737 } 738 err = get_user_pages_fast(cs->addr, 1, cs->write, &page); 739 if (err < 0) 740 return err; 741 BUG_ON(err != 1); 742 cs->pg = page; 743 cs->offset = cs->addr % PAGE_SIZE; 744 cs->len = min(PAGE_SIZE - cs->offset, cs->seglen); 745 cs->seglen -= cs->len; 746 cs->addr += cs->len; 747 } 748 749 return lock_request(cs->fc, cs->req); 750} 751 752/* Do as much copy to/from userspace buffer as we can */ 753static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) 754{ 755 unsigned ncpy = min(*size, cs->len); 756 if (val) { 757 void *pgaddr = kmap_atomic(cs->pg); 758 void *buf = pgaddr + cs->offset; 759 760 if (cs->write) 761 memcpy(buf, *val, ncpy); 762 else 763 memcpy(*val, buf, ncpy); 764 765 kunmap_atomic(pgaddr); 766 *val += ncpy; 767 } 768 *size -= ncpy; 769 cs->len -= ncpy; 770 cs->offset += ncpy; 771 return ncpy; 772} 773 774static int fuse_check_page(struct page *page) 775{ 776 if (page_mapcount(page) || 777 page->mapping != NULL || 778 page_count(page) != 1 || 779 (page->flags & PAGE_FLAGS_CHECK_AT_PREP & 780 ~(1 << PG_locked | 781 1 << PG_referenced | 782 1 << PG_uptodate | 783 1 << PG_lru | 784 1 << PG_active | 785 1 << PG_reclaim))) { 786 printk(KERN_WARNING "fuse: trying to steal weird page\n"); 787 printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); 788 return 1; 789 } 790 return 0; 791} 792 793static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) 794{ 795 int err; 796 struct page *oldpage = *pagep; 797 struct page *newpage; 798 struct pipe_buffer *buf = cs->pipebufs; 799 800 unlock_request(cs->fc, cs->req); 801 fuse_copy_finish(cs); 802 803 err = buf->ops->confirm(cs->pipe, buf); 804 if (err) 805 return err; 806 807 BUG_ON(!cs->nr_segs); 808 cs->currbuf = buf; 809 cs->len = buf->len; 810 cs->pipebufs++; 811 cs->nr_segs--; 812 813 if (cs->len != PAGE_SIZE) 814 goto out_fallback; 815 816 if (buf->ops->steal(cs->pipe, buf) != 0) 817 goto out_fallback; 818 819 newpage = buf->page; 820 821 if (WARN_ON(!PageUptodate(newpage))) 822 return -EIO; 823 824 ClearPageMappedToDisk(newpage); 825 826 if (fuse_check_page(newpage) != 0) 827 goto out_fallback_unlock; 828 829 /* 830 * This is a new and locked page, it shouldn't be mapped or 831 * have any special flags on it 832 */ 833 if (WARN_ON(page_mapped(oldpage))) 834 goto out_fallback_unlock; 835 if (WARN_ON(page_has_private(oldpage))) 836 goto out_fallback_unlock; 837 if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage))) 838 goto out_fallback_unlock; 839 if (WARN_ON(PageMlocked(oldpage))) 840 goto out_fallback_unlock; 841 842 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); 843 if (err) { 844 unlock_page(newpage); 845 return err; 846 } 847 848 page_cache_get(newpage); 849 850 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 851 lru_cache_add_file(newpage); 852 853 err = 0; 854 spin_lock(&cs->fc->lock); 855 if (cs->req->aborted) 856 err = -ENOENT; 857 else 858 *pagep = newpage; 859 spin_unlock(&cs->fc->lock); 860 861 if (err) { 862 unlock_page(newpage); 863 page_cache_release(newpage); 864 return err; 865 } 866 867 unlock_page(oldpage); 868 page_cache_release(oldpage); 869 cs->len = 0; 870 871 return 0; 872 873out_fallback_unlock: 874 unlock_page(newpage); 875out_fallback: 876 cs->pg = buf->page; 877 cs->offset = buf->offset; 878 879 err = lock_request(cs->fc, cs->req); 880 if (err) 881 return err; 882 883 return 1; 884} 885 886static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, 887 unsigned offset, unsigned count) 888{ 889 struct pipe_buffer *buf; 890 891 if (cs->nr_segs == cs->pipe->buffers) 892 return -EIO; 893 894 unlock_request(cs->fc, cs->req); 895 fuse_copy_finish(cs); 896 897 buf = cs->pipebufs; 898 page_cache_get(page); 899 buf->page = page; 900 buf->offset = offset; 901 buf->len = count; 902 903 cs->pipebufs++; 904 cs->nr_segs++; 905 cs->len = 0; 906 907 return 0; 908} 909 910/* 911 * Copy a page in the request to/from the userspace buffer. Must be 912 * done atomically 913 */ 914static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, 915 unsigned offset, unsigned count, int zeroing) 916{ 917 int err; 918 struct page *page = *pagep; 919 920 if (page && zeroing && count < PAGE_SIZE) 921 clear_highpage(page); 922 923 while (count) { 924 if (cs->write && cs->pipebufs && page) { 925 return fuse_ref_page(cs, page, offset, count); 926 } else if (!cs->len) { 927 if (cs->move_pages && page && 928 offset == 0 && count == PAGE_SIZE) { 929 err = fuse_try_move_page(cs, pagep); 930 if (err <= 0) 931 return err; 932 } else { 933 err = fuse_copy_fill(cs); 934 if (err) 935 return err; 936 } 937 } 938 if (page) { 939 void *mapaddr = kmap_atomic(page); 940 void *buf = mapaddr + offset; 941 offset += fuse_copy_do(cs, &buf, &count); 942 kunmap_atomic(mapaddr); 943 } else 944 offset += fuse_copy_do(cs, NULL, &count); 945 } 946 if (page && !cs->write) 947 flush_dcache_page(page); 948 return 0; 949} 950 951/* Copy pages in the request to/from userspace buffer */ 952static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, 953 int zeroing) 954{ 955 unsigned i; 956 struct fuse_req *req = cs->req; 957 958 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { 959 int err; 960 unsigned offset = req->page_descs[i].offset; 961 unsigned count = min(nbytes, req->page_descs[i].length); 962 963 err = fuse_copy_page(cs, &req->pages[i], offset, count, 964 zeroing); 965 if (err) 966 return err; 967 968 nbytes -= count; 969 } 970 return 0; 971} 972 973/* Copy a single argument in the request to/from userspace buffer */ 974static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) 975{ 976 while (size) { 977 if (!cs->len) { 978 int err = fuse_copy_fill(cs); 979 if (err) 980 return err; 981 } 982 fuse_copy_do(cs, &val, &size); 983 } 984 return 0; 985} 986 987/* Copy request arguments to/from userspace buffer */ 988static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, 989 unsigned argpages, struct fuse_arg *args, 990 int zeroing) 991{ 992 int err = 0; 993 unsigned i; 994 995 for (i = 0; !err && i < numargs; i++) { 996 struct fuse_arg *arg = &args[i]; 997 if (i == numargs - 1 && argpages) 998 err = fuse_copy_pages(cs, arg->size, zeroing); 999 else 1000 err = fuse_copy_one(cs, arg->value, arg->size); 1001 } 1002 return err; 1003} 1004 1005static int forget_pending(struct fuse_conn *fc) 1006{ 1007 return fc->forget_list_head.next != NULL; 1008} 1009 1010static int request_pending(struct fuse_conn *fc) 1011{ 1012 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) || 1013 forget_pending(fc); 1014} 1015 1016/* Wait until a request is available on the pending list */ 1017static void request_wait(struct fuse_conn *fc) 1018__releases(fc->lock) 1019__acquires(fc->lock) 1020{ 1021 DECLARE_WAITQUEUE(wait, current); 1022 1023 add_wait_queue_exclusive(&fc->waitq, &wait); 1024 while (fc->connected && !request_pending(fc)) { 1025 set_current_state(TASK_INTERRUPTIBLE); 1026 if (signal_pending(current)) 1027 break; 1028 1029 spin_unlock(&fc->lock); 1030 schedule(); 1031 spin_lock(&fc->lock); 1032 } 1033 set_current_state(TASK_RUNNING); 1034 remove_wait_queue(&fc->waitq, &wait); 1035} 1036 1037/* 1038 * Transfer an interrupt request to userspace 1039 * 1040 * Unlike other requests this is assembled on demand, without a need 1041 * to allocate a separate fuse_req structure. 1042 * 1043 * Called with fc->lock held, releases it 1044 */ 1045static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, 1046 size_t nbytes, struct fuse_req *req) 1047__releases(fc->lock) 1048{ 1049 struct fuse_in_header ih; 1050 struct fuse_interrupt_in arg; 1051 unsigned reqsize = sizeof(ih) + sizeof(arg); 1052 int err; 1053 1054 list_del_init(&req->intr_entry); 1055 req->intr_unique = fuse_get_unique(fc); 1056 memset(&ih, 0, sizeof(ih)); 1057 memset(&arg, 0, sizeof(arg)); 1058 ih.len = reqsize; 1059 ih.opcode = FUSE_INTERRUPT; 1060 ih.unique = req->intr_unique; 1061 arg.unique = req->in.h.unique; 1062 1063 spin_unlock(&fc->lock); 1064 if (nbytes < reqsize) 1065 return -EINVAL; 1066 1067 err = fuse_copy_one(cs, &ih, sizeof(ih)); 1068 if (!err) 1069 err = fuse_copy_one(cs, &arg, sizeof(arg)); 1070 fuse_copy_finish(cs); 1071 1072 return err ? err : reqsize; 1073} 1074 1075static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, 1076 unsigned max, 1077 unsigned *countp) 1078{ 1079 struct fuse_forget_link *head = fc->forget_list_head.next; 1080 struct fuse_forget_link **newhead = &head; 1081 unsigned count; 1082 1083 for (count = 0; *newhead != NULL && count < max; count++) 1084 newhead = &(*newhead)->next; 1085 1086 fc->forget_list_head.next = *newhead; 1087 *newhead = NULL; 1088 if (fc->forget_list_head.next == NULL) 1089 fc->forget_list_tail = &fc->forget_list_head; 1090 1091 if (countp != NULL) 1092 *countp = count; 1093 1094 return head; 1095} 1096 1097static int fuse_read_single_forget(struct fuse_conn *fc, 1098 struct fuse_copy_state *cs, 1099 size_t nbytes) 1100__releases(fc->lock) 1101{ 1102 int err; 1103 struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); 1104 struct fuse_forget_in arg = { 1105 .nlookup = forget->forget_one.nlookup, 1106 }; 1107 struct fuse_in_header ih = { 1108 .opcode = FUSE_FORGET, 1109 .nodeid = forget->forget_one.nodeid, 1110 .unique = fuse_get_unique(fc), 1111 .len = sizeof(ih) + sizeof(arg), 1112 }; 1113 1114 spin_unlock(&fc->lock); 1115 kfree(forget); 1116 if (nbytes < ih.len) 1117 return -EINVAL; 1118 1119 err = fuse_copy_one(cs, &ih, sizeof(ih)); 1120 if (!err) 1121 err = fuse_copy_one(cs, &arg, sizeof(arg)); 1122 fuse_copy_finish(cs); 1123 1124 if (err) 1125 return err; 1126 1127 return ih.len; 1128} 1129 1130static int fuse_read_batch_forget(struct fuse_conn *fc, 1131 struct fuse_copy_state *cs, size_t nbytes) 1132__releases(fc->lock) 1133{ 1134 int err; 1135 unsigned max_forgets; 1136 unsigned count; 1137 struct fuse_forget_link *head; 1138 struct fuse_batch_forget_in arg = { .count = 0 }; 1139 struct fuse_in_header ih = { 1140 .opcode = FUSE_BATCH_FORGET, 1141 .unique = fuse_get_unique(fc), 1142 .len = sizeof(ih) + sizeof(arg), 1143 }; 1144 1145 if (nbytes < ih.len) { 1146 spin_unlock(&fc->lock); 1147 return -EINVAL; 1148 } 1149 1150 max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); 1151 head = dequeue_forget(fc, max_forgets, &count); 1152 spin_unlock(&fc->lock); 1153 1154 arg.count = count; 1155 ih.len += count * sizeof(struct fuse_forget_one); 1156 err = fuse_copy_one(cs, &ih, sizeof(ih)); 1157 if (!err) 1158 err = fuse_copy_one(cs, &arg, sizeof(arg)); 1159 1160 while (head) { 1161 struct fuse_forget_link *forget = head; 1162 1163 if (!err) { 1164 err = fuse_copy_one(cs, &forget->forget_one, 1165 sizeof(forget->forget_one)); 1166 } 1167 head = forget->next; 1168 kfree(forget); 1169 } 1170 1171 fuse_copy_finish(cs); 1172 1173 if (err) 1174 return err; 1175 1176 return ih.len; 1177} 1178 1179static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, 1180 size_t nbytes) 1181__releases(fc->lock) 1182{ 1183 if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) 1184 return fuse_read_single_forget(fc, cs, nbytes); 1185 else 1186 return fuse_read_batch_forget(fc, cs, nbytes); 1187} 1188 1189/* 1190 * Read a single request into the userspace filesystem's buffer. This 1191 * function waits until a request is available, then removes it from 1192 * the pending list and copies request data to userspace buffer. If 1193 * no reply is needed (FORGET) or request has been aborted or there 1194 * was an error during the copying then it's finished by calling 1195 * request_end(). Otherwise add it to the processing list, and set 1196 * the 'sent' flag. 1197 */ 1198static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, 1199 struct fuse_copy_state *cs, size_t nbytes) 1200{ 1201 int err; 1202 struct fuse_req *req; 1203 struct fuse_in *in; 1204 unsigned reqsize; 1205 1206 restart: 1207 spin_lock(&fc->lock); 1208 err = -EAGAIN; 1209 if ((file->f_flags & O_NONBLOCK) && fc->connected && 1210 !request_pending(fc)) 1211 goto err_unlock; 1212 1213 request_wait(fc); 1214 err = -ENODEV; 1215 if (!fc->connected) 1216 goto err_unlock; 1217 err = -ERESTARTSYS; 1218 if (!request_pending(fc)) 1219 goto err_unlock; 1220 1221 if (!list_empty(&fc->interrupts)) { 1222 req = list_entry(fc->interrupts.next, struct fuse_req, 1223 intr_entry); 1224 return fuse_read_interrupt(fc, cs, nbytes, req); 1225 } 1226 1227 if (forget_pending(fc)) { 1228 if (list_empty(&fc->pending) || fc->forget_batch-- > 0) 1229 return fuse_read_forget(fc, cs, nbytes); 1230 1231 if (fc->forget_batch <= -8) 1232 fc->forget_batch = 16; 1233 } 1234 1235 req = list_entry(fc->pending.next, struct fuse_req, list); 1236 req->state = FUSE_REQ_READING; 1237 list_move(&req->list, &fc->io); 1238 1239 in = &req->in; 1240 reqsize = in->h.len; 1241 /* If request is too large, reply with an error and restart the read */ 1242 if (nbytes < reqsize) { 1243 req->out.h.error = -EIO; 1244 /* SETXATTR is special, since it may contain too large data */ 1245 if (in->h.opcode == FUSE_SETXATTR) 1246 req->out.h.error = -E2BIG; 1247 request_end(fc, req); 1248 goto restart; 1249 } 1250 spin_unlock(&fc->lock); 1251 cs->req = req; 1252 err = fuse_copy_one(cs, &in->h, sizeof(in->h)); 1253 if (!err) 1254 err = fuse_copy_args(cs, in->numargs, in->argpages, 1255 (struct fuse_arg *) in->args, 0); 1256 fuse_copy_finish(cs); 1257 spin_lock(&fc->lock); 1258 req->locked = 0; 1259 if (req->aborted) { 1260 request_end(fc, req); 1261 return -ENODEV; 1262 } 1263 if (err) { 1264 req->out.h.error = -EIO; 1265 request_end(fc, req); 1266 return err; 1267 } 1268 if (!req->isreply) 1269 request_end(fc, req); 1270 else { 1271 req->state = FUSE_REQ_SENT; 1272 list_move_tail(&req->list, &fc->processing); 1273 if (req->interrupted) 1274 queue_interrupt(fc, req); 1275 spin_unlock(&fc->lock); 1276 } 1277 return reqsize; 1278 1279 err_unlock: 1280 spin_unlock(&fc->lock); 1281 return err; 1282} 1283 1284static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, 1285 unsigned long nr_segs, loff_t pos) 1286{ 1287 struct fuse_copy_state cs; 1288 struct file *file = iocb->ki_filp; 1289 struct fuse_conn *fc = fuse_get_conn(file); 1290 if (!fc) 1291 return -EPERM; 1292 1293 fuse_copy_init(&cs, fc, 1, iov, nr_segs); 1294 1295 return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); 1296} 1297 1298static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, 1299 struct pipe_inode_info *pipe, 1300 size_t len, unsigned int flags) 1301{ 1302 int ret; 1303 int page_nr = 0; 1304 int do_wakeup = 0; 1305 struct pipe_buffer *bufs; 1306 struct fuse_copy_state cs; 1307 struct fuse_conn *fc = fuse_get_conn(in); 1308 if (!fc) 1309 return -EPERM; 1310 1311 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); 1312 if (!bufs) 1313 return -ENOMEM; 1314 1315 fuse_copy_init(&cs, fc, 1, NULL, 0); 1316 cs.pipebufs = bufs; 1317 cs.pipe = pipe; 1318 ret = fuse_dev_do_read(fc, in, &cs, len); 1319 if (ret < 0) 1320 goto out; 1321 1322 ret = 0; 1323 pipe_lock(pipe); 1324 1325 if (!pipe->readers) { 1326 send_sig(SIGPIPE, current, 0); 1327 if (!ret) 1328 ret = -EPIPE; 1329 goto out_unlock; 1330 } 1331 1332 if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { 1333 ret = -EIO; 1334 goto out_unlock; 1335 } 1336 1337 while (page_nr < cs.nr_segs) { 1338 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 1339 struct pipe_buffer *buf = pipe->bufs + newbuf; 1340 1341 buf->page = bufs[page_nr].page; 1342 buf->offset = bufs[page_nr].offset; 1343 buf->len = bufs[page_nr].len; 1344 /* 1345 * Need to be careful about this. Having buf->ops in module 1346 * code can Oops if the buffer persists after module unload. 1347 */ 1348 buf->ops = &nosteal_pipe_buf_ops; 1349 1350 pipe->nrbufs++; 1351 page_nr++; 1352 ret += buf->len; 1353 1354 if (pipe->files) 1355 do_wakeup = 1; 1356 } 1357 1358out_unlock: 1359 pipe_unlock(pipe); 1360 1361 if (do_wakeup) { 1362 smp_mb(); 1363 if (waitqueue_active(&pipe->wait)) 1364 wake_up_interruptible(&pipe->wait); 1365 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 1366 } 1367 1368out: 1369 for (; page_nr < cs.nr_segs; page_nr++) 1370 page_cache_release(bufs[page_nr].page); 1371 1372 kfree(bufs); 1373 return ret; 1374} 1375 1376static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, 1377 struct fuse_copy_state *cs) 1378{ 1379 struct fuse_notify_poll_wakeup_out outarg; 1380 int err = -EINVAL; 1381 1382 if (size != sizeof(outarg)) 1383 goto err; 1384 1385 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 1386 if (err) 1387 goto err; 1388 1389 fuse_copy_finish(cs); 1390 return fuse_notify_poll_wakeup(fc, &outarg); 1391 1392err: 1393 fuse_copy_finish(cs); 1394 return err; 1395} 1396 1397static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, 1398 struct fuse_copy_state *cs) 1399{ 1400 struct fuse_notify_inval_inode_out outarg; 1401 int err = -EINVAL; 1402 1403 if (size != sizeof(outarg)) 1404 goto err; 1405 1406 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 1407 if (err) 1408 goto err; 1409 fuse_copy_finish(cs); 1410 1411 down_read(&fc->killsb); 1412 err = -ENOENT; 1413 if (fc->sb) { 1414 err = fuse_reverse_inval_inode(fc->sb, outarg.ino, 1415 outarg.off, outarg.len); 1416 } 1417 up_read(&fc->killsb); 1418 return err; 1419 1420err: 1421 fuse_copy_finish(cs); 1422 return err; 1423} 1424 1425static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, 1426 struct fuse_copy_state *cs) 1427{ 1428 struct fuse_notify_inval_entry_out outarg; 1429 int err = -ENOMEM; 1430 char *buf; 1431 struct qstr name; 1432 1433 buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); 1434 if (!buf) 1435 goto err; 1436 1437 err = -EINVAL; 1438 if (size < sizeof(outarg)) 1439 goto err; 1440 1441 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 1442 if (err) 1443 goto err; 1444 1445 err = -ENAMETOOLONG; 1446 if (outarg.namelen > FUSE_NAME_MAX) 1447 goto err; 1448 1449 err = -EINVAL; 1450 if (size != sizeof(outarg) + outarg.namelen + 1) 1451 goto err; 1452 1453 name.name = buf; 1454 name.len = outarg.namelen; 1455 err = fuse_copy_one(cs, buf, outarg.namelen + 1); 1456 if (err) 1457 goto err; 1458 fuse_copy_finish(cs); 1459 buf[outarg.namelen] = 0; 1460 name.hash = full_name_hash(name.name, name.len); 1461 1462 down_read(&fc->killsb); 1463 err = -ENOENT; 1464 if (fc->sb) 1465 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name); 1466 up_read(&fc->killsb); 1467 kfree(buf); 1468 return err; 1469 1470err: 1471 kfree(buf); 1472 fuse_copy_finish(cs); 1473 return err; 1474} 1475 1476static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, 1477 struct fuse_copy_state *cs) 1478{ 1479 struct fuse_notify_delete_out outarg; 1480 int err = -ENOMEM; 1481 char *buf; 1482 struct qstr name; 1483 1484 buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); 1485 if (!buf) 1486 goto err; 1487 1488 err = -EINVAL; 1489 if (size < sizeof(outarg)) 1490 goto err; 1491 1492 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 1493 if (err) 1494 goto err; 1495 1496 err = -ENAMETOOLONG; 1497 if (outarg.namelen > FUSE_NAME_MAX) 1498 goto err; 1499 1500 err = -EINVAL; 1501 if (size != sizeof(outarg) + outarg.namelen + 1) 1502 goto err; 1503 1504 name.name = buf; 1505 name.len = outarg.namelen; 1506 err = fuse_copy_one(cs, buf, outarg.namelen + 1); 1507 if (err) 1508 goto err; 1509 fuse_copy_finish(cs); 1510 buf[outarg.namelen] = 0; 1511 name.hash = full_name_hash(name.name, name.len); 1512 1513 down_read(&fc->killsb); 1514 err = -ENOENT; 1515 if (fc->sb) 1516 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 1517 outarg.child, &name); 1518 up_read(&fc->killsb); 1519 kfree(buf); 1520 return err; 1521 1522err: 1523 kfree(buf); 1524 fuse_copy_finish(cs); 1525 return err; 1526} 1527 1528static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, 1529 struct fuse_copy_state *cs) 1530{ 1531 struct fuse_notify_store_out outarg; 1532 struct inode *inode; 1533 struct address_space *mapping; 1534 u64 nodeid; 1535 int err; 1536 pgoff_t index; 1537 unsigned int offset; 1538 unsigned int num; 1539 loff_t file_size; 1540 loff_t end; 1541 1542 err = -EINVAL; 1543 if (size < sizeof(outarg)) 1544 goto out_finish; 1545 1546 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 1547 if (err) 1548 goto out_finish; 1549 1550 err = -EINVAL; 1551 if (size - sizeof(outarg) != outarg.size) 1552 goto out_finish; 1553 1554 nodeid = outarg.nodeid; 1555 1556 down_read(&fc->killsb); 1557 1558 err = -ENOENT; 1559 if (!fc->sb) 1560 goto out_up_killsb; 1561 1562 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); 1563 if (!inode) 1564 goto out_up_killsb; 1565 1566 mapping = inode->i_mapping; 1567 index = outarg.offset >> PAGE_CACHE_SHIFT; 1568 offset = outarg.offset & ~PAGE_CACHE_MASK; 1569 file_size = i_size_read(inode); 1570 end = outarg.offset + outarg.size; 1571 if (end > file_size) { 1572 file_size = end; 1573 fuse_write_update_size(inode, file_size); 1574 } 1575 1576 num = outarg.size; 1577 while (num) { 1578 struct page *page; 1579 unsigned int this_num; 1580 1581 err = -ENOMEM; 1582 page = find_or_create_page(mapping, index, 1583 mapping_gfp_mask(mapping)); 1584 if (!page) 1585 goto out_iput; 1586 1587 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); 1588 err = fuse_copy_page(cs, &page, offset, this_num, 0); 1589 if (!err && offset == 0 && 1590 (this_num == PAGE_CACHE_SIZE || file_size == end)) 1591 SetPageUptodate(page); 1592 unlock_page(page); 1593 page_cache_release(page); 1594 1595 if (err) 1596 goto out_iput; 1597 1598 num -= this_num; 1599 offset = 0; 1600 index++; 1601 } 1602 1603 err = 0; 1604 1605out_iput: 1606 iput(inode); 1607out_up_killsb: 1608 up_read(&fc->killsb); 1609out_finish: 1610 fuse_copy_finish(cs); 1611 return err; 1612} 1613 1614static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) 1615{ 1616 release_pages(req->pages, req->num_pages, false); 1617} 1618 1619static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, 1620 struct fuse_notify_retrieve_out *outarg) 1621{ 1622 int err; 1623 struct address_space *mapping = inode->i_mapping; 1624 struct fuse_req *req; 1625 pgoff_t index; 1626 loff_t file_size; 1627 unsigned int num; 1628 unsigned int offset; 1629 size_t total_len = 0; 1630 int num_pages; 1631 1632 offset = outarg->offset & ~PAGE_CACHE_MASK; 1633 file_size = i_size_read(inode); 1634 1635 num = outarg->size; 1636 if (outarg->offset > file_size) 1637 num = 0; 1638 else if (outarg->offset + num > file_size) 1639 num = file_size - outarg->offset; 1640 1641 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1642 num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ); 1643 1644 req = fuse_get_req(fc, num_pages); 1645 if (IS_ERR(req)) 1646 return PTR_ERR(req); 1647 1648 req->in.h.opcode = FUSE_NOTIFY_REPLY; 1649 req->in.h.nodeid = outarg->nodeid; 1650 req->in.numargs = 2; 1651 req->in.argpages = 1; 1652 req->page_descs[0].offset = offset; 1653 req->end = fuse_retrieve_end; 1654 1655 index = outarg->offset >> PAGE_CACHE_SHIFT; 1656 1657 while (num && req->num_pages < num_pages) { 1658 struct page *page; 1659 unsigned int this_num; 1660 1661 page = find_get_page(mapping, index); 1662 if (!page) 1663 break; 1664 1665 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); 1666 req->pages[req->num_pages] = page; 1667 req->page_descs[req->num_pages].length = this_num; 1668 req->num_pages++; 1669 1670 offset = 0; 1671 num -= this_num; 1672 total_len += this_num; 1673 index++; 1674 } 1675 req->misc.retrieve_in.offset = outarg->offset; 1676 req->misc.retrieve_in.size = total_len; 1677 req->in.args[0].size = sizeof(req->misc.retrieve_in); 1678 req->in.args[0].value = &req->misc.retrieve_in; 1679 req->in.args[1].size = total_len; 1680 1681 err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); 1682 if (err) 1683 fuse_retrieve_end(fc, req); 1684 1685 return err; 1686} 1687 1688static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, 1689 struct fuse_copy_state *cs) 1690{ 1691 struct fuse_notify_retrieve_out outarg; 1692 struct inode *inode; 1693 int err; 1694 1695 err = -EINVAL; 1696 if (size != sizeof(outarg)) 1697 goto copy_finish; 1698 1699 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 1700 if (err) 1701 goto copy_finish; 1702 1703 fuse_copy_finish(cs); 1704 1705 down_read(&fc->killsb); 1706 err = -ENOENT; 1707 if (fc->sb) { 1708 u64 nodeid = outarg.nodeid; 1709 1710 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); 1711 if (inode) { 1712 err = fuse_retrieve(fc, inode, &outarg); 1713 iput(inode); 1714 } 1715 } 1716 up_read(&fc->killsb); 1717 1718 return err; 1719 1720copy_finish: 1721 fuse_copy_finish(cs); 1722 return err; 1723} 1724 1725static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 1726 unsigned int size, struct fuse_copy_state *cs) 1727{ 1728 switch (code) { 1729 case FUSE_NOTIFY_POLL: 1730 return fuse_notify_poll(fc, size, cs); 1731 1732 case FUSE_NOTIFY_INVAL_INODE: 1733 return fuse_notify_inval_inode(fc, size, cs); 1734 1735 case FUSE_NOTIFY_INVAL_ENTRY: 1736 return fuse_notify_inval_entry(fc, size, cs); 1737 1738 case FUSE_NOTIFY_STORE: 1739 return fuse_notify_store(fc, size, cs); 1740 1741 case FUSE_NOTIFY_RETRIEVE: 1742 return fuse_notify_retrieve(fc, size, cs); 1743 1744 case FUSE_NOTIFY_DELETE: 1745 return fuse_notify_delete(fc, size, cs); 1746 1747 default: 1748 fuse_copy_finish(cs); 1749 return -EINVAL; 1750 } 1751} 1752 1753/* Look up request on processing list by unique ID */ 1754static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) 1755{ 1756 struct fuse_req *req; 1757 1758 list_for_each_entry(req, &fc->processing, list) { 1759 if (req->in.h.unique == unique || req->intr_unique == unique) 1760 return req; 1761 } 1762 return NULL; 1763} 1764 1765static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, 1766 unsigned nbytes) 1767{ 1768 unsigned reqsize = sizeof(struct fuse_out_header); 1769 1770 if (out->h.error) 1771 return nbytes != reqsize ? -EINVAL : 0; 1772 1773 reqsize += len_args(out->numargs, out->args); 1774 1775 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) 1776 return -EINVAL; 1777 else if (reqsize > nbytes) { 1778 struct fuse_arg *lastarg = &out->args[out->numargs-1]; 1779 unsigned diffsize = reqsize - nbytes; 1780 if (diffsize > lastarg->size) 1781 return -EINVAL; 1782 lastarg->size -= diffsize; 1783 } 1784 return fuse_copy_args(cs, out->numargs, out->argpages, out->args, 1785 out->page_zeroing); 1786} 1787 1788/* 1789 * Write a single reply to a request. First the header is copied from 1790 * the write buffer. The request is then searched on the processing 1791 * list by the unique ID found in the header. If found, then remove 1792 * it from the list and copy the rest of the buffer to the request. 1793 * The request is finished by calling request_end() 1794 */ 1795static ssize_t fuse_dev_do_write(struct fuse_conn *fc, 1796 struct fuse_copy_state *cs, size_t nbytes) 1797{ 1798 int err; 1799 struct fuse_req *req; 1800 struct fuse_out_header oh; 1801 1802 if (nbytes < sizeof(struct fuse_out_header)) 1803 return -EINVAL; 1804 1805 err = fuse_copy_one(cs, &oh, sizeof(oh)); 1806 if (err) 1807 goto err_finish; 1808 1809 err = -EINVAL; 1810 if (oh.len != nbytes) 1811 goto err_finish; 1812 1813 /* 1814 * Zero oh.unique indicates unsolicited notification message 1815 * and error contains notification code. 1816 */ 1817 if (!oh.unique) { 1818 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); 1819 return err ? err : nbytes; 1820 } 1821 1822 err = -EINVAL; 1823 if (oh.error <= -1000 || oh.error > 0) 1824 goto err_finish; 1825 1826 spin_lock(&fc->lock); 1827 err = -ENOENT; 1828 if (!fc->connected) 1829 goto err_unlock; 1830 1831 req = request_find(fc, oh.unique); 1832 if (!req) 1833 goto err_unlock; 1834 1835 if (req->aborted) { 1836 spin_unlock(&fc->lock); 1837 fuse_copy_finish(cs); 1838 spin_lock(&fc->lock); 1839 request_end(fc, req); 1840 return -ENOENT; 1841 } 1842 /* Is it an interrupt reply? */ 1843 if (req->intr_unique == oh.unique) { 1844 err = -EINVAL; 1845 if (nbytes != sizeof(struct fuse_out_header)) 1846 goto err_unlock; 1847 1848 if (oh.error == -ENOSYS) 1849 fc->no_interrupt = 1; 1850 else if (oh.error == -EAGAIN) 1851 queue_interrupt(fc, req); 1852 1853 spin_unlock(&fc->lock); 1854 fuse_copy_finish(cs); 1855 return nbytes; 1856 } 1857 1858 req->state = FUSE_REQ_WRITING; 1859 list_move(&req->list, &fc->io); 1860 req->out.h = oh; 1861 req->locked = 1; 1862 cs->req = req; 1863 if (!req->out.page_replace) 1864 cs->move_pages = 0; 1865 spin_unlock(&fc->lock); 1866 1867 err = copy_out_args(cs, &req->out, nbytes); 1868 fuse_copy_finish(cs); 1869 1870 spin_lock(&fc->lock); 1871 req->locked = 0; 1872 if (!err) { 1873 if (req->aborted) 1874 err = -ENOENT; 1875 } else if (!req->aborted) 1876 req->out.h.error = -EIO; 1877 request_end(fc, req); 1878 1879 return err ? err : nbytes; 1880 1881 err_unlock: 1882 spin_unlock(&fc->lock); 1883 err_finish: 1884 fuse_copy_finish(cs); 1885 return err; 1886} 1887 1888static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, 1889 unsigned long nr_segs, loff_t pos) 1890{ 1891 struct fuse_copy_state cs; 1892 struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); 1893 if (!fc) 1894 return -EPERM; 1895 1896 fuse_copy_init(&cs, fc, 0, iov, nr_segs); 1897 1898 return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); 1899} 1900 1901static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, 1902 struct file *out, loff_t *ppos, 1903 size_t len, unsigned int flags) 1904{ 1905 unsigned nbuf; 1906 unsigned idx; 1907 struct pipe_buffer *bufs; 1908 struct fuse_copy_state cs; 1909 struct fuse_conn *fc; 1910 size_t rem; 1911 ssize_t ret; 1912 1913 fc = fuse_get_conn(out); 1914 if (!fc) 1915 return -EPERM; 1916 1917 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); 1918 if (!bufs) 1919 return -ENOMEM; 1920 1921 pipe_lock(pipe); 1922 nbuf = 0; 1923 rem = 0; 1924 for (idx = 0; idx < pipe->nrbufs && rem < len; idx++) 1925 rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; 1926 1927 ret = -EINVAL; 1928 if (rem < len) { 1929 pipe_unlock(pipe); 1930 goto out; 1931 } 1932 1933 rem = len; 1934 while (rem) { 1935 struct pipe_buffer *ibuf; 1936 struct pipe_buffer *obuf; 1937 1938 BUG_ON(nbuf >= pipe->buffers); 1939 BUG_ON(!pipe->nrbufs); 1940 ibuf = &pipe->bufs[pipe->curbuf]; 1941 obuf = &bufs[nbuf]; 1942 1943 if (rem >= ibuf->len) { 1944 *obuf = *ibuf; 1945 ibuf->ops = NULL; 1946 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 1947 pipe->nrbufs--; 1948 } else { 1949 ibuf->ops->get(pipe, ibuf); 1950 *obuf = *ibuf; 1951 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1952 obuf->len = rem; 1953 ibuf->offset += obuf->len; 1954 ibuf->len -= obuf->len; 1955 } 1956 nbuf++; 1957 rem -= obuf->len; 1958 } 1959 pipe_unlock(pipe); 1960 1961 fuse_copy_init(&cs, fc, 0, NULL, nbuf); 1962 cs.pipebufs = bufs; 1963 cs.pipe = pipe; 1964 1965 if (flags & SPLICE_F_MOVE) 1966 cs.move_pages = 1; 1967 1968 ret = fuse_dev_do_write(fc, &cs, len); 1969 1970 for (idx = 0; idx < nbuf; idx++) { 1971 struct pipe_buffer *buf = &bufs[idx]; 1972 buf->ops->release(pipe, buf); 1973 } 1974out: 1975 kfree(bufs); 1976 return ret; 1977} 1978 1979static unsigned fuse_dev_poll(struct file *file, poll_table *wait) 1980{ 1981 unsigned mask = POLLOUT | POLLWRNORM; 1982 struct fuse_conn *fc = fuse_get_conn(file); 1983 if (!fc) 1984 return POLLERR; 1985 1986 poll_wait(file, &fc->waitq, wait); 1987 1988 spin_lock(&fc->lock); 1989 if (!fc->connected) 1990 mask = POLLERR; 1991 else if (request_pending(fc)) 1992 mask |= POLLIN | POLLRDNORM; 1993 spin_unlock(&fc->lock); 1994 1995 return mask; 1996} 1997 1998/* 1999 * Abort all requests on the given list (pending or processing) 2000 * 2001 * This function releases and reacquires fc->lock 2002 */ 2003static void end_requests(struct fuse_conn *fc, struct list_head *head) 2004__releases(fc->lock) 2005__acquires(fc->lock) 2006{ 2007 while (!list_empty(head)) { 2008 struct fuse_req *req; 2009 req = list_entry(head->next, struct fuse_req, list); 2010 req->out.h.error = -ECONNABORTED; 2011 request_end(fc, req); 2012 spin_lock(&fc->lock); 2013 } 2014} 2015 2016/* 2017 * Abort requests under I/O 2018 * 2019 * The requests are set to aborted and finished, and the request 2020 * waiter is woken up. This will make request_wait_answer() wait 2021 * until the request is unlocked and then return. 2022 * 2023 * If the request is asynchronous, then the end function needs to be 2024 * called after waiting for the request to be unlocked (if it was 2025 * locked). 2026 */ 2027static void end_io_requests(struct fuse_conn *fc) 2028__releases(fc->lock) 2029__acquires(fc->lock) 2030{ 2031 while (!list_empty(&fc->io)) { 2032 struct fuse_req *req = 2033 list_entry(fc->io.next, struct fuse_req, list); 2034 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 2035 2036 req->aborted = 1; 2037 req->out.h.error = -ECONNABORTED; 2038 req->state = FUSE_REQ_FINISHED; 2039 list_del_init(&req->list); 2040 wake_up(&req->waitq); 2041 if (end) { 2042 req->end = NULL; 2043 __fuse_get_request(req); 2044 spin_unlock(&fc->lock); 2045 wait_event(req->waitq, !req->locked); 2046 end(fc, req); 2047 fuse_put_request(fc, req); 2048 spin_lock(&fc->lock); 2049 } 2050 } 2051} 2052 2053static void end_queued_requests(struct fuse_conn *fc) 2054__releases(fc->lock) 2055__acquires(fc->lock) 2056{ 2057 fc->max_background = UINT_MAX; 2058 flush_bg_queue(fc); 2059 end_requests(fc, &fc->pending); 2060 end_requests(fc, &fc->processing); 2061 while (forget_pending(fc)) 2062 kfree(dequeue_forget(fc, 1, NULL)); 2063} 2064 2065static void end_polls(struct fuse_conn *fc) 2066{ 2067 struct rb_node *p; 2068 2069 p = rb_first(&fc->polled_files); 2070 2071 while (p) { 2072 struct fuse_file *ff; 2073 ff = rb_entry(p, struct fuse_file, polled_node); 2074 wake_up_interruptible_all(&ff->poll_wait); 2075 2076 p = rb_next(p); 2077 } 2078} 2079 2080/* 2081 * Abort all requests. 2082 * 2083 * Emergency exit in case of a malicious or accidental deadlock, or 2084 * just a hung filesystem. 2085 * 2086 * The same effect is usually achievable through killing the 2087 * filesystem daemon and all users of the filesystem. The exception 2088 * is the combination of an asynchronous request and the tricky 2089 * deadlock (see Documentation/filesystems/fuse.txt). 2090 * 2091 * During the aborting, progression of requests from the pending and 2092 * processing lists onto the io list, and progression of new requests 2093 * onto the pending list is prevented by req->connected being false. 2094 * 2095 * Progression of requests under I/O to the processing list is 2096 * prevented by the req->aborted flag being true for these requests. 2097 * For this reason requests on the io list must be aborted first. 2098 */ 2099void fuse_abort_conn(struct fuse_conn *fc) 2100{ 2101 spin_lock(&fc->lock); 2102 if (fc->connected) { 2103 fc->connected = 0; 2104 fc->blocked = 0; 2105 fc->initialized = 1; 2106 end_io_requests(fc); 2107 end_queued_requests(fc); 2108 end_polls(fc); 2109 wake_up_all(&fc->waitq); 2110 wake_up_all(&fc->blocked_waitq); 2111 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 2112 } 2113 spin_unlock(&fc->lock); 2114} 2115EXPORT_SYMBOL_GPL(fuse_abort_conn); 2116 2117int fuse_dev_release(struct inode *inode, struct file *file) 2118{ 2119 struct fuse_conn *fc = fuse_get_conn(file); 2120 if (fc) { 2121 spin_lock(&fc->lock); 2122 fc->connected = 0; 2123 fc->blocked = 0; 2124 fc->initialized = 1; 2125 end_queued_requests(fc); 2126 end_polls(fc); 2127 wake_up_all(&fc->blocked_waitq); 2128 spin_unlock(&fc->lock); 2129 fuse_conn_put(fc); 2130 } 2131 2132 return 0; 2133} 2134EXPORT_SYMBOL_GPL(fuse_dev_release); 2135 2136static int fuse_dev_fasync(int fd, struct file *file, int on) 2137{ 2138 struct fuse_conn *fc = fuse_get_conn(file); 2139 if (!fc) 2140 return -EPERM; 2141 2142 /* No locking - fasync_helper does its own locking */ 2143 return fasync_helper(fd, file, on, &fc->fasync); 2144} 2145 2146const struct file_operations fuse_dev_operations = { 2147 .owner = THIS_MODULE, 2148 .llseek = no_llseek, 2149 .read = do_sync_read, 2150 .aio_read = fuse_dev_read, 2151 .splice_read = fuse_dev_splice_read, 2152 .write = do_sync_write, 2153 .aio_write = fuse_dev_write, 2154 .splice_write = fuse_dev_splice_write, 2155 .poll = fuse_dev_poll, 2156 .release = fuse_dev_release, 2157 .fasync = fuse_dev_fasync, 2158}; 2159EXPORT_SYMBOL_GPL(fuse_dev_operations); 2160 2161static struct miscdevice fuse_miscdevice = { 2162 .minor = FUSE_MINOR, 2163 .name = "fuse", 2164 .fops = &fuse_dev_operations, 2165}; 2166 2167int __init fuse_dev_init(void) 2168{ 2169 int err = -ENOMEM; 2170 fuse_req_cachep = kmem_cache_create("fuse_request", 2171 sizeof(struct fuse_req), 2172 0, 0, NULL); 2173 if (!fuse_req_cachep) 2174 goto out; 2175 2176 err = misc_register(&fuse_miscdevice); 2177 if (err) 2178 goto out_cache_clean; 2179 2180 return 0; 2181 2182 out_cache_clean: 2183 kmem_cache_destroy(fuse_req_cachep); 2184 out: 2185 return err; 2186} 2187 2188void fuse_dev_cleanup(void) 2189{ 2190 misc_deregister(&fuse_miscdevice); 2191 kmem_cache_destroy(fuse_req_cachep); 2192} 2193