1/* 2 * The Virtio 9p transport driver 3 * 4 * This is a block based transport driver based on the lguest block driver 5 * code. 6 * 7 * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation 8 * 9 * Based on virtio console driver 10 * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 14 * as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to: 23 * Free Software Foundation 24 * 51 Franklin Street, Fifth Floor 25 * Boston, MA 02111-1301 USA 26 * 27 */ 28 29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 30 31#include <linux/in.h> 32#include <linux/module.h> 33#include <linux/net.h> 34#include <linux/ipv6.h> 35#include <linux/errno.h> 36#include <linux/kernel.h> 37#include <linux/un.h> 38#include <linux/uaccess.h> 39#include <linux/inet.h> 40#include <linux/idr.h> 41#include <linux/file.h> 42#include <linux/highmem.h> 43#include <linux/slab.h> 44#include <net/9p/9p.h> 45#include <linux/parser.h> 46#include <net/9p/client.h> 47#include <net/9p/transport.h> 48#include <linux/scatterlist.h> 49#include <linux/swap.h> 50#include <linux/virtio.h> 51#include <linux/virtio_9p.h> 52#include "trans_common.h" 53 54#define VIRTQUEUE_NUM 128 55 56/* a single mutex to manage channel initialization and attachment */ 57static DEFINE_MUTEX(virtio_9p_lock); 58static DECLARE_WAIT_QUEUE_HEAD(vp_wq); 59static atomic_t vp_pinned = ATOMIC_INIT(0); 60 61/** 62 * struct virtio_chan - per-instance transport information 63 * @initialized: whether the channel is initialized 64 * @inuse: whether the channel is in use 65 * @lock: protects multiple elements within this structure 66 * @client: client instance 67 * @vdev: virtio dev associated with this channel 68 * @vq: virtio queue associated with this channel 69 * @sg: scatter gather list which is used to pack a request (protected?) 70 * 71 * We keep all per-channel information in a structure. 72 * This structure is allocated within the devices dev->mem space. 73 * A pointer to the structure will get put in the transport private. 74 * 75 */ 76 77struct virtio_chan { 78 bool inuse; 79 80 spinlock_t lock; 81 82 struct p9_client *client; 83 struct virtio_device *vdev; 84 struct virtqueue *vq; 85 int ring_bufs_avail; 86 wait_queue_head_t *vc_wq; 87 /* This is global limit. Since we don't have a global structure, 88 * will be placing it in each channel. 89 */ 90 unsigned long p9_max_pages; 91 /* Scatterlist: can be too big for stack. */ 92 struct scatterlist sg[VIRTQUEUE_NUM]; 93 94 int tag_len; 95 /* 96 * tag name to identify a mount Non-null terminated 97 */ 98 char *tag; 99 100 struct list_head chan_list; 101}; 102 103static struct list_head virtio_chan_list; 104 105/* How many bytes left in this page. */ 106static unsigned int rest_of_page(void *data) 107{ 108 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); 109} 110 111/** 112 * p9_virtio_close - reclaim resources of a channel 113 * @client: client instance 114 * 115 * This reclaims a channel by freeing its resources and 116 * reseting its inuse flag. 117 * 118 */ 119 120static void p9_virtio_close(struct p9_client *client) 121{ 122 struct virtio_chan *chan = client->trans; 123 124 mutex_lock(&virtio_9p_lock); 125 if (chan) 126 chan->inuse = false; 127 mutex_unlock(&virtio_9p_lock); 128} 129 130/** 131 * req_done - callback which signals activity from the server 132 * @vq: virtio queue activity was received on 133 * 134 * This notifies us that the server has triggered some activity 135 * on the virtio channel - most likely a response to request we 136 * sent. Figure out which requests now have responses and wake up 137 * those threads. 138 * 139 * Bugs: could do with some additional sanity checking, but appears to work. 140 * 141 */ 142 143static void req_done(struct virtqueue *vq) 144{ 145 struct virtio_chan *chan = vq->vdev->priv; 146 struct p9_fcall *rc; 147 unsigned int len; 148 struct p9_req_t *req; 149 unsigned long flags; 150 151 p9_debug(P9_DEBUG_TRANS, ": request done\n"); 152 153 while (1) { 154 spin_lock_irqsave(&chan->lock, flags); 155 rc = virtqueue_get_buf(chan->vq, &len); 156 if (rc == NULL) { 157 spin_unlock_irqrestore(&chan->lock, flags); 158 break; 159 } 160 chan->ring_bufs_avail = 1; 161 spin_unlock_irqrestore(&chan->lock, flags); 162 /* Wakeup if anyone waiting for VirtIO ring space. */ 163 wake_up(chan->vc_wq); 164 p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); 165 p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 166 req = p9_tag_lookup(chan->client, rc->tag); 167 req->status = REQ_STATUS_RCVD; 168 p9_client_cb(chan->client, req); 169 } 170} 171 172/** 173 * pack_sg_list - pack a scatter gather list from a linear buffer 174 * @sg: scatter/gather list to pack into 175 * @start: which segment of the sg_list to start at 176 * @limit: maximum segment to pack data to 177 * @data: data to pack into scatter/gather list 178 * @count: amount of data to pack into the scatter/gather list 179 * 180 * sg_lists have multiple segments of various sizes. This will pack 181 * arbitrary data into an existing scatter gather list, segmenting the 182 * data as necessary within constraints. 183 * 184 */ 185 186static int pack_sg_list(struct scatterlist *sg, int start, 187 int limit, char *data, int count) 188{ 189 int s; 190 int index = start; 191 192 while (count) { 193 s = rest_of_page(data); 194 if (s > count) 195 s = count; 196 BUG_ON(index > limit); 197 /* Make sure we don't terminate early. */ 198 sg_unmark_end(&sg[index]); 199 sg_set_buf(&sg[index++], data, s); 200 count -= s; 201 data += s; 202 } 203 if (index-start) 204 sg_mark_end(&sg[index - 1]); 205 return index-start; 206} 207 208/* We don't currently allow canceling of virtio requests */ 209static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) 210{ 211 return 1; 212} 213 214/** 215 * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, 216 * this takes a list of pages. 217 * @sg: scatter/gather list to pack into 218 * @start: which segment of the sg_list to start at 219 * @pdata: a list of pages to add into sg. 220 * @nr_pages: number of pages to pack into the scatter/gather list 221 * @data: data to pack into scatter/gather list 222 * @count: amount of data to pack into the scatter/gather list 223 */ 224static int 225pack_sg_list_p(struct scatterlist *sg, int start, int limit, 226 struct page **pdata, int nr_pages, char *data, int count) 227{ 228 int i = 0, s; 229 int data_off; 230 int index = start; 231 232 BUG_ON(nr_pages > (limit - start)); 233 /* 234 * if the first page doesn't start at 235 * page boundary find the offset 236 */ 237 data_off = offset_in_page(data); 238 while (nr_pages) { 239 s = rest_of_page(data); 240 if (s > count) 241 s = count; 242 /* Make sure we don't terminate early. */ 243 sg_unmark_end(&sg[index]); 244 sg_set_page(&sg[index++], pdata[i++], s, data_off); 245 data_off = 0; 246 data += s; 247 count -= s; 248 nr_pages--; 249 } 250 251 if (index-start) 252 sg_mark_end(&sg[index - 1]); 253 return index - start; 254} 255 256/** 257 * p9_virtio_request - issue a request 258 * @client: client instance issuing the request 259 * @req: request to be issued 260 * 261 */ 262 263static int 264p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 265{ 266 int err; 267 int in, out, out_sgs, in_sgs; 268 unsigned long flags; 269 struct virtio_chan *chan = client->trans; 270 struct scatterlist *sgs[2]; 271 272 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 273 274 req->status = REQ_STATUS_SENT; 275req_retry: 276 spin_lock_irqsave(&chan->lock, flags); 277 278 out_sgs = in_sgs = 0; 279 /* Handle out VirtIO ring buffers */ 280 out = pack_sg_list(chan->sg, 0, 281 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 282 if (out) 283 sgs[out_sgs++] = chan->sg; 284 285 in = pack_sg_list(chan->sg, out, 286 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); 287 if (in) 288 sgs[out_sgs + in_sgs++] = chan->sg + out; 289 290 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, 291 GFP_ATOMIC); 292 if (err < 0) { 293 if (err == -ENOSPC) { 294 chan->ring_bufs_avail = 0; 295 spin_unlock_irqrestore(&chan->lock, flags); 296 err = wait_event_interruptible(*chan->vc_wq, 297 chan->ring_bufs_avail); 298 if (err == -ERESTARTSYS) 299 return err; 300 301 p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); 302 goto req_retry; 303 } else { 304 spin_unlock_irqrestore(&chan->lock, flags); 305 p9_debug(P9_DEBUG_TRANS, 306 "virtio rpc add_sgs returned failure\n"); 307 return -EIO; 308 } 309 } 310 virtqueue_kick(chan->vq); 311 spin_unlock_irqrestore(&chan->lock, flags); 312 313 p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); 314 return 0; 315} 316 317static int p9_get_mapped_pages(struct virtio_chan *chan, 318 struct page **pages, char *data, 319 int nr_pages, int write, int kern_buf) 320{ 321 int err; 322 if (!kern_buf) { 323 /* 324 * We allow only p9_max_pages pinned. We wait for the 325 * Other zc request to finish here 326 */ 327 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { 328 err = wait_event_interruptible(vp_wq, 329 (atomic_read(&vp_pinned) < chan->p9_max_pages)); 330 if (err == -ERESTARTSYS) 331 return err; 332 } 333 err = p9_payload_gup(data, &nr_pages, pages, write); 334 if (err < 0) 335 return err; 336 atomic_add(nr_pages, &vp_pinned); 337 } else { 338 /* kernel buffer, no need to pin pages */ 339 int s, index = 0; 340 int count = nr_pages; 341 while (nr_pages) { 342 s = rest_of_page(data); 343 pages[index++] = kmap_to_page(data); 344 data += s; 345 nr_pages--; 346 } 347 nr_pages = count; 348 } 349 return nr_pages; 350} 351 352/** 353 * p9_virtio_zc_request - issue a zero copy request 354 * @client: client instance issuing the request 355 * @req: request to be issued 356 * @uidata: user bffer that should be ued for zero copy read 357 * @uodata: user buffer that shoud be user for zero copy write 358 * @inlen: read buffer size 359 * @olen: write buffer size 360 * @hdrlen: reader header size, This is the size of response protocol data 361 * 362 */ 363static int 364p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, 365 char *uidata, char *uodata, int inlen, 366 int outlen, int in_hdr_len, int kern_buf) 367{ 368 int in, out, err, out_sgs, in_sgs; 369 unsigned long flags; 370 int in_nr_pages = 0, out_nr_pages = 0; 371 struct page **in_pages = NULL, **out_pages = NULL; 372 struct virtio_chan *chan = client->trans; 373 struct scatterlist *sgs[4]; 374 375 p9_debug(P9_DEBUG_TRANS, "virtio request\n"); 376 377 if (uodata) { 378 out_nr_pages = p9_nr_pages(uodata, outlen); 379 out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, 380 GFP_NOFS); 381 if (!out_pages) { 382 err = -ENOMEM; 383 goto err_out; 384 } 385 out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, 386 out_nr_pages, 0, kern_buf); 387 if (out_nr_pages < 0) { 388 err = out_nr_pages; 389 kfree(out_pages); 390 out_pages = NULL; 391 goto err_out; 392 } 393 } 394 if (uidata) { 395 in_nr_pages = p9_nr_pages(uidata, inlen); 396 in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, 397 GFP_NOFS); 398 if (!in_pages) { 399 err = -ENOMEM; 400 goto err_out; 401 } 402 in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, 403 in_nr_pages, 1, kern_buf); 404 if (in_nr_pages < 0) { 405 err = in_nr_pages; 406 kfree(in_pages); 407 in_pages = NULL; 408 goto err_out; 409 } 410 } 411 req->status = REQ_STATUS_SENT; 412req_retry_pinned: 413 spin_lock_irqsave(&chan->lock, flags); 414 415 out_sgs = in_sgs = 0; 416 417 /* out data */ 418 out = pack_sg_list(chan->sg, 0, 419 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 420 421 if (out) 422 sgs[out_sgs++] = chan->sg; 423 424 if (out_pages) { 425 sgs[out_sgs++] = chan->sg + out; 426 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 427 out_pages, out_nr_pages, uodata, outlen); 428 } 429 430 /* 431 * Take care of in data 432 * For example TREAD have 11. 433 * 11 is the read/write header = PDU Header(7) + IO Size (4). 434 * Arrange in such a way that server places header in the 435 * alloced memory and payload onto the user buffer. 436 */ 437 in = pack_sg_list(chan->sg, out, 438 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); 439 if (in) 440 sgs[out_sgs + in_sgs++] = chan->sg + out; 441 442 if (in_pages) { 443 sgs[out_sgs + in_sgs++] = chan->sg + out + in; 444 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, 445 in_pages, in_nr_pages, uidata, inlen); 446 } 447 448 BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs)); 449 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, 450 GFP_ATOMIC); 451 if (err < 0) { 452 if (err == -ENOSPC) { 453 chan->ring_bufs_avail = 0; 454 spin_unlock_irqrestore(&chan->lock, flags); 455 err = wait_event_interruptible(*chan->vc_wq, 456 chan->ring_bufs_avail); 457 if (err == -ERESTARTSYS) 458 goto err_out; 459 460 p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); 461 goto req_retry_pinned; 462 } else { 463 spin_unlock_irqrestore(&chan->lock, flags); 464 p9_debug(P9_DEBUG_TRANS, 465 "virtio rpc add_sgs returned failure\n"); 466 err = -EIO; 467 goto err_out; 468 } 469 } 470 virtqueue_kick(chan->vq); 471 spin_unlock_irqrestore(&chan->lock, flags); 472 p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); 473 err = wait_event_interruptible(*req->wq, 474 req->status >= REQ_STATUS_RCVD); 475 /* 476 * Non kernel buffers are pinned, unpin them 477 */ 478err_out: 479 if (!kern_buf) { 480 if (in_pages) { 481 p9_release_pages(in_pages, in_nr_pages); 482 atomic_sub(in_nr_pages, &vp_pinned); 483 } 484 if (out_pages) { 485 p9_release_pages(out_pages, out_nr_pages); 486 atomic_sub(out_nr_pages, &vp_pinned); 487 } 488 /* wakeup anybody waiting for slots to pin pages */ 489 wake_up(&vp_wq); 490 } 491 kfree(in_pages); 492 kfree(out_pages); 493 return err; 494} 495 496static ssize_t p9_mount_tag_show(struct device *dev, 497 struct device_attribute *attr, char *buf) 498{ 499 struct virtio_chan *chan; 500 struct virtio_device *vdev; 501 502 vdev = dev_to_virtio(dev); 503 chan = vdev->priv; 504 505 return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); 506} 507 508static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); 509 510/** 511 * p9_virtio_probe - probe for existence of 9P virtio channels 512 * @vdev: virtio device to probe 513 * 514 * This probes for existing virtio channels. 515 * 516 */ 517 518static int p9_virtio_probe(struct virtio_device *vdev) 519{ 520 __u16 tag_len; 521 char *tag; 522 int err; 523 struct virtio_chan *chan; 524 525 chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); 526 if (!chan) { 527 pr_err("Failed to allocate virtio 9P channel\n"); 528 err = -ENOMEM; 529 goto fail; 530 } 531 532 chan->vdev = vdev; 533 534 /* We expect one virtqueue, for requests. */ 535 chan->vq = virtio_find_single_vq(vdev, req_done, "requests"); 536 if (IS_ERR(chan->vq)) { 537 err = PTR_ERR(chan->vq); 538 goto out_free_vq; 539 } 540 chan->vq->vdev->priv = chan; 541 spin_lock_init(&chan->lock); 542 543 sg_init_table(chan->sg, VIRTQUEUE_NUM); 544 545 chan->inuse = false; 546 if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { 547 vdev->config->get(vdev, 548 offsetof(struct virtio_9p_config, tag_len), 549 &tag_len, sizeof(tag_len)); 550 } else { 551 err = -EINVAL; 552 goto out_free_vq; 553 } 554 tag = kmalloc(tag_len, GFP_KERNEL); 555 if (!tag) { 556 err = -ENOMEM; 557 goto out_free_vq; 558 } 559 vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), 560 tag, tag_len); 561 chan->tag = tag; 562 chan->tag_len = tag_len; 563 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 564 if (err) { 565 goto out_free_tag; 566 } 567 chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); 568 if (!chan->vc_wq) { 569 err = -ENOMEM; 570 goto out_free_tag; 571 } 572 init_waitqueue_head(chan->vc_wq); 573 chan->ring_bufs_avail = 1; 574 /* Ceiling limit to avoid denial of service attacks */ 575 chan->p9_max_pages = nr_free_buffer_pages()/4; 576 577 mutex_lock(&virtio_9p_lock); 578 list_add_tail(&chan->chan_list, &virtio_chan_list); 579 mutex_unlock(&virtio_9p_lock); 580 return 0; 581 582out_free_tag: 583 kfree(tag); 584out_free_vq: 585 vdev->config->del_vqs(vdev); 586 kfree(chan); 587fail: 588 return err; 589} 590 591 592/** 593 * p9_virtio_create - allocate a new virtio channel 594 * @client: client instance invoking this transport 595 * @devname: string identifying the channel to connect to (unused) 596 * @args: args passed from sys_mount() for per-transport options (unused) 597 * 598 * This sets up a transport channel for 9p communication. Right now 599 * we only match the first available channel, but eventually we couldlook up 600 * alternate channels by matching devname versus a virtio_config entry. 601 * We use a simple reference count mechanism to ensure that only a single 602 * mount has a channel open at a time. 603 * 604 */ 605 606static int 607p9_virtio_create(struct p9_client *client, const char *devname, char *args) 608{ 609 struct virtio_chan *chan; 610 int ret = -ENOENT; 611 int found = 0; 612 613 mutex_lock(&virtio_9p_lock); 614 list_for_each_entry(chan, &virtio_chan_list, chan_list) { 615 if (!strncmp(devname, chan->tag, chan->tag_len) && 616 strlen(devname) == chan->tag_len) { 617 if (!chan->inuse) { 618 chan->inuse = true; 619 found = 1; 620 break; 621 } 622 ret = -EBUSY; 623 } 624 } 625 mutex_unlock(&virtio_9p_lock); 626 627 if (!found) { 628 pr_err("no channels available\n"); 629 return ret; 630 } 631 632 client->trans = (void *)chan; 633 client->status = Connected; 634 chan->client = client; 635 636 return 0; 637} 638 639/** 640 * p9_virtio_remove - clean up resources associated with a virtio device 641 * @vdev: virtio device to remove 642 * 643 */ 644 645static void p9_virtio_remove(struct virtio_device *vdev) 646{ 647 struct virtio_chan *chan = vdev->priv; 648 649 if (chan->inuse) 650 p9_virtio_close(chan->client); 651 vdev->config->del_vqs(vdev); 652 653 mutex_lock(&virtio_9p_lock); 654 list_del(&chan->chan_list); 655 mutex_unlock(&virtio_9p_lock); 656 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 657 kfree(chan->tag); 658 kfree(chan->vc_wq); 659 kfree(chan); 660 661} 662 663static struct virtio_device_id id_table[] = { 664 { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, 665 { 0 }, 666}; 667 668static unsigned int features[] = { 669 VIRTIO_9P_MOUNT_TAG, 670}; 671 672/* The standard "struct lguest_driver": */ 673static struct virtio_driver p9_virtio_drv = { 674 .feature_table = features, 675 .feature_table_size = ARRAY_SIZE(features), 676 .driver.name = KBUILD_MODNAME, 677 .driver.owner = THIS_MODULE, 678 .id_table = id_table, 679 .probe = p9_virtio_probe, 680 .remove = p9_virtio_remove, 681}; 682 683static struct p9_trans_module p9_virtio_trans = { 684 .name = "virtio", 685 .create = p9_virtio_create, 686 .close = p9_virtio_close, 687 .request = p9_virtio_request, 688 .zc_request = p9_virtio_zc_request, 689 .cancel = p9_virtio_cancel, 690 /* 691 * We leave one entry for input and one entry for response 692 * headers. We also skip one more entry to accomodate, address 693 * that are not at page boundary, that can result in an extra 694 * page in zero copy. 695 */ 696 .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), 697 .def = 0, 698 .owner = THIS_MODULE, 699}; 700 701/* The standard init function */ 702static int __init p9_virtio_init(void) 703{ 704 INIT_LIST_HEAD(&virtio_chan_list); 705 706 v9fs_register_trans(&p9_virtio_trans); 707 return register_virtio_driver(&p9_virtio_drv); 708} 709 710static void __exit p9_virtio_cleanup(void) 711{ 712 unregister_virtio_driver(&p9_virtio_drv); 713 v9fs_unregister_trans(&p9_virtio_trans); 714} 715 716module_init(p9_virtio_init); 717module_exit(p9_virtio_cleanup); 718 719MODULE_DEVICE_TABLE(virtio, id_table); 720MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); 721MODULE_DESCRIPTION("Virtio 9p Transport"); 722MODULE_LICENSE("GPL"); 723