virtio_blk.c revision 577ebb374c78314ac4617242f509e2f5e7156649
1//#define DEBUG
2#include <linux/spinlock.h>
3#include <linux/slab.h>
4#include <linux/blkdev.h>
5#include <linux/hdreg.h>
6#include <linux/module.h>
7#include <linux/mutex.h>
8#include <linux/virtio.h>
9#include <linux/virtio_blk.h>
10#include <linux/scatterlist.h>
11#include <linux/string_helpers.h>
12#include <scsi/scsi_cmnd.h>
13#include <linux/idr.h>
14
15#define PART_BITS 4
16
17static int major;
18static DEFINE_IDA(vd_index_ida);
19
20struct workqueue_struct *virtblk_wq;
21
22struct virtio_blk
23{
24	spinlock_t lock;
25
26	struct virtio_device *vdev;
27	struct virtqueue *vq;
28
29	/* The disk structure for the kernel. */
30	struct gendisk *disk;
31
32	/* Request tracking. */
33	struct list_head reqs;
34
35	mempool_t *pool;
36
37	/* Process context for config space updates */
38	struct work_struct config_work;
39
40	/* Lock for config space updates */
41	struct mutex config_lock;
42
43	/* enable config space updates */
44	bool config_enable;
45
46	/* What host tells us, plus 2 for header & tailer. */
47	unsigned int sg_elems;
48
49	/* Ida index - used to track minor number allocations. */
50	int index;
51
52	/* Scatterlist: can be too big for stack. */
53	struct scatterlist sg[/*sg_elems*/];
54};
55
56struct virtblk_req
57{
58	struct list_head list;
59	struct request *req;
60	struct virtio_blk_outhdr out_hdr;
61	struct virtio_scsi_inhdr in_hdr;
62	u8 status;
63};
64
65static void blk_done(struct virtqueue *vq)
66{
67	struct virtio_blk *vblk = vq->vdev->priv;
68	struct virtblk_req *vbr;
69	unsigned int len;
70	unsigned long flags;
71
72	spin_lock_irqsave(&vblk->lock, flags);
73	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
74		int error;
75
76		switch (vbr->status) {
77		case VIRTIO_BLK_S_OK:
78			error = 0;
79			break;
80		case VIRTIO_BLK_S_UNSUPP:
81			error = -ENOTTY;
82			break;
83		default:
84			error = -EIO;
85			break;
86		}
87
88		switch (vbr->req->cmd_type) {
89		case REQ_TYPE_BLOCK_PC:
90			vbr->req->resid_len = vbr->in_hdr.residual;
91			vbr->req->sense_len = vbr->in_hdr.sense_len;
92			vbr->req->errors = vbr->in_hdr.errors;
93			break;
94		case REQ_TYPE_SPECIAL:
95			vbr->req->errors = (error != 0);
96			break;
97		default:
98			break;
99		}
100
101		__blk_end_request_all(vbr->req, error);
102		list_del(&vbr->list);
103		mempool_free(vbr, vblk->pool);
104	}
105	/* In case queue is stopped waiting for more buffers. */
106	blk_start_queue(vblk->disk->queue);
107	spin_unlock_irqrestore(&vblk->lock, flags);
108}
109
110static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
111		   struct request *req)
112{
113	unsigned long num, out = 0, in = 0;
114	struct virtblk_req *vbr;
115
116	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
117	if (!vbr)
118		/* When another request finishes we'll try again. */
119		return false;
120
121	vbr->req = req;
122
123	if (req->cmd_flags & REQ_FLUSH) {
124		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
125		vbr->out_hdr.sector = 0;
126		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
127	} else {
128		switch (req->cmd_type) {
129		case REQ_TYPE_FS:
130			vbr->out_hdr.type = 0;
131			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
132			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
133			break;
134		case REQ_TYPE_BLOCK_PC:
135			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
136			vbr->out_hdr.sector = 0;
137			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
138			break;
139		case REQ_TYPE_SPECIAL:
140			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
141			vbr->out_hdr.sector = 0;
142			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
143			break;
144		default:
145			/* We don't put anything else in the queue. */
146			BUG();
147		}
148	}
149
150	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
151
152	/*
153	 * If this is a packet command we need a couple of additional headers.
154	 * Behind the normal outhdr we put a segment with the scsi command
155	 * block, and before the normal inhdr we put the sense data and the
156	 * inhdr with additional status information before the normal inhdr.
157	 */
158	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
159		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
160
161	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
162
163	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
164		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
165		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
166			   sizeof(vbr->in_hdr));
167	}
168
169	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
170		   sizeof(vbr->status));
171
172	if (num) {
173		if (rq_data_dir(vbr->req) == WRITE) {
174			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
175			out += num;
176		} else {
177			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
178			in += num;
179		}
180	}
181
182	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
183		mempool_free(vbr, vblk->pool);
184		return false;
185	}
186
187	list_add_tail(&vbr->list, &vblk->reqs);
188	return true;
189}
190
191static void do_virtblk_request(struct request_queue *q)
192{
193	struct virtio_blk *vblk = q->queuedata;
194	struct request *req;
195	unsigned int issued = 0;
196
197	while ((req = blk_peek_request(q)) != NULL) {
198		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
199
200		/* If this request fails, stop queue and wait for something to
201		   finish to restart it. */
202		if (!do_req(q, vblk, req)) {
203			blk_stop_queue(q);
204			break;
205		}
206		blk_start_request(req);
207		issued++;
208	}
209
210	if (issued)
211		virtqueue_kick(vblk->vq);
212}
213
214/* return id (s/n) string for *disk to *id_str
215 */
216static int virtblk_get_id(struct gendisk *disk, char *id_str)
217{
218	struct virtio_blk *vblk = disk->private_data;
219	struct request *req;
220	struct bio *bio;
221	int err;
222
223	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
224			   GFP_KERNEL);
225	if (IS_ERR(bio))
226		return PTR_ERR(bio);
227
228	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
229	if (IS_ERR(req)) {
230		bio_put(bio);
231		return PTR_ERR(req);
232	}
233
234	req->cmd_type = REQ_TYPE_SPECIAL;
235	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
236	blk_put_request(req);
237
238	return err;
239}
240
241static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
242			     unsigned int cmd, unsigned long data)
243{
244	struct gendisk *disk = bdev->bd_disk;
245	struct virtio_blk *vblk = disk->private_data;
246
247	/*
248	 * Only allow the generic SCSI ioctls if the host can support it.
249	 */
250	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
251		return -ENOTTY;
252
253	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
254				  (void __user *)data);
255}
256
257/* We provide getgeo only to please some old bootloader/partitioning tools */
258static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
259{
260	struct virtio_blk *vblk = bd->bd_disk->private_data;
261	struct virtio_blk_geometry vgeo;
262	int err;
263
264	/* see if the host passed in geometry config */
265	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
266				offsetof(struct virtio_blk_config, geometry),
267				&vgeo);
268
269	if (!err) {
270		geo->heads = vgeo.heads;
271		geo->sectors = vgeo.sectors;
272		geo->cylinders = vgeo.cylinders;
273	} else {
274		/* some standard values, similar to sd */
275		geo->heads = 1 << 6;
276		geo->sectors = 1 << 5;
277		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
278	}
279	return 0;
280}
281
282static const struct block_device_operations virtblk_fops = {
283	.ioctl  = virtblk_ioctl,
284	.owner  = THIS_MODULE,
285	.getgeo = virtblk_getgeo,
286};
287
288static int index_to_minor(int index)
289{
290	return index << PART_BITS;
291}
292
293static int minor_to_index(int minor)
294{
295	return minor >> PART_BITS;
296}
297
298static ssize_t virtblk_serial_show(struct device *dev,
299				struct device_attribute *attr, char *buf)
300{
301	struct gendisk *disk = dev_to_disk(dev);
302	int err;
303
304	/* sysfs gives us a PAGE_SIZE buffer */
305	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
306
307	buf[VIRTIO_BLK_ID_BYTES] = '\0';
308	err = virtblk_get_id(disk, buf);
309	if (!err)
310		return strlen(buf);
311
312	if (err == -EIO) /* Unsupported? Make it empty. */
313		return 0;
314
315	return err;
316}
317DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
318
319static void virtblk_config_changed_work(struct work_struct *work)
320{
321	struct virtio_blk *vblk =
322		container_of(work, struct virtio_blk, config_work);
323	struct virtio_device *vdev = vblk->vdev;
324	struct request_queue *q = vblk->disk->queue;
325	char cap_str_2[10], cap_str_10[10];
326	u64 capacity, size;
327
328	mutex_lock(&vblk->config_lock);
329	if (!vblk->config_enable)
330		goto done;
331
332	/* Host must always specify the capacity. */
333	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
334			  &capacity, sizeof(capacity));
335
336	/* If capacity is too big, truncate with warning. */
337	if ((sector_t)capacity != capacity) {
338		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
339			 (unsigned long long)capacity);
340		capacity = (sector_t)-1;
341	}
342
343	size = capacity * queue_logical_block_size(q);
344	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
345	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
346
347	dev_notice(&vdev->dev,
348		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
349		  (unsigned long long)capacity,
350		  queue_logical_block_size(q),
351		  cap_str_10, cap_str_2);
352
353	set_capacity(vblk->disk, capacity);
354done:
355	mutex_unlock(&vblk->config_lock);
356}
357
358static void virtblk_config_changed(struct virtio_device *vdev)
359{
360	struct virtio_blk *vblk = vdev->priv;
361
362	queue_work(virtblk_wq, &vblk->config_work);
363}
364
365static int init_vq(struct virtio_blk *vblk)
366{
367	int err = 0;
368
369	/* We expect one virtqueue, for output. */
370	vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
371	if (IS_ERR(vblk->vq))
372		err = PTR_ERR(vblk->vq);
373
374	return err;
375}
376
377static int __devinit virtblk_probe(struct virtio_device *vdev)
378{
379	struct virtio_blk *vblk;
380	struct request_queue *q;
381	int err, index;
382	u64 cap;
383	u32 v, blk_size, sg_elems, opt_io_size;
384	u16 min_io_size;
385	u8 physical_block_exp, alignment_offset;
386
387	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
388			     GFP_KERNEL);
389	if (err < 0)
390		goto out;
391	index = err;
392
393	/* We need to know how many segments before we allocate. */
394	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
395				offsetof(struct virtio_blk_config, seg_max),
396				&sg_elems);
397
398	/* We need at least one SG element, whatever they say. */
399	if (err || !sg_elems)
400		sg_elems = 1;
401
402	/* We need an extra sg elements at head and tail. */
403	sg_elems += 2;
404	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
405				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
406	if (!vblk) {
407		err = -ENOMEM;
408		goto out_free_index;
409	}
410
411	INIT_LIST_HEAD(&vblk->reqs);
412	spin_lock_init(&vblk->lock);
413	vblk->vdev = vdev;
414	vblk->sg_elems = sg_elems;
415	sg_init_table(vblk->sg, vblk->sg_elems);
416	mutex_init(&vblk->config_lock);
417	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
418	vblk->config_enable = true;
419
420	err = init_vq(vblk);
421	if (err)
422		goto out_free_vblk;
423
424	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
425	if (!vblk->pool) {
426		err = -ENOMEM;
427		goto out_free_vq;
428	}
429
430	/* FIXME: How many partitions?  How long is a piece of string? */
431	vblk->disk = alloc_disk(1 << PART_BITS);
432	if (!vblk->disk) {
433		err = -ENOMEM;
434		goto out_mempool;
435	}
436
437	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
438	if (!q) {
439		err = -ENOMEM;
440		goto out_put_disk;
441	}
442
443	q->queuedata = vblk;
444
445	if (index < 26) {
446		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
447	} else if (index < (26 + 1) * 26) {
448		sprintf(vblk->disk->disk_name, "vd%c%c",
449			'a' + index / 26 - 1, 'a' + index % 26);
450	} else {
451		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
452		const unsigned int m2 = (index / 26 - 1) % 26;
453		const unsigned int m3 =  index % 26;
454		sprintf(vblk->disk->disk_name, "vd%c%c%c",
455			'a' + m1, 'a' + m2, 'a' + m3);
456	}
457
458	vblk->disk->major = major;
459	vblk->disk->first_minor = index_to_minor(index);
460	vblk->disk->private_data = vblk;
461	vblk->disk->fops = &virtblk_fops;
462	vblk->disk->driverfs_dev = &vdev->dev;
463	vblk->index = index;
464
465	/* configure queue flush support */
466	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
467		blk_queue_flush(q, REQ_FLUSH);
468
469	/* If disk is read-only in the host, the guest should obey */
470	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
471		set_disk_ro(vblk->disk, 1);
472
473	/* Host must always specify the capacity. */
474	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
475			  &cap, sizeof(cap));
476
477	/* If capacity is too big, truncate with warning. */
478	if ((sector_t)cap != cap) {
479		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
480			 (unsigned long long)cap);
481		cap = (sector_t)-1;
482	}
483	set_capacity(vblk->disk, cap);
484
485	/* We can handle whatever the host told us to handle. */
486	blk_queue_max_segments(q, vblk->sg_elems-2);
487
488	/* No need to bounce any requests */
489	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
490
491	/* No real sector limit. */
492	blk_queue_max_hw_sectors(q, -1U);
493
494	/* Host can optionally specify maximum segment size and number of
495	 * segments. */
496	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
497				offsetof(struct virtio_blk_config, size_max),
498				&v);
499	if (!err)
500		blk_queue_max_segment_size(q, v);
501	else
502		blk_queue_max_segment_size(q, -1U);
503
504	/* Host can optionally specify the block size of the device */
505	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
506				offsetof(struct virtio_blk_config, blk_size),
507				&blk_size);
508	if (!err)
509		blk_queue_logical_block_size(q, blk_size);
510	else
511		blk_size = queue_logical_block_size(q);
512
513	/* Use topology information if available */
514	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
515			offsetof(struct virtio_blk_config, physical_block_exp),
516			&physical_block_exp);
517	if (!err && physical_block_exp)
518		blk_queue_physical_block_size(q,
519				blk_size * (1 << physical_block_exp));
520
521	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
522			offsetof(struct virtio_blk_config, alignment_offset),
523			&alignment_offset);
524	if (!err && alignment_offset)
525		blk_queue_alignment_offset(q, blk_size * alignment_offset);
526
527	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
528			offsetof(struct virtio_blk_config, min_io_size),
529			&min_io_size);
530	if (!err && min_io_size)
531		blk_queue_io_min(q, blk_size * min_io_size);
532
533	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
534			offsetof(struct virtio_blk_config, opt_io_size),
535			&opt_io_size);
536	if (!err && opt_io_size)
537		blk_queue_io_opt(q, blk_size * opt_io_size);
538
539
540	add_disk(vblk->disk);
541	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
542	if (err)
543		goto out_del_disk;
544
545	return 0;
546
547out_del_disk:
548	del_gendisk(vblk->disk);
549	blk_cleanup_queue(vblk->disk->queue);
550out_put_disk:
551	put_disk(vblk->disk);
552out_mempool:
553	mempool_destroy(vblk->pool);
554out_free_vq:
555	vdev->config->del_vqs(vdev);
556out_free_vblk:
557	kfree(vblk);
558out_free_index:
559	ida_simple_remove(&vd_index_ida, index);
560out:
561	return err;
562}
563
564static void __devexit virtblk_remove(struct virtio_device *vdev)
565{
566	struct virtio_blk *vblk = vdev->priv;
567	int index = vblk->index;
568
569	/* Prevent config work handler from accessing the device. */
570	mutex_lock(&vblk->config_lock);
571	vblk->config_enable = false;
572	mutex_unlock(&vblk->config_lock);
573
574	/* Nothing should be pending. */
575	BUG_ON(!list_empty(&vblk->reqs));
576
577	/* Stop all the virtqueues. */
578	vdev->config->reset(vdev);
579
580	flush_work(&vblk->config_work);
581
582	del_gendisk(vblk->disk);
583	blk_cleanup_queue(vblk->disk->queue);
584	put_disk(vblk->disk);
585	mempool_destroy(vblk->pool);
586	vdev->config->del_vqs(vdev);
587	kfree(vblk);
588	ida_simple_remove(&vd_index_ida, index);
589}
590
591#ifdef CONFIG_PM
592static int virtblk_freeze(struct virtio_device *vdev)
593{
594	struct virtio_blk *vblk = vdev->priv;
595
596	/* Ensure we don't receive any more interrupts */
597	vdev->config->reset(vdev);
598
599	/* Prevent config work handler from accessing the device. */
600	mutex_lock(&vblk->config_lock);
601	vblk->config_enable = false;
602	mutex_unlock(&vblk->config_lock);
603
604	flush_work(&vblk->config_work);
605
606	spin_lock_irq(vblk->disk->queue->queue_lock);
607	blk_stop_queue(vblk->disk->queue);
608	spin_unlock_irq(vblk->disk->queue->queue_lock);
609	blk_sync_queue(vblk->disk->queue);
610
611	vdev->config->del_vqs(vdev);
612	return 0;
613}
614
615static int virtblk_restore(struct virtio_device *vdev)
616{
617	struct virtio_blk *vblk = vdev->priv;
618	int ret;
619
620	vblk->config_enable = true;
621	ret = init_vq(vdev->priv);
622	if (!ret) {
623		spin_lock_irq(vblk->disk->queue->queue_lock);
624		blk_start_queue(vblk->disk->queue);
625		spin_unlock_irq(vblk->disk->queue->queue_lock);
626	}
627	return ret;
628}
629#endif
630
631static const struct virtio_device_id id_table[] = {
632	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
633	{ 0 },
634};
635
636static unsigned int features[] = {
637	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
638	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
639	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
640};
641
642/*
643 * virtio_blk causes spurious section mismatch warning by
644 * simultaneously referring to a __devinit and a __devexit function.
645 * Use __refdata to avoid this warning.
646 */
647static struct virtio_driver __refdata virtio_blk = {
648	.feature_table		= features,
649	.feature_table_size	= ARRAY_SIZE(features),
650	.driver.name		= KBUILD_MODNAME,
651	.driver.owner		= THIS_MODULE,
652	.id_table		= id_table,
653	.probe			= virtblk_probe,
654	.remove			= __devexit_p(virtblk_remove),
655	.config_changed		= virtblk_config_changed,
656#ifdef CONFIG_PM
657	.freeze			= virtblk_freeze,
658	.restore		= virtblk_restore,
659#endif
660};
661
662static int __init init(void)
663{
664	int error;
665
666	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
667	if (!virtblk_wq)
668		return -ENOMEM;
669
670	major = register_blkdev(0, "virtblk");
671	if (major < 0) {
672		error = major;
673		goto out_destroy_workqueue;
674	}
675
676	error = register_virtio_driver(&virtio_blk);
677	if (error)
678		goto out_unregister_blkdev;
679	return 0;
680
681out_unregister_blkdev:
682	unregister_blkdev(major, "virtblk");
683out_destroy_workqueue:
684	destroy_workqueue(virtblk_wq);
685	return error;
686}
687
688static void __exit fini(void)
689{
690	unregister_blkdev(major, "virtblk");
691	unregister_virtio_driver(&virtio_blk);
692	destroy_workqueue(virtblk_wq);
693}
694module_init(init);
695module_exit(fini);
696
697MODULE_DEVICE_TABLE(virtio, id_table);
698MODULE_DESCRIPTION("Virtio block driver");
699MODULE_LICENSE("GPL");
700