scsi_lib.c revision 6391a11375de5e2bb1eb8481e54619761dc65d9f
1/*
2 *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
3 *
4 *  SCSI queueing library.
5 *      Initial versions: Eric Youngdale (eric@andante.org).
6 *                        Based upon conversations with large numbers
7 *                        of people at Linux Expo.
8 */
9
10#include <linux/bio.h>
11#include <linux/blkdev.h>
12#include <linux/completion.h>
13#include <linux/kernel.h>
14#include <linux/mempool.h>
15#include <linux/slab.h>
16#include <linux/init.h>
17#include <linux/pci.h>
18#include <linux/delay.h>
19#include <linux/hardirq.h>
20
21#include <scsi/scsi.h>
22#include <scsi/scsi_dbg.h>
23#include <scsi/scsi_device.h>
24#include <scsi/scsi_driver.h>
25#include <scsi/scsi_eh.h>
26#include <scsi/scsi_host.h>
27#include <scsi/scsi_request.h>
28
29#include "scsi_priv.h"
30#include "scsi_logging.h"
31
32
33#define SG_MEMPOOL_NR		ARRAY_SIZE(scsi_sg_pools)
34#define SG_MEMPOOL_SIZE		32
35
36struct scsi_host_sg_pool {
37	size_t		size;
38	char		*name;
39	kmem_cache_t	*slab;
40	mempool_t	*pool;
41};
42
43#if (SCSI_MAX_PHYS_SEGMENTS < 32)
44#error SCSI_MAX_PHYS_SEGMENTS is too small
45#endif
46
47#define SP(x) { x, "sgpool-" #x }
48static struct scsi_host_sg_pool scsi_sg_pools[] = {
49	SP(8),
50	SP(16),
51	SP(32),
52#if (SCSI_MAX_PHYS_SEGMENTS > 32)
53	SP(64),
54#if (SCSI_MAX_PHYS_SEGMENTS > 64)
55	SP(128),
56#if (SCSI_MAX_PHYS_SEGMENTS > 128)
57	SP(256),
58#if (SCSI_MAX_PHYS_SEGMENTS > 256)
59#error SCSI_MAX_PHYS_SEGMENTS is too large
60#endif
61#endif
62#endif
63#endif
64};
65#undef SP
66
67static void scsi_run_queue(struct request_queue *q);
68
69/*
70 * Function:	scsi_unprep_request()
71 *
72 * Purpose:	Remove all preparation done for a request, including its
73 *		associated scsi_cmnd, so that it can be requeued.
74 *
75 * Arguments:	req	- request to unprepare
76 *
77 * Lock status:	Assumed that no locks are held upon entry.
78 *
79 * Returns:	Nothing.
80 */
81static void scsi_unprep_request(struct request *req)
82{
83	struct scsi_cmnd *cmd = req->special;
84
85	req->flags &= ~REQ_DONTPREP;
86	req->special = (req->flags & REQ_SPECIAL) ? cmd->sc_request : NULL;
87
88	scsi_put_command(cmd);
89}
90
91/*
92 * Function:    scsi_queue_insert()
93 *
94 * Purpose:     Insert a command in the midlevel queue.
95 *
96 * Arguments:   cmd    - command that we are adding to queue.
97 *              reason - why we are inserting command to queue.
98 *
99 * Lock status: Assumed that lock is not held upon entry.
100 *
101 * Returns:     Nothing.
102 *
103 * Notes:       We do this for one of two cases.  Either the host is busy
104 *              and it cannot accept any more commands for the time being,
105 *              or the device returned QUEUE_FULL and can accept no more
106 *              commands.
107 * Notes:       This could be called either from an interrupt context or a
108 *              normal process context.
109 */
110int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
111{
112	struct Scsi_Host *host = cmd->device->host;
113	struct scsi_device *device = cmd->device;
114	struct request_queue *q = device->request_queue;
115	unsigned long flags;
116
117	SCSI_LOG_MLQUEUE(1,
118		 printk("Inserting command %p into mlqueue\n", cmd));
119
120	/*
121	 * Set the appropriate busy bit for the device/host.
122	 *
123	 * If the host/device isn't busy, assume that something actually
124	 * completed, and that we should be able to queue a command now.
125	 *
126	 * Note that the prior mid-layer assumption that any host could
127	 * always queue at least one command is now broken.  The mid-layer
128	 * will implement a user specifiable stall (see
129	 * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
130	 * if a command is requeued with no other commands outstanding
131	 * either for the device or for the host.
132	 */
133	if (reason == SCSI_MLQUEUE_HOST_BUSY)
134		host->host_blocked = host->max_host_blocked;
135	else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
136		device->device_blocked = device->max_device_blocked;
137
138	/*
139	 * Decrement the counters, since these commands are no longer
140	 * active on the host/device.
141	 */
142	scsi_device_unbusy(device);
143
144	/*
145	 * Requeue this command.  It will go before all other commands
146	 * that are already in the queue.
147	 *
148	 * NOTE: there is magic here about the way the queue is plugged if
149	 * we have no outstanding commands.
150	 *
151	 * Although we *don't* plug the queue, we call the request
152	 * function.  The SCSI request function detects the blocked condition
153	 * and plugs the queue appropriately.
154         */
155	spin_lock_irqsave(q->queue_lock, flags);
156	blk_requeue_request(q, cmd->request);
157	spin_unlock_irqrestore(q->queue_lock, flags);
158
159	scsi_run_queue(q);
160
161	return 0;
162}
163
164/*
165 * Function:    scsi_do_req
166 *
167 * Purpose:     Queue a SCSI request
168 *
169 * Arguments:   sreq	  - command descriptor.
170 *              cmnd      - actual SCSI command to be performed.
171 *              buffer    - data buffer.
172 *              bufflen   - size of data buffer.
173 *              done      - completion function to be run.
174 *              timeout   - how long to let it run before timeout.
175 *              retries   - number of retries we allow.
176 *
177 * Lock status: No locks held upon entry.
178 *
179 * Returns:     Nothing.
180 *
181 * Notes:	This function is only used for queueing requests for things
182 *		like ioctls and character device requests - this is because
183 *		we essentially just inject a request into the queue for the
184 *		device.
185 *
186 *		In order to support the scsi_device_quiesce function, we
187 *		now inject requests on the *head* of the device queue
188 *		rather than the tail.
189 */
190void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
191		 void *buffer, unsigned bufflen,
192		 void (*done)(struct scsi_cmnd *),
193		 int timeout, int retries)
194{
195	/*
196	 * If the upper level driver is reusing these things, then
197	 * we should release the low-level block now.  Another one will
198	 * be allocated later when this request is getting queued.
199	 */
200	__scsi_release_request(sreq);
201
202	/*
203	 * Our own function scsi_done (which marks the host as not busy,
204	 * disables the timeout counter, etc) will be called by us or by the
205	 * scsi_hosts[host].queuecommand() function needs to also call
206	 * the completion function for the high level driver.
207	 */
208	memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
209	sreq->sr_bufflen = bufflen;
210	sreq->sr_buffer = buffer;
211	sreq->sr_allowed = retries;
212	sreq->sr_done = done;
213	sreq->sr_timeout_per_command = timeout;
214
215	if (sreq->sr_cmd_len == 0)
216		sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
217
218	/*
219	 * head injection *required* here otherwise quiesce won't work
220	 *
221	 * Because users of this function are apt to reuse requests with no
222	 * modification, we have to sanitise the request flags here
223	 */
224	sreq->sr_request->flags &= ~REQ_DONTPREP;
225	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
226		       	   1, sreq);
227}
228EXPORT_SYMBOL(scsi_do_req);
229
230/**
231 * scsi_execute - insert request and wait for the result
232 * @sdev:	scsi device
233 * @cmd:	scsi command
234 * @data_direction: data direction
235 * @buffer:	data buffer
236 * @bufflen:	len of buffer
237 * @sense:	optional sense buffer
238 * @timeout:	request timeout in seconds
239 * @retries:	number of times to retry request
240 * @flags:	or into request flags;
241 *
242 * returns the req->errors value which is the the scsi_cmnd result
243 * field.
244 **/
245int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
246		 int data_direction, void *buffer, unsigned bufflen,
247		 unsigned char *sense, int timeout, int retries, int flags)
248{
249	struct request *req;
250	int write = (data_direction == DMA_TO_DEVICE);
251	int ret = DRIVER_ERROR << 24;
252
253	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
254
255	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
256					buffer, bufflen, __GFP_WAIT))
257		goto out;
258
259	req->cmd_len = COMMAND_SIZE(cmd[0]);
260	memcpy(req->cmd, cmd, req->cmd_len);
261	req->sense = sense;
262	req->sense_len = 0;
263	req->retries = retries;
264	req->timeout = timeout;
265	req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
266
267	/*
268	 * head injection *required* here otherwise quiesce won't work
269	 */
270	blk_execute_rq(req->q, NULL, req, 1);
271
272	ret = req->errors;
273 out:
274	blk_put_request(req);
275
276	return ret;
277}
278EXPORT_SYMBOL(scsi_execute);
279
280
281int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
282		     int data_direction, void *buffer, unsigned bufflen,
283		     struct scsi_sense_hdr *sshdr, int timeout, int retries)
284{
285	char *sense = NULL;
286	int result;
287
288	if (sshdr) {
289		sense = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO);
290		if (!sense)
291			return DRIVER_ERROR << 24;
292	}
293	result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
294			      sense, timeout, retries, 0);
295	if (sshdr)
296		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
297
298	kfree(sense);
299	return result;
300}
301EXPORT_SYMBOL(scsi_execute_req);
302
303struct scsi_io_context {
304	void *data;
305	void (*done)(void *data, char *sense, int result, int resid);
306	char sense[SCSI_SENSE_BUFFERSIZE];
307};
308
309static kmem_cache_t *scsi_io_context_cache;
310
311static void scsi_end_async(struct request *req, int uptodate)
312{
313	struct scsi_io_context *sioc = req->end_io_data;
314
315	if (sioc->done)
316		sioc->done(sioc->data, sioc->sense, req->errors, req->data_len);
317
318	kmem_cache_free(scsi_io_context_cache, sioc);
319	__blk_put_request(req->q, req);
320}
321
322static int scsi_merge_bio(struct request *rq, struct bio *bio)
323{
324	struct request_queue *q = rq->q;
325
326	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
327	if (rq_data_dir(rq) == WRITE)
328		bio->bi_rw |= (1 << BIO_RW);
329	blk_queue_bounce(q, &bio);
330
331	if (!rq->bio)
332		blk_rq_bio_prep(q, rq, bio);
333	else if (!q->back_merge_fn(q, rq, bio))
334		return -EINVAL;
335	else {
336		rq->biotail->bi_next = bio;
337		rq->biotail = bio;
338		rq->hard_nr_sectors += bio_sectors(bio);
339		rq->nr_sectors = rq->hard_nr_sectors;
340	}
341
342	return 0;
343}
344
345static int scsi_bi_endio(struct bio *bio, unsigned int bytes_done, int error)
346{
347	if (bio->bi_size)
348		return 1;
349
350	bio_put(bio);
351	return 0;
352}
353
354/**
355 * scsi_req_map_sg - map a scatterlist into a request
356 * @rq:		request to fill
357 * @sg:		scatterlist
358 * @nsegs:	number of elements
359 * @bufflen:	len of buffer
360 * @gfp:	memory allocation flags
361 *
362 * scsi_req_map_sg maps a scatterlist into a request so that the
363 * request can be sent to the block layer. We do not trust the scatterlist
364 * sent to use, as some ULDs use that struct to only organize the pages.
365 */
366static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
367			   int nsegs, unsigned bufflen, gfp_t gfp)
368{
369	struct request_queue *q = rq->q;
370	int nr_pages = (bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
371	unsigned int data_len = 0, len, bytes, off;
372	struct page *page;
373	struct bio *bio = NULL;
374	int i, err, nr_vecs = 0;
375
376	for (i = 0; i < nsegs; i++) {
377		page = sgl[i].page;
378		off = sgl[i].offset;
379		len = sgl[i].length;
380		data_len += len;
381
382		while (len > 0) {
383			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
384
385			if (!bio) {
386				nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
387				nr_pages -= nr_vecs;
388
389				bio = bio_alloc(gfp, nr_vecs);
390				if (!bio) {
391					err = -ENOMEM;
392					goto free_bios;
393				}
394				bio->bi_end_io = scsi_bi_endio;
395			}
396
397			if (bio_add_pc_page(q, bio, page, bytes, off) !=
398			    bytes) {
399				bio_put(bio);
400				err = -EINVAL;
401				goto free_bios;
402			}
403
404			if (bio->bi_vcnt >= nr_vecs) {
405				err = scsi_merge_bio(rq, bio);
406				if (err) {
407					bio_endio(bio, bio->bi_size, 0);
408					goto free_bios;
409				}
410				bio = NULL;
411			}
412
413			page++;
414			len -= bytes;
415			off = 0;
416		}
417	}
418
419	rq->buffer = rq->data = NULL;
420	rq->data_len = data_len;
421	return 0;
422
423free_bios:
424	while ((bio = rq->bio) != NULL) {
425		rq->bio = bio->bi_next;
426		/*
427		 * call endio instead of bio_put incase it was bounced
428		 */
429		bio_endio(bio, bio->bi_size, 0);
430	}
431
432	return err;
433}
434
435/**
436 * scsi_execute_async - insert request
437 * @sdev:	scsi device
438 * @cmd:	scsi command
439 * @cmd_len:	length of scsi cdb
440 * @data_direction: data direction
441 * @buffer:	data buffer (this can be a kernel buffer or scatterlist)
442 * @bufflen:	len of buffer
443 * @use_sg:	if buffer is a scatterlist this is the number of elements
444 * @timeout:	request timeout in seconds
445 * @retries:	number of times to retry request
446 * @flags:	or into request flags
447 **/
448int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
449		       int cmd_len, int data_direction, void *buffer, unsigned bufflen,
450		       int use_sg, int timeout, int retries, void *privdata,
451		       void (*done)(void *, char *, int, int), gfp_t gfp)
452{
453	struct request *req;
454	struct scsi_io_context *sioc;
455	int err = 0;
456	int write = (data_direction == DMA_TO_DEVICE);
457
458	sioc = kmem_cache_alloc(scsi_io_context_cache, gfp);
459	if (!sioc)
460		return DRIVER_ERROR << 24;
461	memset(sioc, 0, sizeof(*sioc));
462
463	req = blk_get_request(sdev->request_queue, write, gfp);
464	if (!req)
465		goto free_sense;
466	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
467
468	if (use_sg)
469		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
470	else if (bufflen)
471		err = blk_rq_map_kern(req->q, req, buffer, bufflen, gfp);
472
473	if (err)
474		goto free_req;
475
476	req->cmd_len = cmd_len;
477	memcpy(req->cmd, cmd, req->cmd_len);
478	req->sense = sioc->sense;
479	req->sense_len = 0;
480	req->timeout = timeout;
481	req->retries = retries;
482	req->end_io_data = sioc;
483
484	sioc->data = privdata;
485	sioc->done = done;
486
487	blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
488	return 0;
489
490free_req:
491	blk_put_request(req);
492free_sense:
493	kfree(sioc);
494	return DRIVER_ERROR << 24;
495}
496EXPORT_SYMBOL_GPL(scsi_execute_async);
497
498/*
499 * Function:    scsi_init_cmd_errh()
500 *
501 * Purpose:     Initialize cmd fields related to error handling.
502 *
503 * Arguments:   cmd	- command that is ready to be queued.
504 *
505 * Returns:     Nothing
506 *
507 * Notes:       This function has the job of initializing a number of
508 *              fields related to error handling.   Typically this will
509 *              be called once for each command, as required.
510 */
511static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
512{
513	cmd->serial_number = 0;
514
515	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
516
517	if (cmd->cmd_len == 0)
518		cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
519
520	/*
521	 * We need saved copies of a number of fields - this is because
522	 * error handling may need to overwrite these with different values
523	 * to run different commands, and once error handling is complete,
524	 * we will need to restore these values prior to running the actual
525	 * command.
526	 */
527	cmd->old_use_sg = cmd->use_sg;
528	cmd->old_cmd_len = cmd->cmd_len;
529	cmd->sc_old_data_direction = cmd->sc_data_direction;
530	cmd->old_underflow = cmd->underflow;
531	memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
532	cmd->buffer = cmd->request_buffer;
533	cmd->bufflen = cmd->request_bufflen;
534
535	return 1;
536}
537
538/*
539 * Function:   scsi_setup_cmd_retry()
540 *
541 * Purpose:    Restore the command state for a retry
542 *
543 * Arguments:  cmd	- command to be restored
544 *
545 * Returns:    Nothing
546 *
547 * Notes:      Immediately prior to retrying a command, we need
548 *             to restore certain fields that we saved above.
549 */
550void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
551{
552	memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
553	cmd->request_buffer = cmd->buffer;
554	cmd->request_bufflen = cmd->bufflen;
555	cmd->use_sg = cmd->old_use_sg;
556	cmd->cmd_len = cmd->old_cmd_len;
557	cmd->sc_data_direction = cmd->sc_old_data_direction;
558	cmd->underflow = cmd->old_underflow;
559}
560
561void scsi_device_unbusy(struct scsi_device *sdev)
562{
563	struct Scsi_Host *shost = sdev->host;
564	unsigned long flags;
565
566	spin_lock_irqsave(shost->host_lock, flags);
567	shost->host_busy--;
568	if (unlikely(scsi_host_in_recovery(shost) &&
569		     shost->host_failed))
570		scsi_eh_wakeup(shost);
571	spin_unlock(shost->host_lock);
572	spin_lock(sdev->request_queue->queue_lock);
573	sdev->device_busy--;
574	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
575}
576
577/*
578 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
579 * and call blk_run_queue for all the scsi_devices on the target -
580 * including current_sdev first.
581 *
582 * Called with *no* scsi locks held.
583 */
584static void scsi_single_lun_run(struct scsi_device *current_sdev)
585{
586	struct Scsi_Host *shost = current_sdev->host;
587	struct scsi_device *sdev, *tmp;
588	struct scsi_target *starget = scsi_target(current_sdev);
589	unsigned long flags;
590
591	spin_lock_irqsave(shost->host_lock, flags);
592	starget->starget_sdev_user = NULL;
593	spin_unlock_irqrestore(shost->host_lock, flags);
594
595	/*
596	 * Call blk_run_queue for all LUNs on the target, starting with
597	 * current_sdev. We race with others (to set starget_sdev_user),
598	 * but in most cases, we will be first. Ideally, each LU on the
599	 * target would get some limited time or requests on the target.
600	 */
601	blk_run_queue(current_sdev->request_queue);
602
603	spin_lock_irqsave(shost->host_lock, flags);
604	if (starget->starget_sdev_user)
605		goto out;
606	list_for_each_entry_safe(sdev, tmp, &starget->devices,
607			same_target_siblings) {
608		if (sdev == current_sdev)
609			continue;
610		if (scsi_device_get(sdev))
611			continue;
612
613		spin_unlock_irqrestore(shost->host_lock, flags);
614		blk_run_queue(sdev->request_queue);
615		spin_lock_irqsave(shost->host_lock, flags);
616
617		scsi_device_put(sdev);
618	}
619 out:
620	spin_unlock_irqrestore(shost->host_lock, flags);
621}
622
623/*
624 * Function:	scsi_run_queue()
625 *
626 * Purpose:	Select a proper request queue to serve next
627 *
628 * Arguments:	q	- last request's queue
629 *
630 * Returns:     Nothing
631 *
632 * Notes:	The previous command was completely finished, start
633 *		a new one if possible.
634 */
635static void scsi_run_queue(struct request_queue *q)
636{
637	struct scsi_device *sdev = q->queuedata;
638	struct Scsi_Host *shost = sdev->host;
639	unsigned long flags;
640
641	if (sdev->single_lun)
642		scsi_single_lun_run(sdev);
643
644	spin_lock_irqsave(shost->host_lock, flags);
645	while (!list_empty(&shost->starved_list) &&
646	       !shost->host_blocked && !shost->host_self_blocked &&
647		!((shost->can_queue > 0) &&
648		  (shost->host_busy >= shost->can_queue))) {
649		/*
650		 * As long as shost is accepting commands and we have
651		 * starved queues, call blk_run_queue. scsi_request_fn
652		 * drops the queue_lock and can add us back to the
653		 * starved_list.
654		 *
655		 * host_lock protects the starved_list and starved_entry.
656		 * scsi_request_fn must get the host_lock before checking
657		 * or modifying starved_list or starved_entry.
658		 */
659		sdev = list_entry(shost->starved_list.next,
660					  struct scsi_device, starved_entry);
661		list_del_init(&sdev->starved_entry);
662		spin_unlock_irqrestore(shost->host_lock, flags);
663
664		blk_run_queue(sdev->request_queue);
665
666		spin_lock_irqsave(shost->host_lock, flags);
667		if (unlikely(!list_empty(&sdev->starved_entry)))
668			/*
669			 * sdev lost a race, and was put back on the
670			 * starved list. This is unlikely but without this
671			 * in theory we could loop forever.
672			 */
673			break;
674	}
675	spin_unlock_irqrestore(shost->host_lock, flags);
676
677	blk_run_queue(q);
678}
679
680/*
681 * Function:	scsi_requeue_command()
682 *
683 * Purpose:	Handle post-processing of completed commands.
684 *
685 * Arguments:	q	- queue to operate on
686 *		cmd	- command that may need to be requeued.
687 *
688 * Returns:	Nothing
689 *
690 * Notes:	After command completion, there may be blocks left
691 *		over which weren't finished by the previous command
692 *		this can be for a number of reasons - the main one is
693 *		I/O errors in the middle of the request, in which case
694 *		we need to request the blocks that come after the bad
695 *		sector.
696 * Notes:	Upon return, cmd is a stale pointer.
697 */
698static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
699{
700	struct request *req = cmd->request;
701	unsigned long flags;
702
703	scsi_unprep_request(req);
704	spin_lock_irqsave(q->queue_lock, flags);
705	blk_requeue_request(q, req);
706	spin_unlock_irqrestore(q->queue_lock, flags);
707
708	scsi_run_queue(q);
709}
710
711void scsi_next_command(struct scsi_cmnd *cmd)
712{
713	struct scsi_device *sdev = cmd->device;
714	struct request_queue *q = sdev->request_queue;
715
716	/* need to hold a reference on the device before we let go of the cmd */
717	get_device(&sdev->sdev_gendev);
718
719	scsi_put_command(cmd);
720	scsi_run_queue(q);
721
722	/* ok to remove device now */
723	put_device(&sdev->sdev_gendev);
724}
725
726void scsi_run_host_queues(struct Scsi_Host *shost)
727{
728	struct scsi_device *sdev;
729
730	shost_for_each_device(sdev, shost)
731		scsi_run_queue(sdev->request_queue);
732}
733
734/*
735 * Function:    scsi_end_request()
736 *
737 * Purpose:     Post-processing of completed commands (usually invoked at end
738 *		of upper level post-processing and scsi_io_completion).
739 *
740 * Arguments:   cmd	 - command that is complete.
741 *              uptodate - 1 if I/O indicates success, <= 0 for I/O error.
742 *              bytes    - number of bytes of completed I/O
743 *		requeue  - indicates whether we should requeue leftovers.
744 *
745 * Lock status: Assumed that lock is not held upon entry.
746 *
747 * Returns:     cmd if requeue required, NULL otherwise.
748 *
749 * Notes:       This is called for block device requests in order to
750 *              mark some number of sectors as complete.
751 *
752 *		We are guaranteeing that the request queue will be goosed
753 *		at some point during this call.
754 * Notes:	If cmd was requeued, upon return it will be a stale pointer.
755 */
756static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
757					  int bytes, int requeue)
758{
759	request_queue_t *q = cmd->device->request_queue;
760	struct request *req = cmd->request;
761	unsigned long flags;
762
763	/*
764	 * If there are blocks left over at the end, set up the command
765	 * to queue the remainder of them.
766	 */
767	if (end_that_request_chunk(req, uptodate, bytes)) {
768		int leftover = (req->hard_nr_sectors << 9);
769
770		if (blk_pc_request(req))
771			leftover = req->data_len;
772
773		/* kill remainder if no retrys */
774		if (!uptodate && blk_noretry_request(req))
775			end_that_request_chunk(req, 0, leftover);
776		else {
777			if (requeue) {
778				/*
779				 * Bleah.  Leftovers again.  Stick the
780				 * leftovers in the front of the
781				 * queue, and goose the queue again.
782				 */
783				scsi_requeue_command(q, cmd);
784				cmd = NULL;
785			}
786			return cmd;
787		}
788	}
789
790	add_disk_randomness(req->rq_disk);
791
792	spin_lock_irqsave(q->queue_lock, flags);
793	if (blk_rq_tagged(req))
794		blk_queue_end_tag(q, req);
795	end_that_request_last(req, uptodate);
796	spin_unlock_irqrestore(q->queue_lock, flags);
797
798	/*
799	 * This will goose the queue request function at the end, so we don't
800	 * need to worry about launching another command.
801	 */
802	scsi_next_command(cmd);
803	return NULL;
804}
805
806static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
807{
808	struct scsi_host_sg_pool *sgp;
809	struct scatterlist *sgl;
810
811	BUG_ON(!cmd->use_sg);
812
813	switch (cmd->use_sg) {
814	case 1 ... 8:
815		cmd->sglist_len = 0;
816		break;
817	case 9 ... 16:
818		cmd->sglist_len = 1;
819		break;
820	case 17 ... 32:
821		cmd->sglist_len = 2;
822		break;
823#if (SCSI_MAX_PHYS_SEGMENTS > 32)
824	case 33 ... 64:
825		cmd->sglist_len = 3;
826		break;
827#if (SCSI_MAX_PHYS_SEGMENTS > 64)
828	case 65 ... 128:
829		cmd->sglist_len = 4;
830		break;
831#if (SCSI_MAX_PHYS_SEGMENTS  > 128)
832	case 129 ... 256:
833		cmd->sglist_len = 5;
834		break;
835#endif
836#endif
837#endif
838	default:
839		return NULL;
840	}
841
842	sgp = scsi_sg_pools + cmd->sglist_len;
843	sgl = mempool_alloc(sgp->pool, gfp_mask);
844	return sgl;
845}
846
847static void scsi_free_sgtable(struct scatterlist *sgl, int index)
848{
849	struct scsi_host_sg_pool *sgp;
850
851	BUG_ON(index >= SG_MEMPOOL_NR);
852
853	sgp = scsi_sg_pools + index;
854	mempool_free(sgl, sgp->pool);
855}
856
857/*
858 * Function:    scsi_release_buffers()
859 *
860 * Purpose:     Completion processing for block device I/O requests.
861 *
862 * Arguments:   cmd	- command that we are bailing.
863 *
864 * Lock status: Assumed that no lock is held upon entry.
865 *
866 * Returns:     Nothing
867 *
868 * Notes:       In the event that an upper level driver rejects a
869 *		command, we must release resources allocated during
870 *		the __init_io() function.  Primarily this would involve
871 *		the scatter-gather table, and potentially any bounce
872 *		buffers.
873 */
874static void scsi_release_buffers(struct scsi_cmnd *cmd)
875{
876	struct request *req = cmd->request;
877
878	/*
879	 * Free up any indirection buffers we allocated for DMA purposes.
880	 */
881	if (cmd->use_sg)
882		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
883	else if (cmd->request_buffer != req->buffer)
884		kfree(cmd->request_buffer);
885
886	/*
887	 * Zero these out.  They now point to freed memory, and it is
888	 * dangerous to hang onto the pointers.
889	 */
890	cmd->buffer  = NULL;
891	cmd->bufflen = 0;
892	cmd->request_buffer = NULL;
893	cmd->request_bufflen = 0;
894}
895
896/*
897 * Function:    scsi_io_completion()
898 *
899 * Purpose:     Completion processing for block device I/O requests.
900 *
901 * Arguments:   cmd   - command that is finished.
902 *
903 * Lock status: Assumed that no lock is held upon entry.
904 *
905 * Returns:     Nothing
906 *
907 * Notes:       This function is matched in terms of capabilities to
908 *              the function that created the scatter-gather list.
909 *              In other words, if there are no bounce buffers
910 *              (the normal case for most drivers), we don't need
911 *              the logic to deal with cleaning up afterwards.
912 *
913 *		We must do one of several things here:
914 *
915 *		a) Call scsi_end_request.  This will finish off the
916 *		   specified number of sectors.  If we are done, the
917 *		   command block will be released, and the queue
918 *		   function will be goosed.  If we are not done, then
919 *		   scsi_end_request will directly goose the queue.
920 *
921 *		b) We can just use scsi_requeue_command() here.  This would
922 *		   be used if we just wanted to retry, for example.
923 */
924void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
925			unsigned int block_bytes)
926{
927	int result = cmd->result;
928	int this_count = cmd->bufflen;
929	request_queue_t *q = cmd->device->request_queue;
930	struct request *req = cmd->request;
931	int clear_errors = 1;
932	struct scsi_sense_hdr sshdr;
933	int sense_valid = 0;
934	int sense_deferred = 0;
935
936	/*
937	 * Free up any indirection buffers we allocated for DMA purposes.
938	 * For the case of a READ, we need to copy the data out of the
939	 * bounce buffer and into the real buffer.
940	 */
941	if (cmd->use_sg)
942		scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
943	else if (cmd->buffer != req->buffer) {
944		if (rq_data_dir(req) == READ) {
945			unsigned long flags;
946			char *to = bio_kmap_irq(req->bio, &flags);
947			memcpy(to, cmd->buffer, cmd->bufflen);
948			bio_kunmap_irq(to, &flags);
949		}
950		kfree(cmd->buffer);
951	}
952
953	if (result) {
954		sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
955		if (sense_valid)
956			sense_deferred = scsi_sense_is_deferred(&sshdr);
957	}
958	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
959		req->errors = result;
960		if (result) {
961			clear_errors = 0;
962			if (sense_valid && req->sense) {
963				/*
964				 * SG_IO wants current and deferred errors
965				 */
966				int len = 8 + cmd->sense_buffer[7];
967
968				if (len > SCSI_SENSE_BUFFERSIZE)
969					len = SCSI_SENSE_BUFFERSIZE;
970				memcpy(req->sense, cmd->sense_buffer,  len);
971				req->sense_len = len;
972			}
973		} else
974			req->data_len = cmd->resid;
975	}
976
977	/*
978	 * Zero these out.  They now point to freed memory, and it is
979	 * dangerous to hang onto the pointers.
980	 */
981	cmd->buffer  = NULL;
982	cmd->bufflen = 0;
983	cmd->request_buffer = NULL;
984	cmd->request_bufflen = 0;
985
986	/*
987	 * Next deal with any sectors which we were able to correctly
988	 * handle.
989	 */
990	if (good_bytes >= 0) {
991		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
992					      req->nr_sectors, good_bytes));
993		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
994
995		if (clear_errors)
996			req->errors = 0;
997		/*
998		 * If multiple sectors are requested in one buffer, then
999		 * they will have been finished off by the first command.
1000		 * If not, then we have a multi-buffer command.
1001		 *
1002		 * If block_bytes != 0, it means we had a medium error
1003		 * of some sort, and that we want to mark some number of
1004		 * sectors as not uptodate.  Thus we want to inhibit
1005		 * requeueing right here - we will requeue down below
1006		 * when we handle the bad sectors.
1007		 */
1008
1009		/*
1010		 * If the command completed without error, then either
1011		 * finish off the rest of the command, or start a new one.
1012		 */
1013		if (scsi_end_request(cmd, 1, good_bytes, result == 0) == NULL)
1014			return;
1015	}
1016	/*
1017	 * Now, if we were good little boys and girls, Santa left us a request
1018	 * sense buffer.  We can extract information from this, so we
1019	 * can choose a block to remap, etc.
1020	 */
1021	if (sense_valid && !sense_deferred) {
1022		switch (sshdr.sense_key) {
1023		case UNIT_ATTENTION:
1024			if (cmd->device->removable) {
1025				/* detected disc change.  set a bit
1026				 * and quietly refuse further access.
1027				 */
1028				cmd->device->changed = 1;
1029				scsi_end_request(cmd, 0,
1030						this_count, 1);
1031				return;
1032			} else {
1033				/*
1034				* Must have been a power glitch, or a
1035				* bus reset.  Could not have been a
1036				* media change, so we just retry the
1037				* request and see what happens.
1038				*/
1039				scsi_requeue_command(q, cmd);
1040				return;
1041			}
1042			break;
1043		case ILLEGAL_REQUEST:
1044			/*
1045		 	* If we had an ILLEGAL REQUEST returned, then we may
1046		 	* have performed an unsupported command.  The only
1047		 	* thing this should be would be a ten byte read where
1048			* only a six byte read was supported.  Also, on a
1049			* system where READ CAPACITY failed, we may have read
1050			* past the end of the disk.
1051		 	*/
1052			if ((cmd->device->use_10_for_rw &&
1053			    sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
1054			    (cmd->cmnd[0] == READ_10 ||
1055			     cmd->cmnd[0] == WRITE_10)) {
1056				cmd->device->use_10_for_rw = 0;
1057				/*
1058				 * This will cause a retry with a 6-byte
1059				 * command.
1060				 */
1061				scsi_requeue_command(q, cmd);
1062				result = 0;
1063			} else {
1064				scsi_end_request(cmd, 0, this_count, 1);
1065				return;
1066			}
1067			break;
1068		case NOT_READY:
1069			/*
1070			 * If the device is in the process of becoming ready,
1071			 * retry.
1072			 */
1073			if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) {
1074				scsi_requeue_command(q, cmd);
1075				return;
1076			}
1077			if (!(req->flags & REQ_QUIET))
1078				scmd_printk(KERN_INFO, cmd,
1079					   "Device not ready.\n");
1080			scsi_end_request(cmd, 0, this_count, 1);
1081			return;
1082		case VOLUME_OVERFLOW:
1083			if (!(req->flags & REQ_QUIET)) {
1084				scmd_printk(KERN_INFO, cmd,
1085					   "Volume overflow, CDB: ");
1086				__scsi_print_command(cmd->data_cmnd);
1087				scsi_print_sense("", cmd);
1088			}
1089			scsi_end_request(cmd, 0, block_bytes, 1);
1090			return;
1091		default:
1092			break;
1093		}
1094	}			/* driver byte != 0 */
1095	if (host_byte(result) == DID_RESET) {
1096		/*
1097		 * Third party bus reset or reset for error
1098		 * recovery reasons.  Just retry the request
1099		 * and see what happens.
1100		 */
1101		scsi_requeue_command(q, cmd);
1102		return;
1103	}
1104	if (result) {
1105		if (!(req->flags & REQ_QUIET)) {
1106			scmd_printk(KERN_INFO, cmd,
1107				   "SCSI error: return code = 0x%x\n", result);
1108
1109			if (driver_byte(result) & DRIVER_SENSE)
1110				scsi_print_sense("", cmd);
1111		}
1112		/*
1113		 * Mark a single buffer as not uptodate.  Queue the remainder.
1114		 * We sometimes get this cruft in the event that a medium error
1115		 * isn't properly reported.
1116		 */
1117		block_bytes = req->hard_cur_sectors << 9;
1118		if (!block_bytes)
1119			block_bytes = req->data_len;
1120		scsi_end_request(cmd, 0, block_bytes, 1);
1121	}
1122}
1123EXPORT_SYMBOL(scsi_io_completion);
1124
1125/*
1126 * Function:    scsi_init_io()
1127 *
1128 * Purpose:     SCSI I/O initialize function.
1129 *
1130 * Arguments:   cmd   - Command descriptor we wish to initialize
1131 *
1132 * Returns:     0 on success
1133 *		BLKPREP_DEFER if the failure is retryable
1134 *		BLKPREP_KILL if the failure is fatal
1135 */
1136static int scsi_init_io(struct scsi_cmnd *cmd)
1137{
1138	struct request     *req = cmd->request;
1139	struct scatterlist *sgpnt;
1140	int		   count;
1141
1142	/*
1143	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
1144	 */
1145	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
1146		cmd->request_bufflen = req->data_len;
1147		cmd->request_buffer = req->data;
1148		req->buffer = req->data;
1149		cmd->use_sg = 0;
1150		return 0;
1151	}
1152
1153	/*
1154	 * we used to not use scatter-gather for single segment request,
1155	 * but now we do (it makes highmem I/O easier to support without
1156	 * kmapping pages)
1157	 */
1158	cmd->use_sg = req->nr_phys_segments;
1159
1160	/*
1161	 * if sg table allocation fails, requeue request later.
1162	 */
1163	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
1164	if (unlikely(!sgpnt)) {
1165		scsi_unprep_request(req);
1166		return BLKPREP_DEFER;
1167	}
1168
1169	cmd->request_buffer = (char *) sgpnt;
1170	cmd->request_bufflen = req->nr_sectors << 9;
1171	if (blk_pc_request(req))
1172		cmd->request_bufflen = req->data_len;
1173	req->buffer = NULL;
1174
1175	/*
1176	 * Next, walk the list, and fill in the addresses and sizes of
1177	 * each segment.
1178	 */
1179	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
1180
1181	/*
1182	 * mapped well, send it off
1183	 */
1184	if (likely(count <= cmd->use_sg)) {
1185		cmd->use_sg = count;
1186		return 0;
1187	}
1188
1189	printk(KERN_ERR "Incorrect number of segments after building list\n");
1190	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
1191	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
1192			req->current_nr_sectors);
1193
1194	/* release the command and kill it */
1195	scsi_release_buffers(cmd);
1196	scsi_put_command(cmd);
1197	return BLKPREP_KILL;
1198}
1199
1200static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
1201			       sector_t *error_sector)
1202{
1203	struct scsi_device *sdev = q->queuedata;
1204	struct scsi_driver *drv;
1205
1206	if (sdev->sdev_state != SDEV_RUNNING)
1207		return -ENXIO;
1208
1209	drv = *(struct scsi_driver **) disk->private_data;
1210	if (drv->issue_flush)
1211		return drv->issue_flush(&sdev->sdev_gendev, error_sector);
1212
1213	return -EOPNOTSUPP;
1214}
1215
1216static void scsi_blk_pc_done(struct scsi_cmnd *cmd)
1217{
1218	BUG_ON(!blk_pc_request(cmd->request));
1219	/*
1220	 * This will complete the whole command with uptodate=1 so
1221	 * as far as the block layer is concerned the command completed
1222	 * successfully. Since this is a REQ_BLOCK_PC command the
1223	 * caller should check the request's errors value
1224	 */
1225	scsi_io_completion(cmd, cmd->bufflen, 0);
1226}
1227
1228static void scsi_setup_blk_pc_cmnd(struct scsi_cmnd *cmd)
1229{
1230	struct request *req = cmd->request;
1231
1232	BUG_ON(sizeof(req->cmd) > sizeof(cmd->cmnd));
1233	memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
1234	cmd->cmd_len = req->cmd_len;
1235	if (!req->data_len)
1236		cmd->sc_data_direction = DMA_NONE;
1237	else if (rq_data_dir(req) == WRITE)
1238		cmd->sc_data_direction = DMA_TO_DEVICE;
1239	else
1240		cmd->sc_data_direction = DMA_FROM_DEVICE;
1241
1242	cmd->transfersize = req->data_len;
1243	cmd->allowed = req->retries;
1244	cmd->timeout_per_command = req->timeout;
1245	cmd->done = scsi_blk_pc_done;
1246}
1247
1248static int scsi_prep_fn(struct request_queue *q, struct request *req)
1249{
1250	struct scsi_device *sdev = q->queuedata;
1251	struct scsi_cmnd *cmd;
1252	int specials_only = 0;
1253
1254	/*
1255	 * Just check to see if the device is online.  If it isn't, we
1256	 * refuse to process any commands.  The device must be brought
1257	 * online before trying any recovery commands
1258	 */
1259	if (unlikely(!scsi_device_online(sdev))) {
1260		sdev_printk(KERN_ERR, sdev,
1261			    "rejecting I/O to offline device\n");
1262		goto kill;
1263	}
1264	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1265		/* OK, we're not in a running state don't prep
1266		 * user commands */
1267		if (sdev->sdev_state == SDEV_DEL) {
1268			/* Device is fully deleted, no commands
1269			 * at all allowed down */
1270			sdev_printk(KERN_ERR, sdev,
1271				    "rejecting I/O to dead device\n");
1272			goto kill;
1273		}
1274		/* OK, we only allow special commands (i.e. not
1275		 * user initiated ones */
1276		specials_only = sdev->sdev_state;
1277	}
1278
1279	/*
1280	 * Find the actual device driver associated with this command.
1281	 * The SPECIAL requests are things like character device or
1282	 * ioctls, which did not originate from ll_rw_blk.  Note that
1283	 * the special field is also used to indicate the cmd for
1284	 * the remainder of a partially fulfilled request that can
1285	 * come up when there is a medium error.  We have to treat
1286	 * these two cases differently.  We differentiate by looking
1287	 * at request->cmd, as this tells us the real story.
1288	 */
1289	if (req->flags & REQ_SPECIAL && req->special) {
1290		struct scsi_request *sreq = req->special;
1291
1292		if (sreq->sr_magic == SCSI_REQ_MAGIC) {
1293			cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
1294			if (unlikely(!cmd))
1295				goto defer;
1296			scsi_init_cmd_from_req(cmd, sreq);
1297		} else
1298			cmd = req->special;
1299	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1300
1301		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
1302			if(specials_only == SDEV_QUIESCE ||
1303					specials_only == SDEV_BLOCK)
1304				goto defer;
1305
1306			sdev_printk(KERN_ERR, sdev,
1307				    "rejecting I/O to device being removed\n");
1308			goto kill;
1309		}
1310
1311
1312		/*
1313		 * Now try and find a command block that we can use.
1314		 */
1315		if (!req->special) {
1316			cmd = scsi_get_command(sdev, GFP_ATOMIC);
1317			if (unlikely(!cmd))
1318				goto defer;
1319		} else
1320			cmd = req->special;
1321
1322		/* pull a tag out of the request if we have one */
1323		cmd->tag = req->tag;
1324	} else {
1325		blk_dump_rq_flags(req, "SCSI bad req");
1326		goto kill;
1327	}
1328
1329	/* note the overloading of req->special.  When the tag
1330	 * is active it always means cmd.  If the tag goes
1331	 * back for re-queueing, it may be reset */
1332	req->special = cmd;
1333	cmd->request = req;
1334
1335	/*
1336	 * FIXME: drop the lock here because the functions below
1337	 * expect to be called without the queue lock held.  Also,
1338	 * previously, we dequeued the request before dropping the
1339	 * lock.  We hope REQ_STARTED prevents anything untoward from
1340	 * happening now.
1341	 */
1342	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1343		int ret;
1344
1345		/*
1346		 * This will do a couple of things:
1347		 *  1) Fill in the actual SCSI command.
1348		 *  2) Fill in any other upper-level specific fields
1349		 * (timeout).
1350		 *
1351		 * If this returns 0, it means that the request failed
1352		 * (reading past end of disk, reading offline device,
1353		 * etc).   This won't actually talk to the device, but
1354		 * some kinds of consistency checking may cause the
1355		 * request to be rejected immediately.
1356		 */
1357
1358		/*
1359		 * This sets up the scatter-gather table (allocating if
1360		 * required).
1361		 */
1362		ret = scsi_init_io(cmd);
1363		switch(ret) {
1364			/* For BLKPREP_KILL/DEFER the cmd was released */
1365		case BLKPREP_KILL:
1366			goto kill;
1367		case BLKPREP_DEFER:
1368			goto defer;
1369		}
1370
1371		/*
1372		 * Initialize the actual SCSI command for this request.
1373		 */
1374		if (req->flags & REQ_BLOCK_PC) {
1375			scsi_setup_blk_pc_cmnd(cmd);
1376		} else if (req->rq_disk) {
1377			struct scsi_driver *drv;
1378
1379			drv = *(struct scsi_driver **)req->rq_disk->private_data;
1380			if (unlikely(!drv->init_command(cmd))) {
1381				scsi_release_buffers(cmd);
1382				scsi_put_command(cmd);
1383				goto kill;
1384			}
1385		}
1386	}
1387
1388	/*
1389	 * The request is now prepped, no need to come back here
1390	 */
1391	req->flags |= REQ_DONTPREP;
1392	return BLKPREP_OK;
1393
1394 defer:
1395	/* If we defer, the elv_next_request() returns NULL, but the
1396	 * queue must be restarted, so we plug here if no returning
1397	 * command will automatically do that. */
1398	if (sdev->device_busy == 0)
1399		blk_plug_device(q);
1400	return BLKPREP_DEFER;
1401 kill:
1402	req->errors = DID_NO_CONNECT << 16;
1403	return BLKPREP_KILL;
1404}
1405
1406/*
1407 * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1408 * return 0.
1409 *
1410 * Called with the queue_lock held.
1411 */
1412static inline int scsi_dev_queue_ready(struct request_queue *q,
1413				  struct scsi_device *sdev)
1414{
1415	if (sdev->device_busy >= sdev->queue_depth)
1416		return 0;
1417	if (sdev->device_busy == 0 && sdev->device_blocked) {
1418		/*
1419		 * unblock after device_blocked iterates to zero
1420		 */
1421		if (--sdev->device_blocked == 0) {
1422			SCSI_LOG_MLQUEUE(3,
1423				   sdev_printk(KERN_INFO, sdev,
1424				   "unblocking device at zero depth\n"));
1425		} else {
1426			blk_plug_device(q);
1427			return 0;
1428		}
1429	}
1430	if (sdev->device_blocked)
1431		return 0;
1432
1433	return 1;
1434}
1435
1436/*
1437 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
1438 * return 0. We must end up running the queue again whenever 0 is
1439 * returned, else IO can hang.
1440 *
1441 * Called with host_lock held.
1442 */
1443static inline int scsi_host_queue_ready(struct request_queue *q,
1444				   struct Scsi_Host *shost,
1445				   struct scsi_device *sdev)
1446{
1447	if (scsi_host_in_recovery(shost))
1448		return 0;
1449	if (shost->host_busy == 0 && shost->host_blocked) {
1450		/*
1451		 * unblock after host_blocked iterates to zero
1452		 */
1453		if (--shost->host_blocked == 0) {
1454			SCSI_LOG_MLQUEUE(3,
1455				printk("scsi%d unblocking host at zero depth\n",
1456					shost->host_no));
1457		} else {
1458			blk_plug_device(q);
1459			return 0;
1460		}
1461	}
1462	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
1463	    shost->host_blocked || shost->host_self_blocked) {
1464		if (list_empty(&sdev->starved_entry))
1465			list_add_tail(&sdev->starved_entry, &shost->starved_list);
1466		return 0;
1467	}
1468
1469	/* We're OK to process the command, so we can't be starved */
1470	if (!list_empty(&sdev->starved_entry))
1471		list_del_init(&sdev->starved_entry);
1472
1473	return 1;
1474}
1475
1476/*
1477 * Kill a request for a dead device
1478 */
1479static void scsi_kill_request(struct request *req, request_queue_t *q)
1480{
1481	struct scsi_cmnd *cmd = req->special;
1482	struct scsi_device *sdev = cmd->device;
1483	struct Scsi_Host *shost = sdev->host;
1484
1485	blkdev_dequeue_request(req);
1486
1487	if (unlikely(cmd == NULL)) {
1488		printk(KERN_CRIT "impossible request in %s.\n",
1489				 __FUNCTION__);
1490		BUG();
1491	}
1492
1493	scsi_init_cmd_errh(cmd);
1494	cmd->result = DID_NO_CONNECT << 16;
1495	atomic_inc(&cmd->device->iorequest_cnt);
1496
1497	/*
1498	 * SCSI request completion path will do scsi_device_unbusy(),
1499	 * bump busy counts.  To bump the counters, we need to dance
1500	 * with the locks as normal issue path does.
1501	 */
1502	sdev->device_busy++;
1503	spin_unlock(sdev->request_queue->queue_lock);
1504	spin_lock(shost->host_lock);
1505	shost->host_busy++;
1506	spin_unlock(shost->host_lock);
1507	spin_lock(sdev->request_queue->queue_lock);
1508
1509	__scsi_done(cmd);
1510}
1511
1512static void scsi_softirq_done(struct request *rq)
1513{
1514	struct scsi_cmnd *cmd = rq->completion_data;
1515	unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command;
1516	int disposition;
1517
1518	INIT_LIST_HEAD(&cmd->eh_entry);
1519
1520	disposition = scsi_decide_disposition(cmd);
1521	if (disposition != SUCCESS &&
1522	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
1523		sdev_printk(KERN_ERR, cmd->device,
1524			    "timing out command, waited %lus\n",
1525			    wait_for/HZ);
1526		disposition = SUCCESS;
1527	}
1528
1529	scsi_log_completion(cmd, disposition);
1530
1531	switch (disposition) {
1532		case SUCCESS:
1533			scsi_finish_command(cmd);
1534			break;
1535		case NEEDS_RETRY:
1536			scsi_retry_command(cmd);
1537			break;
1538		case ADD_TO_MLQUEUE:
1539			scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
1540			break;
1541		default:
1542			if (!scsi_eh_scmd_add(cmd, 0))
1543				scsi_finish_command(cmd);
1544	}
1545}
1546
1547/*
1548 * Function:    scsi_request_fn()
1549 *
1550 * Purpose:     Main strategy routine for SCSI.
1551 *
1552 * Arguments:   q       - Pointer to actual queue.
1553 *
1554 * Returns:     Nothing
1555 *
1556 * Lock status: IO request lock assumed to be held when called.
1557 */
1558static void scsi_request_fn(struct request_queue *q)
1559{
1560	struct scsi_device *sdev = q->queuedata;
1561	struct Scsi_Host *shost;
1562	struct scsi_cmnd *cmd;
1563	struct request *req;
1564
1565	if (!sdev) {
1566		printk("scsi: killing requests for dead queue\n");
1567		while ((req = elv_next_request(q)) != NULL)
1568			scsi_kill_request(req, q);
1569		return;
1570	}
1571
1572	if(!get_device(&sdev->sdev_gendev))
1573		/* We must be tearing the block queue down already */
1574		return;
1575
1576	/*
1577	 * To start with, we keep looping until the queue is empty, or until
1578	 * the host is no longer able to accept any more requests.
1579	 */
1580	shost = sdev->host;
1581	while (!blk_queue_plugged(q)) {
1582		int rtn;
1583		/*
1584		 * get next queueable request.  We do this early to make sure
1585		 * that the request is fully prepared even if we cannot
1586		 * accept it.
1587		 */
1588		req = elv_next_request(q);
1589		if (!req || !scsi_dev_queue_ready(q, sdev))
1590			break;
1591
1592		if (unlikely(!scsi_device_online(sdev))) {
1593			sdev_printk(KERN_ERR, sdev,
1594				    "rejecting I/O to offline device\n");
1595			scsi_kill_request(req, q);
1596			continue;
1597		}
1598
1599
1600		/*
1601		 * Remove the request from the request list.
1602		 */
1603		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1604			blkdev_dequeue_request(req);
1605		sdev->device_busy++;
1606
1607		spin_unlock(q->queue_lock);
1608		cmd = req->special;
1609		if (unlikely(cmd == NULL)) {
1610			printk(KERN_CRIT "impossible request in %s.\n"
1611					 "please mail a stack trace to "
1612					 "linux-scsi@vger.kernel.org",
1613					 __FUNCTION__);
1614			BUG();
1615		}
1616		spin_lock(shost->host_lock);
1617
1618		if (!scsi_host_queue_ready(q, shost, sdev))
1619			goto not_ready;
1620		if (sdev->single_lun) {
1621			if (scsi_target(sdev)->starget_sdev_user &&
1622			    scsi_target(sdev)->starget_sdev_user != sdev)
1623				goto not_ready;
1624			scsi_target(sdev)->starget_sdev_user = sdev;
1625		}
1626		shost->host_busy++;
1627
1628		/*
1629		 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1630		 *		take the lock again.
1631		 */
1632		spin_unlock_irq(shost->host_lock);
1633
1634		/*
1635		 * Finally, initialize any error handling parameters, and set up
1636		 * the timers for timeouts.
1637		 */
1638		scsi_init_cmd_errh(cmd);
1639
1640		/*
1641		 * Dispatch the command to the low-level driver.
1642		 */
1643		rtn = scsi_dispatch_cmd(cmd);
1644		spin_lock_irq(q->queue_lock);
1645		if(rtn) {
1646			/* we're refusing the command; because of
1647			 * the way locks get dropped, we need to
1648			 * check here if plugging is required */
1649			if(sdev->device_busy == 0)
1650				blk_plug_device(q);
1651
1652			break;
1653		}
1654	}
1655
1656	goto out;
1657
1658 not_ready:
1659	spin_unlock_irq(shost->host_lock);
1660
1661	/*
1662	 * lock q, handle tag, requeue req, and decrement device_busy. We
1663	 * must return with queue_lock held.
1664	 *
1665	 * Decrementing device_busy without checking it is OK, as all such
1666	 * cases (host limits or settings) should run the queue at some
1667	 * later time.
1668	 */
1669	spin_lock_irq(q->queue_lock);
1670	blk_requeue_request(q, req);
1671	sdev->device_busy--;
1672	if(sdev->device_busy == 0)
1673		blk_plug_device(q);
1674 out:
1675	/* must be careful here...if we trigger the ->remove() function
1676	 * we cannot be holding the q lock */
1677	spin_unlock_irq(q->queue_lock);
1678	put_device(&sdev->sdev_gendev);
1679	spin_lock_irq(q->queue_lock);
1680}
1681
1682u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1683{
1684	struct device *host_dev;
1685	u64 bounce_limit = 0xffffffff;
1686
1687	if (shost->unchecked_isa_dma)
1688		return BLK_BOUNCE_ISA;
1689	/*
1690	 * Platforms with virtual-DMA translation
1691	 * hardware have no practical limit.
1692	 */
1693	if (!PCI_DMA_BUS_IS_PHYS)
1694		return BLK_BOUNCE_ANY;
1695
1696	host_dev = scsi_get_device(shost);
1697	if (host_dev && host_dev->dma_mask)
1698		bounce_limit = *host_dev->dma_mask;
1699
1700	return bounce_limit;
1701}
1702EXPORT_SYMBOL(scsi_calculate_bounce_limit);
1703
1704struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1705{
1706	struct Scsi_Host *shost = sdev->host;
1707	struct request_queue *q;
1708
1709	q = blk_init_queue(scsi_request_fn, NULL);
1710	if (!q)
1711		return NULL;
1712
1713	blk_queue_prep_rq(q, scsi_prep_fn);
1714
1715	blk_queue_max_hw_segments(q, shost->sg_tablesize);
1716	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
1717	blk_queue_max_sectors(q, shost->max_sectors);
1718	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1719	blk_queue_segment_boundary(q, shost->dma_boundary);
1720	blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1721	blk_queue_softirq_done(q, scsi_softirq_done);
1722
1723	if (!shost->use_clustering)
1724		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
1725	return q;
1726}
1727
1728void scsi_free_queue(struct request_queue *q)
1729{
1730	blk_cleanup_queue(q);
1731}
1732
1733/*
1734 * Function:    scsi_block_requests()
1735 *
1736 * Purpose:     Utility function used by low-level drivers to prevent further
1737 *		commands from being queued to the device.
1738 *
1739 * Arguments:   shost       - Host in question
1740 *
1741 * Returns:     Nothing
1742 *
1743 * Lock status: No locks are assumed held.
1744 *
1745 * Notes:       There is no timer nor any other means by which the requests
1746 *		get unblocked other than the low-level driver calling
1747 *		scsi_unblock_requests().
1748 */
1749void scsi_block_requests(struct Scsi_Host *shost)
1750{
1751	shost->host_self_blocked = 1;
1752}
1753EXPORT_SYMBOL(scsi_block_requests);
1754
1755/*
1756 * Function:    scsi_unblock_requests()
1757 *
1758 * Purpose:     Utility function used by low-level drivers to allow further
1759 *		commands from being queued to the device.
1760 *
1761 * Arguments:   shost       - Host in question
1762 *
1763 * Returns:     Nothing
1764 *
1765 * Lock status: No locks are assumed held.
1766 *
1767 * Notes:       There is no timer nor any other means by which the requests
1768 *		get unblocked other than the low-level driver calling
1769 *		scsi_unblock_requests().
1770 *
1771 *		This is done as an API function so that changes to the
1772 *		internals of the scsi mid-layer won't require wholesale
1773 *		changes to drivers that use this feature.
1774 */
1775void scsi_unblock_requests(struct Scsi_Host *shost)
1776{
1777	shost->host_self_blocked = 0;
1778	scsi_run_host_queues(shost);
1779}
1780EXPORT_SYMBOL(scsi_unblock_requests);
1781
1782int __init scsi_init_queue(void)
1783{
1784	int i;
1785
1786	scsi_io_context_cache = kmem_cache_create("scsi_io_context",
1787					sizeof(struct scsi_io_context),
1788					0, 0, NULL, NULL);
1789	if (!scsi_io_context_cache) {
1790		printk(KERN_ERR "SCSI: can't init scsi io context cache\n");
1791		return -ENOMEM;
1792	}
1793
1794	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1795		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1796		int size = sgp->size * sizeof(struct scatterlist);
1797
1798		sgp->slab = kmem_cache_create(sgp->name, size, 0,
1799				SLAB_HWCACHE_ALIGN, NULL, NULL);
1800		if (!sgp->slab) {
1801			printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1802					sgp->name);
1803		}
1804
1805		sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
1806						     sgp->slab);
1807		if (!sgp->pool) {
1808			printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1809					sgp->name);
1810		}
1811	}
1812
1813	return 0;
1814}
1815
1816void scsi_exit_queue(void)
1817{
1818	int i;
1819
1820	kmem_cache_destroy(scsi_io_context_cache);
1821
1822	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1823		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1824		mempool_destroy(sgp->pool);
1825		kmem_cache_destroy(sgp->slab);
1826	}
1827}
1828
1829/**
1830 *	scsi_mode_select - issue a mode select
1831 *	@sdev:	SCSI device to be queried
1832 *	@pf:	Page format bit (1 == standard, 0 == vendor specific)
1833 *	@sp:	Save page bit (0 == don't save, 1 == save)
1834 *	@modepage: mode page being requested
1835 *	@buffer: request buffer (may not be smaller than eight bytes)
1836 *	@len:	length of request buffer.
1837 *	@timeout: command timeout
1838 *	@retries: number of retries before failing
1839 *	@data: returns a structure abstracting the mode header data
1840 *	@sense: place to put sense data (or NULL if no sense to be collected).
1841 *		must be SCSI_SENSE_BUFFERSIZE big.
1842 *
1843 *	Returns zero if successful; negative error number or scsi
1844 *	status on error
1845 *
1846 */
1847int
1848scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage,
1849		 unsigned char *buffer, int len, int timeout, int retries,
1850		 struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
1851{
1852	unsigned char cmd[10];
1853	unsigned char *real_buffer;
1854	int ret;
1855
1856	memset(cmd, 0, sizeof(cmd));
1857	cmd[1] = (pf ? 0x10 : 0) | (sp ? 0x01 : 0);
1858
1859	if (sdev->use_10_for_ms) {
1860		if (len > 65535)
1861			return -EINVAL;
1862		real_buffer = kmalloc(8 + len, GFP_KERNEL);
1863		if (!real_buffer)
1864			return -ENOMEM;
1865		memcpy(real_buffer + 8, buffer, len);
1866		len += 8;
1867		real_buffer[0] = 0;
1868		real_buffer[1] = 0;
1869		real_buffer[2] = data->medium_type;
1870		real_buffer[3] = data->device_specific;
1871		real_buffer[4] = data->longlba ? 0x01 : 0;
1872		real_buffer[5] = 0;
1873		real_buffer[6] = data->block_descriptor_length >> 8;
1874		real_buffer[7] = data->block_descriptor_length;
1875
1876		cmd[0] = MODE_SELECT_10;
1877		cmd[7] = len >> 8;
1878		cmd[8] = len;
1879	} else {
1880		if (len > 255 || data->block_descriptor_length > 255 ||
1881		    data->longlba)
1882			return -EINVAL;
1883
1884		real_buffer = kmalloc(4 + len, GFP_KERNEL);
1885		if (!real_buffer)
1886			return -ENOMEM;
1887		memcpy(real_buffer + 4, buffer, len);
1888		len += 4;
1889		real_buffer[0] = 0;
1890		real_buffer[1] = data->medium_type;
1891		real_buffer[2] = data->device_specific;
1892		real_buffer[3] = data->block_descriptor_length;
1893
1894
1895		cmd[0] = MODE_SELECT;
1896		cmd[4] = len;
1897	}
1898
1899	ret = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, real_buffer, len,
1900			       sshdr, timeout, retries);
1901	kfree(real_buffer);
1902	return ret;
1903}
1904EXPORT_SYMBOL_GPL(scsi_mode_select);
1905
1906/**
1907 *	scsi_mode_sense - issue a mode sense, falling back from 10 to
1908 *		six bytes if necessary.
1909 *	@sdev:	SCSI device to be queried
1910 *	@dbd:	set if mode sense will allow block descriptors to be returned
1911 *	@modepage: mode page being requested
1912 *	@buffer: request buffer (may not be smaller than eight bytes)
1913 *	@len:	length of request buffer.
1914 *	@timeout: command timeout
1915 *	@retries: number of retries before failing
1916 *	@data: returns a structure abstracting the mode header data
1917 *	@sense: place to put sense data (or NULL if no sense to be collected).
1918 *		must be SCSI_SENSE_BUFFERSIZE big.
1919 *
1920 *	Returns zero if unsuccessful, or the header offset (either 4
1921 *	or 8 depending on whether a six or ten byte command was
1922 *	issued) if successful.
1923 **/
1924int
1925scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
1926		  unsigned char *buffer, int len, int timeout, int retries,
1927		  struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
1928{
1929	unsigned char cmd[12];
1930	int use_10_for_ms;
1931	int header_length;
1932	int result;
1933	struct scsi_sense_hdr my_sshdr;
1934
1935	memset(data, 0, sizeof(*data));
1936	memset(&cmd[0], 0, 12);
1937	cmd[1] = dbd & 0x18;	/* allows DBD and LLBA bits */
1938	cmd[2] = modepage;
1939
1940	/* caller might not be interested in sense, but we need it */
1941	if (!sshdr)
1942		sshdr = &my_sshdr;
1943
1944 retry:
1945	use_10_for_ms = sdev->use_10_for_ms;
1946
1947	if (use_10_for_ms) {
1948		if (len < 8)
1949			len = 8;
1950
1951		cmd[0] = MODE_SENSE_10;
1952		cmd[8] = len;
1953		header_length = 8;
1954	} else {
1955		if (len < 4)
1956			len = 4;
1957
1958		cmd[0] = MODE_SENSE;
1959		cmd[4] = len;
1960		header_length = 4;
1961	}
1962
1963	memset(buffer, 0, len);
1964
1965	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
1966				  sshdr, timeout, retries);
1967
1968	/* This code looks awful: what it's doing is making sure an
1969	 * ILLEGAL REQUEST sense return identifies the actual command
1970	 * byte as the problem.  MODE_SENSE commands can return
1971	 * ILLEGAL REQUEST if the code page isn't supported */
1972
1973	if (use_10_for_ms && !scsi_status_is_good(result) &&
1974	    (driver_byte(result) & DRIVER_SENSE)) {
1975		if (scsi_sense_valid(sshdr)) {
1976			if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
1977			    (sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
1978				/*
1979				 * Invalid command operation code
1980				 */
1981				sdev->use_10_for_ms = 0;
1982				goto retry;
1983			}
1984		}
1985	}
1986
1987	if(scsi_status_is_good(result)) {
1988		if (unlikely(buffer[0] == 0x86 && buffer[1] == 0x0b &&
1989			     (modepage == 6 || modepage == 8))) {
1990			/* Initio breakage? */
1991			header_length = 0;
1992			data->length = 13;
1993			data->medium_type = 0;
1994			data->device_specific = 0;
1995			data->longlba = 0;
1996			data->block_descriptor_length = 0;
1997		} else if(use_10_for_ms) {
1998			data->length = buffer[0]*256 + buffer[1] + 2;
1999			data->medium_type = buffer[2];
2000			data->device_specific = buffer[3];
2001			data->longlba = buffer[4] & 0x01;
2002			data->block_descriptor_length = buffer[6]*256
2003				+ buffer[7];
2004		} else {
2005			data->length = buffer[0] + 1;
2006			data->medium_type = buffer[1];
2007			data->device_specific = buffer[2];
2008			data->block_descriptor_length = buffer[3];
2009		}
2010		data->header_length = header_length;
2011	}
2012
2013	return result;
2014}
2015EXPORT_SYMBOL(scsi_mode_sense);
2016
2017int
2018scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)
2019{
2020	char cmd[] = {
2021		TEST_UNIT_READY, 0, 0, 0, 0, 0,
2022	};
2023	struct scsi_sense_hdr sshdr;
2024	int result;
2025
2026	result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
2027				  timeout, retries);
2028
2029	if ((driver_byte(result) & DRIVER_SENSE) && sdev->removable) {
2030
2031		if ((scsi_sense_valid(&sshdr)) &&
2032		    ((sshdr.sense_key == UNIT_ATTENTION) ||
2033		     (sshdr.sense_key == NOT_READY))) {
2034			sdev->changed = 1;
2035			result = 0;
2036		}
2037	}
2038	return result;
2039}
2040EXPORT_SYMBOL(scsi_test_unit_ready);
2041
2042/**
2043 *	scsi_device_set_state - Take the given device through the device
2044 *		state model.
2045 *	@sdev:	scsi device to change the state of.
2046 *	@state:	state to change to.
2047 *
2048 *	Returns zero if unsuccessful or an error if the requested
2049 *	transition is illegal.
2050 **/
2051int
2052scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
2053{
2054	enum scsi_device_state oldstate = sdev->sdev_state;
2055
2056	if (state == oldstate)
2057		return 0;
2058
2059	switch (state) {
2060	case SDEV_CREATED:
2061		/* There are no legal states that come back to
2062		 * created.  This is the manually initialised start
2063		 * state */
2064		goto illegal;
2065
2066	case SDEV_RUNNING:
2067		switch (oldstate) {
2068		case SDEV_CREATED:
2069		case SDEV_OFFLINE:
2070		case SDEV_QUIESCE:
2071		case SDEV_BLOCK:
2072			break;
2073		default:
2074			goto illegal;
2075		}
2076		break;
2077
2078	case SDEV_QUIESCE:
2079		switch (oldstate) {
2080		case SDEV_RUNNING:
2081		case SDEV_OFFLINE:
2082			break;
2083		default:
2084			goto illegal;
2085		}
2086		break;
2087
2088	case SDEV_OFFLINE:
2089		switch (oldstate) {
2090		case SDEV_CREATED:
2091		case SDEV_RUNNING:
2092		case SDEV_QUIESCE:
2093		case SDEV_BLOCK:
2094			break;
2095		default:
2096			goto illegal;
2097		}
2098		break;
2099
2100	case SDEV_BLOCK:
2101		switch (oldstate) {
2102		case SDEV_CREATED:
2103		case SDEV_RUNNING:
2104			break;
2105		default:
2106			goto illegal;
2107		}
2108		break;
2109
2110	case SDEV_CANCEL:
2111		switch (oldstate) {
2112		case SDEV_CREATED:
2113		case SDEV_RUNNING:
2114		case SDEV_OFFLINE:
2115		case SDEV_BLOCK:
2116			break;
2117		default:
2118			goto illegal;
2119		}
2120		break;
2121
2122	case SDEV_DEL:
2123		switch (oldstate) {
2124		case SDEV_CANCEL:
2125			break;
2126		default:
2127			goto illegal;
2128		}
2129		break;
2130
2131	}
2132	sdev->sdev_state = state;
2133	return 0;
2134
2135 illegal:
2136	SCSI_LOG_ERROR_RECOVERY(1,
2137				sdev_printk(KERN_ERR, sdev,
2138					    "Illegal state transition %s->%s\n",
2139					    scsi_device_state_name(oldstate),
2140					    scsi_device_state_name(state))
2141				);
2142	return -EINVAL;
2143}
2144EXPORT_SYMBOL(scsi_device_set_state);
2145
2146/**
2147 *	scsi_device_quiesce - Block user issued commands.
2148 *	@sdev:	scsi device to quiesce.
2149 *
2150 *	This works by trying to transition to the SDEV_QUIESCE state
2151 *	(which must be a legal transition).  When the device is in this
2152 *	state, only special requests will be accepted, all others will
2153 *	be deferred.  Since special requests may also be requeued requests,
2154 *	a successful return doesn't guarantee the device will be
2155 *	totally quiescent.
2156 *
2157 *	Must be called with user context, may sleep.
2158 *
2159 *	Returns zero if unsuccessful or an error if not.
2160 **/
2161int
2162scsi_device_quiesce(struct scsi_device *sdev)
2163{
2164	int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
2165	if (err)
2166		return err;
2167
2168	scsi_run_queue(sdev->request_queue);
2169	while (sdev->device_busy) {
2170		msleep_interruptible(200);
2171		scsi_run_queue(sdev->request_queue);
2172	}
2173	return 0;
2174}
2175EXPORT_SYMBOL(scsi_device_quiesce);
2176
2177/**
2178 *	scsi_device_resume - Restart user issued commands to a quiesced device.
2179 *	@sdev:	scsi device to resume.
2180 *
2181 *	Moves the device from quiesced back to running and restarts the
2182 *	queues.
2183 *
2184 *	Must be called with user context, may sleep.
2185 **/
2186void
2187scsi_device_resume(struct scsi_device *sdev)
2188{
2189	if(scsi_device_set_state(sdev, SDEV_RUNNING))
2190		return;
2191	scsi_run_queue(sdev->request_queue);
2192}
2193EXPORT_SYMBOL(scsi_device_resume);
2194
2195static void
2196device_quiesce_fn(struct scsi_device *sdev, void *data)
2197{
2198	scsi_device_quiesce(sdev);
2199}
2200
2201void
2202scsi_target_quiesce(struct scsi_target *starget)
2203{
2204	starget_for_each_device(starget, NULL, device_quiesce_fn);
2205}
2206EXPORT_SYMBOL(scsi_target_quiesce);
2207
2208static void
2209device_resume_fn(struct scsi_device *sdev, void *data)
2210{
2211	scsi_device_resume(sdev);
2212}
2213
2214void
2215scsi_target_resume(struct scsi_target *starget)
2216{
2217	starget_for_each_device(starget, NULL, device_resume_fn);
2218}
2219EXPORT_SYMBOL(scsi_target_resume);
2220
2221/**
2222 * scsi_internal_device_block - internal function to put a device
2223 *				temporarily into the SDEV_BLOCK state
2224 * @sdev:	device to block
2225 *
2226 * Block request made by scsi lld's to temporarily stop all
2227 * scsi commands on the specified device.  Called from interrupt
2228 * or normal process context.
2229 *
2230 * Returns zero if successful or error if not
2231 *
2232 * Notes:
2233 *	This routine transitions the device to the SDEV_BLOCK state
2234 *	(which must be a legal transition).  When the device is in this
2235 *	state, all commands are deferred until the scsi lld reenables
2236 *	the device with scsi_device_unblock or device_block_tmo fires.
2237 *	This routine assumes the host_lock is held on entry.
2238 **/
2239int
2240scsi_internal_device_block(struct scsi_device *sdev)
2241{
2242	request_queue_t *q = sdev->request_queue;
2243	unsigned long flags;
2244	int err = 0;
2245
2246	err = scsi_device_set_state(sdev, SDEV_BLOCK);
2247	if (err)
2248		return err;
2249
2250	/*
2251	 * The device has transitioned to SDEV_BLOCK.  Stop the
2252	 * block layer from calling the midlayer with this device's
2253	 * request queue.
2254	 */
2255	spin_lock_irqsave(q->queue_lock, flags);
2256	blk_stop_queue(q);
2257	spin_unlock_irqrestore(q->queue_lock, flags);
2258
2259	return 0;
2260}
2261EXPORT_SYMBOL_GPL(scsi_internal_device_block);
2262
2263/**
2264 * scsi_internal_device_unblock - resume a device after a block request
2265 * @sdev:	device to resume
2266 *
2267 * Called by scsi lld's or the midlayer to restart the device queue
2268 * for the previously suspended scsi device.  Called from interrupt or
2269 * normal process context.
2270 *
2271 * Returns zero if successful or error if not.
2272 *
2273 * Notes:
2274 *	This routine transitions the device to the SDEV_RUNNING state
2275 *	(which must be a legal transition) allowing the midlayer to
2276 *	goose the queue for this device.  This routine assumes the
2277 *	host_lock is held upon entry.
2278 **/
2279int
2280scsi_internal_device_unblock(struct scsi_device *sdev)
2281{
2282	request_queue_t *q = sdev->request_queue;
2283	int err;
2284	unsigned long flags;
2285
2286	/*
2287	 * Try to transition the scsi device to SDEV_RUNNING
2288	 * and goose the device queue if successful.
2289	 */
2290	err = scsi_device_set_state(sdev, SDEV_RUNNING);
2291	if (err)
2292		return err;
2293
2294	spin_lock_irqsave(q->queue_lock, flags);
2295	blk_start_queue(q);
2296	spin_unlock_irqrestore(q->queue_lock, flags);
2297
2298	return 0;
2299}
2300EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
2301
2302static void
2303device_block(struct scsi_device *sdev, void *data)
2304{
2305	scsi_internal_device_block(sdev);
2306}
2307
2308static int
2309target_block(struct device *dev, void *data)
2310{
2311	if (scsi_is_target_device(dev))
2312		starget_for_each_device(to_scsi_target(dev), NULL,
2313					device_block);
2314	return 0;
2315}
2316
2317void
2318scsi_target_block(struct device *dev)
2319{
2320	if (scsi_is_target_device(dev))
2321		starget_for_each_device(to_scsi_target(dev), NULL,
2322					device_block);
2323	else
2324		device_for_each_child(dev, NULL, target_block);
2325}
2326EXPORT_SYMBOL_GPL(scsi_target_block);
2327
2328static void
2329device_unblock(struct scsi_device *sdev, void *data)
2330{
2331	scsi_internal_device_unblock(sdev);
2332}
2333
2334static int
2335target_unblock(struct device *dev, void *data)
2336{
2337	if (scsi_is_target_device(dev))
2338		starget_for_each_device(to_scsi_target(dev), NULL,
2339					device_unblock);
2340	return 0;
2341}
2342
2343void
2344scsi_target_unblock(struct device *dev)
2345{
2346	if (scsi_is_target_device(dev))
2347		starget_for_each_device(to_scsi_target(dev), NULL,
2348					device_unblock);
2349	else
2350		device_for_each_child(dev, NULL, target_unblock);
2351}
2352EXPORT_SYMBOL_GPL(scsi_target_unblock);
2353
2354/**
2355 * scsi_kmap_atomic_sg - find and atomically map an sg-elemnt
2356 * @sg:		scatter-gather list
2357 * @sg_count:	number of segments in sg
2358 * @offset:	offset in bytes into sg, on return offset into the mapped area
2359 * @len:	bytes to map, on return number of bytes mapped
2360 *
2361 * Returns virtual address of the start of the mapped page
2362 */
2363void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
2364			  size_t *offset, size_t *len)
2365{
2366	int i;
2367	size_t sg_len = 0, len_complete = 0;
2368	struct page *page;
2369
2370	for (i = 0; i < sg_count; i++) {
2371		len_complete = sg_len; /* Complete sg-entries */
2372		sg_len += sg[i].length;
2373		if (sg_len > *offset)
2374			break;
2375	}
2376
2377	if (unlikely(i == sg_count)) {
2378		printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, "
2379			"elements %d\n",
2380		       __FUNCTION__, sg_len, *offset, sg_count);
2381		WARN_ON(1);
2382		return NULL;
2383	}
2384
2385	/* Offset starting from the beginning of first page in this sg-entry */
2386	*offset = *offset - len_complete + sg[i].offset;
2387
2388	/* Assumption: contiguous pages can be accessed as "page + i" */
2389	page = nth_page(sg[i].page, (*offset >> PAGE_SHIFT));
2390	*offset &= ~PAGE_MASK;
2391
2392	/* Bytes in this sg-entry from *offset to the end of the page */
2393	sg_len = PAGE_SIZE - *offset;
2394	if (*len > sg_len)
2395		*len = sg_len;
2396
2397	return kmap_atomic(page, KM_BIO_SRC_IRQ);
2398}
2399EXPORT_SYMBOL(scsi_kmap_atomic_sg);
2400
2401/**
2402 * scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously
2403 *			   mapped with scsi_kmap_atomic_sg
2404 * @virt:	virtual address to be unmapped
2405 */
2406void scsi_kunmap_atomic_sg(void *virt)
2407{
2408	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
2409}
2410EXPORT_SYMBOL(scsi_kunmap_atomic_sg);
2411