scsi_lib.c revision b95be99d52ce4f9db9ff0bd5f10e9e2066da6d2e
1/*
2 *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
3 *
4 *  SCSI queueing library.
5 *      Initial versions: Eric Youngdale (eric@andante.org).
6 *                        Based upon conversations with large numbers
7 *                        of people at Linux Expo.
8 */
9
10#include <linux/bio.h>
11#include <linux/blkdev.h>
12#include <linux/completion.h>
13#include <linux/kernel.h>
14#include <linux/mempool.h>
15#include <linux/slab.h>
16#include <linux/init.h>
17#include <linux/pci.h>
18#include <linux/delay.h>
19
20#include <scsi/scsi.h>
21#include <scsi/scsi_dbg.h>
22#include <scsi/scsi_device.h>
23#include <scsi/scsi_driver.h>
24#include <scsi/scsi_eh.h>
25#include <scsi/scsi_host.h>
26#include <scsi/scsi_request.h>
27
28#include "scsi_priv.h"
29#include "scsi_logging.h"
30
31
32#define SG_MEMPOOL_NR		(sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool))
33#define SG_MEMPOOL_SIZE		32
34
35struct scsi_host_sg_pool {
36	size_t		size;
37	char		*name;
38	kmem_cache_t	*slab;
39	mempool_t	*pool;
40};
41
42#if (SCSI_MAX_PHYS_SEGMENTS < 32)
43#error SCSI_MAX_PHYS_SEGMENTS is too small
44#endif
45
46#define SP(x) { x, "sgpool-" #x }
47static struct scsi_host_sg_pool scsi_sg_pools[] = {
48	SP(8),
49	SP(16),
50	SP(32),
51#if (SCSI_MAX_PHYS_SEGMENTS > 32)
52	SP(64),
53#if (SCSI_MAX_PHYS_SEGMENTS > 64)
54	SP(128),
55#if (SCSI_MAX_PHYS_SEGMENTS > 128)
56	SP(256),
57#if (SCSI_MAX_PHYS_SEGMENTS > 256)
58#error SCSI_MAX_PHYS_SEGMENTS is too large
59#endif
60#endif
61#endif
62#endif
63};
64#undef SP
65
66
67/*
68 * Function:    scsi_insert_special_req()
69 *
70 * Purpose:     Insert pre-formed request into request queue.
71 *
72 * Arguments:   sreq	- request that is ready to be queued.
73 *              at_head	- boolean.  True if we should insert at head
74 *                        of queue, false if we should insert at tail.
75 *
76 * Lock status: Assumed that lock is not held upon entry.
77 *
78 * Returns:     Nothing
79 *
80 * Notes:       This function is called from character device and from
81 *              ioctl types of functions where the caller knows exactly
82 *              what SCSI command needs to be issued.   The idea is that
83 *              we merely inject the command into the queue (at the head
84 *              for now), and then call the queue request function to actually
85 *              process it.
86 */
87int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
88{
89	/*
90	 * Because users of this function are apt to reuse requests with no
91	 * modification, we have to sanitise the request flags here
92	 */
93	sreq->sr_request->flags &= ~REQ_DONTPREP;
94	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
95		       	   at_head, sreq);
96	return 0;
97}
98
99static void scsi_run_queue(struct request_queue *q);
100static void scsi_release_buffers(struct scsi_cmnd *cmd);
101
102/*
103 * Function:	scsi_unprep_request()
104 *
105 * Purpose:	Remove all preparation done for a request, including its
106 *		associated scsi_cmnd, so that it can be requeued.
107 *
108 * Arguments:	req	- request to unprepare
109 *
110 * Lock status:	Assumed that no locks are held upon entry.
111 *
112 * Returns:	Nothing.
113 */
114static void scsi_unprep_request(struct request *req)
115{
116	struct scsi_cmnd *cmd = req->special;
117
118	req->flags &= ~REQ_DONTPREP;
119	req->special = (req->flags & REQ_SPECIAL) ? cmd->sc_request : NULL;
120
121	scsi_put_command(cmd);
122}
123
124/*
125 * Function:    scsi_queue_insert()
126 *
127 * Purpose:     Insert a command in the midlevel queue.
128 *
129 * Arguments:   cmd    - command that we are adding to queue.
130 *              reason - why we are inserting command to queue.
131 *
132 * Lock status: Assumed that lock is not held upon entry.
133 *
134 * Returns:     Nothing.
135 *
136 * Notes:       We do this for one of two cases.  Either the host is busy
137 *              and it cannot accept any more commands for the time being,
138 *              or the device returned QUEUE_FULL and can accept no more
139 *              commands.
140 * Notes:       This could be called either from an interrupt context or a
141 *              normal process context.
142 */
143int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
144{
145	struct Scsi_Host *host = cmd->device->host;
146	struct scsi_device *device = cmd->device;
147	struct request_queue *q = device->request_queue;
148	unsigned long flags;
149
150	SCSI_LOG_MLQUEUE(1,
151		 printk("Inserting command %p into mlqueue\n", cmd));
152
153	/*
154	 * Set the appropriate busy bit for the device/host.
155	 *
156	 * If the host/device isn't busy, assume that something actually
157	 * completed, and that we should be able to queue a command now.
158	 *
159	 * Note that the prior mid-layer assumption that any host could
160	 * always queue at least one command is now broken.  The mid-layer
161	 * will implement a user specifiable stall (see
162	 * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
163	 * if a command is requeued with no other commands outstanding
164	 * either for the device or for the host.
165	 */
166	if (reason == SCSI_MLQUEUE_HOST_BUSY)
167		host->host_blocked = host->max_host_blocked;
168	else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
169		device->device_blocked = device->max_device_blocked;
170
171	/*
172	 * Decrement the counters, since these commands are no longer
173	 * active on the host/device.
174	 */
175	scsi_device_unbusy(device);
176
177	/*
178	 * Requeue this command.  It will go before all other commands
179	 * that are already in the queue.
180	 *
181	 * NOTE: there is magic here about the way the queue is plugged if
182	 * we have no outstanding commands.
183	 *
184	 * Although we *don't* plug the queue, we call the request
185	 * function.  The SCSI request function detects the blocked condition
186	 * and plugs the queue appropriately.
187         */
188	spin_lock_irqsave(q->queue_lock, flags);
189	blk_requeue_request(q, cmd->request);
190	spin_unlock_irqrestore(q->queue_lock, flags);
191
192	scsi_run_queue(q);
193
194	return 0;
195}
196
197/*
198 * Function:    scsi_do_req
199 *
200 * Purpose:     Queue a SCSI request
201 *
202 * Arguments:   sreq	  - command descriptor.
203 *              cmnd      - actual SCSI command to be performed.
204 *              buffer    - data buffer.
205 *              bufflen   - size of data buffer.
206 *              done      - completion function to be run.
207 *              timeout   - how long to let it run before timeout.
208 *              retries   - number of retries we allow.
209 *
210 * Lock status: No locks held upon entry.
211 *
212 * Returns:     Nothing.
213 *
214 * Notes:	This function is only used for queueing requests for things
215 *		like ioctls and character device requests - this is because
216 *		we essentially just inject a request into the queue for the
217 *		device.
218 *
219 *		In order to support the scsi_device_quiesce function, we
220 *		now inject requests on the *head* of the device queue
221 *		rather than the tail.
222 */
223void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
224		 void *buffer, unsigned bufflen,
225		 void (*done)(struct scsi_cmnd *),
226		 int timeout, int retries)
227{
228	/*
229	 * If the upper level driver is reusing these things, then
230	 * we should release the low-level block now.  Another one will
231	 * be allocated later when this request is getting queued.
232	 */
233	__scsi_release_request(sreq);
234
235	/*
236	 * Our own function scsi_done (which marks the host as not busy,
237	 * disables the timeout counter, etc) will be called by us or by the
238	 * scsi_hosts[host].queuecommand() function needs to also call
239	 * the completion function for the high level driver.
240	 */
241	memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
242	sreq->sr_bufflen = bufflen;
243	sreq->sr_buffer = buffer;
244	sreq->sr_allowed = retries;
245	sreq->sr_done = done;
246	sreq->sr_timeout_per_command = timeout;
247
248	if (sreq->sr_cmd_len == 0)
249		sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
250
251	/*
252	 * head injection *required* here otherwise quiesce won't work
253	 */
254	scsi_insert_special_req(sreq, 1);
255}
256EXPORT_SYMBOL(scsi_do_req);
257
258/* This is the end routine we get to if a command was never attached
259 * to the request.  Simply complete the request without changing
260 * rq_status; this will cause a DRIVER_ERROR. */
261static void scsi_wait_req_end_io(struct request *req)
262{
263	BUG_ON(!req->waiting);
264
265	complete(req->waiting);
266}
267
268void scsi_wait_req(struct scsi_request *sreq, const void *cmnd, void *buffer,
269		   unsigned bufflen, int timeout, int retries)
270{
271	DECLARE_COMPLETION(wait);
272	int write = (sreq->sr_data_direction == DMA_TO_DEVICE);
273	struct request *req;
274
275	req = blk_get_request(sreq->sr_device->request_queue, write,
276			      __GFP_WAIT);
277	if (bufflen && blk_rq_map_kern(sreq->sr_device->request_queue, req,
278				       buffer, bufflen, __GFP_WAIT)) {
279		sreq->sr_result = DRIVER_ERROR << 24;
280		blk_put_request(req);
281		return;
282	}
283
284	req->flags |= REQ_NOMERGE;
285	req->waiting = &wait;
286	req->end_io = scsi_wait_req_end_io;
287	req->cmd_len = COMMAND_SIZE(((u8 *)cmnd)[0]);
288	req->sense = sreq->sr_sense_buffer;
289	req->sense_len = 0;
290	memcpy(req->cmd, cmnd, req->cmd_len);
291	req->timeout = timeout;
292	req->flags |= REQ_BLOCK_PC;
293	req->rq_disk = NULL;
294	blk_insert_request(sreq->sr_device->request_queue, req,
295			   sreq->sr_data_direction == DMA_TO_DEVICE, NULL);
296	wait_for_completion(&wait);
297	sreq->sr_request->waiting = NULL;
298	sreq->sr_result = req->errors;
299	if (req->errors)
300		sreq->sr_result |= (DRIVER_ERROR << 24);
301
302	blk_put_request(req);
303}
304
305EXPORT_SYMBOL(scsi_wait_req);
306
307/**
308 * scsi_execute - insert request and wait for the result
309 * @sdev:	scsi device
310 * @cmd:	scsi command
311 * @data_direction: data direction
312 * @buffer:	data buffer
313 * @bufflen:	len of buffer
314 * @sense:	optional sense buffer
315 * @timeout:	request timeout in seconds
316 * @retries:	number of times to retry request
317 * @flags:	or into request flags;
318 *
319 * returns the req->errors value which is the the scsi_cmnd result
320 * field.
321 **/
322int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
323		 int data_direction, void *buffer, unsigned bufflen,
324		 unsigned char *sense, int timeout, int retries, int flags)
325{
326	struct request *req;
327	int write = (data_direction == DMA_TO_DEVICE);
328	int ret = DRIVER_ERROR << 24;
329
330	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
331
332	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
333					buffer, bufflen, __GFP_WAIT))
334		goto out;
335
336	req->cmd_len = COMMAND_SIZE(cmd[0]);
337	memcpy(req->cmd, cmd, req->cmd_len);
338	req->sense = sense;
339	req->sense_len = 0;
340	req->timeout = timeout;
341	req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
342
343	/*
344	 * head injection *required* here otherwise quiesce won't work
345	 */
346	blk_execute_rq(req->q, NULL, req, 1);
347
348	ret = req->errors;
349 out:
350	blk_put_request(req);
351
352	return ret;
353}
354EXPORT_SYMBOL(scsi_execute);
355
356
357int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
358		     int data_direction, void *buffer, unsigned bufflen,
359		     struct scsi_sense_hdr *sshdr, int timeout, int retries)
360{
361	char *sense = NULL;
362	int result;
363
364	if (sshdr) {
365		sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO);
366		if (!sense)
367			return DRIVER_ERROR << 24;
368		memset(sense, 0, SCSI_SENSE_BUFFERSIZE);
369	}
370	result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
371				  sense, timeout, retries, 0);
372	if (sshdr)
373		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
374
375	kfree(sense);
376	return result;
377}
378EXPORT_SYMBOL(scsi_execute_req);
379
380/*
381 * Function:    scsi_init_cmd_errh()
382 *
383 * Purpose:     Initialize cmd fields related to error handling.
384 *
385 * Arguments:   cmd	- command that is ready to be queued.
386 *
387 * Returns:     Nothing
388 *
389 * Notes:       This function has the job of initializing a number of
390 *              fields related to error handling.   Typically this will
391 *              be called once for each command, as required.
392 */
393static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
394{
395	cmd->serial_number = 0;
396
397	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
398
399	if (cmd->cmd_len == 0)
400		cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
401
402	/*
403	 * We need saved copies of a number of fields - this is because
404	 * error handling may need to overwrite these with different values
405	 * to run different commands, and once error handling is complete,
406	 * we will need to restore these values prior to running the actual
407	 * command.
408	 */
409	cmd->old_use_sg = cmd->use_sg;
410	cmd->old_cmd_len = cmd->cmd_len;
411	cmd->sc_old_data_direction = cmd->sc_data_direction;
412	cmd->old_underflow = cmd->underflow;
413	memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
414	cmd->buffer = cmd->request_buffer;
415	cmd->bufflen = cmd->request_bufflen;
416
417	return 1;
418}
419
420/*
421 * Function:   scsi_setup_cmd_retry()
422 *
423 * Purpose:    Restore the command state for a retry
424 *
425 * Arguments:  cmd	- command to be restored
426 *
427 * Returns:    Nothing
428 *
429 * Notes:      Immediately prior to retrying a command, we need
430 *             to restore certain fields that we saved above.
431 */
432void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
433{
434	memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
435	cmd->request_buffer = cmd->buffer;
436	cmd->request_bufflen = cmd->bufflen;
437	cmd->use_sg = cmd->old_use_sg;
438	cmd->cmd_len = cmd->old_cmd_len;
439	cmd->sc_data_direction = cmd->sc_old_data_direction;
440	cmd->underflow = cmd->old_underflow;
441}
442
443void scsi_device_unbusy(struct scsi_device *sdev)
444{
445	struct Scsi_Host *shost = sdev->host;
446	unsigned long flags;
447
448	spin_lock_irqsave(shost->host_lock, flags);
449	shost->host_busy--;
450	if (unlikely((shost->shost_state == SHOST_RECOVERY) &&
451		     shost->host_failed))
452		scsi_eh_wakeup(shost);
453	spin_unlock(shost->host_lock);
454	spin_lock(sdev->request_queue->queue_lock);
455	sdev->device_busy--;
456	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
457}
458
459/*
460 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
461 * and call blk_run_queue for all the scsi_devices on the target -
462 * including current_sdev first.
463 *
464 * Called with *no* scsi locks held.
465 */
466static void scsi_single_lun_run(struct scsi_device *current_sdev)
467{
468	struct Scsi_Host *shost = current_sdev->host;
469	struct scsi_device *sdev, *tmp;
470	struct scsi_target *starget = scsi_target(current_sdev);
471	unsigned long flags;
472
473	spin_lock_irqsave(shost->host_lock, flags);
474	starget->starget_sdev_user = NULL;
475	spin_unlock_irqrestore(shost->host_lock, flags);
476
477	/*
478	 * Call blk_run_queue for all LUNs on the target, starting with
479	 * current_sdev. We race with others (to set starget_sdev_user),
480	 * but in most cases, we will be first. Ideally, each LU on the
481	 * target would get some limited time or requests on the target.
482	 */
483	blk_run_queue(current_sdev->request_queue);
484
485	spin_lock_irqsave(shost->host_lock, flags);
486	if (starget->starget_sdev_user)
487		goto out;
488	list_for_each_entry_safe(sdev, tmp, &starget->devices,
489			same_target_siblings) {
490		if (sdev == current_sdev)
491			continue;
492		if (scsi_device_get(sdev))
493			continue;
494
495		spin_unlock_irqrestore(shost->host_lock, flags);
496		blk_run_queue(sdev->request_queue);
497		spin_lock_irqsave(shost->host_lock, flags);
498
499		scsi_device_put(sdev);
500	}
501 out:
502	spin_unlock_irqrestore(shost->host_lock, flags);
503}
504
505/*
506 * Function:	scsi_run_queue()
507 *
508 * Purpose:	Select a proper request queue to serve next
509 *
510 * Arguments:	q	- last request's queue
511 *
512 * Returns:     Nothing
513 *
514 * Notes:	The previous command was completely finished, start
515 *		a new one if possible.
516 */
517static void scsi_run_queue(struct request_queue *q)
518{
519	struct scsi_device *sdev = q->queuedata;
520	struct Scsi_Host *shost = sdev->host;
521	unsigned long flags;
522
523	if (sdev->single_lun)
524		scsi_single_lun_run(sdev);
525
526	spin_lock_irqsave(shost->host_lock, flags);
527	while (!list_empty(&shost->starved_list) &&
528	       !shost->host_blocked && !shost->host_self_blocked &&
529		!((shost->can_queue > 0) &&
530		  (shost->host_busy >= shost->can_queue))) {
531		/*
532		 * As long as shost is accepting commands and we have
533		 * starved queues, call blk_run_queue. scsi_request_fn
534		 * drops the queue_lock and can add us back to the
535		 * starved_list.
536		 *
537		 * host_lock protects the starved_list and starved_entry.
538		 * scsi_request_fn must get the host_lock before checking
539		 * or modifying starved_list or starved_entry.
540		 */
541		sdev = list_entry(shost->starved_list.next,
542					  struct scsi_device, starved_entry);
543		list_del_init(&sdev->starved_entry);
544		spin_unlock_irqrestore(shost->host_lock, flags);
545
546		blk_run_queue(sdev->request_queue);
547
548		spin_lock_irqsave(shost->host_lock, flags);
549		if (unlikely(!list_empty(&sdev->starved_entry)))
550			/*
551			 * sdev lost a race, and was put back on the
552			 * starved list. This is unlikely but without this
553			 * in theory we could loop forever.
554			 */
555			break;
556	}
557	spin_unlock_irqrestore(shost->host_lock, flags);
558
559	blk_run_queue(q);
560}
561
562/*
563 * Function:	scsi_requeue_command()
564 *
565 * Purpose:	Handle post-processing of completed commands.
566 *
567 * Arguments:	q	- queue to operate on
568 *		cmd	- command that may need to be requeued.
569 *
570 * Returns:	Nothing
571 *
572 * Notes:	After command completion, there may be blocks left
573 *		over which weren't finished by the previous command
574 *		this can be for a number of reasons - the main one is
575 *		I/O errors in the middle of the request, in which case
576 *		we need to request the blocks that come after the bad
577 *		sector.
578 * Notes:	Upon return, cmd is a stale pointer.
579 */
580static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
581{
582	struct request *req = cmd->request;
583	unsigned long flags;
584
585	scsi_unprep_request(req);
586	spin_lock_irqsave(q->queue_lock, flags);
587	blk_requeue_request(q, req);
588	spin_unlock_irqrestore(q->queue_lock, flags);
589
590	scsi_run_queue(q);
591}
592
593void scsi_next_command(struct scsi_cmnd *cmd)
594{
595	struct request_queue *q = cmd->device->request_queue;
596
597	scsi_put_command(cmd);
598	scsi_run_queue(q);
599}
600
601void scsi_run_host_queues(struct Scsi_Host *shost)
602{
603	struct scsi_device *sdev;
604
605	shost_for_each_device(sdev, shost)
606		scsi_run_queue(sdev->request_queue);
607}
608
609/*
610 * Function:    scsi_end_request()
611 *
612 * Purpose:     Post-processing of completed commands (usually invoked at end
613 *		of upper level post-processing and scsi_io_completion).
614 *
615 * Arguments:   cmd	 - command that is complete.
616 *              uptodate - 1 if I/O indicates success, <= 0 for I/O error.
617 *              bytes    - number of bytes of completed I/O
618 *		requeue  - indicates whether we should requeue leftovers.
619 *
620 * Lock status: Assumed that lock is not held upon entry.
621 *
622 * Returns:     cmd if requeue required, NULL otherwise.
623 *
624 * Notes:       This is called for block device requests in order to
625 *              mark some number of sectors as complete.
626 *
627 *		We are guaranteeing that the request queue will be goosed
628 *		at some point during this call.
629 * Notes:	If cmd was requeued, upon return it will be a stale pointer.
630 */
631static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
632					  int bytes, int requeue)
633{
634	request_queue_t *q = cmd->device->request_queue;
635	struct request *req = cmd->request;
636	unsigned long flags;
637
638	/*
639	 * If there are blocks left over at the end, set up the command
640	 * to queue the remainder of them.
641	 */
642	if (end_that_request_chunk(req, uptodate, bytes)) {
643		int leftover = (req->hard_nr_sectors << 9);
644
645		if (blk_pc_request(req))
646			leftover = req->data_len;
647
648		/* kill remainder if no retrys */
649		if (!uptodate && blk_noretry_request(req))
650			end_that_request_chunk(req, 0, leftover);
651		else {
652			if (requeue) {
653				/*
654				 * Bleah.  Leftovers again.  Stick the
655				 * leftovers in the front of the
656				 * queue, and goose the queue again.
657				 */
658				scsi_requeue_command(q, cmd);
659				cmd = NULL;
660			}
661			return cmd;
662		}
663	}
664
665	add_disk_randomness(req->rq_disk);
666
667	spin_lock_irqsave(q->queue_lock, flags);
668	if (blk_rq_tagged(req))
669		blk_queue_end_tag(q, req);
670	end_that_request_last(req);
671	spin_unlock_irqrestore(q->queue_lock, flags);
672
673	/*
674	 * This will goose the queue request function at the end, so we don't
675	 * need to worry about launching another command.
676	 */
677	scsi_next_command(cmd);
678	return NULL;
679}
680
681static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, int gfp_mask)
682{
683	struct scsi_host_sg_pool *sgp;
684	struct scatterlist *sgl;
685
686	BUG_ON(!cmd->use_sg);
687
688	switch (cmd->use_sg) {
689	case 1 ... 8:
690		cmd->sglist_len = 0;
691		break;
692	case 9 ... 16:
693		cmd->sglist_len = 1;
694		break;
695	case 17 ... 32:
696		cmd->sglist_len = 2;
697		break;
698#if (SCSI_MAX_PHYS_SEGMENTS > 32)
699	case 33 ... 64:
700		cmd->sglist_len = 3;
701		break;
702#if (SCSI_MAX_PHYS_SEGMENTS > 64)
703	case 65 ... 128:
704		cmd->sglist_len = 4;
705		break;
706#if (SCSI_MAX_PHYS_SEGMENTS  > 128)
707	case 129 ... 256:
708		cmd->sglist_len = 5;
709		break;
710#endif
711#endif
712#endif
713	default:
714		return NULL;
715	}
716
717	sgp = scsi_sg_pools + cmd->sglist_len;
718	sgl = mempool_alloc(sgp->pool, gfp_mask);
719	return sgl;
720}
721
722static void scsi_free_sgtable(struct scatterlist *sgl, int index)
723{
724	struct scsi_host_sg_pool *sgp;
725
726	BUG_ON(index >= SG_MEMPOOL_NR);
727
728	sgp = scsi_sg_pools + index;
729	mempool_free(sgl, sgp->pool);
730}
731
732/*
733 * Function:    scsi_release_buffers()
734 *
735 * Purpose:     Completion processing for block device I/O requests.
736 *
737 * Arguments:   cmd	- command that we are bailing.
738 *
739 * Lock status: Assumed that no lock is held upon entry.
740 *
741 * Returns:     Nothing
742 *
743 * Notes:       In the event that an upper level driver rejects a
744 *		command, we must release resources allocated during
745 *		the __init_io() function.  Primarily this would involve
746 *		the scatter-gather table, and potentially any bounce
747 *		buffers.
748 */
749static void scsi_release_buffers(struct scsi_cmnd *cmd)
750{
751	struct request *req = cmd->request;
752
753	/*
754	 * Free up any indirection buffers we allocated for DMA purposes.
755	 */
756	if (cmd->use_sg)
757		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
758	else if (cmd->request_buffer != req->buffer)
759		kfree(cmd->request_buffer);
760
761	/*
762	 * Zero these out.  They now point to freed memory, and it is
763	 * dangerous to hang onto the pointers.
764	 */
765	cmd->buffer  = NULL;
766	cmd->bufflen = 0;
767	cmd->request_buffer = NULL;
768	cmd->request_bufflen = 0;
769}
770
771/*
772 * Function:    scsi_io_completion()
773 *
774 * Purpose:     Completion processing for block device I/O requests.
775 *
776 * Arguments:   cmd   - command that is finished.
777 *
778 * Lock status: Assumed that no lock is held upon entry.
779 *
780 * Returns:     Nothing
781 *
782 * Notes:       This function is matched in terms of capabilities to
783 *              the function that created the scatter-gather list.
784 *              In other words, if there are no bounce buffers
785 *              (the normal case for most drivers), we don't need
786 *              the logic to deal with cleaning up afterwards.
787 *
788 *		We must do one of several things here:
789 *
790 *		a) Call scsi_end_request.  This will finish off the
791 *		   specified number of sectors.  If we are done, the
792 *		   command block will be released, and the queue
793 *		   function will be goosed.  If we are not done, then
794 *		   scsi_end_request will directly goose the queue.
795 *
796 *		b) We can just use scsi_requeue_command() here.  This would
797 *		   be used if we just wanted to retry, for example.
798 */
799void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
800			unsigned int block_bytes)
801{
802	int result = cmd->result;
803	int this_count = cmd->bufflen;
804	request_queue_t *q = cmd->device->request_queue;
805	struct request *req = cmd->request;
806	int clear_errors = 1;
807	struct scsi_sense_hdr sshdr;
808	int sense_valid = 0;
809	int sense_deferred = 0;
810
811	if (blk_complete_barrier_rq(q, req, good_bytes >> 9))
812		return;
813
814	/*
815	 * Free up any indirection buffers we allocated for DMA purposes.
816	 * For the case of a READ, we need to copy the data out of the
817	 * bounce buffer and into the real buffer.
818	 */
819	if (cmd->use_sg)
820		scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
821	else if (cmd->buffer != req->buffer) {
822		if (rq_data_dir(req) == READ) {
823			unsigned long flags;
824			char *to = bio_kmap_irq(req->bio, &flags);
825			memcpy(to, cmd->buffer, cmd->bufflen);
826			bio_kunmap_irq(to, &flags);
827		}
828		kfree(cmd->buffer);
829	}
830
831	if (result) {
832		sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
833		if (sense_valid)
834			sense_deferred = scsi_sense_is_deferred(&sshdr);
835	}
836	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
837		req->errors = result;
838		if (result) {
839			clear_errors = 0;
840			if (sense_valid && req->sense) {
841				/*
842				 * SG_IO wants current and deferred errors
843				 */
844				int len = 8 + cmd->sense_buffer[7];
845
846				if (len > SCSI_SENSE_BUFFERSIZE)
847					len = SCSI_SENSE_BUFFERSIZE;
848				memcpy(req->sense, cmd->sense_buffer,  len);
849				req->sense_len = len;
850			}
851		} else
852			req->data_len = cmd->resid;
853	}
854
855	/*
856	 * Zero these out.  They now point to freed memory, and it is
857	 * dangerous to hang onto the pointers.
858	 */
859	cmd->buffer  = NULL;
860	cmd->bufflen = 0;
861	cmd->request_buffer = NULL;
862	cmd->request_bufflen = 0;
863
864	/*
865	 * Next deal with any sectors which we were able to correctly
866	 * handle.
867	 */
868	if (good_bytes >= 0) {
869		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
870					      req->nr_sectors, good_bytes));
871		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
872
873		if (clear_errors)
874			req->errors = 0;
875		/*
876		 * If multiple sectors are requested in one buffer, then
877		 * they will have been finished off by the first command.
878		 * If not, then we have a multi-buffer command.
879		 *
880		 * If block_bytes != 0, it means we had a medium error
881		 * of some sort, and that we want to mark some number of
882		 * sectors as not uptodate.  Thus we want to inhibit
883		 * requeueing right here - we will requeue down below
884		 * when we handle the bad sectors.
885		 */
886
887		/*
888		 * If the command completed without error, then either
889		 * finish off the rest of the command, or start a new one.
890		 */
891		if (scsi_end_request(cmd, 1, good_bytes, result == 0) == NULL)
892			return;
893	}
894	/*
895	 * Now, if we were good little boys and girls, Santa left us a request
896	 * sense buffer.  We can extract information from this, so we
897	 * can choose a block to remap, etc.
898	 */
899	if (sense_valid && !sense_deferred) {
900		switch (sshdr.sense_key) {
901		case UNIT_ATTENTION:
902			if (cmd->device->removable) {
903				/* detected disc change.  set a bit
904				 * and quietly refuse further access.
905				 */
906				cmd->device->changed = 1;
907				scsi_end_request(cmd, 0,
908						this_count, 1);
909				return;
910			} else {
911				/*
912				* Must have been a power glitch, or a
913				* bus reset.  Could not have been a
914				* media change, so we just retry the
915				* request and see what happens.
916				*/
917				scsi_requeue_command(q, cmd);
918				return;
919			}
920			break;
921		case ILLEGAL_REQUEST:
922			/*
923		 	* If we had an ILLEGAL REQUEST returned, then we may
924		 	* have performed an unsupported command.  The only
925		 	* thing this should be would be a ten byte read where
926			* only a six byte read was supported.  Also, on a
927			* system where READ CAPACITY failed, we may have read
928			* past the end of the disk.
929		 	*/
930			if (cmd->device->use_10_for_rw &&
931			    (cmd->cmnd[0] == READ_10 ||
932			     cmd->cmnd[0] == WRITE_10)) {
933				cmd->device->use_10_for_rw = 0;
934				/*
935				 * This will cause a retry with a 6-byte
936				 * command.
937				 */
938				scsi_requeue_command(q, cmd);
939				result = 0;
940			} else {
941				scsi_end_request(cmd, 0, this_count, 1);
942				return;
943			}
944			break;
945		case NOT_READY:
946			/*
947			 * If the device is in the process of becoming ready,
948			 * retry.
949			 */
950			if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) {
951				scsi_requeue_command(q, cmd);
952				return;
953			}
954			if (!(req->flags & REQ_QUIET))
955				dev_printk(KERN_INFO,
956					   &cmd->device->sdev_gendev,
957					   "Device not ready.\n");
958			scsi_end_request(cmd, 0, this_count, 1);
959			return;
960		case VOLUME_OVERFLOW:
961			if (!(req->flags & REQ_QUIET)) {
962				dev_printk(KERN_INFO,
963					   &cmd->device->sdev_gendev,
964					   "Volume overflow, CDB: ");
965				__scsi_print_command(cmd->data_cmnd);
966				scsi_print_sense("", cmd);
967			}
968			scsi_end_request(cmd, 0, block_bytes, 1);
969			return;
970		default:
971			break;
972		}
973	}			/* driver byte != 0 */
974	if (host_byte(result) == DID_RESET) {
975		/*
976		 * Third party bus reset or reset for error
977		 * recovery reasons.  Just retry the request
978		 * and see what happens.
979		 */
980		scsi_requeue_command(q, cmd);
981		return;
982	}
983	if (result) {
984		if (!(req->flags & REQ_QUIET)) {
985			dev_printk(KERN_INFO, &cmd->device->sdev_gendev,
986				   "SCSI error: return code = 0x%x\n", result);
987
988			if (driver_byte(result) & DRIVER_SENSE)
989				scsi_print_sense("", cmd);
990		}
991		/*
992		 * Mark a single buffer as not uptodate.  Queue the remainder.
993		 * We sometimes get this cruft in the event that a medium error
994		 * isn't properly reported.
995		 */
996		block_bytes = req->hard_cur_sectors << 9;
997		if (!block_bytes)
998			block_bytes = req->data_len;
999		scsi_end_request(cmd, 0, block_bytes, 1);
1000	}
1001}
1002EXPORT_SYMBOL(scsi_io_completion);
1003
1004/*
1005 * Function:    scsi_init_io()
1006 *
1007 * Purpose:     SCSI I/O initialize function.
1008 *
1009 * Arguments:   cmd   - Command descriptor we wish to initialize
1010 *
1011 * Returns:     0 on success
1012 *		BLKPREP_DEFER if the failure is retryable
1013 *		BLKPREP_KILL if the failure is fatal
1014 */
1015static int scsi_init_io(struct scsi_cmnd *cmd)
1016{
1017	struct request     *req = cmd->request;
1018	struct scatterlist *sgpnt;
1019	int		   count;
1020
1021	/*
1022	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
1023	 */
1024	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
1025		cmd->request_bufflen = req->data_len;
1026		cmd->request_buffer = req->data;
1027		req->buffer = req->data;
1028		cmd->use_sg = 0;
1029		return 0;
1030	}
1031
1032	/*
1033	 * we used to not use scatter-gather for single segment request,
1034	 * but now we do (it makes highmem I/O easier to support without
1035	 * kmapping pages)
1036	 */
1037	cmd->use_sg = req->nr_phys_segments;
1038
1039	/*
1040	 * if sg table allocation fails, requeue request later.
1041	 */
1042	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
1043	if (unlikely(!sgpnt))
1044		return BLKPREP_DEFER;
1045
1046	cmd->request_buffer = (char *) sgpnt;
1047	cmd->request_bufflen = req->nr_sectors << 9;
1048	if (blk_pc_request(req))
1049		cmd->request_bufflen = req->data_len;
1050	req->buffer = NULL;
1051
1052	/*
1053	 * Next, walk the list, and fill in the addresses and sizes of
1054	 * each segment.
1055	 */
1056	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
1057
1058	/*
1059	 * mapped well, send it off
1060	 */
1061	if (likely(count <= cmd->use_sg)) {
1062		cmd->use_sg = count;
1063		return 0;
1064	}
1065
1066	printk(KERN_ERR "Incorrect number of segments after building list\n");
1067	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
1068	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
1069			req->current_nr_sectors);
1070
1071	/* release the command and kill it */
1072	scsi_release_buffers(cmd);
1073	scsi_put_command(cmd);
1074	return BLKPREP_KILL;
1075}
1076
1077static int scsi_prepare_flush_fn(request_queue_t *q, struct request *rq)
1078{
1079	struct scsi_device *sdev = q->queuedata;
1080	struct scsi_driver *drv;
1081
1082	if (sdev->sdev_state == SDEV_RUNNING) {
1083		drv = *(struct scsi_driver **) rq->rq_disk->private_data;
1084
1085		if (drv->prepare_flush)
1086			return drv->prepare_flush(q, rq);
1087	}
1088
1089	return 0;
1090}
1091
1092static void scsi_end_flush_fn(request_queue_t *q, struct request *rq)
1093{
1094	struct scsi_device *sdev = q->queuedata;
1095	struct request *flush_rq = rq->end_io_data;
1096	struct scsi_driver *drv;
1097
1098	if (flush_rq->errors) {
1099		printk("scsi: barrier error, disabling flush support\n");
1100		blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1101	}
1102
1103	if (sdev->sdev_state == SDEV_RUNNING) {
1104		drv = *(struct scsi_driver **) rq->rq_disk->private_data;
1105		drv->end_flush(q, rq);
1106	}
1107}
1108
1109static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
1110			       sector_t *error_sector)
1111{
1112	struct scsi_device *sdev = q->queuedata;
1113	struct scsi_driver *drv;
1114
1115	if (sdev->sdev_state != SDEV_RUNNING)
1116		return -ENXIO;
1117
1118	drv = *(struct scsi_driver **) disk->private_data;
1119	if (drv->issue_flush)
1120		return drv->issue_flush(&sdev->sdev_gendev, error_sector);
1121
1122	return -EOPNOTSUPP;
1123}
1124
1125static void scsi_generic_done(struct scsi_cmnd *cmd)
1126{
1127	BUG_ON(!blk_pc_request(cmd->request));
1128	scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
1129}
1130
1131static int scsi_prep_fn(struct request_queue *q, struct request *req)
1132{
1133	struct scsi_device *sdev = q->queuedata;
1134	struct scsi_cmnd *cmd;
1135	int specials_only = 0;
1136
1137	/*
1138	 * Just check to see if the device is online.  If it isn't, we
1139	 * refuse to process any commands.  The device must be brought
1140	 * online before trying any recovery commands
1141	 */
1142	if (unlikely(!scsi_device_online(sdev))) {
1143		printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1144		       sdev->host->host_no, sdev->id, sdev->lun);
1145		goto kill;
1146	}
1147	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1148		/* OK, we're not in a running state don't prep
1149		 * user commands */
1150		if (sdev->sdev_state == SDEV_DEL) {
1151			/* Device is fully deleted, no commands
1152			 * at all allowed down */
1153			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to dead device\n",
1154			       sdev->host->host_no, sdev->id, sdev->lun);
1155			goto kill;
1156		}
1157		/* OK, we only allow special commands (i.e. not
1158		 * user initiated ones */
1159		specials_only = sdev->sdev_state;
1160	}
1161
1162	/*
1163	 * Find the actual device driver associated with this command.
1164	 * The SPECIAL requests are things like character device or
1165	 * ioctls, which did not originate from ll_rw_blk.  Note that
1166	 * the special field is also used to indicate the cmd for
1167	 * the remainder of a partially fulfilled request that can
1168	 * come up when there is a medium error.  We have to treat
1169	 * these two cases differently.  We differentiate by looking
1170	 * at request->cmd, as this tells us the real story.
1171	 */
1172	if (req->flags & REQ_SPECIAL && req->special) {
1173		struct scsi_request *sreq = req->special;
1174
1175		if (sreq->sr_magic == SCSI_REQ_MAGIC) {
1176			cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
1177			if (unlikely(!cmd))
1178				goto defer;
1179			scsi_init_cmd_from_req(cmd, sreq);
1180		} else
1181			cmd = req->special;
1182	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1183
1184		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
1185			if(specials_only == SDEV_QUIESCE ||
1186					specials_only == SDEV_BLOCK)
1187				goto defer;
1188
1189			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",
1190			       sdev->host->host_no, sdev->id, sdev->lun);
1191			goto kill;
1192		}
1193
1194
1195		/*
1196		 * Now try and find a command block that we can use.
1197		 */
1198		if (!req->special) {
1199			cmd = scsi_get_command(sdev, GFP_ATOMIC);
1200			if (unlikely(!cmd))
1201				goto defer;
1202		} else
1203			cmd = req->special;
1204
1205		/* pull a tag out of the request if we have one */
1206		cmd->tag = req->tag;
1207	} else {
1208		blk_dump_rq_flags(req, "SCSI bad req");
1209		goto kill;
1210	}
1211
1212	/* note the overloading of req->special.  When the tag
1213	 * is active it always means cmd.  If the tag goes
1214	 * back for re-queueing, it may be reset */
1215	req->special = cmd;
1216	cmd->request = req;
1217
1218	/*
1219	 * FIXME: drop the lock here because the functions below
1220	 * expect to be called without the queue lock held.  Also,
1221	 * previously, we dequeued the request before dropping the
1222	 * lock.  We hope REQ_STARTED prevents anything untoward from
1223	 * happening now.
1224	 */
1225	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1226		struct scsi_driver *drv;
1227		int ret;
1228
1229		/*
1230		 * This will do a couple of things:
1231		 *  1) Fill in the actual SCSI command.
1232		 *  2) Fill in any other upper-level specific fields
1233		 * (timeout).
1234		 *
1235		 * If this returns 0, it means that the request failed
1236		 * (reading past end of disk, reading offline device,
1237		 * etc).   This won't actually talk to the device, but
1238		 * some kinds of consistency checking may cause the
1239		 * request to be rejected immediately.
1240		 */
1241
1242		/*
1243		 * This sets up the scatter-gather table (allocating if
1244		 * required).
1245		 */
1246		ret = scsi_init_io(cmd);
1247		switch(ret) {
1248		case BLKPREP_KILL:
1249			/* BLKPREP_KILL return also releases the command */
1250			goto kill;
1251		case BLKPREP_DEFER:
1252			goto defer;
1253		}
1254
1255		/*
1256		 * Initialize the actual SCSI command for this request.
1257		 */
1258		if (req->rq_disk) {
1259			drv = *(struct scsi_driver **)req->rq_disk->private_data;
1260			if (unlikely(!drv->init_command(cmd))) {
1261				scsi_release_buffers(cmd);
1262				scsi_put_command(cmd);
1263				goto kill;
1264			}
1265		} else {
1266			memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
1267			cmd->cmd_len = req->cmd_len;
1268			if (rq_data_dir(req) == WRITE)
1269				cmd->sc_data_direction = DMA_TO_DEVICE;
1270			else if (req->data_len)
1271				cmd->sc_data_direction = DMA_FROM_DEVICE;
1272			else
1273				cmd->sc_data_direction = DMA_NONE;
1274
1275			cmd->transfersize = req->data_len;
1276			cmd->allowed = 3;
1277			cmd->timeout_per_command = req->timeout;
1278			cmd->done = scsi_generic_done;
1279		}
1280	}
1281
1282	/*
1283	 * The request is now prepped, no need to come back here
1284	 */
1285	req->flags |= REQ_DONTPREP;
1286	return BLKPREP_OK;
1287
1288 defer:
1289	/* If we defer, the elv_next_request() returns NULL, but the
1290	 * queue must be restarted, so we plug here if no returning
1291	 * command will automatically do that. */
1292	if (sdev->device_busy == 0)
1293		blk_plug_device(q);
1294	return BLKPREP_DEFER;
1295 kill:
1296	req->errors = DID_NO_CONNECT << 16;
1297	return BLKPREP_KILL;
1298}
1299
1300/*
1301 * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1302 * return 0.
1303 *
1304 * Called with the queue_lock held.
1305 */
1306static inline int scsi_dev_queue_ready(struct request_queue *q,
1307				  struct scsi_device *sdev)
1308{
1309	if (sdev->device_busy >= sdev->queue_depth)
1310		return 0;
1311	if (sdev->device_busy == 0 && sdev->device_blocked) {
1312		/*
1313		 * unblock after device_blocked iterates to zero
1314		 */
1315		if (--sdev->device_blocked == 0) {
1316			SCSI_LOG_MLQUEUE(3,
1317				printk("scsi%d (%d:%d) unblocking device at"
1318				       " zero depth\n", sdev->host->host_no,
1319				       sdev->id, sdev->lun));
1320		} else {
1321			blk_plug_device(q);
1322			return 0;
1323		}
1324	}
1325	if (sdev->device_blocked)
1326		return 0;
1327
1328	return 1;
1329}
1330
1331/*
1332 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
1333 * return 0. We must end up running the queue again whenever 0 is
1334 * returned, else IO can hang.
1335 *
1336 * Called with host_lock held.
1337 */
1338static inline int scsi_host_queue_ready(struct request_queue *q,
1339				   struct Scsi_Host *shost,
1340				   struct scsi_device *sdev)
1341{
1342	if (shost->shost_state == SHOST_RECOVERY)
1343		return 0;
1344	if (shost->host_busy == 0 && shost->host_blocked) {
1345		/*
1346		 * unblock after host_blocked iterates to zero
1347		 */
1348		if (--shost->host_blocked == 0) {
1349			SCSI_LOG_MLQUEUE(3,
1350				printk("scsi%d unblocking host at zero depth\n",
1351					shost->host_no));
1352		} else {
1353			blk_plug_device(q);
1354			return 0;
1355		}
1356	}
1357	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
1358	    shost->host_blocked || shost->host_self_blocked) {
1359		if (list_empty(&sdev->starved_entry))
1360			list_add_tail(&sdev->starved_entry, &shost->starved_list);
1361		return 0;
1362	}
1363
1364	/* We're OK to process the command, so we can't be starved */
1365	if (!list_empty(&sdev->starved_entry))
1366		list_del_init(&sdev->starved_entry);
1367
1368	return 1;
1369}
1370
1371/*
1372 * Kill a request for a dead device
1373 */
1374static void scsi_kill_request(struct request *req, request_queue_t *q)
1375{
1376	struct scsi_cmnd *cmd = req->special;
1377
1378	blkdev_dequeue_request(req);
1379
1380	if (unlikely(cmd == NULL)) {
1381		printk(KERN_CRIT "impossible request in %s.\n",
1382				 __FUNCTION__);
1383		BUG();
1384	}
1385
1386	scsi_init_cmd_errh(cmd);
1387	cmd->result = DID_NO_CONNECT << 16;
1388	atomic_inc(&cmd->device->iorequest_cnt);
1389	__scsi_done(cmd);
1390}
1391
1392/*
1393 * Function:    scsi_request_fn()
1394 *
1395 * Purpose:     Main strategy routine for SCSI.
1396 *
1397 * Arguments:   q       - Pointer to actual queue.
1398 *
1399 * Returns:     Nothing
1400 *
1401 * Lock status: IO request lock assumed to be held when called.
1402 */
1403static void scsi_request_fn(struct request_queue *q)
1404{
1405	struct scsi_device *sdev = q->queuedata;
1406	struct Scsi_Host *shost;
1407	struct scsi_cmnd *cmd;
1408	struct request *req;
1409
1410	if (!sdev) {
1411		printk("scsi: killing requests for dead queue\n");
1412		while ((req = elv_next_request(q)) != NULL)
1413			scsi_kill_request(req, q);
1414		return;
1415	}
1416
1417	if(!get_device(&sdev->sdev_gendev))
1418		/* We must be tearing the block queue down already */
1419		return;
1420
1421	/*
1422	 * To start with, we keep looping until the queue is empty, or until
1423	 * the host is no longer able to accept any more requests.
1424	 */
1425	shost = sdev->host;
1426	while (!blk_queue_plugged(q)) {
1427		int rtn;
1428		/*
1429		 * get next queueable request.  We do this early to make sure
1430		 * that the request is fully prepared even if we cannot
1431		 * accept it.
1432		 */
1433		req = elv_next_request(q);
1434		if (!req || !scsi_dev_queue_ready(q, sdev))
1435			break;
1436
1437		if (unlikely(!scsi_device_online(sdev))) {
1438			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1439			       sdev->host->host_no, sdev->id, sdev->lun);
1440			scsi_kill_request(req, q);
1441			continue;
1442		}
1443
1444
1445		/*
1446		 * Remove the request from the request list.
1447		 */
1448		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1449			blkdev_dequeue_request(req);
1450		sdev->device_busy++;
1451
1452		spin_unlock(q->queue_lock);
1453		cmd = req->special;
1454		if (unlikely(cmd == NULL)) {
1455			printk(KERN_CRIT "impossible request in %s.\n"
1456					 "please mail a stack trace to "
1457					 "linux-scsi@vger.kernel.org",
1458					 __FUNCTION__);
1459			BUG();
1460		}
1461		spin_lock(shost->host_lock);
1462
1463		if (!scsi_host_queue_ready(q, shost, sdev))
1464			goto not_ready;
1465		if (sdev->single_lun) {
1466			if (scsi_target(sdev)->starget_sdev_user &&
1467			    scsi_target(sdev)->starget_sdev_user != sdev)
1468				goto not_ready;
1469			scsi_target(sdev)->starget_sdev_user = sdev;
1470		}
1471		shost->host_busy++;
1472
1473		/*
1474		 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1475		 *		take the lock again.
1476		 */
1477		spin_unlock_irq(shost->host_lock);
1478
1479		/*
1480		 * Finally, initialize any error handling parameters, and set up
1481		 * the timers for timeouts.
1482		 */
1483		scsi_init_cmd_errh(cmd);
1484
1485		/*
1486		 * Dispatch the command to the low-level driver.
1487		 */
1488		rtn = scsi_dispatch_cmd(cmd);
1489		spin_lock_irq(q->queue_lock);
1490		if(rtn) {
1491			/* we're refusing the command; because of
1492			 * the way locks get dropped, we need to
1493			 * check here if plugging is required */
1494			if(sdev->device_busy == 0)
1495				blk_plug_device(q);
1496
1497			break;
1498		}
1499	}
1500
1501	goto out;
1502
1503 not_ready:
1504	spin_unlock_irq(shost->host_lock);
1505
1506	/*
1507	 * lock q, handle tag, requeue req, and decrement device_busy. We
1508	 * must return with queue_lock held.
1509	 *
1510	 * Decrementing device_busy without checking it is OK, as all such
1511	 * cases (host limits or settings) should run the queue at some
1512	 * later time.
1513	 */
1514	spin_lock_irq(q->queue_lock);
1515	blk_requeue_request(q, req);
1516	sdev->device_busy--;
1517	if(sdev->device_busy == 0)
1518		blk_plug_device(q);
1519 out:
1520	/* must be careful here...if we trigger the ->remove() function
1521	 * we cannot be holding the q lock */
1522	spin_unlock_irq(q->queue_lock);
1523	put_device(&sdev->sdev_gendev);
1524	spin_lock_irq(q->queue_lock);
1525}
1526
1527u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1528{
1529	struct device *host_dev;
1530	u64 bounce_limit = 0xffffffff;
1531
1532	if (shost->unchecked_isa_dma)
1533		return BLK_BOUNCE_ISA;
1534	/*
1535	 * Platforms with virtual-DMA translation
1536	 * hardware have no practical limit.
1537	 */
1538	if (!PCI_DMA_BUS_IS_PHYS)
1539		return BLK_BOUNCE_ANY;
1540
1541	host_dev = scsi_get_device(shost);
1542	if (host_dev && host_dev->dma_mask)
1543		bounce_limit = *host_dev->dma_mask;
1544
1545	return bounce_limit;
1546}
1547EXPORT_SYMBOL(scsi_calculate_bounce_limit);
1548
1549struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1550{
1551	struct Scsi_Host *shost = sdev->host;
1552	struct request_queue *q;
1553
1554	q = blk_init_queue(scsi_request_fn, NULL);
1555	if (!q)
1556		return NULL;
1557
1558	blk_queue_prep_rq(q, scsi_prep_fn);
1559
1560	blk_queue_max_hw_segments(q, shost->sg_tablesize);
1561	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
1562	blk_queue_max_sectors(q, shost->max_sectors);
1563	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1564	blk_queue_segment_boundary(q, shost->dma_boundary);
1565	blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1566
1567	/*
1568	 * ordered tags are superior to flush ordering
1569	 */
1570	if (shost->ordered_tag)
1571		blk_queue_ordered(q, QUEUE_ORDERED_TAG);
1572	else if (shost->ordered_flush) {
1573		blk_queue_ordered(q, QUEUE_ORDERED_FLUSH);
1574		q->prepare_flush_fn = scsi_prepare_flush_fn;
1575		q->end_flush_fn = scsi_end_flush_fn;
1576	}
1577
1578	if (!shost->use_clustering)
1579		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
1580	return q;
1581}
1582
1583void scsi_free_queue(struct request_queue *q)
1584{
1585	blk_cleanup_queue(q);
1586}
1587
1588/*
1589 * Function:    scsi_block_requests()
1590 *
1591 * Purpose:     Utility function used by low-level drivers to prevent further
1592 *		commands from being queued to the device.
1593 *
1594 * Arguments:   shost       - Host in question
1595 *
1596 * Returns:     Nothing
1597 *
1598 * Lock status: No locks are assumed held.
1599 *
1600 * Notes:       There is no timer nor any other means by which the requests
1601 *		get unblocked other than the low-level driver calling
1602 *		scsi_unblock_requests().
1603 */
1604void scsi_block_requests(struct Scsi_Host *shost)
1605{
1606	shost->host_self_blocked = 1;
1607}
1608EXPORT_SYMBOL(scsi_block_requests);
1609
1610/*
1611 * Function:    scsi_unblock_requests()
1612 *
1613 * Purpose:     Utility function used by low-level drivers to allow further
1614 *		commands from being queued to the device.
1615 *
1616 * Arguments:   shost       - Host in question
1617 *
1618 * Returns:     Nothing
1619 *
1620 * Lock status: No locks are assumed held.
1621 *
1622 * Notes:       There is no timer nor any other means by which the requests
1623 *		get unblocked other than the low-level driver calling
1624 *		scsi_unblock_requests().
1625 *
1626 *		This is done as an API function so that changes to the
1627 *		internals of the scsi mid-layer won't require wholesale
1628 *		changes to drivers that use this feature.
1629 */
1630void scsi_unblock_requests(struct Scsi_Host *shost)
1631{
1632	shost->host_self_blocked = 0;
1633	scsi_run_host_queues(shost);
1634}
1635EXPORT_SYMBOL(scsi_unblock_requests);
1636
1637int __init scsi_init_queue(void)
1638{
1639	int i;
1640
1641	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1642		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1643		int size = sgp->size * sizeof(struct scatterlist);
1644
1645		sgp->slab = kmem_cache_create(sgp->name, size, 0,
1646				SLAB_HWCACHE_ALIGN, NULL, NULL);
1647		if (!sgp->slab) {
1648			printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1649					sgp->name);
1650		}
1651
1652		sgp->pool = mempool_create(SG_MEMPOOL_SIZE,
1653				mempool_alloc_slab, mempool_free_slab,
1654				sgp->slab);
1655		if (!sgp->pool) {
1656			printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1657					sgp->name);
1658		}
1659	}
1660
1661	return 0;
1662}
1663
1664void scsi_exit_queue(void)
1665{
1666	int i;
1667
1668	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1669		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1670		mempool_destroy(sgp->pool);
1671		kmem_cache_destroy(sgp->slab);
1672	}
1673}
1674/**
1675 *	scsi_mode_sense - issue a mode sense, falling back from 10 to
1676 *		six bytes if necessary.
1677 *	@sdev:	SCSI device to be queried
1678 *	@dbd:	set if mode sense will allow block descriptors to be returned
1679 *	@modepage: mode page being requested
1680 *	@buffer: request buffer (may not be smaller than eight bytes)
1681 *	@len:	length of request buffer.
1682 *	@timeout: command timeout
1683 *	@retries: number of retries before failing
1684 *	@data: returns a structure abstracting the mode header data
1685 *	@sense: place to put sense data (or NULL if no sense to be collected).
1686 *		must be SCSI_SENSE_BUFFERSIZE big.
1687 *
1688 *	Returns zero if unsuccessful, or the header offset (either 4
1689 *	or 8 depending on whether a six or ten byte command was
1690 *	issued) if successful.
1691 **/
1692int
1693scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
1694		  unsigned char *buffer, int len, int timeout, int retries,
1695		  struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) {
1696	unsigned char cmd[12];
1697	int use_10_for_ms;
1698	int header_length;
1699	int result;
1700	struct scsi_sense_hdr my_sshdr;
1701
1702	memset(data, 0, sizeof(*data));
1703	memset(&cmd[0], 0, 12);
1704	cmd[1] = dbd & 0x18;	/* allows DBD and LLBA bits */
1705	cmd[2] = modepage;
1706
1707	/* caller might not be interested in sense, but we need it */
1708	if (!sshdr)
1709		sshdr = &my_sshdr;
1710
1711 retry:
1712	use_10_for_ms = sdev->use_10_for_ms;
1713
1714	if (use_10_for_ms) {
1715		if (len < 8)
1716			len = 8;
1717
1718		cmd[0] = MODE_SENSE_10;
1719		cmd[8] = len;
1720		header_length = 8;
1721	} else {
1722		if (len < 4)
1723			len = 4;
1724
1725		cmd[0] = MODE_SENSE;
1726		cmd[4] = len;
1727		header_length = 4;
1728	}
1729
1730	memset(buffer, 0, len);
1731
1732	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
1733				  sshdr, timeout, retries);
1734
1735	/* This code looks awful: what it's doing is making sure an
1736	 * ILLEGAL REQUEST sense return identifies the actual command
1737	 * byte as the problem.  MODE_SENSE commands can return
1738	 * ILLEGAL REQUEST if the code page isn't supported */
1739
1740	if (use_10_for_ms && !scsi_status_is_good(result) &&
1741	    (driver_byte(result) & DRIVER_SENSE)) {
1742		if (scsi_sense_valid(sshdr)) {
1743			if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
1744			    (sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
1745				/*
1746				 * Invalid command operation code
1747				 */
1748				sdev->use_10_for_ms = 0;
1749				goto retry;
1750			}
1751		}
1752	}
1753
1754	if(scsi_status_is_good(result)) {
1755		data->header_length = header_length;
1756		if(use_10_for_ms) {
1757			data->length = buffer[0]*256 + buffer[1] + 2;
1758			data->medium_type = buffer[2];
1759			data->device_specific = buffer[3];
1760			data->longlba = buffer[4] & 0x01;
1761			data->block_descriptor_length = buffer[6]*256
1762				+ buffer[7];
1763		} else {
1764			data->length = buffer[0] + 1;
1765			data->medium_type = buffer[1];
1766			data->device_specific = buffer[2];
1767			data->block_descriptor_length = buffer[3];
1768		}
1769	}
1770
1771	return result;
1772}
1773EXPORT_SYMBOL(scsi_mode_sense);
1774
1775int
1776scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)
1777{
1778	char cmd[] = {
1779		TEST_UNIT_READY, 0, 0, 0, 0, 0,
1780	};
1781	struct scsi_sense_hdr sshdr;
1782	int result;
1783
1784	result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
1785				  timeout, retries);
1786
1787	if ((driver_byte(result) & DRIVER_SENSE) && sdev->removable) {
1788
1789		if ((scsi_sense_valid(&sshdr)) &&
1790		    ((sshdr.sense_key == UNIT_ATTENTION) ||
1791		     (sshdr.sense_key == NOT_READY))) {
1792			sdev->changed = 1;
1793			result = 0;
1794		}
1795	}
1796	return result;
1797}
1798EXPORT_SYMBOL(scsi_test_unit_ready);
1799
1800/**
1801 *	scsi_device_set_state - Take the given device through the device
1802 *		state model.
1803 *	@sdev:	scsi device to change the state of.
1804 *	@state:	state to change to.
1805 *
1806 *	Returns zero if unsuccessful or an error if the requested
1807 *	transition is illegal.
1808 **/
1809int
1810scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
1811{
1812	enum scsi_device_state oldstate = sdev->sdev_state;
1813
1814	if (state == oldstate)
1815		return 0;
1816
1817	switch (state) {
1818	case SDEV_CREATED:
1819		/* There are no legal states that come back to
1820		 * created.  This is the manually initialised start
1821		 * state */
1822		goto illegal;
1823
1824	case SDEV_RUNNING:
1825		switch (oldstate) {
1826		case SDEV_CREATED:
1827		case SDEV_OFFLINE:
1828		case SDEV_QUIESCE:
1829		case SDEV_BLOCK:
1830			break;
1831		default:
1832			goto illegal;
1833		}
1834		break;
1835
1836	case SDEV_QUIESCE:
1837		switch (oldstate) {
1838		case SDEV_RUNNING:
1839		case SDEV_OFFLINE:
1840			break;
1841		default:
1842			goto illegal;
1843		}
1844		break;
1845
1846	case SDEV_OFFLINE:
1847		switch (oldstate) {
1848		case SDEV_CREATED:
1849		case SDEV_RUNNING:
1850		case SDEV_QUIESCE:
1851		case SDEV_BLOCK:
1852			break;
1853		default:
1854			goto illegal;
1855		}
1856		break;
1857
1858	case SDEV_BLOCK:
1859		switch (oldstate) {
1860		case SDEV_CREATED:
1861		case SDEV_RUNNING:
1862			break;
1863		default:
1864			goto illegal;
1865		}
1866		break;
1867
1868	case SDEV_CANCEL:
1869		switch (oldstate) {
1870		case SDEV_CREATED:
1871		case SDEV_RUNNING:
1872		case SDEV_OFFLINE:
1873		case SDEV_BLOCK:
1874			break;
1875		default:
1876			goto illegal;
1877		}
1878		break;
1879
1880	case SDEV_DEL:
1881		switch (oldstate) {
1882		case SDEV_CANCEL:
1883			break;
1884		default:
1885			goto illegal;
1886		}
1887		break;
1888
1889	}
1890	sdev->sdev_state = state;
1891	return 0;
1892
1893 illegal:
1894	SCSI_LOG_ERROR_RECOVERY(1,
1895				dev_printk(KERN_ERR, &sdev->sdev_gendev,
1896					   "Illegal state transition %s->%s\n",
1897					   scsi_device_state_name(oldstate),
1898					   scsi_device_state_name(state))
1899				);
1900	return -EINVAL;
1901}
1902EXPORT_SYMBOL(scsi_device_set_state);
1903
1904/**
1905 *	scsi_device_quiesce - Block user issued commands.
1906 *	@sdev:	scsi device to quiesce.
1907 *
1908 *	This works by trying to transition to the SDEV_QUIESCE state
1909 *	(which must be a legal transition).  When the device is in this
1910 *	state, only special requests will be accepted, all others will
1911 *	be deferred.  Since special requests may also be requeued requests,
1912 *	a successful return doesn't guarantee the device will be
1913 *	totally quiescent.
1914 *
1915 *	Must be called with user context, may sleep.
1916 *
1917 *	Returns zero if unsuccessful or an error if not.
1918 **/
1919int
1920scsi_device_quiesce(struct scsi_device *sdev)
1921{
1922	int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
1923	if (err)
1924		return err;
1925
1926	scsi_run_queue(sdev->request_queue);
1927	while (sdev->device_busy) {
1928		msleep_interruptible(200);
1929		scsi_run_queue(sdev->request_queue);
1930	}
1931	return 0;
1932}
1933EXPORT_SYMBOL(scsi_device_quiesce);
1934
1935/**
1936 *	scsi_device_resume - Restart user issued commands to a quiesced device.
1937 *	@sdev:	scsi device to resume.
1938 *
1939 *	Moves the device from quiesced back to running and restarts the
1940 *	queues.
1941 *
1942 *	Must be called with user context, may sleep.
1943 **/
1944void
1945scsi_device_resume(struct scsi_device *sdev)
1946{
1947	if(scsi_device_set_state(sdev, SDEV_RUNNING))
1948		return;
1949	scsi_run_queue(sdev->request_queue);
1950}
1951EXPORT_SYMBOL(scsi_device_resume);
1952
1953static void
1954device_quiesce_fn(struct scsi_device *sdev, void *data)
1955{
1956	scsi_device_quiesce(sdev);
1957}
1958
1959void
1960scsi_target_quiesce(struct scsi_target *starget)
1961{
1962	starget_for_each_device(starget, NULL, device_quiesce_fn);
1963}
1964EXPORT_SYMBOL(scsi_target_quiesce);
1965
1966static void
1967device_resume_fn(struct scsi_device *sdev, void *data)
1968{
1969	scsi_device_resume(sdev);
1970}
1971
1972void
1973scsi_target_resume(struct scsi_target *starget)
1974{
1975	starget_for_each_device(starget, NULL, device_resume_fn);
1976}
1977EXPORT_SYMBOL(scsi_target_resume);
1978
1979/**
1980 * scsi_internal_device_block - internal function to put a device
1981 *				temporarily into the SDEV_BLOCK state
1982 * @sdev:	device to block
1983 *
1984 * Block request made by scsi lld's to temporarily stop all
1985 * scsi commands on the specified device.  Called from interrupt
1986 * or normal process context.
1987 *
1988 * Returns zero if successful or error if not
1989 *
1990 * Notes:
1991 *	This routine transitions the device to the SDEV_BLOCK state
1992 *	(which must be a legal transition).  When the device is in this
1993 *	state, all commands are deferred until the scsi lld reenables
1994 *	the device with scsi_device_unblock or device_block_tmo fires.
1995 *	This routine assumes the host_lock is held on entry.
1996 **/
1997int
1998scsi_internal_device_block(struct scsi_device *sdev)
1999{
2000	request_queue_t *q = sdev->request_queue;
2001	unsigned long flags;
2002	int err = 0;
2003
2004	err = scsi_device_set_state(sdev, SDEV_BLOCK);
2005	if (err)
2006		return err;
2007
2008	/*
2009	 * The device has transitioned to SDEV_BLOCK.  Stop the
2010	 * block layer from calling the midlayer with this device's
2011	 * request queue.
2012	 */
2013	spin_lock_irqsave(q->queue_lock, flags);
2014	blk_stop_queue(q);
2015	spin_unlock_irqrestore(q->queue_lock, flags);
2016
2017	return 0;
2018}
2019EXPORT_SYMBOL_GPL(scsi_internal_device_block);
2020
2021/**
2022 * scsi_internal_device_unblock - resume a device after a block request
2023 * @sdev:	device to resume
2024 *
2025 * Called by scsi lld's or the midlayer to restart the device queue
2026 * for the previously suspended scsi device.  Called from interrupt or
2027 * normal process context.
2028 *
2029 * Returns zero if successful or error if not.
2030 *
2031 * Notes:
2032 *	This routine transitions the device to the SDEV_RUNNING state
2033 *	(which must be a legal transition) allowing the midlayer to
2034 *	goose the queue for this device.  This routine assumes the
2035 *	host_lock is held upon entry.
2036 **/
2037int
2038scsi_internal_device_unblock(struct scsi_device *sdev)
2039{
2040	request_queue_t *q = sdev->request_queue;
2041	int err;
2042	unsigned long flags;
2043
2044	/*
2045	 * Try to transition the scsi device to SDEV_RUNNING
2046	 * and goose the device queue if successful.
2047	 */
2048	err = scsi_device_set_state(sdev, SDEV_RUNNING);
2049	if (err)
2050		return err;
2051
2052	spin_lock_irqsave(q->queue_lock, flags);
2053	blk_start_queue(q);
2054	spin_unlock_irqrestore(q->queue_lock, flags);
2055
2056	return 0;
2057}
2058EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
2059
2060static void
2061device_block(struct scsi_device *sdev, void *data)
2062{
2063	scsi_internal_device_block(sdev);
2064}
2065
2066static int
2067target_block(struct device *dev, void *data)
2068{
2069	if (scsi_is_target_device(dev))
2070		starget_for_each_device(to_scsi_target(dev), NULL,
2071					device_block);
2072	return 0;
2073}
2074
2075void
2076scsi_target_block(struct device *dev)
2077{
2078	if (scsi_is_target_device(dev))
2079		starget_for_each_device(to_scsi_target(dev), NULL,
2080					device_block);
2081	else
2082		device_for_each_child(dev, NULL, target_block);
2083}
2084EXPORT_SYMBOL_GPL(scsi_target_block);
2085
2086static void
2087device_unblock(struct scsi_device *sdev, void *data)
2088{
2089	scsi_internal_device_unblock(sdev);
2090}
2091
2092static int
2093target_unblock(struct device *dev, void *data)
2094{
2095	if (scsi_is_target_device(dev))
2096		starget_for_each_device(to_scsi_target(dev), NULL,
2097					device_unblock);
2098	return 0;
2099}
2100
2101void
2102scsi_target_unblock(struct device *dev)
2103{
2104	if (scsi_is_target_device(dev))
2105		starget_for_each_device(to_scsi_target(dev), NULL,
2106					device_unblock);
2107	else
2108		device_for_each_child(dev, NULL, target_unblock);
2109}
2110EXPORT_SYMBOL_GPL(scsi_target_unblock);
2111