scsi_lib.c revision 1cf72699c1530c3e4ac3d58344f6a6a40a2f46d3
1/*
2 *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
3 *
4 *  SCSI queueing library.
5 *      Initial versions: Eric Youngdale (eric@andante.org).
6 *                        Based upon conversations with large numbers
7 *                        of people at Linux Expo.
8 */
9
10#include <linux/bio.h>
11#include <linux/blkdev.h>
12#include <linux/completion.h>
13#include <linux/kernel.h>
14#include <linux/mempool.h>
15#include <linux/slab.h>
16#include <linux/init.h>
17#include <linux/pci.h>
18#include <linux/delay.h>
19
20#include <scsi/scsi.h>
21#include <scsi/scsi_dbg.h>
22#include <scsi/scsi_device.h>
23#include <scsi/scsi_driver.h>
24#include <scsi/scsi_eh.h>
25#include <scsi/scsi_host.h>
26#include <scsi/scsi_request.h>
27
28#include "scsi_priv.h"
29#include "scsi_logging.h"
30
31
32#define SG_MEMPOOL_NR		(sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool))
33#define SG_MEMPOOL_SIZE		32
34
35struct scsi_host_sg_pool {
36	size_t		size;
37	char		*name;
38	kmem_cache_t	*slab;
39	mempool_t	*pool;
40};
41
42#if (SCSI_MAX_PHYS_SEGMENTS < 32)
43#error SCSI_MAX_PHYS_SEGMENTS is too small
44#endif
45
46#define SP(x) { x, "sgpool-" #x }
47static struct scsi_host_sg_pool scsi_sg_pools[] = {
48	SP(8),
49	SP(16),
50	SP(32),
51#if (SCSI_MAX_PHYS_SEGMENTS > 32)
52	SP(64),
53#if (SCSI_MAX_PHYS_SEGMENTS > 64)
54	SP(128),
55#if (SCSI_MAX_PHYS_SEGMENTS > 128)
56	SP(256),
57#if (SCSI_MAX_PHYS_SEGMENTS > 256)
58#error SCSI_MAX_PHYS_SEGMENTS is too large
59#endif
60#endif
61#endif
62#endif
63};
64#undef SP
65
66
67/*
68 * Function:    scsi_insert_special_req()
69 *
70 * Purpose:     Insert pre-formed request into request queue.
71 *
72 * Arguments:   sreq	- request that is ready to be queued.
73 *              at_head	- boolean.  True if we should insert at head
74 *                        of queue, false if we should insert at tail.
75 *
76 * Lock status: Assumed that lock is not held upon entry.
77 *
78 * Returns:     Nothing
79 *
80 * Notes:       This function is called from character device and from
81 *              ioctl types of functions where the caller knows exactly
82 *              what SCSI command needs to be issued.   The idea is that
83 *              we merely inject the command into the queue (at the head
84 *              for now), and then call the queue request function to actually
85 *              process it.
86 */
87int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
88{
89	/*
90	 * Because users of this function are apt to reuse requests with no
91	 * modification, we have to sanitise the request flags here
92	 */
93	sreq->sr_request->flags &= ~REQ_DONTPREP;
94	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
95		       	   at_head, sreq);
96	return 0;
97}
98
99static void scsi_run_queue(struct request_queue *q);
100
101/*
102 * Function:    scsi_queue_insert()
103 *
104 * Purpose:     Insert a command in the midlevel queue.
105 *
106 * Arguments:   cmd    - command that we are adding to queue.
107 *              reason - why we are inserting command to queue.
108 *
109 * Lock status: Assumed that lock is not held upon entry.
110 *
111 * Returns:     Nothing.
112 *
113 * Notes:       We do this for one of two cases.  Either the host is busy
114 *              and it cannot accept any more commands for the time being,
115 *              or the device returned QUEUE_FULL and can accept no more
116 *              commands.
117 * Notes:       This could be called either from an interrupt context or a
118 *              normal process context.
119 */
120int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
121{
122	struct Scsi_Host *host = cmd->device->host;
123	struct scsi_device *device = cmd->device;
124	struct request_queue *q = device->request_queue;
125	unsigned long flags;
126
127	SCSI_LOG_MLQUEUE(1,
128		 printk("Inserting command %p into mlqueue\n", cmd));
129
130	/*
131	 * Set the appropriate busy bit for the device/host.
132	 *
133	 * If the host/device isn't busy, assume that something actually
134	 * completed, and that we should be able to queue a command now.
135	 *
136	 * Note that the prior mid-layer assumption that any host could
137	 * always queue at least one command is now broken.  The mid-layer
138	 * will implement a user specifiable stall (see
139	 * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
140	 * if a command is requeued with no other commands outstanding
141	 * either for the device or for the host.
142	 */
143	if (reason == SCSI_MLQUEUE_HOST_BUSY)
144		host->host_blocked = host->max_host_blocked;
145	else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
146		device->device_blocked = device->max_device_blocked;
147
148	/*
149	 * Decrement the counters, since these commands are no longer
150	 * active on the host/device.
151	 */
152	scsi_device_unbusy(device);
153
154	/*
155	 * Requeue this command.  It will go before all other commands
156	 * that are already in the queue.
157	 *
158	 * NOTE: there is magic here about the way the queue is plugged if
159	 * we have no outstanding commands.
160	 *
161	 * Although we *don't* plug the queue, we call the request
162	 * function.  The SCSI request function detects the blocked condition
163	 * and plugs the queue appropriately.
164         */
165	spin_lock_irqsave(q->queue_lock, flags);
166	blk_requeue_request(q, cmd->request);
167	spin_unlock_irqrestore(q->queue_lock, flags);
168
169	scsi_run_queue(q);
170
171	return 0;
172}
173
174/*
175 * Function:    scsi_do_req
176 *
177 * Purpose:     Queue a SCSI request
178 *
179 * Arguments:   sreq	  - command descriptor.
180 *              cmnd      - actual SCSI command to be performed.
181 *              buffer    - data buffer.
182 *              bufflen   - size of data buffer.
183 *              done      - completion function to be run.
184 *              timeout   - how long to let it run before timeout.
185 *              retries   - number of retries we allow.
186 *
187 * Lock status: No locks held upon entry.
188 *
189 * Returns:     Nothing.
190 *
191 * Notes:	This function is only used for queueing requests for things
192 *		like ioctls and character device requests - this is because
193 *		we essentially just inject a request into the queue for the
194 *		device.
195 *
196 *		In order to support the scsi_device_quiesce function, we
197 *		now inject requests on the *head* of the device queue
198 *		rather than the tail.
199 */
200void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
201		 void *buffer, unsigned bufflen,
202		 void (*done)(struct scsi_cmnd *),
203		 int timeout, int retries)
204{
205	/*
206	 * If the upper level driver is reusing these things, then
207	 * we should release the low-level block now.  Another one will
208	 * be allocated later when this request is getting queued.
209	 */
210	__scsi_release_request(sreq);
211
212	/*
213	 * Our own function scsi_done (which marks the host as not busy,
214	 * disables the timeout counter, etc) will be called by us or by the
215	 * scsi_hosts[host].queuecommand() function needs to also call
216	 * the completion function for the high level driver.
217	 */
218	memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
219	sreq->sr_bufflen = bufflen;
220	sreq->sr_buffer = buffer;
221	sreq->sr_allowed = retries;
222	sreq->sr_done = done;
223	sreq->sr_timeout_per_command = timeout;
224
225	if (sreq->sr_cmd_len == 0)
226		sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
227
228	/*
229	 * head injection *required* here otherwise quiesce won't work
230	 */
231	scsi_insert_special_req(sreq, 1);
232}
233EXPORT_SYMBOL(scsi_do_req);
234
235/* This is the end routine we get to if a command was never attached
236 * to the request.  Simply complete the request without changing
237 * rq_status; this will cause a DRIVER_ERROR. */
238static void scsi_wait_req_end_io(struct request *req)
239{
240	BUG_ON(!req->waiting);
241
242	complete(req->waiting);
243}
244
245void scsi_wait_req(struct scsi_request *sreq, const void *cmnd, void *buffer,
246		   unsigned bufflen, int timeout, int retries)
247{
248	DECLARE_COMPLETION(wait);
249	int write = (sreq->sr_data_direction == DMA_TO_DEVICE);
250	struct request *req;
251
252	req = blk_get_request(sreq->sr_device->request_queue, write,
253			      __GFP_WAIT);
254	if (bufflen && blk_rq_map_kern(sreq->sr_device->request_queue, req,
255				       buffer, bufflen, __GFP_WAIT)) {
256		sreq->sr_result = DRIVER_ERROR << 24;
257		blk_put_request(req);
258		return;
259	}
260
261	req->flags |= REQ_NOMERGE;
262	req->waiting = &wait;
263	req->end_io = scsi_wait_req_end_io;
264	req->cmd_len = COMMAND_SIZE(((u8 *)cmnd)[0]);
265	req->sense = sreq->sr_sense_buffer;
266	req->sense_len = 0;
267	memcpy(req->cmd, cmnd, req->cmd_len);
268	req->timeout = timeout;
269	req->flags |= REQ_BLOCK_PC;
270	req->rq_disk = NULL;
271	blk_insert_request(sreq->sr_device->request_queue, req,
272			   sreq->sr_data_direction == DMA_TO_DEVICE, NULL);
273	wait_for_completion(&wait);
274	sreq->sr_request->waiting = NULL;
275	sreq->sr_result = req->errors;
276	if (req->errors)
277		sreq->sr_result |= (DRIVER_ERROR << 24);
278
279	blk_put_request(req);
280}
281
282EXPORT_SYMBOL(scsi_wait_req);
283
284/**
285 * scsi_execute_req - insert request and wait for the result
286 * @sdev:	scsi device
287 * @cmd:	scsi command
288 * @data_direction: data direction
289 * @buffer:	data buffer
290 * @bufflen:	len of buffer
291 * @sense:	optional sense buffer
292 * @timeout:	request timeout in seconds
293 * @retries:	number of times to retry request
294 *
295 * scsi_execute_req returns the req->errors value which is the
296 * the scsi_cmnd result field.
297 **/
298int scsi_execute_req(struct scsi_device *sdev, unsigned char *cmd,
299		     int data_direction, void *buffer, unsigned bufflen,
300		     unsigned char *sense, int timeout, int retries)
301{
302	struct request *req;
303	int write = (data_direction == DMA_TO_DEVICE);
304	int ret = DRIVER_ERROR << 24;
305
306	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
307
308	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
309					buffer, bufflen, __GFP_WAIT))
310		goto out;
311
312	req->cmd_len = COMMAND_SIZE(cmd[0]);
313	memcpy(req->cmd, cmd, req->cmd_len);
314	req->sense = sense;
315	req->sense_len = 0;
316	req->timeout = timeout;
317	req->flags |= REQ_BLOCK_PC | REQ_SPECIAL;
318
319	/*
320	 * head injection *required* here otherwise quiesce won't work
321	 */
322	blk_execute_rq(req->q, NULL, req, 1);
323
324	ret = req->errors;
325 out:
326	blk_put_request(req);
327
328	return ret;
329}
330
331EXPORT_SYMBOL(scsi_execute_req);
332
333/*
334 * Function:    scsi_init_cmd_errh()
335 *
336 * Purpose:     Initialize cmd fields related to error handling.
337 *
338 * Arguments:   cmd	- command that is ready to be queued.
339 *
340 * Returns:     Nothing
341 *
342 * Notes:       This function has the job of initializing a number of
343 *              fields related to error handling.   Typically this will
344 *              be called once for each command, as required.
345 */
346static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
347{
348	cmd->serial_number = 0;
349
350	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
351
352	if (cmd->cmd_len == 0)
353		cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
354
355	/*
356	 * We need saved copies of a number of fields - this is because
357	 * error handling may need to overwrite these with different values
358	 * to run different commands, and once error handling is complete,
359	 * we will need to restore these values prior to running the actual
360	 * command.
361	 */
362	cmd->old_use_sg = cmd->use_sg;
363	cmd->old_cmd_len = cmd->cmd_len;
364	cmd->sc_old_data_direction = cmd->sc_data_direction;
365	cmd->old_underflow = cmd->underflow;
366	memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
367	cmd->buffer = cmd->request_buffer;
368	cmd->bufflen = cmd->request_bufflen;
369
370	return 1;
371}
372
373/*
374 * Function:   scsi_setup_cmd_retry()
375 *
376 * Purpose:    Restore the command state for a retry
377 *
378 * Arguments:  cmd	- command to be restored
379 *
380 * Returns:    Nothing
381 *
382 * Notes:      Immediately prior to retrying a command, we need
383 *             to restore certain fields that we saved above.
384 */
385void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
386{
387	memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
388	cmd->request_buffer = cmd->buffer;
389	cmd->request_bufflen = cmd->bufflen;
390	cmd->use_sg = cmd->old_use_sg;
391	cmd->cmd_len = cmd->old_cmd_len;
392	cmd->sc_data_direction = cmd->sc_old_data_direction;
393	cmd->underflow = cmd->old_underflow;
394}
395
396void scsi_device_unbusy(struct scsi_device *sdev)
397{
398	struct Scsi_Host *shost = sdev->host;
399	unsigned long flags;
400
401	spin_lock_irqsave(shost->host_lock, flags);
402	shost->host_busy--;
403	if (unlikely((shost->shost_state == SHOST_RECOVERY) &&
404		     shost->host_failed))
405		scsi_eh_wakeup(shost);
406	spin_unlock(shost->host_lock);
407	spin_lock(sdev->request_queue->queue_lock);
408	sdev->device_busy--;
409	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
410}
411
412/*
413 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
414 * and call blk_run_queue for all the scsi_devices on the target -
415 * including current_sdev first.
416 *
417 * Called with *no* scsi locks held.
418 */
419static void scsi_single_lun_run(struct scsi_device *current_sdev)
420{
421	struct Scsi_Host *shost = current_sdev->host;
422	struct scsi_device *sdev, *tmp;
423	struct scsi_target *starget = scsi_target(current_sdev);
424	unsigned long flags;
425
426	spin_lock_irqsave(shost->host_lock, flags);
427	starget->starget_sdev_user = NULL;
428	spin_unlock_irqrestore(shost->host_lock, flags);
429
430	/*
431	 * Call blk_run_queue for all LUNs on the target, starting with
432	 * current_sdev. We race with others (to set starget_sdev_user),
433	 * but in most cases, we will be first. Ideally, each LU on the
434	 * target would get some limited time or requests on the target.
435	 */
436	blk_run_queue(current_sdev->request_queue);
437
438	spin_lock_irqsave(shost->host_lock, flags);
439	if (starget->starget_sdev_user)
440		goto out;
441	list_for_each_entry_safe(sdev, tmp, &starget->devices,
442			same_target_siblings) {
443		if (sdev == current_sdev)
444			continue;
445		if (scsi_device_get(sdev))
446			continue;
447
448		spin_unlock_irqrestore(shost->host_lock, flags);
449		blk_run_queue(sdev->request_queue);
450		spin_lock_irqsave(shost->host_lock, flags);
451
452		scsi_device_put(sdev);
453	}
454 out:
455	spin_unlock_irqrestore(shost->host_lock, flags);
456}
457
458/*
459 * Function:	scsi_run_queue()
460 *
461 * Purpose:	Select a proper request queue to serve next
462 *
463 * Arguments:	q	- last request's queue
464 *
465 * Returns:     Nothing
466 *
467 * Notes:	The previous command was completely finished, start
468 *		a new one if possible.
469 */
470static void scsi_run_queue(struct request_queue *q)
471{
472	struct scsi_device *sdev = q->queuedata;
473	struct Scsi_Host *shost = sdev->host;
474	unsigned long flags;
475
476	if (sdev->single_lun)
477		scsi_single_lun_run(sdev);
478
479	spin_lock_irqsave(shost->host_lock, flags);
480	while (!list_empty(&shost->starved_list) &&
481	       !shost->host_blocked && !shost->host_self_blocked &&
482		!((shost->can_queue > 0) &&
483		  (shost->host_busy >= shost->can_queue))) {
484		/*
485		 * As long as shost is accepting commands and we have
486		 * starved queues, call blk_run_queue. scsi_request_fn
487		 * drops the queue_lock and can add us back to the
488		 * starved_list.
489		 *
490		 * host_lock protects the starved_list and starved_entry.
491		 * scsi_request_fn must get the host_lock before checking
492		 * or modifying starved_list or starved_entry.
493		 */
494		sdev = list_entry(shost->starved_list.next,
495					  struct scsi_device, starved_entry);
496		list_del_init(&sdev->starved_entry);
497		spin_unlock_irqrestore(shost->host_lock, flags);
498
499		blk_run_queue(sdev->request_queue);
500
501		spin_lock_irqsave(shost->host_lock, flags);
502		if (unlikely(!list_empty(&sdev->starved_entry)))
503			/*
504			 * sdev lost a race, and was put back on the
505			 * starved list. This is unlikely but without this
506			 * in theory we could loop forever.
507			 */
508			break;
509	}
510	spin_unlock_irqrestore(shost->host_lock, flags);
511
512	blk_run_queue(q);
513}
514
515/*
516 * Function:	scsi_requeue_command()
517 *
518 * Purpose:	Handle post-processing of completed commands.
519 *
520 * Arguments:	q	- queue to operate on
521 *		cmd	- command that may need to be requeued.
522 *
523 * Returns:	Nothing
524 *
525 * Notes:	After command completion, there may be blocks left
526 *		over which weren't finished by the previous command
527 *		this can be for a number of reasons - the main one is
528 *		I/O errors in the middle of the request, in which case
529 *		we need to request the blocks that come after the bad
530 *		sector.
531 */
532static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
533{
534	unsigned long flags;
535
536	cmd->request->flags &= ~REQ_DONTPREP;
537
538	spin_lock_irqsave(q->queue_lock, flags);
539	blk_requeue_request(q, cmd->request);
540	spin_unlock_irqrestore(q->queue_lock, flags);
541
542	scsi_run_queue(q);
543}
544
545void scsi_next_command(struct scsi_cmnd *cmd)
546{
547	struct request_queue *q = cmd->device->request_queue;
548
549	scsi_put_command(cmd);
550	scsi_run_queue(q);
551}
552
553void scsi_run_host_queues(struct Scsi_Host *shost)
554{
555	struct scsi_device *sdev;
556
557	shost_for_each_device(sdev, shost)
558		scsi_run_queue(sdev->request_queue);
559}
560
561/*
562 * Function:    scsi_end_request()
563 *
564 * Purpose:     Post-processing of completed commands (usually invoked at end
565 *		of upper level post-processing and scsi_io_completion).
566 *
567 * Arguments:   cmd	 - command that is complete.
568 *              uptodate - 1 if I/O indicates success, <= 0 for I/O error.
569 *              bytes    - number of bytes of completed I/O
570 *		requeue  - indicates whether we should requeue leftovers.
571 *
572 * Lock status: Assumed that lock is not held upon entry.
573 *
574 * Returns:     cmd if requeue done or required, NULL otherwise
575 *
576 * Notes:       This is called for block device requests in order to
577 *              mark some number of sectors as complete.
578 *
579 *		We are guaranteeing that the request queue will be goosed
580 *		at some point during this call.
581 */
582static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
583					  int bytes, int requeue)
584{
585	request_queue_t *q = cmd->device->request_queue;
586	struct request *req = cmd->request;
587	unsigned long flags;
588
589	/*
590	 * If there are blocks left over at the end, set up the command
591	 * to queue the remainder of them.
592	 */
593	if (end_that_request_chunk(req, uptodate, bytes)) {
594		int leftover = (req->hard_nr_sectors << 9);
595
596		if (blk_pc_request(req))
597			leftover = req->data_len;
598
599		/* kill remainder if no retrys */
600		if (!uptodate && blk_noretry_request(req))
601			end_that_request_chunk(req, 0, leftover);
602		else {
603			if (requeue)
604				/*
605				 * Bleah.  Leftovers again.  Stick the
606				 * leftovers in the front of the
607				 * queue, and goose the queue again.
608				 */
609				scsi_requeue_command(q, cmd);
610
611			return cmd;
612		}
613	}
614
615	add_disk_randomness(req->rq_disk);
616
617	spin_lock_irqsave(q->queue_lock, flags);
618	if (blk_rq_tagged(req))
619		blk_queue_end_tag(q, req);
620	end_that_request_last(req);
621	spin_unlock_irqrestore(q->queue_lock, flags);
622
623	/*
624	 * This will goose the queue request function at the end, so we don't
625	 * need to worry about launching another command.
626	 */
627	scsi_next_command(cmd);
628	return NULL;
629}
630
631static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, int gfp_mask)
632{
633	struct scsi_host_sg_pool *sgp;
634	struct scatterlist *sgl;
635
636	BUG_ON(!cmd->use_sg);
637
638	switch (cmd->use_sg) {
639	case 1 ... 8:
640		cmd->sglist_len = 0;
641		break;
642	case 9 ... 16:
643		cmd->sglist_len = 1;
644		break;
645	case 17 ... 32:
646		cmd->sglist_len = 2;
647		break;
648#if (SCSI_MAX_PHYS_SEGMENTS > 32)
649	case 33 ... 64:
650		cmd->sglist_len = 3;
651		break;
652#if (SCSI_MAX_PHYS_SEGMENTS > 64)
653	case 65 ... 128:
654		cmd->sglist_len = 4;
655		break;
656#if (SCSI_MAX_PHYS_SEGMENTS  > 128)
657	case 129 ... 256:
658		cmd->sglist_len = 5;
659		break;
660#endif
661#endif
662#endif
663	default:
664		return NULL;
665	}
666
667	sgp = scsi_sg_pools + cmd->sglist_len;
668	sgl = mempool_alloc(sgp->pool, gfp_mask);
669	return sgl;
670}
671
672static void scsi_free_sgtable(struct scatterlist *sgl, int index)
673{
674	struct scsi_host_sg_pool *sgp;
675
676	BUG_ON(index >= SG_MEMPOOL_NR);
677
678	sgp = scsi_sg_pools + index;
679	mempool_free(sgl, sgp->pool);
680}
681
682/*
683 * Function:    scsi_release_buffers()
684 *
685 * Purpose:     Completion processing for block device I/O requests.
686 *
687 * Arguments:   cmd	- command that we are bailing.
688 *
689 * Lock status: Assumed that no lock is held upon entry.
690 *
691 * Returns:     Nothing
692 *
693 * Notes:       In the event that an upper level driver rejects a
694 *		command, we must release resources allocated during
695 *		the __init_io() function.  Primarily this would involve
696 *		the scatter-gather table, and potentially any bounce
697 *		buffers.
698 */
699static void scsi_release_buffers(struct scsi_cmnd *cmd)
700{
701	struct request *req = cmd->request;
702
703	/*
704	 * Free up any indirection buffers we allocated for DMA purposes.
705	 */
706	if (cmd->use_sg)
707		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
708	else if (cmd->request_buffer != req->buffer)
709		kfree(cmd->request_buffer);
710
711	/*
712	 * Zero these out.  They now point to freed memory, and it is
713	 * dangerous to hang onto the pointers.
714	 */
715	cmd->buffer  = NULL;
716	cmd->bufflen = 0;
717	cmd->request_buffer = NULL;
718	cmd->request_bufflen = 0;
719}
720
721/*
722 * Function:    scsi_io_completion()
723 *
724 * Purpose:     Completion processing for block device I/O requests.
725 *
726 * Arguments:   cmd   - command that is finished.
727 *
728 * Lock status: Assumed that no lock is held upon entry.
729 *
730 * Returns:     Nothing
731 *
732 * Notes:       This function is matched in terms of capabilities to
733 *              the function that created the scatter-gather list.
734 *              In other words, if there are no bounce buffers
735 *              (the normal case for most drivers), we don't need
736 *              the logic to deal with cleaning up afterwards.
737 *
738 *		We must do one of several things here:
739 *
740 *		a) Call scsi_end_request.  This will finish off the
741 *		   specified number of sectors.  If we are done, the
742 *		   command block will be released, and the queue
743 *		   function will be goosed.  If we are not done, then
744 *		   scsi_end_request will directly goose the queue.
745 *
746 *		b) We can just use scsi_requeue_command() here.  This would
747 *		   be used if we just wanted to retry, for example.
748 */
749void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
750			unsigned int block_bytes)
751{
752	int result = cmd->result;
753	int this_count = cmd->bufflen;
754	request_queue_t *q = cmd->device->request_queue;
755	struct request *req = cmd->request;
756	int clear_errors = 1;
757	struct scsi_sense_hdr sshdr;
758	int sense_valid = 0;
759	int sense_deferred = 0;
760
761	if (blk_complete_barrier_rq(q, req, good_bytes >> 9))
762		return;
763
764	/*
765	 * Free up any indirection buffers we allocated for DMA purposes.
766	 * For the case of a READ, we need to copy the data out of the
767	 * bounce buffer and into the real buffer.
768	 */
769	if (cmd->use_sg)
770		scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
771	else if (cmd->buffer != req->buffer) {
772		if (rq_data_dir(req) == READ) {
773			unsigned long flags;
774			char *to = bio_kmap_irq(req->bio, &flags);
775			memcpy(to, cmd->buffer, cmd->bufflen);
776			bio_kunmap_irq(to, &flags);
777		}
778		kfree(cmd->buffer);
779	}
780
781	if (result) {
782		sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
783		if (sense_valid)
784			sense_deferred = scsi_sense_is_deferred(&sshdr);
785	}
786	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
787		req->errors = result;
788		if (result) {
789			clear_errors = 0;
790			if (sense_valid && req->sense) {
791				/*
792				 * SG_IO wants current and deferred errors
793				 */
794				int len = 8 + cmd->sense_buffer[7];
795
796				if (len > SCSI_SENSE_BUFFERSIZE)
797					len = SCSI_SENSE_BUFFERSIZE;
798				memcpy(req->sense, cmd->sense_buffer,  len);
799				req->sense_len = len;
800			}
801		} else
802			req->data_len = cmd->resid;
803	}
804
805	/*
806	 * Zero these out.  They now point to freed memory, and it is
807	 * dangerous to hang onto the pointers.
808	 */
809	cmd->buffer  = NULL;
810	cmd->bufflen = 0;
811	cmd->request_buffer = NULL;
812	cmd->request_bufflen = 0;
813
814	/*
815	 * Next deal with any sectors which we were able to correctly
816	 * handle.
817	 */
818	if (good_bytes >= 0) {
819		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
820					      req->nr_sectors, good_bytes));
821		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
822
823		if (clear_errors)
824			req->errors = 0;
825		/*
826		 * If multiple sectors are requested in one buffer, then
827		 * they will have been finished off by the first command.
828		 * If not, then we have a multi-buffer command.
829		 *
830		 * If block_bytes != 0, it means we had a medium error
831		 * of some sort, and that we want to mark some number of
832		 * sectors as not uptodate.  Thus we want to inhibit
833		 * requeueing right here - we will requeue down below
834		 * when we handle the bad sectors.
835		 */
836		cmd = scsi_end_request(cmd, 1, good_bytes, result == 0);
837
838		/*
839		 * If the command completed without error, then either finish off the
840		 * rest of the command, or start a new one.
841		 */
842		if (result == 0 || cmd == NULL ) {
843			return;
844		}
845	}
846	/*
847	 * Now, if we were good little boys and girls, Santa left us a request
848	 * sense buffer.  We can extract information from this, so we
849	 * can choose a block to remap, etc.
850	 */
851	if (sense_valid && !sense_deferred) {
852		switch (sshdr.sense_key) {
853		case UNIT_ATTENTION:
854			if (cmd->device->removable) {
855				/* detected disc change.  set a bit
856				 * and quietly refuse further access.
857				 */
858				cmd->device->changed = 1;
859				cmd = scsi_end_request(cmd, 0,
860						this_count, 1);
861				return;
862			} else {
863				/*
864				* Must have been a power glitch, or a
865				* bus reset.  Could not have been a
866				* media change, so we just retry the
867				* request and see what happens.
868				*/
869				scsi_requeue_command(q, cmd);
870				return;
871			}
872			break;
873		case ILLEGAL_REQUEST:
874			/*
875		 	* If we had an ILLEGAL REQUEST returned, then we may
876		 	* have performed an unsupported command.  The only
877		 	* thing this should be would be a ten byte read where
878			* only a six byte read was supported.  Also, on a
879			* system where READ CAPACITY failed, we may have read
880			* past the end of the disk.
881		 	*/
882			if (cmd->device->use_10_for_rw &&
883			    (cmd->cmnd[0] == READ_10 ||
884			     cmd->cmnd[0] == WRITE_10)) {
885				cmd->device->use_10_for_rw = 0;
886				/*
887				 * This will cause a retry with a 6-byte
888				 * command.
889				 */
890				scsi_requeue_command(q, cmd);
891				result = 0;
892			} else {
893				cmd = scsi_end_request(cmd, 0, this_count, 1);
894				return;
895			}
896			break;
897		case NOT_READY:
898			/*
899			 * If the device is in the process of becoming ready,
900			 * retry.
901			 */
902			if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) {
903				scsi_requeue_command(q, cmd);
904				return;
905			}
906			printk(KERN_INFO "Device %s not ready.\n",
907			       req->rq_disk ? req->rq_disk->disk_name : "");
908			cmd = scsi_end_request(cmd, 0, this_count, 1);
909			return;
910		case VOLUME_OVERFLOW:
911			printk(KERN_INFO "Volume overflow <%d %d %d %d> CDB: ",
912			       cmd->device->host->host_no,
913			       (int)cmd->device->channel,
914			       (int)cmd->device->id, (int)cmd->device->lun);
915			__scsi_print_command(cmd->data_cmnd);
916			scsi_print_sense("", cmd);
917			cmd = scsi_end_request(cmd, 0, block_bytes, 1);
918			return;
919		default:
920			break;
921		}
922	}			/* driver byte != 0 */
923	if (host_byte(result) == DID_RESET) {
924		/*
925		 * Third party bus reset or reset for error
926		 * recovery reasons.  Just retry the request
927		 * and see what happens.
928		 */
929		scsi_requeue_command(q, cmd);
930		return;
931	}
932	if (result) {
933		if (!(req->flags & REQ_SPECIAL))
934			printk(KERN_INFO "SCSI error : <%d %d %d %d> return code "
935			       "= 0x%x\n", cmd->device->host->host_no,
936			       cmd->device->channel,
937			       cmd->device->id,
938			       cmd->device->lun, result);
939
940		if (driver_byte(result) & DRIVER_SENSE)
941			scsi_print_sense("", cmd);
942		/*
943		 * Mark a single buffer as not uptodate.  Queue the remainder.
944		 * We sometimes get this cruft in the event that a medium error
945		 * isn't properly reported.
946		 */
947		block_bytes = req->hard_cur_sectors << 9;
948		if (!block_bytes)
949			block_bytes = req->data_len;
950		cmd = scsi_end_request(cmd, 0, block_bytes, 1);
951	}
952}
953EXPORT_SYMBOL(scsi_io_completion);
954
955/*
956 * Function:    scsi_init_io()
957 *
958 * Purpose:     SCSI I/O initialize function.
959 *
960 * Arguments:   cmd   - Command descriptor we wish to initialize
961 *
962 * Returns:     0 on success
963 *		BLKPREP_DEFER if the failure is retryable
964 *		BLKPREP_KILL if the failure is fatal
965 */
966static int scsi_init_io(struct scsi_cmnd *cmd)
967{
968	struct request     *req = cmd->request;
969	struct scatterlist *sgpnt;
970	int		   count;
971
972	/*
973	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
974	 */
975	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
976		cmd->request_bufflen = req->data_len;
977		cmd->request_buffer = req->data;
978		req->buffer = req->data;
979		cmd->use_sg = 0;
980		return 0;
981	}
982
983	/*
984	 * we used to not use scatter-gather for single segment request,
985	 * but now we do (it makes highmem I/O easier to support without
986	 * kmapping pages)
987	 */
988	cmd->use_sg = req->nr_phys_segments;
989
990	/*
991	 * if sg table allocation fails, requeue request later.
992	 */
993	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
994	if (unlikely(!sgpnt))
995		return BLKPREP_DEFER;
996
997	cmd->request_buffer = (char *) sgpnt;
998	cmd->request_bufflen = req->nr_sectors << 9;
999	if (blk_pc_request(req))
1000		cmd->request_bufflen = req->data_len;
1001	req->buffer = NULL;
1002
1003	/*
1004	 * Next, walk the list, and fill in the addresses and sizes of
1005	 * each segment.
1006	 */
1007	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
1008
1009	/*
1010	 * mapped well, send it off
1011	 */
1012	if (likely(count <= cmd->use_sg)) {
1013		cmd->use_sg = count;
1014		return 0;
1015	}
1016
1017	printk(KERN_ERR "Incorrect number of segments after building list\n");
1018	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
1019	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
1020			req->current_nr_sectors);
1021
1022	/* release the command and kill it */
1023	scsi_release_buffers(cmd);
1024	scsi_put_command(cmd);
1025	return BLKPREP_KILL;
1026}
1027
1028static int scsi_prepare_flush_fn(request_queue_t *q, struct request *rq)
1029{
1030	struct scsi_device *sdev = q->queuedata;
1031	struct scsi_driver *drv;
1032
1033	if (sdev->sdev_state == SDEV_RUNNING) {
1034		drv = *(struct scsi_driver **) rq->rq_disk->private_data;
1035
1036		if (drv->prepare_flush)
1037			return drv->prepare_flush(q, rq);
1038	}
1039
1040	return 0;
1041}
1042
1043static void scsi_end_flush_fn(request_queue_t *q, struct request *rq)
1044{
1045	struct scsi_device *sdev = q->queuedata;
1046	struct request *flush_rq = rq->end_io_data;
1047	struct scsi_driver *drv;
1048
1049	if (flush_rq->errors) {
1050		printk("scsi: barrier error, disabling flush support\n");
1051		blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1052	}
1053
1054	if (sdev->sdev_state == SDEV_RUNNING) {
1055		drv = *(struct scsi_driver **) rq->rq_disk->private_data;
1056		drv->end_flush(q, rq);
1057	}
1058}
1059
1060static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
1061			       sector_t *error_sector)
1062{
1063	struct scsi_device *sdev = q->queuedata;
1064	struct scsi_driver *drv;
1065
1066	if (sdev->sdev_state != SDEV_RUNNING)
1067		return -ENXIO;
1068
1069	drv = *(struct scsi_driver **) disk->private_data;
1070	if (drv->issue_flush)
1071		return drv->issue_flush(&sdev->sdev_gendev, error_sector);
1072
1073	return -EOPNOTSUPP;
1074}
1075
1076static void scsi_generic_done(struct scsi_cmnd *cmd)
1077{
1078	BUG_ON(!blk_pc_request(cmd->request));
1079	scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
1080}
1081
1082static int scsi_prep_fn(struct request_queue *q, struct request *req)
1083{
1084	struct scsi_device *sdev = q->queuedata;
1085	struct scsi_cmnd *cmd;
1086	int specials_only = 0;
1087
1088	/*
1089	 * Just check to see if the device is online.  If it isn't, we
1090	 * refuse to process any commands.  The device must be brought
1091	 * online before trying any recovery commands
1092	 */
1093	if (unlikely(!scsi_device_online(sdev))) {
1094		printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1095		       sdev->host->host_no, sdev->id, sdev->lun);
1096		return BLKPREP_KILL;
1097	}
1098	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1099		/* OK, we're not in a running state don't prep
1100		 * user commands */
1101		if (sdev->sdev_state == SDEV_DEL) {
1102			/* Device is fully deleted, no commands
1103			 * at all allowed down */
1104			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to dead device\n",
1105			       sdev->host->host_no, sdev->id, sdev->lun);
1106			return BLKPREP_KILL;
1107		}
1108		/* OK, we only allow special commands (i.e. not
1109		 * user initiated ones */
1110		specials_only = sdev->sdev_state;
1111	}
1112
1113	/*
1114	 * Find the actual device driver associated with this command.
1115	 * The SPECIAL requests are things like character device or
1116	 * ioctls, which did not originate from ll_rw_blk.  Note that
1117	 * the special field is also used to indicate the cmd for
1118	 * the remainder of a partially fulfilled request that can
1119	 * come up when there is a medium error.  We have to treat
1120	 * these two cases differently.  We differentiate by looking
1121	 * at request->cmd, as this tells us the real story.
1122	 */
1123	if (req->flags & REQ_SPECIAL && req->special) {
1124		struct scsi_request *sreq = req->special;
1125
1126		if (sreq->sr_magic == SCSI_REQ_MAGIC) {
1127			cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
1128			if (unlikely(!cmd))
1129				goto defer;
1130			scsi_init_cmd_from_req(cmd, sreq);
1131		} else
1132			cmd = req->special;
1133	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1134
1135		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
1136			if(specials_only == SDEV_QUIESCE ||
1137					specials_only == SDEV_BLOCK)
1138				return BLKPREP_DEFER;
1139
1140			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",
1141			       sdev->host->host_no, sdev->id, sdev->lun);
1142			return BLKPREP_KILL;
1143		}
1144
1145
1146		/*
1147		 * Now try and find a command block that we can use.
1148		 */
1149		if (!req->special) {
1150			cmd = scsi_get_command(sdev, GFP_ATOMIC);
1151			if (unlikely(!cmd))
1152				goto defer;
1153		} else
1154			cmd = req->special;
1155
1156		/* pull a tag out of the request if we have one */
1157		cmd->tag = req->tag;
1158	} else {
1159		blk_dump_rq_flags(req, "SCSI bad req");
1160		return BLKPREP_KILL;
1161	}
1162
1163	/* note the overloading of req->special.  When the tag
1164	 * is active it always means cmd.  If the tag goes
1165	 * back for re-queueing, it may be reset */
1166	req->special = cmd;
1167	cmd->request = req;
1168
1169	/*
1170	 * FIXME: drop the lock here because the functions below
1171	 * expect to be called without the queue lock held.  Also,
1172	 * previously, we dequeued the request before dropping the
1173	 * lock.  We hope REQ_STARTED prevents anything untoward from
1174	 * happening now.
1175	 */
1176	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1177		struct scsi_driver *drv;
1178		int ret;
1179
1180		/*
1181		 * This will do a couple of things:
1182		 *  1) Fill in the actual SCSI command.
1183		 *  2) Fill in any other upper-level specific fields
1184		 * (timeout).
1185		 *
1186		 * If this returns 0, it means that the request failed
1187		 * (reading past end of disk, reading offline device,
1188		 * etc).   This won't actually talk to the device, but
1189		 * some kinds of consistency checking may cause the
1190		 * request to be rejected immediately.
1191		 */
1192
1193		/*
1194		 * This sets up the scatter-gather table (allocating if
1195		 * required).
1196		 */
1197		ret = scsi_init_io(cmd);
1198		if (ret)	/* BLKPREP_KILL return also releases the command */
1199			return ret;
1200
1201		/*
1202		 * Initialize the actual SCSI command for this request.
1203		 */
1204		if (req->rq_disk) {
1205			drv = *(struct scsi_driver **)req->rq_disk->private_data;
1206			if (unlikely(!drv->init_command(cmd))) {
1207				scsi_release_buffers(cmd);
1208				scsi_put_command(cmd);
1209				return BLKPREP_KILL;
1210			}
1211		} else {
1212			memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
1213			if (rq_data_dir(req) == WRITE)
1214				cmd->sc_data_direction = DMA_TO_DEVICE;
1215			else if (req->data_len)
1216				cmd->sc_data_direction = DMA_FROM_DEVICE;
1217			else
1218				cmd->sc_data_direction = DMA_NONE;
1219
1220			cmd->transfersize = req->data_len;
1221			cmd->allowed = 3;
1222			cmd->timeout_per_command = req->timeout;
1223			cmd->done = scsi_generic_done;
1224		}
1225	}
1226
1227	/*
1228	 * The request is now prepped, no need to come back here
1229	 */
1230	req->flags |= REQ_DONTPREP;
1231	return BLKPREP_OK;
1232
1233 defer:
1234	/* If we defer, the elv_next_request() returns NULL, but the
1235	 * queue must be restarted, so we plug here if no returning
1236	 * command will automatically do that. */
1237	if (sdev->device_busy == 0)
1238		blk_plug_device(q);
1239	return BLKPREP_DEFER;
1240}
1241
1242/*
1243 * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1244 * return 0.
1245 *
1246 * Called with the queue_lock held.
1247 */
1248static inline int scsi_dev_queue_ready(struct request_queue *q,
1249				  struct scsi_device *sdev)
1250{
1251	if (sdev->device_busy >= sdev->queue_depth)
1252		return 0;
1253	if (sdev->device_busy == 0 && sdev->device_blocked) {
1254		/*
1255		 * unblock after device_blocked iterates to zero
1256		 */
1257		if (--sdev->device_blocked == 0) {
1258			SCSI_LOG_MLQUEUE(3,
1259				printk("scsi%d (%d:%d) unblocking device at"
1260				       " zero depth\n", sdev->host->host_no,
1261				       sdev->id, sdev->lun));
1262		} else {
1263			blk_plug_device(q);
1264			return 0;
1265		}
1266	}
1267	if (sdev->device_blocked)
1268		return 0;
1269
1270	return 1;
1271}
1272
1273/*
1274 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
1275 * return 0. We must end up running the queue again whenever 0 is
1276 * returned, else IO can hang.
1277 *
1278 * Called with host_lock held.
1279 */
1280static inline int scsi_host_queue_ready(struct request_queue *q,
1281				   struct Scsi_Host *shost,
1282				   struct scsi_device *sdev)
1283{
1284	if (shost->shost_state == SHOST_RECOVERY)
1285		return 0;
1286	if (shost->host_busy == 0 && shost->host_blocked) {
1287		/*
1288		 * unblock after host_blocked iterates to zero
1289		 */
1290		if (--shost->host_blocked == 0) {
1291			SCSI_LOG_MLQUEUE(3,
1292				printk("scsi%d unblocking host at zero depth\n",
1293					shost->host_no));
1294		} else {
1295			blk_plug_device(q);
1296			return 0;
1297		}
1298	}
1299	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
1300	    shost->host_blocked || shost->host_self_blocked) {
1301		if (list_empty(&sdev->starved_entry))
1302			list_add_tail(&sdev->starved_entry, &shost->starved_list);
1303		return 0;
1304	}
1305
1306	/* We're OK to process the command, so we can't be starved */
1307	if (!list_empty(&sdev->starved_entry))
1308		list_del_init(&sdev->starved_entry);
1309
1310	return 1;
1311}
1312
1313/*
1314 * Kill requests for a dead device
1315 */
1316static void scsi_kill_requests(request_queue_t *q)
1317{
1318	struct request *req;
1319
1320	while ((req = elv_next_request(q)) != NULL) {
1321		blkdev_dequeue_request(req);
1322		req->flags |= REQ_QUIET;
1323		while (end_that_request_first(req, 0, req->nr_sectors))
1324			;
1325		end_that_request_last(req);
1326	}
1327}
1328
1329/*
1330 * Function:    scsi_request_fn()
1331 *
1332 * Purpose:     Main strategy routine for SCSI.
1333 *
1334 * Arguments:   q       - Pointer to actual queue.
1335 *
1336 * Returns:     Nothing
1337 *
1338 * Lock status: IO request lock assumed to be held when called.
1339 */
1340static void scsi_request_fn(struct request_queue *q)
1341{
1342	struct scsi_device *sdev = q->queuedata;
1343	struct Scsi_Host *shost;
1344	struct scsi_cmnd *cmd;
1345	struct request *req;
1346
1347	if (!sdev) {
1348		printk("scsi: killing requests for dead queue\n");
1349		scsi_kill_requests(q);
1350		return;
1351	}
1352
1353	if(!get_device(&sdev->sdev_gendev))
1354		/* We must be tearing the block queue down already */
1355		return;
1356
1357	/*
1358	 * To start with, we keep looping until the queue is empty, or until
1359	 * the host is no longer able to accept any more requests.
1360	 */
1361	shost = sdev->host;
1362	while (!blk_queue_plugged(q)) {
1363		int rtn;
1364		/*
1365		 * get next queueable request.  We do this early to make sure
1366		 * that the request is fully prepared even if we cannot
1367		 * accept it.
1368		 */
1369		req = elv_next_request(q);
1370		if (!req || !scsi_dev_queue_ready(q, sdev))
1371			break;
1372
1373		if (unlikely(!scsi_device_online(sdev))) {
1374			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1375			       sdev->host->host_no, sdev->id, sdev->lun);
1376			blkdev_dequeue_request(req);
1377			req->flags |= REQ_QUIET;
1378			while (end_that_request_first(req, 0, req->nr_sectors))
1379				;
1380			end_that_request_last(req);
1381			continue;
1382		}
1383
1384
1385		/*
1386		 * Remove the request from the request list.
1387		 */
1388		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1389			blkdev_dequeue_request(req);
1390		sdev->device_busy++;
1391
1392		spin_unlock(q->queue_lock);
1393		spin_lock(shost->host_lock);
1394
1395		if (!scsi_host_queue_ready(q, shost, sdev))
1396			goto not_ready;
1397		if (sdev->single_lun) {
1398			if (scsi_target(sdev)->starget_sdev_user &&
1399			    scsi_target(sdev)->starget_sdev_user != sdev)
1400				goto not_ready;
1401			scsi_target(sdev)->starget_sdev_user = sdev;
1402		}
1403		shost->host_busy++;
1404
1405		/*
1406		 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1407		 *		take the lock again.
1408		 */
1409		spin_unlock_irq(shost->host_lock);
1410
1411		cmd = req->special;
1412		if (unlikely(cmd == NULL)) {
1413			printk(KERN_CRIT "impossible request in %s.\n"
1414					 "please mail a stack trace to "
1415					 "linux-scsi@vger.kernel.org",
1416					 __FUNCTION__);
1417			BUG();
1418		}
1419
1420		/*
1421		 * Finally, initialize any error handling parameters, and set up
1422		 * the timers for timeouts.
1423		 */
1424		scsi_init_cmd_errh(cmd);
1425
1426		/*
1427		 * Dispatch the command to the low-level driver.
1428		 */
1429		rtn = scsi_dispatch_cmd(cmd);
1430		spin_lock_irq(q->queue_lock);
1431		if(rtn) {
1432			/* we're refusing the command; because of
1433			 * the way locks get dropped, we need to
1434			 * check here if plugging is required */
1435			if(sdev->device_busy == 0)
1436				blk_plug_device(q);
1437
1438			break;
1439		}
1440	}
1441
1442	goto out;
1443
1444 not_ready:
1445	spin_unlock_irq(shost->host_lock);
1446
1447	/*
1448	 * lock q, handle tag, requeue req, and decrement device_busy. We
1449	 * must return with queue_lock held.
1450	 *
1451	 * Decrementing device_busy without checking it is OK, as all such
1452	 * cases (host limits or settings) should run the queue at some
1453	 * later time.
1454	 */
1455	spin_lock_irq(q->queue_lock);
1456	blk_requeue_request(q, req);
1457	sdev->device_busy--;
1458	if(sdev->device_busy == 0)
1459		blk_plug_device(q);
1460 out:
1461	/* must be careful here...if we trigger the ->remove() function
1462	 * we cannot be holding the q lock */
1463	spin_unlock_irq(q->queue_lock);
1464	put_device(&sdev->sdev_gendev);
1465	spin_lock_irq(q->queue_lock);
1466}
1467
1468u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1469{
1470	struct device *host_dev;
1471	u64 bounce_limit = 0xffffffff;
1472
1473	if (shost->unchecked_isa_dma)
1474		return BLK_BOUNCE_ISA;
1475	/*
1476	 * Platforms with virtual-DMA translation
1477	 * hardware have no practical limit.
1478	 */
1479	if (!PCI_DMA_BUS_IS_PHYS)
1480		return BLK_BOUNCE_ANY;
1481
1482	host_dev = scsi_get_device(shost);
1483	if (host_dev && host_dev->dma_mask)
1484		bounce_limit = *host_dev->dma_mask;
1485
1486	return bounce_limit;
1487}
1488EXPORT_SYMBOL(scsi_calculate_bounce_limit);
1489
1490struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1491{
1492	struct Scsi_Host *shost = sdev->host;
1493	struct request_queue *q;
1494
1495	q = blk_init_queue(scsi_request_fn, NULL);
1496	if (!q)
1497		return NULL;
1498
1499	blk_queue_prep_rq(q, scsi_prep_fn);
1500
1501	blk_queue_max_hw_segments(q, shost->sg_tablesize);
1502	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
1503	blk_queue_max_sectors(q, shost->max_sectors);
1504	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1505	blk_queue_segment_boundary(q, shost->dma_boundary);
1506	blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1507
1508	/*
1509	 * ordered tags are superior to flush ordering
1510	 */
1511	if (shost->ordered_tag)
1512		blk_queue_ordered(q, QUEUE_ORDERED_TAG);
1513	else if (shost->ordered_flush) {
1514		blk_queue_ordered(q, QUEUE_ORDERED_FLUSH);
1515		q->prepare_flush_fn = scsi_prepare_flush_fn;
1516		q->end_flush_fn = scsi_end_flush_fn;
1517	}
1518
1519	if (!shost->use_clustering)
1520		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
1521	return q;
1522}
1523
1524void scsi_free_queue(struct request_queue *q)
1525{
1526	blk_cleanup_queue(q);
1527}
1528
1529/*
1530 * Function:    scsi_block_requests()
1531 *
1532 * Purpose:     Utility function used by low-level drivers to prevent further
1533 *		commands from being queued to the device.
1534 *
1535 * Arguments:   shost       - Host in question
1536 *
1537 * Returns:     Nothing
1538 *
1539 * Lock status: No locks are assumed held.
1540 *
1541 * Notes:       There is no timer nor any other means by which the requests
1542 *		get unblocked other than the low-level driver calling
1543 *		scsi_unblock_requests().
1544 */
1545void scsi_block_requests(struct Scsi_Host *shost)
1546{
1547	shost->host_self_blocked = 1;
1548}
1549EXPORT_SYMBOL(scsi_block_requests);
1550
1551/*
1552 * Function:    scsi_unblock_requests()
1553 *
1554 * Purpose:     Utility function used by low-level drivers to allow further
1555 *		commands from being queued to the device.
1556 *
1557 * Arguments:   shost       - Host in question
1558 *
1559 * Returns:     Nothing
1560 *
1561 * Lock status: No locks are assumed held.
1562 *
1563 * Notes:       There is no timer nor any other means by which the requests
1564 *		get unblocked other than the low-level driver calling
1565 *		scsi_unblock_requests().
1566 *
1567 *		This is done as an API function so that changes to the
1568 *		internals of the scsi mid-layer won't require wholesale
1569 *		changes to drivers that use this feature.
1570 */
1571void scsi_unblock_requests(struct Scsi_Host *shost)
1572{
1573	shost->host_self_blocked = 0;
1574	scsi_run_host_queues(shost);
1575}
1576EXPORT_SYMBOL(scsi_unblock_requests);
1577
1578int __init scsi_init_queue(void)
1579{
1580	int i;
1581
1582	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1583		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1584		int size = sgp->size * sizeof(struct scatterlist);
1585
1586		sgp->slab = kmem_cache_create(sgp->name, size, 0,
1587				SLAB_HWCACHE_ALIGN, NULL, NULL);
1588		if (!sgp->slab) {
1589			printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1590					sgp->name);
1591		}
1592
1593		sgp->pool = mempool_create(SG_MEMPOOL_SIZE,
1594				mempool_alloc_slab, mempool_free_slab,
1595				sgp->slab);
1596		if (!sgp->pool) {
1597			printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1598					sgp->name);
1599		}
1600	}
1601
1602	return 0;
1603}
1604
1605void scsi_exit_queue(void)
1606{
1607	int i;
1608
1609	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1610		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1611		mempool_destroy(sgp->pool);
1612		kmem_cache_destroy(sgp->slab);
1613	}
1614}
1615/**
1616 *	__scsi_mode_sense - issue a mode sense, falling back from 10 to
1617 *		six bytes if necessary.
1618 *	@sdev:	SCSI device to be queried
1619 *	@dbd:	set if mode sense will allow block descriptors to be returned
1620 *	@modepage: mode page being requested
1621 *	@buffer: request buffer (may not be smaller than eight bytes)
1622 *	@len:	length of request buffer.
1623 *	@timeout: command timeout
1624 *	@retries: number of retries before failing
1625 *	@data: returns a structure abstracting the mode header data
1626 *	@sense: place to put sense data (or NULL if no sense to be collected).
1627 *		must be SCSI_SENSE_BUFFERSIZE big.
1628 *
1629 *	Returns zero if unsuccessful, or the header offset (either 4
1630 *	or 8 depending on whether a six or ten byte command was
1631 *	issued) if successful.
1632 **/
1633int
1634scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
1635		  unsigned char *buffer, int len, int timeout, int retries,
1636		  struct scsi_mode_data *data, char *sense) {
1637	unsigned char cmd[12];
1638	int use_10_for_ms;
1639	int header_length;
1640	int result;
1641	char *sense_buffer = NULL;
1642
1643	memset(data, 0, sizeof(*data));
1644	memset(&cmd[0], 0, 12);
1645	cmd[1] = dbd & 0x18;	/* allows DBD and LLBA bits */
1646	cmd[2] = modepage;
1647
1648	if (!sense) {
1649		sense_buffer = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL);
1650		if (!sense_buffer) {
1651			dev_printk(KERN_ERR, &sdev->sdev_gendev, "failed to allocate sense buffer\n");
1652			return 0;
1653		}
1654		sense = sense_buffer;
1655	}
1656 retry:
1657	use_10_for_ms = sdev->use_10_for_ms;
1658
1659	if (use_10_for_ms) {
1660		if (len < 8)
1661			len = 8;
1662
1663		cmd[0] = MODE_SENSE_10;
1664		cmd[8] = len;
1665		header_length = 8;
1666	} else {
1667		if (len < 4)
1668			len = 4;
1669
1670		cmd[0] = MODE_SENSE;
1671		cmd[4] = len;
1672		header_length = 4;
1673	}
1674
1675	memset(sense, 0, SCSI_SENSE_BUFFERSIZE);
1676
1677	memset(buffer, 0, len);
1678
1679	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
1680				  sense, timeout, retries);
1681
1682	/* This code looks awful: what it's doing is making sure an
1683	 * ILLEGAL REQUEST sense return identifies the actual command
1684	 * byte as the problem.  MODE_SENSE commands can return
1685	 * ILLEGAL REQUEST if the code page isn't supported */
1686
1687	if (use_10_for_ms && !scsi_status_is_good(result) &&
1688	    (driver_byte(result) & DRIVER_SENSE)) {
1689		struct scsi_sense_hdr sshdr;
1690
1691		if (scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr)) {
1692			if ((sshdr.sense_key == ILLEGAL_REQUEST) &&
1693			    (sshdr.asc == 0x20) && (sshdr.ascq == 0)) {
1694				/*
1695				 * Invalid command operation code
1696				 */
1697				sdev->use_10_for_ms = 0;
1698				goto retry;
1699			}
1700		}
1701	}
1702
1703	if(scsi_status_is_good(result)) {
1704		data->header_length = header_length;
1705		if(use_10_for_ms) {
1706			data->length = buffer[0]*256 + buffer[1] + 2;
1707			data->medium_type = buffer[2];
1708			data->device_specific = buffer[3];
1709			data->longlba = buffer[4] & 0x01;
1710			data->block_descriptor_length = buffer[6]*256
1711				+ buffer[7];
1712		} else {
1713			data->length = buffer[0] + 1;
1714			data->medium_type = buffer[1];
1715			data->device_specific = buffer[2];
1716			data->block_descriptor_length = buffer[3];
1717		}
1718	}
1719
1720	kfree(sense_buffer);
1721	return result;
1722}
1723EXPORT_SYMBOL(scsi_mode_sense);
1724
1725int
1726scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)
1727{
1728	char cmd[] = {
1729		TEST_UNIT_READY, 0, 0, 0, 0, 0,
1730	};
1731	char sense[SCSI_SENSE_BUFFERSIZE];
1732	int result;
1733
1734	result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sense,
1735				  timeout, retries);
1736
1737	if ((driver_byte(result) & DRIVER_SENSE) && sdev->removable) {
1738		struct scsi_sense_hdr sshdr;
1739
1740		if ((scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE,
1741					  &sshdr)) &&
1742		    ((sshdr.sense_key == UNIT_ATTENTION) ||
1743		     (sshdr.sense_key == NOT_READY))) {
1744			sdev->changed = 1;
1745			result = 0;
1746		}
1747	}
1748	return result;
1749}
1750EXPORT_SYMBOL(scsi_test_unit_ready);
1751
1752/**
1753 *	scsi_device_set_state - Take the given device through the device
1754 *		state model.
1755 *	@sdev:	scsi device to change the state of.
1756 *	@state:	state to change to.
1757 *
1758 *	Returns zero if unsuccessful or an error if the requested
1759 *	transition is illegal.
1760 **/
1761int
1762scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
1763{
1764	enum scsi_device_state oldstate = sdev->sdev_state;
1765
1766	if (state == oldstate)
1767		return 0;
1768
1769	switch (state) {
1770	case SDEV_CREATED:
1771		/* There are no legal states that come back to
1772		 * created.  This is the manually initialised start
1773		 * state */
1774		goto illegal;
1775
1776	case SDEV_RUNNING:
1777		switch (oldstate) {
1778		case SDEV_CREATED:
1779		case SDEV_OFFLINE:
1780		case SDEV_QUIESCE:
1781		case SDEV_BLOCK:
1782			break;
1783		default:
1784			goto illegal;
1785		}
1786		break;
1787
1788	case SDEV_QUIESCE:
1789		switch (oldstate) {
1790		case SDEV_RUNNING:
1791		case SDEV_OFFLINE:
1792			break;
1793		default:
1794			goto illegal;
1795		}
1796		break;
1797
1798	case SDEV_OFFLINE:
1799		switch (oldstate) {
1800		case SDEV_CREATED:
1801		case SDEV_RUNNING:
1802		case SDEV_QUIESCE:
1803		case SDEV_BLOCK:
1804			break;
1805		default:
1806			goto illegal;
1807		}
1808		break;
1809
1810	case SDEV_BLOCK:
1811		switch (oldstate) {
1812		case SDEV_CREATED:
1813		case SDEV_RUNNING:
1814			break;
1815		default:
1816			goto illegal;
1817		}
1818		break;
1819
1820	case SDEV_CANCEL:
1821		switch (oldstate) {
1822		case SDEV_CREATED:
1823		case SDEV_RUNNING:
1824		case SDEV_OFFLINE:
1825		case SDEV_BLOCK:
1826			break;
1827		default:
1828			goto illegal;
1829		}
1830		break;
1831
1832	case SDEV_DEL:
1833		switch (oldstate) {
1834		case SDEV_CANCEL:
1835			break;
1836		default:
1837			goto illegal;
1838		}
1839		break;
1840
1841	}
1842	sdev->sdev_state = state;
1843	return 0;
1844
1845 illegal:
1846	SCSI_LOG_ERROR_RECOVERY(1,
1847				dev_printk(KERN_ERR, &sdev->sdev_gendev,
1848					   "Illegal state transition %s->%s\n",
1849					   scsi_device_state_name(oldstate),
1850					   scsi_device_state_name(state))
1851				);
1852	return -EINVAL;
1853}
1854EXPORT_SYMBOL(scsi_device_set_state);
1855
1856/**
1857 *	scsi_device_quiesce - Block user issued commands.
1858 *	@sdev:	scsi device to quiesce.
1859 *
1860 *	This works by trying to transition to the SDEV_QUIESCE state
1861 *	(which must be a legal transition).  When the device is in this
1862 *	state, only special requests will be accepted, all others will
1863 *	be deferred.  Since special requests may also be requeued requests,
1864 *	a successful return doesn't guarantee the device will be
1865 *	totally quiescent.
1866 *
1867 *	Must be called with user context, may sleep.
1868 *
1869 *	Returns zero if unsuccessful or an error if not.
1870 **/
1871int
1872scsi_device_quiesce(struct scsi_device *sdev)
1873{
1874	int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
1875	if (err)
1876		return err;
1877
1878	scsi_run_queue(sdev->request_queue);
1879	while (sdev->device_busy) {
1880		msleep_interruptible(200);
1881		scsi_run_queue(sdev->request_queue);
1882	}
1883	return 0;
1884}
1885EXPORT_SYMBOL(scsi_device_quiesce);
1886
1887/**
1888 *	scsi_device_resume - Restart user issued commands to a quiesced device.
1889 *	@sdev:	scsi device to resume.
1890 *
1891 *	Moves the device from quiesced back to running and restarts the
1892 *	queues.
1893 *
1894 *	Must be called with user context, may sleep.
1895 **/
1896void
1897scsi_device_resume(struct scsi_device *sdev)
1898{
1899	if(scsi_device_set_state(sdev, SDEV_RUNNING))
1900		return;
1901	scsi_run_queue(sdev->request_queue);
1902}
1903EXPORT_SYMBOL(scsi_device_resume);
1904
1905static void
1906device_quiesce_fn(struct scsi_device *sdev, void *data)
1907{
1908	scsi_device_quiesce(sdev);
1909}
1910
1911void
1912scsi_target_quiesce(struct scsi_target *starget)
1913{
1914	starget_for_each_device(starget, NULL, device_quiesce_fn);
1915}
1916EXPORT_SYMBOL(scsi_target_quiesce);
1917
1918static void
1919device_resume_fn(struct scsi_device *sdev, void *data)
1920{
1921	scsi_device_resume(sdev);
1922}
1923
1924void
1925scsi_target_resume(struct scsi_target *starget)
1926{
1927	starget_for_each_device(starget, NULL, device_resume_fn);
1928}
1929EXPORT_SYMBOL(scsi_target_resume);
1930
1931/**
1932 * scsi_internal_device_block - internal function to put a device
1933 *				temporarily into the SDEV_BLOCK state
1934 * @sdev:	device to block
1935 *
1936 * Block request made by scsi lld's to temporarily stop all
1937 * scsi commands on the specified device.  Called from interrupt
1938 * or normal process context.
1939 *
1940 * Returns zero if successful or error if not
1941 *
1942 * Notes:
1943 *	This routine transitions the device to the SDEV_BLOCK state
1944 *	(which must be a legal transition).  When the device is in this
1945 *	state, all commands are deferred until the scsi lld reenables
1946 *	the device with scsi_device_unblock or device_block_tmo fires.
1947 *	This routine assumes the host_lock is held on entry.
1948 **/
1949int
1950scsi_internal_device_block(struct scsi_device *sdev)
1951{
1952	request_queue_t *q = sdev->request_queue;
1953	unsigned long flags;
1954	int err = 0;
1955
1956	err = scsi_device_set_state(sdev, SDEV_BLOCK);
1957	if (err)
1958		return err;
1959
1960	/*
1961	 * The device has transitioned to SDEV_BLOCK.  Stop the
1962	 * block layer from calling the midlayer with this device's
1963	 * request queue.
1964	 */
1965	spin_lock_irqsave(q->queue_lock, flags);
1966	blk_stop_queue(q);
1967	spin_unlock_irqrestore(q->queue_lock, flags);
1968
1969	return 0;
1970}
1971EXPORT_SYMBOL_GPL(scsi_internal_device_block);
1972
1973/**
1974 * scsi_internal_device_unblock - resume a device after a block request
1975 * @sdev:	device to resume
1976 *
1977 * Called by scsi lld's or the midlayer to restart the device queue
1978 * for the previously suspended scsi device.  Called from interrupt or
1979 * normal process context.
1980 *
1981 * Returns zero if successful or error if not.
1982 *
1983 * Notes:
1984 *	This routine transitions the device to the SDEV_RUNNING state
1985 *	(which must be a legal transition) allowing the midlayer to
1986 *	goose the queue for this device.  This routine assumes the
1987 *	host_lock is held upon entry.
1988 **/
1989int
1990scsi_internal_device_unblock(struct scsi_device *sdev)
1991{
1992	request_queue_t *q = sdev->request_queue;
1993	int err;
1994	unsigned long flags;
1995
1996	/*
1997	 * Try to transition the scsi device to SDEV_RUNNING
1998	 * and goose the device queue if successful.
1999	 */
2000	err = scsi_device_set_state(sdev, SDEV_RUNNING);
2001	if (err)
2002		return err;
2003
2004	spin_lock_irqsave(q->queue_lock, flags);
2005	blk_start_queue(q);
2006	spin_unlock_irqrestore(q->queue_lock, flags);
2007
2008	return 0;
2009}
2010EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
2011
2012static void
2013device_block(struct scsi_device *sdev, void *data)
2014{
2015	scsi_internal_device_block(sdev);
2016}
2017
2018static int
2019target_block(struct device *dev, void *data)
2020{
2021	if (scsi_is_target_device(dev))
2022		starget_for_each_device(to_scsi_target(dev), NULL,
2023					device_block);
2024	return 0;
2025}
2026
2027void
2028scsi_target_block(struct device *dev)
2029{
2030	if (scsi_is_target_device(dev))
2031		starget_for_each_device(to_scsi_target(dev), NULL,
2032					device_block);
2033	else
2034		device_for_each_child(dev, NULL, target_block);
2035}
2036EXPORT_SYMBOL_GPL(scsi_target_block);
2037
2038static void
2039device_unblock(struct scsi_device *sdev, void *data)
2040{
2041	scsi_internal_device_unblock(sdev);
2042}
2043
2044static int
2045target_unblock(struct device *dev, void *data)
2046{
2047	if (scsi_is_target_device(dev))
2048		starget_for_each_device(to_scsi_target(dev), NULL,
2049					device_unblock);
2050	return 0;
2051}
2052
2053void
2054scsi_target_unblock(struct device *dev)
2055{
2056	if (scsi_is_target_device(dev))
2057		starget_for_each_device(to_scsi_target(dev), NULL,
2058					device_unblock);
2059	else
2060		device_for_each_child(dev, NULL, target_unblock);
2061}
2062EXPORT_SYMBOL_GPL(scsi_target_unblock);
2063