scsi_lib.c revision 286f3e13a1dc7f32407629fbd7aabc8ea78c62b5
1/*
2 *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
3 *
4 *  SCSI queueing library.
5 *      Initial versions: Eric Youngdale (eric@andante.org).
6 *                        Based upon conversations with large numbers
7 *                        of people at Linux Expo.
8 */
9
10#include <linux/bio.h>
11#include <linux/blkdev.h>
12#include <linux/completion.h>
13#include <linux/kernel.h>
14#include <linux/mempool.h>
15#include <linux/slab.h>
16#include <linux/init.h>
17#include <linux/pci.h>
18#include <linux/delay.h>
19
20#include <scsi/scsi.h>
21#include <scsi/scsi_dbg.h>
22#include <scsi/scsi_device.h>
23#include <scsi/scsi_driver.h>
24#include <scsi/scsi_eh.h>
25#include <scsi/scsi_host.h>
26#include <scsi/scsi_request.h>
27
28#include "scsi_priv.h"
29#include "scsi_logging.h"
30
31
32#define SG_MEMPOOL_NR		(sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool))
33#define SG_MEMPOOL_SIZE		32
34
35struct scsi_host_sg_pool {
36	size_t		size;
37	char		*name;
38	kmem_cache_t	*slab;
39	mempool_t	*pool;
40};
41
42#if (SCSI_MAX_PHYS_SEGMENTS < 32)
43#error SCSI_MAX_PHYS_SEGMENTS is too small
44#endif
45
46#define SP(x) { x, "sgpool-" #x }
47static struct scsi_host_sg_pool scsi_sg_pools[] = {
48	SP(8),
49	SP(16),
50	SP(32),
51#if (SCSI_MAX_PHYS_SEGMENTS > 32)
52	SP(64),
53#if (SCSI_MAX_PHYS_SEGMENTS > 64)
54	SP(128),
55#if (SCSI_MAX_PHYS_SEGMENTS > 128)
56	SP(256),
57#if (SCSI_MAX_PHYS_SEGMENTS > 256)
58#error SCSI_MAX_PHYS_SEGMENTS is too large
59#endif
60#endif
61#endif
62#endif
63};
64#undef SP
65
66
67/*
68 * Function:    scsi_insert_special_req()
69 *
70 * Purpose:     Insert pre-formed request into request queue.
71 *
72 * Arguments:   sreq	- request that is ready to be queued.
73 *              at_head	- boolean.  True if we should insert at head
74 *                        of queue, false if we should insert at tail.
75 *
76 * Lock status: Assumed that lock is not held upon entry.
77 *
78 * Returns:     Nothing
79 *
80 * Notes:       This function is called from character device and from
81 *              ioctl types of functions where the caller knows exactly
82 *              what SCSI command needs to be issued.   The idea is that
83 *              we merely inject the command into the queue (at the head
84 *              for now), and then call the queue request function to actually
85 *              process it.
86 */
87int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
88{
89	/*
90	 * Because users of this function are apt to reuse requests with no
91	 * modification, we have to sanitise the request flags here
92	 */
93	sreq->sr_request->flags &= ~REQ_DONTPREP;
94	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
95		       	   at_head, sreq);
96	return 0;
97}
98
99static void scsi_run_queue(struct request_queue *q);
100
101/*
102 * Function:    scsi_queue_insert()
103 *
104 * Purpose:     Insert a command in the midlevel queue.
105 *
106 * Arguments:   cmd    - command that we are adding to queue.
107 *              reason - why we are inserting command to queue.
108 *
109 * Lock status: Assumed that lock is not held upon entry.
110 *
111 * Returns:     Nothing.
112 *
113 * Notes:       We do this for one of two cases.  Either the host is busy
114 *              and it cannot accept any more commands for the time being,
115 *              or the device returned QUEUE_FULL and can accept no more
116 *              commands.
117 * Notes:       This could be called either from an interrupt context or a
118 *              normal process context.
119 */
120int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
121{
122	struct Scsi_Host *host = cmd->device->host;
123	struct scsi_device *device = cmd->device;
124	struct request_queue *q = device->request_queue;
125	unsigned long flags;
126
127	SCSI_LOG_MLQUEUE(1,
128		 printk("Inserting command %p into mlqueue\n", cmd));
129
130	/*
131	 * Set the appropriate busy bit for the device/host.
132	 *
133	 * If the host/device isn't busy, assume that something actually
134	 * completed, and that we should be able to queue a command now.
135	 *
136	 * Note that the prior mid-layer assumption that any host could
137	 * always queue at least one command is now broken.  The mid-layer
138	 * will implement a user specifiable stall (see
139	 * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
140	 * if a command is requeued with no other commands outstanding
141	 * either for the device or for the host.
142	 */
143	if (reason == SCSI_MLQUEUE_HOST_BUSY)
144		host->host_blocked = host->max_host_blocked;
145	else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
146		device->device_blocked = device->max_device_blocked;
147
148	/*
149	 * Decrement the counters, since these commands are no longer
150	 * active on the host/device.
151	 */
152	scsi_device_unbusy(device);
153
154	/*
155	 * Requeue this command.  It will go before all other commands
156	 * that are already in the queue.
157	 *
158	 * NOTE: there is magic here about the way the queue is plugged if
159	 * we have no outstanding commands.
160	 *
161	 * Although we *don't* plug the queue, we call the request
162	 * function.  The SCSI request function detects the blocked condition
163	 * and plugs the queue appropriately.
164         */
165	spin_lock_irqsave(q->queue_lock, flags);
166	blk_requeue_request(q, cmd->request);
167	spin_unlock_irqrestore(q->queue_lock, flags);
168
169	scsi_run_queue(q);
170
171	return 0;
172}
173
174/*
175 * Function:    scsi_do_req
176 *
177 * Purpose:     Queue a SCSI request
178 *
179 * Arguments:   sreq	  - command descriptor.
180 *              cmnd      - actual SCSI command to be performed.
181 *              buffer    - data buffer.
182 *              bufflen   - size of data buffer.
183 *              done      - completion function to be run.
184 *              timeout   - how long to let it run before timeout.
185 *              retries   - number of retries we allow.
186 *
187 * Lock status: No locks held upon entry.
188 *
189 * Returns:     Nothing.
190 *
191 * Notes:	This function is only used for queueing requests for things
192 *		like ioctls and character device requests - this is because
193 *		we essentially just inject a request into the queue for the
194 *		device.
195 *
196 *		In order to support the scsi_device_quiesce function, we
197 *		now inject requests on the *head* of the device queue
198 *		rather than the tail.
199 */
200void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
201		 void *buffer, unsigned bufflen,
202		 void (*done)(struct scsi_cmnd *),
203		 int timeout, int retries)
204{
205	/*
206	 * If the upper level driver is reusing these things, then
207	 * we should release the low-level block now.  Another one will
208	 * be allocated later when this request is getting queued.
209	 */
210	__scsi_release_request(sreq);
211
212	/*
213	 * Our own function scsi_done (which marks the host as not busy,
214	 * disables the timeout counter, etc) will be called by us or by the
215	 * scsi_hosts[host].queuecommand() function needs to also call
216	 * the completion function for the high level driver.
217	 */
218	memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
219	sreq->sr_bufflen = bufflen;
220	sreq->sr_buffer = buffer;
221	sreq->sr_allowed = retries;
222	sreq->sr_done = done;
223	sreq->sr_timeout_per_command = timeout;
224
225	if (sreq->sr_cmd_len == 0)
226		sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
227
228	/*
229	 * head injection *required* here otherwise quiesce won't work
230	 */
231	scsi_insert_special_req(sreq, 1);
232}
233EXPORT_SYMBOL(scsi_do_req);
234
235/* This is the end routine we get to if a command was never attached
236 * to the request.  Simply complete the request without changing
237 * rq_status; this will cause a DRIVER_ERROR. */
238static void scsi_wait_req_end_io(struct request *req)
239{
240	BUG_ON(!req->waiting);
241
242	complete(req->waiting);
243}
244
245void scsi_wait_req(struct scsi_request *sreq, const void *cmnd, void *buffer,
246		   unsigned bufflen, int timeout, int retries)
247{
248	DECLARE_COMPLETION(wait);
249	int write = (sreq->sr_data_direction == DMA_TO_DEVICE);
250	struct request *req;
251
252	req = blk_get_request(sreq->sr_device->request_queue, write,
253			      __GFP_WAIT);
254	if (bufflen && blk_rq_map_kern(sreq->sr_device->request_queue, req,
255				       buffer, bufflen, __GFP_WAIT)) {
256		sreq->sr_result = DRIVER_ERROR << 24;
257		blk_put_request(req);
258		return;
259	}
260
261	req->flags |= REQ_NOMERGE;
262	req->waiting = &wait;
263	req->end_io = scsi_wait_req_end_io;
264	req->cmd_len = COMMAND_SIZE(((u8 *)cmnd)[0]);
265	req->sense = sreq->sr_sense_buffer;
266	req->sense_len = 0;
267	memcpy(req->cmd, cmnd, req->cmd_len);
268	req->timeout = timeout;
269	req->flags |= REQ_BLOCK_PC;
270	req->rq_disk = NULL;
271	blk_insert_request(sreq->sr_device->request_queue, req,
272			   sreq->sr_data_direction == DMA_TO_DEVICE, NULL);
273	wait_for_completion(&wait);
274	sreq->sr_request->waiting = NULL;
275	sreq->sr_result = req->errors;
276	if (req->errors)
277		sreq->sr_result |= (DRIVER_ERROR << 24);
278
279	blk_put_request(req);
280}
281
282EXPORT_SYMBOL(scsi_wait_req);
283
284/**
285 * scsi_execute - insert request and wait for the result
286 * @sdev:	scsi device
287 * @cmd:	scsi command
288 * @data_direction: data direction
289 * @buffer:	data buffer
290 * @bufflen:	len of buffer
291 * @sense:	optional sense buffer
292 * @timeout:	request timeout in seconds
293 * @retries:	number of times to retry request
294 * @flags:	or into request flags;
295 *
296 * returns the req->errors value which is the the scsi_cmnd result
297 * field.
298 **/
299int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
300		 int data_direction, void *buffer, unsigned bufflen,
301		 unsigned char *sense, int timeout, int retries, int flags)
302{
303	struct request *req;
304	int write = (data_direction == DMA_TO_DEVICE);
305	int ret = DRIVER_ERROR << 24;
306
307	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
308
309	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
310					buffer, bufflen, __GFP_WAIT))
311		goto out;
312
313	req->cmd_len = COMMAND_SIZE(cmd[0]);
314	memcpy(req->cmd, cmd, req->cmd_len);
315	req->sense = sense;
316	req->sense_len = 0;
317	req->timeout = timeout;
318	req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
319
320	/*
321	 * head injection *required* here otherwise quiesce won't work
322	 */
323	blk_execute_rq(req->q, NULL, req, 1);
324
325	ret = req->errors;
326 out:
327	blk_put_request(req);
328
329	return ret;
330}
331EXPORT_SYMBOL(scsi_execute);
332
333
334int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
335		     int data_direction, void *buffer, unsigned bufflen,
336		     struct scsi_sense_hdr *sshdr, int timeout, int retries)
337{
338	char *sense = NULL;
339	int result;
340
341	if (sshdr) {
342		sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO);
343		if (!sense)
344			return DRIVER_ERROR << 24;
345		memset(sense, 0, SCSI_SENSE_BUFFERSIZE);
346	}
347	result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
348				  sense, timeout, retries, 0);
349	if (sshdr)
350		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
351
352	kfree(sense);
353	return result;
354}
355EXPORT_SYMBOL(scsi_execute_req);
356
357/*
358 * Function:    scsi_init_cmd_errh()
359 *
360 * Purpose:     Initialize cmd fields related to error handling.
361 *
362 * Arguments:   cmd	- command that is ready to be queued.
363 *
364 * Returns:     Nothing
365 *
366 * Notes:       This function has the job of initializing a number of
367 *              fields related to error handling.   Typically this will
368 *              be called once for each command, as required.
369 */
370static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
371{
372	cmd->serial_number = 0;
373
374	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
375
376	if (cmd->cmd_len == 0)
377		cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
378
379	/*
380	 * We need saved copies of a number of fields - this is because
381	 * error handling may need to overwrite these with different values
382	 * to run different commands, and once error handling is complete,
383	 * we will need to restore these values prior to running the actual
384	 * command.
385	 */
386	cmd->old_use_sg = cmd->use_sg;
387	cmd->old_cmd_len = cmd->cmd_len;
388	cmd->sc_old_data_direction = cmd->sc_data_direction;
389	cmd->old_underflow = cmd->underflow;
390	memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
391	cmd->buffer = cmd->request_buffer;
392	cmd->bufflen = cmd->request_bufflen;
393
394	return 1;
395}
396
397/*
398 * Function:   scsi_setup_cmd_retry()
399 *
400 * Purpose:    Restore the command state for a retry
401 *
402 * Arguments:  cmd	- command to be restored
403 *
404 * Returns:    Nothing
405 *
406 * Notes:      Immediately prior to retrying a command, we need
407 *             to restore certain fields that we saved above.
408 */
409void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
410{
411	memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
412	cmd->request_buffer = cmd->buffer;
413	cmd->request_bufflen = cmd->bufflen;
414	cmd->use_sg = cmd->old_use_sg;
415	cmd->cmd_len = cmd->old_cmd_len;
416	cmd->sc_data_direction = cmd->sc_old_data_direction;
417	cmd->underflow = cmd->old_underflow;
418}
419
420void scsi_device_unbusy(struct scsi_device *sdev)
421{
422	struct Scsi_Host *shost = sdev->host;
423	unsigned long flags;
424
425	spin_lock_irqsave(shost->host_lock, flags);
426	shost->host_busy--;
427	if (unlikely((shost->shost_state == SHOST_RECOVERY) &&
428		     shost->host_failed))
429		scsi_eh_wakeup(shost);
430	spin_unlock(shost->host_lock);
431	spin_lock(sdev->request_queue->queue_lock);
432	sdev->device_busy--;
433	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
434}
435
436/*
437 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
438 * and call blk_run_queue for all the scsi_devices on the target -
439 * including current_sdev first.
440 *
441 * Called with *no* scsi locks held.
442 */
443static void scsi_single_lun_run(struct scsi_device *current_sdev)
444{
445	struct Scsi_Host *shost = current_sdev->host;
446	struct scsi_device *sdev, *tmp;
447	struct scsi_target *starget = scsi_target(current_sdev);
448	unsigned long flags;
449
450	spin_lock_irqsave(shost->host_lock, flags);
451	starget->starget_sdev_user = NULL;
452	spin_unlock_irqrestore(shost->host_lock, flags);
453
454	/*
455	 * Call blk_run_queue for all LUNs on the target, starting with
456	 * current_sdev. We race with others (to set starget_sdev_user),
457	 * but in most cases, we will be first. Ideally, each LU on the
458	 * target would get some limited time or requests on the target.
459	 */
460	blk_run_queue(current_sdev->request_queue);
461
462	spin_lock_irqsave(shost->host_lock, flags);
463	if (starget->starget_sdev_user)
464		goto out;
465	list_for_each_entry_safe(sdev, tmp, &starget->devices,
466			same_target_siblings) {
467		if (sdev == current_sdev)
468			continue;
469		if (scsi_device_get(sdev))
470			continue;
471
472		spin_unlock_irqrestore(shost->host_lock, flags);
473		blk_run_queue(sdev->request_queue);
474		spin_lock_irqsave(shost->host_lock, flags);
475
476		scsi_device_put(sdev);
477	}
478 out:
479	spin_unlock_irqrestore(shost->host_lock, flags);
480}
481
482/*
483 * Function:	scsi_run_queue()
484 *
485 * Purpose:	Select a proper request queue to serve next
486 *
487 * Arguments:	q	- last request's queue
488 *
489 * Returns:     Nothing
490 *
491 * Notes:	The previous command was completely finished, start
492 *		a new one if possible.
493 */
494static void scsi_run_queue(struct request_queue *q)
495{
496	struct scsi_device *sdev = q->queuedata;
497	struct Scsi_Host *shost = sdev->host;
498	unsigned long flags;
499
500	if (sdev->single_lun)
501		scsi_single_lun_run(sdev);
502
503	spin_lock_irqsave(shost->host_lock, flags);
504	while (!list_empty(&shost->starved_list) &&
505	       !shost->host_blocked && !shost->host_self_blocked &&
506		!((shost->can_queue > 0) &&
507		  (shost->host_busy >= shost->can_queue))) {
508		/*
509		 * As long as shost is accepting commands and we have
510		 * starved queues, call blk_run_queue. scsi_request_fn
511		 * drops the queue_lock and can add us back to the
512		 * starved_list.
513		 *
514		 * host_lock protects the starved_list and starved_entry.
515		 * scsi_request_fn must get the host_lock before checking
516		 * or modifying starved_list or starved_entry.
517		 */
518		sdev = list_entry(shost->starved_list.next,
519					  struct scsi_device, starved_entry);
520		list_del_init(&sdev->starved_entry);
521		spin_unlock_irqrestore(shost->host_lock, flags);
522
523		blk_run_queue(sdev->request_queue);
524
525		spin_lock_irqsave(shost->host_lock, flags);
526		if (unlikely(!list_empty(&sdev->starved_entry)))
527			/*
528			 * sdev lost a race, and was put back on the
529			 * starved list. This is unlikely but without this
530			 * in theory we could loop forever.
531			 */
532			break;
533	}
534	spin_unlock_irqrestore(shost->host_lock, flags);
535
536	blk_run_queue(q);
537}
538
539/*
540 * Function:	scsi_requeue_command()
541 *
542 * Purpose:	Handle post-processing of completed commands.
543 *
544 * Arguments:	q	- queue to operate on
545 *		cmd	- command that may need to be requeued.
546 *
547 * Returns:	Nothing
548 *
549 * Notes:	After command completion, there may be blocks left
550 *		over which weren't finished by the previous command
551 *		this can be for a number of reasons - the main one is
552 *		I/O errors in the middle of the request, in which case
553 *		we need to request the blocks that come after the bad
554 *		sector.
555 */
556static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
557{
558	unsigned long flags;
559
560	cmd->request->flags &= ~REQ_DONTPREP;
561
562	spin_lock_irqsave(q->queue_lock, flags);
563	blk_requeue_request(q, cmd->request);
564	spin_unlock_irqrestore(q->queue_lock, flags);
565
566	scsi_run_queue(q);
567}
568
569void scsi_next_command(struct scsi_cmnd *cmd)
570{
571	struct request_queue *q = cmd->device->request_queue;
572
573	scsi_put_command(cmd);
574	scsi_run_queue(q);
575}
576
577void scsi_run_host_queues(struct Scsi_Host *shost)
578{
579	struct scsi_device *sdev;
580
581	shost_for_each_device(sdev, shost)
582		scsi_run_queue(sdev->request_queue);
583}
584
585/*
586 * Function:    scsi_end_request()
587 *
588 * Purpose:     Post-processing of completed commands (usually invoked at end
589 *		of upper level post-processing and scsi_io_completion).
590 *
591 * Arguments:   cmd	 - command that is complete.
592 *              uptodate - 1 if I/O indicates success, <= 0 for I/O error.
593 *              bytes    - number of bytes of completed I/O
594 *		requeue  - indicates whether we should requeue leftovers.
595 *
596 * Lock status: Assumed that lock is not held upon entry.
597 *
598 * Returns:     cmd if requeue done or required, NULL otherwise
599 *
600 * Notes:       This is called for block device requests in order to
601 *              mark some number of sectors as complete.
602 *
603 *		We are guaranteeing that the request queue will be goosed
604 *		at some point during this call.
605 */
606static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
607					  int bytes, int requeue)
608{
609	request_queue_t *q = cmd->device->request_queue;
610	struct request *req = cmd->request;
611	unsigned long flags;
612
613	/*
614	 * If there are blocks left over at the end, set up the command
615	 * to queue the remainder of them.
616	 */
617	if (end_that_request_chunk(req, uptodate, bytes)) {
618		int leftover = (req->hard_nr_sectors << 9);
619
620		if (blk_pc_request(req))
621			leftover = req->data_len;
622
623		/* kill remainder if no retrys */
624		if (!uptodate && blk_noretry_request(req))
625			end_that_request_chunk(req, 0, leftover);
626		else {
627			if (requeue)
628				/*
629				 * Bleah.  Leftovers again.  Stick the
630				 * leftovers in the front of the
631				 * queue, and goose the queue again.
632				 */
633				scsi_requeue_command(q, cmd);
634
635			return cmd;
636		}
637	}
638
639	add_disk_randomness(req->rq_disk);
640
641	spin_lock_irqsave(q->queue_lock, flags);
642	if (blk_rq_tagged(req))
643		blk_queue_end_tag(q, req);
644	end_that_request_last(req);
645	spin_unlock_irqrestore(q->queue_lock, flags);
646
647	/*
648	 * This will goose the queue request function at the end, so we don't
649	 * need to worry about launching another command.
650	 */
651	scsi_next_command(cmd);
652	return NULL;
653}
654
655static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, int gfp_mask)
656{
657	struct scsi_host_sg_pool *sgp;
658	struct scatterlist *sgl;
659
660	BUG_ON(!cmd->use_sg);
661
662	switch (cmd->use_sg) {
663	case 1 ... 8:
664		cmd->sglist_len = 0;
665		break;
666	case 9 ... 16:
667		cmd->sglist_len = 1;
668		break;
669	case 17 ... 32:
670		cmd->sglist_len = 2;
671		break;
672#if (SCSI_MAX_PHYS_SEGMENTS > 32)
673	case 33 ... 64:
674		cmd->sglist_len = 3;
675		break;
676#if (SCSI_MAX_PHYS_SEGMENTS > 64)
677	case 65 ... 128:
678		cmd->sglist_len = 4;
679		break;
680#if (SCSI_MAX_PHYS_SEGMENTS  > 128)
681	case 129 ... 256:
682		cmd->sglist_len = 5;
683		break;
684#endif
685#endif
686#endif
687	default:
688		return NULL;
689	}
690
691	sgp = scsi_sg_pools + cmd->sglist_len;
692	sgl = mempool_alloc(sgp->pool, gfp_mask);
693	return sgl;
694}
695
696static void scsi_free_sgtable(struct scatterlist *sgl, int index)
697{
698	struct scsi_host_sg_pool *sgp;
699
700	BUG_ON(index >= SG_MEMPOOL_NR);
701
702	sgp = scsi_sg_pools + index;
703	mempool_free(sgl, sgp->pool);
704}
705
706/*
707 * Function:    scsi_release_buffers()
708 *
709 * Purpose:     Completion processing for block device I/O requests.
710 *
711 * Arguments:   cmd	- command that we are bailing.
712 *
713 * Lock status: Assumed that no lock is held upon entry.
714 *
715 * Returns:     Nothing
716 *
717 * Notes:       In the event that an upper level driver rejects a
718 *		command, we must release resources allocated during
719 *		the __init_io() function.  Primarily this would involve
720 *		the scatter-gather table, and potentially any bounce
721 *		buffers.
722 */
723static void scsi_release_buffers(struct scsi_cmnd *cmd)
724{
725	struct request *req = cmd->request;
726
727	/*
728	 * Free up any indirection buffers we allocated for DMA purposes.
729	 */
730	if (cmd->use_sg)
731		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
732	else if (cmd->request_buffer != req->buffer)
733		kfree(cmd->request_buffer);
734
735	/*
736	 * Zero these out.  They now point to freed memory, and it is
737	 * dangerous to hang onto the pointers.
738	 */
739	cmd->buffer  = NULL;
740	cmd->bufflen = 0;
741	cmd->request_buffer = NULL;
742	cmd->request_bufflen = 0;
743}
744
745/*
746 * Function:    scsi_io_completion()
747 *
748 * Purpose:     Completion processing for block device I/O requests.
749 *
750 * Arguments:   cmd   - command that is finished.
751 *
752 * Lock status: Assumed that no lock is held upon entry.
753 *
754 * Returns:     Nothing
755 *
756 * Notes:       This function is matched in terms of capabilities to
757 *              the function that created the scatter-gather list.
758 *              In other words, if there are no bounce buffers
759 *              (the normal case for most drivers), we don't need
760 *              the logic to deal with cleaning up afterwards.
761 *
762 *		We must do one of several things here:
763 *
764 *		a) Call scsi_end_request.  This will finish off the
765 *		   specified number of sectors.  If we are done, the
766 *		   command block will be released, and the queue
767 *		   function will be goosed.  If we are not done, then
768 *		   scsi_end_request will directly goose the queue.
769 *
770 *		b) We can just use scsi_requeue_command() here.  This would
771 *		   be used if we just wanted to retry, for example.
772 */
773void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
774			unsigned int block_bytes)
775{
776	int result = cmd->result;
777	int this_count = cmd->bufflen;
778	request_queue_t *q = cmd->device->request_queue;
779	struct request *req = cmd->request;
780	int clear_errors = 1;
781	struct scsi_sense_hdr sshdr;
782	int sense_valid = 0;
783	int sense_deferred = 0;
784
785	if (blk_complete_barrier_rq(q, req, good_bytes >> 9))
786		return;
787
788	/*
789	 * Free up any indirection buffers we allocated for DMA purposes.
790	 * For the case of a READ, we need to copy the data out of the
791	 * bounce buffer and into the real buffer.
792	 */
793	if (cmd->use_sg)
794		scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
795	else if (cmd->buffer != req->buffer) {
796		if (rq_data_dir(req) == READ) {
797			unsigned long flags;
798			char *to = bio_kmap_irq(req->bio, &flags);
799			memcpy(to, cmd->buffer, cmd->bufflen);
800			bio_kunmap_irq(to, &flags);
801		}
802		kfree(cmd->buffer);
803	}
804
805	if (result) {
806		sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
807		if (sense_valid)
808			sense_deferred = scsi_sense_is_deferred(&sshdr);
809	}
810	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
811		req->errors = result;
812		if (result) {
813			clear_errors = 0;
814			if (sense_valid && req->sense) {
815				/*
816				 * SG_IO wants current and deferred errors
817				 */
818				int len = 8 + cmd->sense_buffer[7];
819
820				if (len > SCSI_SENSE_BUFFERSIZE)
821					len = SCSI_SENSE_BUFFERSIZE;
822				memcpy(req->sense, cmd->sense_buffer,  len);
823				req->sense_len = len;
824			}
825		} else
826			req->data_len = cmd->resid;
827	}
828
829	/*
830	 * Zero these out.  They now point to freed memory, and it is
831	 * dangerous to hang onto the pointers.
832	 */
833	cmd->buffer  = NULL;
834	cmd->bufflen = 0;
835	cmd->request_buffer = NULL;
836	cmd->request_bufflen = 0;
837
838	/*
839	 * Next deal with any sectors which we were able to correctly
840	 * handle.
841	 */
842	if (good_bytes >= 0) {
843		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
844					      req->nr_sectors, good_bytes));
845		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
846
847		if (clear_errors)
848			req->errors = 0;
849		/*
850		 * If multiple sectors are requested in one buffer, then
851		 * they will have been finished off by the first command.
852		 * If not, then we have a multi-buffer command.
853		 *
854		 * If block_bytes != 0, it means we had a medium error
855		 * of some sort, and that we want to mark some number of
856		 * sectors as not uptodate.  Thus we want to inhibit
857		 * requeueing right here - we will requeue down below
858		 * when we handle the bad sectors.
859		 */
860		cmd = scsi_end_request(cmd, 1, good_bytes, result == 0);
861
862		/*
863		 * If the command completed without error, then either finish off the
864		 * rest of the command, or start a new one.
865		 */
866		if (result == 0 || cmd == NULL ) {
867			return;
868		}
869	}
870	/*
871	 * Now, if we were good little boys and girls, Santa left us a request
872	 * sense buffer.  We can extract information from this, so we
873	 * can choose a block to remap, etc.
874	 */
875	if (sense_valid && !sense_deferred) {
876		switch (sshdr.sense_key) {
877		case UNIT_ATTENTION:
878			if (cmd->device->removable) {
879				/* detected disc change.  set a bit
880				 * and quietly refuse further access.
881				 */
882				cmd->device->changed = 1;
883				cmd = scsi_end_request(cmd, 0,
884						this_count, 1);
885				return;
886			} else {
887				/*
888				* Must have been a power glitch, or a
889				* bus reset.  Could not have been a
890				* media change, so we just retry the
891				* request and see what happens.
892				*/
893				scsi_requeue_command(q, cmd);
894				return;
895			}
896			break;
897		case ILLEGAL_REQUEST:
898			/*
899		 	* If we had an ILLEGAL REQUEST returned, then we may
900		 	* have performed an unsupported command.  The only
901		 	* thing this should be would be a ten byte read where
902			* only a six byte read was supported.  Also, on a
903			* system where READ CAPACITY failed, we may have read
904			* past the end of the disk.
905		 	*/
906			if (cmd->device->use_10_for_rw &&
907			    (cmd->cmnd[0] == READ_10 ||
908			     cmd->cmnd[0] == WRITE_10)) {
909				cmd->device->use_10_for_rw = 0;
910				/*
911				 * This will cause a retry with a 6-byte
912				 * command.
913				 */
914				scsi_requeue_command(q, cmd);
915				result = 0;
916			} else {
917				cmd = scsi_end_request(cmd, 0, this_count, 1);
918				return;
919			}
920			break;
921		case NOT_READY:
922			/*
923			 * If the device is in the process of becoming ready,
924			 * retry.
925			 */
926			if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) {
927				scsi_requeue_command(q, cmd);
928				return;
929			}
930			if (!(req->flags & REQ_QUIET))
931				dev_printk(KERN_INFO,
932					   &cmd->device->sdev_gendev,
933					   "Device not ready.\n");
934			cmd = scsi_end_request(cmd, 0, this_count, 1);
935			return;
936		case VOLUME_OVERFLOW:
937			if (!(req->flags & REQ_QUIET)) {
938				dev_printk(KERN_INFO,
939					   &cmd->device->sdev_gendev,
940					   "Volume overflow, CDB: ");
941				__scsi_print_command(cmd->data_cmnd);
942				scsi_print_sense("", cmd);
943			}
944			cmd = scsi_end_request(cmd, 0, block_bytes, 1);
945			return;
946		default:
947			break;
948		}
949	}			/* driver byte != 0 */
950	if (host_byte(result) == DID_RESET) {
951		/*
952		 * Third party bus reset or reset for error
953		 * recovery reasons.  Just retry the request
954		 * and see what happens.
955		 */
956		scsi_requeue_command(q, cmd);
957		return;
958	}
959	if (result) {
960		if (!(req->flags & REQ_QUIET)) {
961			dev_printk(KERN_INFO, &cmd->device->sdev_gendev,
962				   "SCSI error: return code = 0x%x\n", result);
963
964			if (driver_byte(result) & DRIVER_SENSE)
965				scsi_print_sense("", cmd);
966		}
967		/*
968		 * Mark a single buffer as not uptodate.  Queue the remainder.
969		 * We sometimes get this cruft in the event that a medium error
970		 * isn't properly reported.
971		 */
972		block_bytes = req->hard_cur_sectors << 9;
973		if (!block_bytes)
974			block_bytes = req->data_len;
975		cmd = scsi_end_request(cmd, 0, block_bytes, 1);
976	}
977}
978EXPORT_SYMBOL(scsi_io_completion);
979
980/*
981 * Function:    scsi_init_io()
982 *
983 * Purpose:     SCSI I/O initialize function.
984 *
985 * Arguments:   cmd   - Command descriptor we wish to initialize
986 *
987 * Returns:     0 on success
988 *		BLKPREP_DEFER if the failure is retryable
989 *		BLKPREP_KILL if the failure is fatal
990 */
991static int scsi_init_io(struct scsi_cmnd *cmd)
992{
993	struct request     *req = cmd->request;
994	struct scatterlist *sgpnt;
995	int		   count;
996
997	/*
998	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
999	 */
1000	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
1001		cmd->request_bufflen = req->data_len;
1002		cmd->request_buffer = req->data;
1003		req->buffer = req->data;
1004		cmd->use_sg = 0;
1005		return 0;
1006	}
1007
1008	/*
1009	 * we used to not use scatter-gather for single segment request,
1010	 * but now we do (it makes highmem I/O easier to support without
1011	 * kmapping pages)
1012	 */
1013	cmd->use_sg = req->nr_phys_segments;
1014
1015	/*
1016	 * if sg table allocation fails, requeue request later.
1017	 */
1018	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
1019	if (unlikely(!sgpnt))
1020		return BLKPREP_DEFER;
1021
1022	cmd->request_buffer = (char *) sgpnt;
1023	cmd->request_bufflen = req->nr_sectors << 9;
1024	if (blk_pc_request(req))
1025		cmd->request_bufflen = req->data_len;
1026	req->buffer = NULL;
1027
1028	/*
1029	 * Next, walk the list, and fill in the addresses and sizes of
1030	 * each segment.
1031	 */
1032	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
1033
1034	/*
1035	 * mapped well, send it off
1036	 */
1037	if (likely(count <= cmd->use_sg)) {
1038		cmd->use_sg = count;
1039		return 0;
1040	}
1041
1042	printk(KERN_ERR "Incorrect number of segments after building list\n");
1043	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
1044	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
1045			req->current_nr_sectors);
1046
1047	/* release the command and kill it */
1048	scsi_release_buffers(cmd);
1049	scsi_put_command(cmd);
1050	return BLKPREP_KILL;
1051}
1052
1053static int scsi_prepare_flush_fn(request_queue_t *q, struct request *rq)
1054{
1055	struct scsi_device *sdev = q->queuedata;
1056	struct scsi_driver *drv;
1057
1058	if (sdev->sdev_state == SDEV_RUNNING) {
1059		drv = *(struct scsi_driver **) rq->rq_disk->private_data;
1060
1061		if (drv->prepare_flush)
1062			return drv->prepare_flush(q, rq);
1063	}
1064
1065	return 0;
1066}
1067
1068static void scsi_end_flush_fn(request_queue_t *q, struct request *rq)
1069{
1070	struct scsi_device *sdev = q->queuedata;
1071	struct request *flush_rq = rq->end_io_data;
1072	struct scsi_driver *drv;
1073
1074	if (flush_rq->errors) {
1075		printk("scsi: barrier error, disabling flush support\n");
1076		blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1077	}
1078
1079	if (sdev->sdev_state == SDEV_RUNNING) {
1080		drv = *(struct scsi_driver **) rq->rq_disk->private_data;
1081		drv->end_flush(q, rq);
1082	}
1083}
1084
1085static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
1086			       sector_t *error_sector)
1087{
1088	struct scsi_device *sdev = q->queuedata;
1089	struct scsi_driver *drv;
1090
1091	if (sdev->sdev_state != SDEV_RUNNING)
1092		return -ENXIO;
1093
1094	drv = *(struct scsi_driver **) disk->private_data;
1095	if (drv->issue_flush)
1096		return drv->issue_flush(&sdev->sdev_gendev, error_sector);
1097
1098	return -EOPNOTSUPP;
1099}
1100
1101static void scsi_generic_done(struct scsi_cmnd *cmd)
1102{
1103	BUG_ON(!blk_pc_request(cmd->request));
1104	scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
1105}
1106
1107static int scsi_prep_fn(struct request_queue *q, struct request *req)
1108{
1109	struct scsi_device *sdev = q->queuedata;
1110	struct scsi_cmnd *cmd;
1111	int specials_only = 0;
1112
1113	/*
1114	 * Just check to see if the device is online.  If it isn't, we
1115	 * refuse to process any commands.  The device must be brought
1116	 * online before trying any recovery commands
1117	 */
1118	if (unlikely(!scsi_device_online(sdev))) {
1119		printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1120		       sdev->host->host_no, sdev->id, sdev->lun);
1121		return BLKPREP_KILL;
1122	}
1123	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1124		/* OK, we're not in a running state don't prep
1125		 * user commands */
1126		if (sdev->sdev_state == SDEV_DEL) {
1127			/* Device is fully deleted, no commands
1128			 * at all allowed down */
1129			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to dead device\n",
1130			       sdev->host->host_no, sdev->id, sdev->lun);
1131			return BLKPREP_KILL;
1132		}
1133		/* OK, we only allow special commands (i.e. not
1134		 * user initiated ones */
1135		specials_only = sdev->sdev_state;
1136	}
1137
1138	/*
1139	 * Find the actual device driver associated with this command.
1140	 * The SPECIAL requests are things like character device or
1141	 * ioctls, which did not originate from ll_rw_blk.  Note that
1142	 * the special field is also used to indicate the cmd for
1143	 * the remainder of a partially fulfilled request that can
1144	 * come up when there is a medium error.  We have to treat
1145	 * these two cases differently.  We differentiate by looking
1146	 * at request->cmd, as this tells us the real story.
1147	 */
1148	if (req->flags & REQ_SPECIAL && req->special) {
1149		struct scsi_request *sreq = req->special;
1150
1151		if (sreq->sr_magic == SCSI_REQ_MAGIC) {
1152			cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
1153			if (unlikely(!cmd))
1154				goto defer;
1155			scsi_init_cmd_from_req(cmd, sreq);
1156		} else
1157			cmd = req->special;
1158	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1159
1160		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
1161			if(specials_only == SDEV_QUIESCE ||
1162					specials_only == SDEV_BLOCK)
1163				return BLKPREP_DEFER;
1164
1165			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",
1166			       sdev->host->host_no, sdev->id, sdev->lun);
1167			return BLKPREP_KILL;
1168		}
1169
1170
1171		/*
1172		 * Now try and find a command block that we can use.
1173		 */
1174		if (!req->special) {
1175			cmd = scsi_get_command(sdev, GFP_ATOMIC);
1176			if (unlikely(!cmd))
1177				goto defer;
1178		} else
1179			cmd = req->special;
1180
1181		/* pull a tag out of the request if we have one */
1182		cmd->tag = req->tag;
1183	} else {
1184		blk_dump_rq_flags(req, "SCSI bad req");
1185		return BLKPREP_KILL;
1186	}
1187
1188	/* note the overloading of req->special.  When the tag
1189	 * is active it always means cmd.  If the tag goes
1190	 * back for re-queueing, it may be reset */
1191	req->special = cmd;
1192	cmd->request = req;
1193
1194	/*
1195	 * FIXME: drop the lock here because the functions below
1196	 * expect to be called without the queue lock held.  Also,
1197	 * previously, we dequeued the request before dropping the
1198	 * lock.  We hope REQ_STARTED prevents anything untoward from
1199	 * happening now.
1200	 */
1201	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1202		struct scsi_driver *drv;
1203		int ret;
1204
1205		/*
1206		 * This will do a couple of things:
1207		 *  1) Fill in the actual SCSI command.
1208		 *  2) Fill in any other upper-level specific fields
1209		 * (timeout).
1210		 *
1211		 * If this returns 0, it means that the request failed
1212		 * (reading past end of disk, reading offline device,
1213		 * etc).   This won't actually talk to the device, but
1214		 * some kinds of consistency checking may cause the
1215		 * request to be rejected immediately.
1216		 */
1217
1218		/*
1219		 * This sets up the scatter-gather table (allocating if
1220		 * required).
1221		 */
1222		ret = scsi_init_io(cmd);
1223		if (ret)	/* BLKPREP_KILL return also releases the command */
1224			return ret;
1225
1226		/*
1227		 * Initialize the actual SCSI command for this request.
1228		 */
1229		if (req->rq_disk) {
1230			drv = *(struct scsi_driver **)req->rq_disk->private_data;
1231			if (unlikely(!drv->init_command(cmd))) {
1232				scsi_release_buffers(cmd);
1233				scsi_put_command(cmd);
1234				return BLKPREP_KILL;
1235			}
1236		} else {
1237			memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
1238			if (rq_data_dir(req) == WRITE)
1239				cmd->sc_data_direction = DMA_TO_DEVICE;
1240			else if (req->data_len)
1241				cmd->sc_data_direction = DMA_FROM_DEVICE;
1242			else
1243				cmd->sc_data_direction = DMA_NONE;
1244
1245			cmd->transfersize = req->data_len;
1246			cmd->allowed = 3;
1247			cmd->timeout_per_command = req->timeout;
1248			cmd->done = scsi_generic_done;
1249		}
1250	}
1251
1252	/*
1253	 * The request is now prepped, no need to come back here
1254	 */
1255	req->flags |= REQ_DONTPREP;
1256	return BLKPREP_OK;
1257
1258 defer:
1259	/* If we defer, the elv_next_request() returns NULL, but the
1260	 * queue must be restarted, so we plug here if no returning
1261	 * command will automatically do that. */
1262	if (sdev->device_busy == 0)
1263		blk_plug_device(q);
1264	return BLKPREP_DEFER;
1265}
1266
1267/*
1268 * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1269 * return 0.
1270 *
1271 * Called with the queue_lock held.
1272 */
1273static inline int scsi_dev_queue_ready(struct request_queue *q,
1274				  struct scsi_device *sdev)
1275{
1276	if (sdev->device_busy >= sdev->queue_depth)
1277		return 0;
1278	if (sdev->device_busy == 0 && sdev->device_blocked) {
1279		/*
1280		 * unblock after device_blocked iterates to zero
1281		 */
1282		if (--sdev->device_blocked == 0) {
1283			SCSI_LOG_MLQUEUE(3,
1284				printk("scsi%d (%d:%d) unblocking device at"
1285				       " zero depth\n", sdev->host->host_no,
1286				       sdev->id, sdev->lun));
1287		} else {
1288			blk_plug_device(q);
1289			return 0;
1290		}
1291	}
1292	if (sdev->device_blocked)
1293		return 0;
1294
1295	return 1;
1296}
1297
1298/*
1299 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
1300 * return 0. We must end up running the queue again whenever 0 is
1301 * returned, else IO can hang.
1302 *
1303 * Called with host_lock held.
1304 */
1305static inline int scsi_host_queue_ready(struct request_queue *q,
1306				   struct Scsi_Host *shost,
1307				   struct scsi_device *sdev)
1308{
1309	if (shost->shost_state == SHOST_RECOVERY)
1310		return 0;
1311	if (shost->host_busy == 0 && shost->host_blocked) {
1312		/*
1313		 * unblock after host_blocked iterates to zero
1314		 */
1315		if (--shost->host_blocked == 0) {
1316			SCSI_LOG_MLQUEUE(3,
1317				printk("scsi%d unblocking host at zero depth\n",
1318					shost->host_no));
1319		} else {
1320			blk_plug_device(q);
1321			return 0;
1322		}
1323	}
1324	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
1325	    shost->host_blocked || shost->host_self_blocked) {
1326		if (list_empty(&sdev->starved_entry))
1327			list_add_tail(&sdev->starved_entry, &shost->starved_list);
1328		return 0;
1329	}
1330
1331	/* We're OK to process the command, so we can't be starved */
1332	if (!list_empty(&sdev->starved_entry))
1333		list_del_init(&sdev->starved_entry);
1334
1335	return 1;
1336}
1337
1338/*
1339 * Kill requests for a dead device
1340 */
1341static void scsi_kill_requests(request_queue_t *q)
1342{
1343	struct request *req;
1344
1345	while ((req = elv_next_request(q)) != NULL) {
1346		blkdev_dequeue_request(req);
1347		req->flags |= REQ_QUIET;
1348		while (end_that_request_first(req, 0, req->nr_sectors))
1349			;
1350		end_that_request_last(req);
1351	}
1352}
1353
1354/*
1355 * Function:    scsi_request_fn()
1356 *
1357 * Purpose:     Main strategy routine for SCSI.
1358 *
1359 * Arguments:   q       - Pointer to actual queue.
1360 *
1361 * Returns:     Nothing
1362 *
1363 * Lock status: IO request lock assumed to be held when called.
1364 */
1365static void scsi_request_fn(struct request_queue *q)
1366{
1367	struct scsi_device *sdev = q->queuedata;
1368	struct Scsi_Host *shost;
1369	struct scsi_cmnd *cmd;
1370	struct request *req;
1371
1372	if (!sdev) {
1373		printk("scsi: killing requests for dead queue\n");
1374		scsi_kill_requests(q);
1375		return;
1376	}
1377
1378	if(!get_device(&sdev->sdev_gendev))
1379		/* We must be tearing the block queue down already */
1380		return;
1381
1382	/*
1383	 * To start with, we keep looping until the queue is empty, or until
1384	 * the host is no longer able to accept any more requests.
1385	 */
1386	shost = sdev->host;
1387	while (!blk_queue_plugged(q)) {
1388		int rtn;
1389		/*
1390		 * get next queueable request.  We do this early to make sure
1391		 * that the request is fully prepared even if we cannot
1392		 * accept it.
1393		 */
1394		req = elv_next_request(q);
1395		if (!req || !scsi_dev_queue_ready(q, sdev))
1396			break;
1397
1398		if (unlikely(!scsi_device_online(sdev))) {
1399			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1400			       sdev->host->host_no, sdev->id, sdev->lun);
1401			blkdev_dequeue_request(req);
1402			req->flags |= REQ_QUIET;
1403			while (end_that_request_first(req, 0, req->nr_sectors))
1404				;
1405			end_that_request_last(req);
1406			continue;
1407		}
1408
1409
1410		/*
1411		 * Remove the request from the request list.
1412		 */
1413		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1414			blkdev_dequeue_request(req);
1415		sdev->device_busy++;
1416
1417		spin_unlock(q->queue_lock);
1418		spin_lock(shost->host_lock);
1419
1420		if (!scsi_host_queue_ready(q, shost, sdev))
1421			goto not_ready;
1422		if (sdev->single_lun) {
1423			if (scsi_target(sdev)->starget_sdev_user &&
1424			    scsi_target(sdev)->starget_sdev_user != sdev)
1425				goto not_ready;
1426			scsi_target(sdev)->starget_sdev_user = sdev;
1427		}
1428		shost->host_busy++;
1429
1430		/*
1431		 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1432		 *		take the lock again.
1433		 */
1434		spin_unlock_irq(shost->host_lock);
1435
1436		cmd = req->special;
1437		if (unlikely(cmd == NULL)) {
1438			printk(KERN_CRIT "impossible request in %s.\n"
1439					 "please mail a stack trace to "
1440					 "linux-scsi@vger.kernel.org",
1441					 __FUNCTION__);
1442			BUG();
1443		}
1444
1445		/*
1446		 * Finally, initialize any error handling parameters, and set up
1447		 * the timers for timeouts.
1448		 */
1449		scsi_init_cmd_errh(cmd);
1450
1451		/*
1452		 * Dispatch the command to the low-level driver.
1453		 */
1454		rtn = scsi_dispatch_cmd(cmd);
1455		spin_lock_irq(q->queue_lock);
1456		if(rtn) {
1457			/* we're refusing the command; because of
1458			 * the way locks get dropped, we need to
1459			 * check here if plugging is required */
1460			if(sdev->device_busy == 0)
1461				blk_plug_device(q);
1462
1463			break;
1464		}
1465	}
1466
1467	goto out;
1468
1469 not_ready:
1470	spin_unlock_irq(shost->host_lock);
1471
1472	/*
1473	 * lock q, handle tag, requeue req, and decrement device_busy. We
1474	 * must return with queue_lock held.
1475	 *
1476	 * Decrementing device_busy without checking it is OK, as all such
1477	 * cases (host limits or settings) should run the queue at some
1478	 * later time.
1479	 */
1480	spin_lock_irq(q->queue_lock);
1481	blk_requeue_request(q, req);
1482	sdev->device_busy--;
1483	if(sdev->device_busy == 0)
1484		blk_plug_device(q);
1485 out:
1486	/* must be careful here...if we trigger the ->remove() function
1487	 * we cannot be holding the q lock */
1488	spin_unlock_irq(q->queue_lock);
1489	put_device(&sdev->sdev_gendev);
1490	spin_lock_irq(q->queue_lock);
1491}
1492
1493u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1494{
1495	struct device *host_dev;
1496	u64 bounce_limit = 0xffffffff;
1497
1498	if (shost->unchecked_isa_dma)
1499		return BLK_BOUNCE_ISA;
1500	/*
1501	 * Platforms with virtual-DMA translation
1502	 * hardware have no practical limit.
1503	 */
1504	if (!PCI_DMA_BUS_IS_PHYS)
1505		return BLK_BOUNCE_ANY;
1506
1507	host_dev = scsi_get_device(shost);
1508	if (host_dev && host_dev->dma_mask)
1509		bounce_limit = *host_dev->dma_mask;
1510
1511	return bounce_limit;
1512}
1513EXPORT_SYMBOL(scsi_calculate_bounce_limit);
1514
1515struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1516{
1517	struct Scsi_Host *shost = sdev->host;
1518	struct request_queue *q;
1519
1520	q = blk_init_queue(scsi_request_fn, NULL);
1521	if (!q)
1522		return NULL;
1523
1524	blk_queue_prep_rq(q, scsi_prep_fn);
1525
1526	blk_queue_max_hw_segments(q, shost->sg_tablesize);
1527	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
1528	blk_queue_max_sectors(q, shost->max_sectors);
1529	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1530	blk_queue_segment_boundary(q, shost->dma_boundary);
1531	blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1532
1533	/*
1534	 * ordered tags are superior to flush ordering
1535	 */
1536	if (shost->ordered_tag)
1537		blk_queue_ordered(q, QUEUE_ORDERED_TAG);
1538	else if (shost->ordered_flush) {
1539		blk_queue_ordered(q, QUEUE_ORDERED_FLUSH);
1540		q->prepare_flush_fn = scsi_prepare_flush_fn;
1541		q->end_flush_fn = scsi_end_flush_fn;
1542	}
1543
1544	if (!shost->use_clustering)
1545		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
1546	return q;
1547}
1548
1549void scsi_free_queue(struct request_queue *q)
1550{
1551	blk_cleanup_queue(q);
1552}
1553
1554/*
1555 * Function:    scsi_block_requests()
1556 *
1557 * Purpose:     Utility function used by low-level drivers to prevent further
1558 *		commands from being queued to the device.
1559 *
1560 * Arguments:   shost       - Host in question
1561 *
1562 * Returns:     Nothing
1563 *
1564 * Lock status: No locks are assumed held.
1565 *
1566 * Notes:       There is no timer nor any other means by which the requests
1567 *		get unblocked other than the low-level driver calling
1568 *		scsi_unblock_requests().
1569 */
1570void scsi_block_requests(struct Scsi_Host *shost)
1571{
1572	shost->host_self_blocked = 1;
1573}
1574EXPORT_SYMBOL(scsi_block_requests);
1575
1576/*
1577 * Function:    scsi_unblock_requests()
1578 *
1579 * Purpose:     Utility function used by low-level drivers to allow further
1580 *		commands from being queued to the device.
1581 *
1582 * Arguments:   shost       - Host in question
1583 *
1584 * Returns:     Nothing
1585 *
1586 * Lock status: No locks are assumed held.
1587 *
1588 * Notes:       There is no timer nor any other means by which the requests
1589 *		get unblocked other than the low-level driver calling
1590 *		scsi_unblock_requests().
1591 *
1592 *		This is done as an API function so that changes to the
1593 *		internals of the scsi mid-layer won't require wholesale
1594 *		changes to drivers that use this feature.
1595 */
1596void scsi_unblock_requests(struct Scsi_Host *shost)
1597{
1598	shost->host_self_blocked = 0;
1599	scsi_run_host_queues(shost);
1600}
1601EXPORT_SYMBOL(scsi_unblock_requests);
1602
1603int __init scsi_init_queue(void)
1604{
1605	int i;
1606
1607	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1608		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1609		int size = sgp->size * sizeof(struct scatterlist);
1610
1611		sgp->slab = kmem_cache_create(sgp->name, size, 0,
1612				SLAB_HWCACHE_ALIGN, NULL, NULL);
1613		if (!sgp->slab) {
1614			printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1615					sgp->name);
1616		}
1617
1618		sgp->pool = mempool_create(SG_MEMPOOL_SIZE,
1619				mempool_alloc_slab, mempool_free_slab,
1620				sgp->slab);
1621		if (!sgp->pool) {
1622			printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1623					sgp->name);
1624		}
1625	}
1626
1627	return 0;
1628}
1629
1630void scsi_exit_queue(void)
1631{
1632	int i;
1633
1634	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1635		struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1636		mempool_destroy(sgp->pool);
1637		kmem_cache_destroy(sgp->slab);
1638	}
1639}
1640/**
1641 *	scsi_mode_sense - issue a mode sense, falling back from 10 to
1642 *		six bytes if necessary.
1643 *	@sdev:	SCSI device to be queried
1644 *	@dbd:	set if mode sense will allow block descriptors to be returned
1645 *	@modepage: mode page being requested
1646 *	@buffer: request buffer (may not be smaller than eight bytes)
1647 *	@len:	length of request buffer.
1648 *	@timeout: command timeout
1649 *	@retries: number of retries before failing
1650 *	@data: returns a structure abstracting the mode header data
1651 *	@sense: place to put sense data (or NULL if no sense to be collected).
1652 *		must be SCSI_SENSE_BUFFERSIZE big.
1653 *
1654 *	Returns zero if unsuccessful, or the header offset (either 4
1655 *	or 8 depending on whether a six or ten byte command was
1656 *	issued) if successful.
1657 **/
1658int
1659scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
1660		  unsigned char *buffer, int len, int timeout, int retries,
1661		  struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) {
1662	unsigned char cmd[12];
1663	int use_10_for_ms;
1664	int header_length;
1665	int result;
1666	struct scsi_sense_hdr my_sshdr;
1667
1668	memset(data, 0, sizeof(*data));
1669	memset(&cmd[0], 0, 12);
1670	cmd[1] = dbd & 0x18;	/* allows DBD and LLBA bits */
1671	cmd[2] = modepage;
1672
1673	/* caller might not be interested in sense, but we need it */
1674	if (!sshdr)
1675		sshdr = &my_sshdr;
1676
1677 retry:
1678	use_10_for_ms = sdev->use_10_for_ms;
1679
1680	if (use_10_for_ms) {
1681		if (len < 8)
1682			len = 8;
1683
1684		cmd[0] = MODE_SENSE_10;
1685		cmd[8] = len;
1686		header_length = 8;
1687	} else {
1688		if (len < 4)
1689			len = 4;
1690
1691		cmd[0] = MODE_SENSE;
1692		cmd[4] = len;
1693		header_length = 4;
1694	}
1695
1696	memset(buffer, 0, len);
1697
1698	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
1699				  sshdr, timeout, retries);
1700
1701	/* This code looks awful: what it's doing is making sure an
1702	 * ILLEGAL REQUEST sense return identifies the actual command
1703	 * byte as the problem.  MODE_SENSE commands can return
1704	 * ILLEGAL REQUEST if the code page isn't supported */
1705
1706	if (use_10_for_ms && !scsi_status_is_good(result) &&
1707	    (driver_byte(result) & DRIVER_SENSE)) {
1708		if (scsi_sense_valid(sshdr)) {
1709			if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
1710			    (sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
1711				/*
1712				 * Invalid command operation code
1713				 */
1714				sdev->use_10_for_ms = 0;
1715				goto retry;
1716			}
1717		}
1718	}
1719
1720	if(scsi_status_is_good(result)) {
1721		data->header_length = header_length;
1722		if(use_10_for_ms) {
1723			data->length = buffer[0]*256 + buffer[1] + 2;
1724			data->medium_type = buffer[2];
1725			data->device_specific = buffer[3];
1726			data->longlba = buffer[4] & 0x01;
1727			data->block_descriptor_length = buffer[6]*256
1728				+ buffer[7];
1729		} else {
1730			data->length = buffer[0] + 1;
1731			data->medium_type = buffer[1];
1732			data->device_specific = buffer[2];
1733			data->block_descriptor_length = buffer[3];
1734		}
1735	}
1736
1737	return result;
1738}
1739EXPORT_SYMBOL(scsi_mode_sense);
1740
1741int
1742scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)
1743{
1744	char cmd[] = {
1745		TEST_UNIT_READY, 0, 0, 0, 0, 0,
1746	};
1747	struct scsi_sense_hdr sshdr;
1748	int result;
1749
1750	result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
1751				  timeout, retries);
1752
1753	if ((driver_byte(result) & DRIVER_SENSE) && sdev->removable) {
1754
1755		if ((scsi_sense_valid(&sshdr)) &&
1756		    ((sshdr.sense_key == UNIT_ATTENTION) ||
1757		     (sshdr.sense_key == NOT_READY))) {
1758			sdev->changed = 1;
1759			result = 0;
1760		}
1761	}
1762	return result;
1763}
1764EXPORT_SYMBOL(scsi_test_unit_ready);
1765
1766/**
1767 *	scsi_device_set_state - Take the given device through the device
1768 *		state model.
1769 *	@sdev:	scsi device to change the state of.
1770 *	@state:	state to change to.
1771 *
1772 *	Returns zero if unsuccessful or an error if the requested
1773 *	transition is illegal.
1774 **/
1775int
1776scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
1777{
1778	enum scsi_device_state oldstate = sdev->sdev_state;
1779
1780	if (state == oldstate)
1781		return 0;
1782
1783	switch (state) {
1784	case SDEV_CREATED:
1785		/* There are no legal states that come back to
1786		 * created.  This is the manually initialised start
1787		 * state */
1788		goto illegal;
1789
1790	case SDEV_RUNNING:
1791		switch (oldstate) {
1792		case SDEV_CREATED:
1793		case SDEV_OFFLINE:
1794		case SDEV_QUIESCE:
1795		case SDEV_BLOCK:
1796			break;
1797		default:
1798			goto illegal;
1799		}
1800		break;
1801
1802	case SDEV_QUIESCE:
1803		switch (oldstate) {
1804		case SDEV_RUNNING:
1805		case SDEV_OFFLINE:
1806			break;
1807		default:
1808			goto illegal;
1809		}
1810		break;
1811
1812	case SDEV_OFFLINE:
1813		switch (oldstate) {
1814		case SDEV_CREATED:
1815		case SDEV_RUNNING:
1816		case SDEV_QUIESCE:
1817		case SDEV_BLOCK:
1818			break;
1819		default:
1820			goto illegal;
1821		}
1822		break;
1823
1824	case SDEV_BLOCK:
1825		switch (oldstate) {
1826		case SDEV_CREATED:
1827		case SDEV_RUNNING:
1828			break;
1829		default:
1830			goto illegal;
1831		}
1832		break;
1833
1834	case SDEV_CANCEL:
1835		switch (oldstate) {
1836		case SDEV_CREATED:
1837		case SDEV_RUNNING:
1838		case SDEV_OFFLINE:
1839		case SDEV_BLOCK:
1840			break;
1841		default:
1842			goto illegal;
1843		}
1844		break;
1845
1846	case SDEV_DEL:
1847		switch (oldstate) {
1848		case SDEV_CANCEL:
1849			break;
1850		default:
1851			goto illegal;
1852		}
1853		break;
1854
1855	}
1856	sdev->sdev_state = state;
1857	return 0;
1858
1859 illegal:
1860	SCSI_LOG_ERROR_RECOVERY(1,
1861				dev_printk(KERN_ERR, &sdev->sdev_gendev,
1862					   "Illegal state transition %s->%s\n",
1863					   scsi_device_state_name(oldstate),
1864					   scsi_device_state_name(state))
1865				);
1866	return -EINVAL;
1867}
1868EXPORT_SYMBOL(scsi_device_set_state);
1869
1870/**
1871 *	scsi_device_quiesce - Block user issued commands.
1872 *	@sdev:	scsi device to quiesce.
1873 *
1874 *	This works by trying to transition to the SDEV_QUIESCE state
1875 *	(which must be a legal transition).  When the device is in this
1876 *	state, only special requests will be accepted, all others will
1877 *	be deferred.  Since special requests may also be requeued requests,
1878 *	a successful return doesn't guarantee the device will be
1879 *	totally quiescent.
1880 *
1881 *	Must be called with user context, may sleep.
1882 *
1883 *	Returns zero if unsuccessful or an error if not.
1884 **/
1885int
1886scsi_device_quiesce(struct scsi_device *sdev)
1887{
1888	int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
1889	if (err)
1890		return err;
1891
1892	scsi_run_queue(sdev->request_queue);
1893	while (sdev->device_busy) {
1894		msleep_interruptible(200);
1895		scsi_run_queue(sdev->request_queue);
1896	}
1897	return 0;
1898}
1899EXPORT_SYMBOL(scsi_device_quiesce);
1900
1901/**
1902 *	scsi_device_resume - Restart user issued commands to a quiesced device.
1903 *	@sdev:	scsi device to resume.
1904 *
1905 *	Moves the device from quiesced back to running and restarts the
1906 *	queues.
1907 *
1908 *	Must be called with user context, may sleep.
1909 **/
1910void
1911scsi_device_resume(struct scsi_device *sdev)
1912{
1913	if(scsi_device_set_state(sdev, SDEV_RUNNING))
1914		return;
1915	scsi_run_queue(sdev->request_queue);
1916}
1917EXPORT_SYMBOL(scsi_device_resume);
1918
1919static void
1920device_quiesce_fn(struct scsi_device *sdev, void *data)
1921{
1922	scsi_device_quiesce(sdev);
1923}
1924
1925void
1926scsi_target_quiesce(struct scsi_target *starget)
1927{
1928	starget_for_each_device(starget, NULL, device_quiesce_fn);
1929}
1930EXPORT_SYMBOL(scsi_target_quiesce);
1931
1932static void
1933device_resume_fn(struct scsi_device *sdev, void *data)
1934{
1935	scsi_device_resume(sdev);
1936}
1937
1938void
1939scsi_target_resume(struct scsi_target *starget)
1940{
1941	starget_for_each_device(starget, NULL, device_resume_fn);
1942}
1943EXPORT_SYMBOL(scsi_target_resume);
1944
1945/**
1946 * scsi_internal_device_block - internal function to put a device
1947 *				temporarily into the SDEV_BLOCK state
1948 * @sdev:	device to block
1949 *
1950 * Block request made by scsi lld's to temporarily stop all
1951 * scsi commands on the specified device.  Called from interrupt
1952 * or normal process context.
1953 *
1954 * Returns zero if successful or error if not
1955 *
1956 * Notes:
1957 *	This routine transitions the device to the SDEV_BLOCK state
1958 *	(which must be a legal transition).  When the device is in this
1959 *	state, all commands are deferred until the scsi lld reenables
1960 *	the device with scsi_device_unblock or device_block_tmo fires.
1961 *	This routine assumes the host_lock is held on entry.
1962 **/
1963int
1964scsi_internal_device_block(struct scsi_device *sdev)
1965{
1966	request_queue_t *q = sdev->request_queue;
1967	unsigned long flags;
1968	int err = 0;
1969
1970	err = scsi_device_set_state(sdev, SDEV_BLOCK);
1971	if (err)
1972		return err;
1973
1974	/*
1975	 * The device has transitioned to SDEV_BLOCK.  Stop the
1976	 * block layer from calling the midlayer with this device's
1977	 * request queue.
1978	 */
1979	spin_lock_irqsave(q->queue_lock, flags);
1980	blk_stop_queue(q);
1981	spin_unlock_irqrestore(q->queue_lock, flags);
1982
1983	return 0;
1984}
1985EXPORT_SYMBOL_GPL(scsi_internal_device_block);
1986
1987/**
1988 * scsi_internal_device_unblock - resume a device after a block request
1989 * @sdev:	device to resume
1990 *
1991 * Called by scsi lld's or the midlayer to restart the device queue
1992 * for the previously suspended scsi device.  Called from interrupt or
1993 * normal process context.
1994 *
1995 * Returns zero if successful or error if not.
1996 *
1997 * Notes:
1998 *	This routine transitions the device to the SDEV_RUNNING state
1999 *	(which must be a legal transition) allowing the midlayer to
2000 *	goose the queue for this device.  This routine assumes the
2001 *	host_lock is held upon entry.
2002 **/
2003int
2004scsi_internal_device_unblock(struct scsi_device *sdev)
2005{
2006	request_queue_t *q = sdev->request_queue;
2007	int err;
2008	unsigned long flags;
2009
2010	/*
2011	 * Try to transition the scsi device to SDEV_RUNNING
2012	 * and goose the device queue if successful.
2013	 */
2014	err = scsi_device_set_state(sdev, SDEV_RUNNING);
2015	if (err)
2016		return err;
2017
2018	spin_lock_irqsave(q->queue_lock, flags);
2019	blk_start_queue(q);
2020	spin_unlock_irqrestore(q->queue_lock, flags);
2021
2022	return 0;
2023}
2024EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
2025
2026static void
2027device_block(struct scsi_device *sdev, void *data)
2028{
2029	scsi_internal_device_block(sdev);
2030}
2031
2032static int
2033target_block(struct device *dev, void *data)
2034{
2035	if (scsi_is_target_device(dev))
2036		starget_for_each_device(to_scsi_target(dev), NULL,
2037					device_block);
2038	return 0;
2039}
2040
2041void
2042scsi_target_block(struct device *dev)
2043{
2044	if (scsi_is_target_device(dev))
2045		starget_for_each_device(to_scsi_target(dev), NULL,
2046					device_block);
2047	else
2048		device_for_each_child(dev, NULL, target_block);
2049}
2050EXPORT_SYMBOL_GPL(scsi_target_block);
2051
2052static void
2053device_unblock(struct scsi_device *sdev, void *data)
2054{
2055	scsi_internal_device_unblock(sdev);
2056}
2057
2058static int
2059target_unblock(struct device *dev, void *data)
2060{
2061	if (scsi_is_target_device(dev))
2062		starget_for_each_device(to_scsi_target(dev), NULL,
2063					device_unblock);
2064	return 0;
2065}
2066
2067void
2068scsi_target_unblock(struct device *dev)
2069{
2070	if (scsi_is_target_device(dev))
2071		starget_for_each_device(to_scsi_target(dev), NULL,
2072					device_unblock);
2073	else
2074		device_for_each_child(dev, NULL, target_unblock);
2075}
2076EXPORT_SYMBOL_GPL(scsi_target_unblock);
2077