cciss.c revision 6e9a4738c9fadb7cbdcabc1e3b415159f3741ed9
1/*
2 *    Disk Array driver for HP SA 5xxx and 6xxx Controllers
3 *    Copyright 2000, 2006 Hewlett-Packard Development Company, L.P.
4 *
5 *    This program is free software; you can redistribute it and/or modify
6 *    it under the terms of the GNU General Public License as published by
7 *    the Free Software Foundation; either version 2 of the License, or
8 *    (at your option) any later version.
9 *
10 *    This program is distributed in the hope that it will be useful,
11 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 *    NON INFRINGEMENT.  See the GNU General Public License for more details.
14 *
15 *    You should have received a copy of the GNU General Public License
16 *    along with this program; if not, write to the Free Software
17 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
20 *
21 */
22
23#include <linux/config.h>	/* CONFIG_PROC_FS */
24#include <linux/module.h>
25#include <linux/interrupt.h>
26#include <linux/types.h>
27#include <linux/pci.h>
28#include <linux/kernel.h>
29#include <linux/slab.h>
30#include <linux/delay.h>
31#include <linux/major.h>
32#include <linux/fs.h>
33#include <linux/bio.h>
34#include <linux/blkpg.h>
35#include <linux/timer.h>
36#include <linux/proc_fs.h>
37#include <linux/init.h>
38#include <linux/hdreg.h>
39#include <linux/spinlock.h>
40#include <linux/compat.h>
41#include <linux/blktrace_api.h>
42#include <asm/uaccess.h>
43#include <asm/io.h>
44
45#include <linux/dma-mapping.h>
46#include <linux/blkdev.h>
47#include <linux/genhd.h>
48#include <linux/completion.h>
49
50#define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
51#define DRIVER_NAME "HP CISS Driver (v 3.6.10)"
52#define DRIVER_VERSION CCISS_DRIVER_VERSION(3,6,10)
53
54/* Embedded module documentation macros - see modules.h */
55MODULE_AUTHOR("Hewlett-Packard Company");
56MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 3.6.10");
57MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
58			" SA6i P600 P800 P400 P400i E200 E200i E500");
59MODULE_LICENSE("GPL");
60
61#include "cciss_cmd.h"
62#include "cciss.h"
63#include <linux/cciss_ioctl.h>
64
65/* define the PCI info for the cards we can control */
66static const struct pci_device_id cciss_pci_device_id[] = {
67	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISS,  0x0E11, 0x4070},
68	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4080},
69	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4082},
70	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4083},
71	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x4091},
72	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409A},
73	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409B},
74	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409C},
75	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409D},
76	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSA,     0x103C, 0x3225},
77	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3223},
78	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3234},
79	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3235},
80	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3211},
81	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3212},
82	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3213},
83	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3214},
84	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3215},
85	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3233},
86	{0,}
87};
88
89MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
90
91/*  board_id = Subsystem Device ID & Vendor ID
92 *  product = Marketing Name for the board
93 *  access = Address of the struct of function pointers
94 */
95static struct board_type products[] = {
96	{0x40700E11, "Smart Array 5300", &SA5_access},
97	{0x40800E11, "Smart Array 5i", &SA5B_access},
98	{0x40820E11, "Smart Array 532", &SA5B_access},
99	{0x40830E11, "Smart Array 5312", &SA5B_access},
100	{0x409A0E11, "Smart Array 641", &SA5_access},
101	{0x409B0E11, "Smart Array 642", &SA5_access},
102	{0x409C0E11, "Smart Array 6400", &SA5_access},
103	{0x409D0E11, "Smart Array 6400 EM", &SA5_access},
104	{0x40910E11, "Smart Array 6i", &SA5_access},
105	{0x3225103C, "Smart Array P600", &SA5_access},
106	{0x3223103C, "Smart Array P800", &SA5_access},
107	{0x3234103C, "Smart Array P400", &SA5_access},
108	{0x3235103C, "Smart Array P400i", &SA5_access},
109	{0x3211103C, "Smart Array E200i", &SA5_access},
110	{0x3212103C, "Smart Array E200", &SA5_access},
111	{0x3213103C, "Smart Array E200i", &SA5_access},
112	{0x3214103C, "Smart Array E200i", &SA5_access},
113	{0x3215103C, "Smart Array E200i", &SA5_access},
114	{0x3233103C, "Smart Array E500", &SA5_access},
115};
116
117/* How long to wait (in milliseconds) for board to go into simple mode */
118#define MAX_CONFIG_WAIT 30000
119#define MAX_IOCTL_CONFIG_WAIT 1000
120
121/*define how many times we will try a command because of bus resets */
122#define MAX_CMD_RETRIES 3
123
124#define READ_AHEAD 	 1024
125#define NR_CMDS		 384	/* #commands that can be outstanding */
126#define MAX_CTLR	32
127
128/* Originally cciss driver only supports 8 major numbers */
129#define MAX_CTLR_ORIG 	8
130
131static ctlr_info_t *hba[MAX_CTLR];
132
133static void do_cciss_request(request_queue_t *q);
134static irqreturn_t do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs);
135static int cciss_open(struct inode *inode, struct file *filep);
136static int cciss_release(struct inode *inode, struct file *filep);
137static int cciss_ioctl(struct inode *inode, struct file *filep,
138		       unsigned int cmd, unsigned long arg);
139static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
140
141static int revalidate_allvol(ctlr_info_t *host);
142static int cciss_revalidate(struct gendisk *disk);
143static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk);
144static int deregister_disk(struct gendisk *disk, drive_info_struct *drv,
145			   int clear_all);
146
147static void cciss_read_capacity(int ctlr, int logvol, int withirq,
148			sector_t *total_size, unsigned int *block_size);
149static void cciss_read_capacity_16(int ctlr, int logvol, int withirq,
150			sector_t *total_size, unsigned int *block_size);
151static void cciss_geometry_inquiry(int ctlr, int logvol,
152			int withirq, sector_t total_size,
153			unsigned int block_size, InquiryData_struct *inq_buff,
154				   drive_info_struct *drv);
155static void cciss_getgeometry(int cntl_num);
156static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
157					   __u32);
158static void start_io(ctlr_info_t *h);
159static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
160		   unsigned int use_unit_num, unsigned int log_unit,
161		   __u8 page_code, unsigned char *scsi3addr, int cmd_type);
162static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
163			   unsigned int use_unit_num, unsigned int log_unit,
164			   __u8 page_code, int cmd_type);
165
166static void fail_all_cmds(unsigned long ctlr);
167
168#ifdef CONFIG_PROC_FS
169static int cciss_proc_get_info(char *buffer, char **start, off_t offset,
170			       int length, int *eof, void *data);
171static void cciss_procinit(int i);
172#else
173static void cciss_procinit(int i)
174{
175}
176#endif				/* CONFIG_PROC_FS */
177
178#ifdef CONFIG_COMPAT
179static long cciss_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg);
180#endif
181
182static struct block_device_operations cciss_fops = {
183	.owner = THIS_MODULE,
184	.open = cciss_open,
185	.release = cciss_release,
186	.ioctl = cciss_ioctl,
187	.getgeo = cciss_getgeo,
188#ifdef CONFIG_COMPAT
189	.compat_ioctl = cciss_compat_ioctl,
190#endif
191	.revalidate_disk = cciss_revalidate,
192};
193
194/*
195 * Enqueuing and dequeuing functions for cmdlists.
196 */
197static inline void addQ(CommandList_struct **Qptr, CommandList_struct *c)
198{
199	if (*Qptr == NULL) {
200		*Qptr = c;
201		c->next = c->prev = c;
202	} else {
203		c->prev = (*Qptr)->prev;
204		c->next = (*Qptr);
205		(*Qptr)->prev->next = c;
206		(*Qptr)->prev = c;
207	}
208}
209
210static inline CommandList_struct *removeQ(CommandList_struct **Qptr,
211					  CommandList_struct *c)
212{
213	if (c && c->next != c) {
214		if (*Qptr == c)
215			*Qptr = c->next;
216		c->prev->next = c->next;
217		c->next->prev = c->prev;
218	} else {
219		*Qptr = NULL;
220	}
221	return c;
222}
223
224#include "cciss_scsi.c"		/* For SCSI tape support */
225
226#ifdef CONFIG_PROC_FS
227
228/*
229 * Report information about this controller.
230 */
231#define ENG_GIG 1000000000
232#define ENG_GIG_FACTOR (ENG_GIG/512)
233#define RAID_UNKNOWN 6
234static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
235	"UNKNOWN"
236};
237
238static struct proc_dir_entry *proc_cciss;
239
240static int cciss_proc_get_info(char *buffer, char **start, off_t offset,
241			       int length, int *eof, void *data)
242{
243	off_t pos = 0;
244	off_t len = 0;
245	int size, i, ctlr;
246	ctlr_info_t *h = (ctlr_info_t *) data;
247	drive_info_struct *drv;
248	unsigned long flags;
249	sector_t vol_sz, vol_sz_frac;
250
251	ctlr = h->ctlr;
252
253	/* prevent displaying bogus info during configuration
254	 * or deconfiguration of a logical volume
255	 */
256	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
257	if (h->busy_configuring) {
258		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
259		return -EBUSY;
260	}
261	h->busy_configuring = 1;
262	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
263
264	size = sprintf(buffer, "%s: HP %s Controller\n"
265		       "Board ID: 0x%08lx\n"
266		       "Firmware Version: %c%c%c%c\n"
267		       "IRQ: %d\n"
268		       "Logical drives: %d\n"
269		       "Current Q depth: %d\n"
270		       "Current # commands on controller: %d\n"
271		       "Max Q depth since init: %d\n"
272		       "Max # commands on controller since init: %d\n"
273		       "Max SG entries since init: %d\n\n",
274		       h->devname,
275		       h->product_name,
276		       (unsigned long)h->board_id,
277		       h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
278		       h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
279		       h->num_luns, h->Qdepth, h->commands_outstanding,
280		       h->maxQsinceinit, h->max_outstanding, h->maxSG);
281
282	pos += size;
283	len += size;
284	cciss_proc_tape_report(ctlr, buffer, &pos, &len);
285	for (i = 0; i <= h->highest_lun; i++) {
286
287		drv = &h->drv[i];
288		if (drv->heads == 0)
289			continue;
290
291		vol_sz = drv->nr_blocks;
292		vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
293		vol_sz_frac *= 100;
294		sector_div(vol_sz_frac, ENG_GIG_FACTOR);
295
296		if (drv->raid_level > 5)
297			drv->raid_level = RAID_UNKNOWN;
298		size = sprintf(buffer + len, "cciss/c%dd%d:"
299			       "\t%4u.%02uGB\tRAID %s\n",
300			       ctlr, i, (int)vol_sz, (int)vol_sz_frac,
301			       raid_label[drv->raid_level]);
302		pos += size;
303		len += size;
304	}
305
306	*eof = 1;
307	*start = buffer + offset;
308	len -= offset;
309	if (len > length)
310		len = length;
311	h->busy_configuring = 0;
312	return len;
313}
314
315static int
316cciss_proc_write(struct file *file, const char __user *buffer,
317		 unsigned long count, void *data)
318{
319	unsigned char cmd[80];
320	int len;
321#ifdef CONFIG_CISS_SCSI_TAPE
322	ctlr_info_t *h = (ctlr_info_t *) data;
323	int rc;
324#endif
325
326	if (count > sizeof(cmd) - 1)
327		return -EINVAL;
328	if (copy_from_user(cmd, buffer, count))
329		return -EFAULT;
330	cmd[count] = '\0';
331	len = strlen(cmd);	// above 3 lines ensure safety
332	if (len && cmd[len - 1] == '\n')
333		cmd[--len] = '\0';
334#	ifdef CONFIG_CISS_SCSI_TAPE
335	if (strcmp("engage scsi", cmd) == 0) {
336		rc = cciss_engage_scsi(h->ctlr);
337		if (rc != 0)
338			return -rc;
339		return count;
340	}
341	/* might be nice to have "disengage" too, but it's not
342	   safely possible. (only 1 module use count, lock issues.) */
343#	endif
344	return -EINVAL;
345}
346
347/*
348 * Get us a file in /proc/cciss that says something about each controller.
349 * Create /proc/cciss if it doesn't exist yet.
350 */
351static void __devinit cciss_procinit(int i)
352{
353	struct proc_dir_entry *pde;
354
355	if (proc_cciss == NULL) {
356		proc_cciss = proc_mkdir("cciss", proc_root_driver);
357		if (!proc_cciss)
358			return;
359	}
360
361	pde = create_proc_read_entry(hba[i]->devname,
362				     S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH,
363				     proc_cciss, cciss_proc_get_info, hba[i]);
364	pde->write_proc = cciss_proc_write;
365}
366#endif				/* CONFIG_PROC_FS */
367
368/*
369 * For operations that cannot sleep, a command block is allocated at init,
370 * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
371 * which ones are free or in use.  For operations that can wait for kmalloc
372 * to possible sleep, this routine can be called with get_from_pool set to 0.
373 * cmd_free() MUST be called with a got_from_pool set to 0 if cmd_alloc was.
374 */
375static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
376{
377	CommandList_struct *c;
378	int i;
379	u64bit temp64;
380	dma_addr_t cmd_dma_handle, err_dma_handle;
381
382	if (!get_from_pool) {
383		c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
384			sizeof(CommandList_struct), &cmd_dma_handle);
385		if (c == NULL)
386			return NULL;
387		memset(c, 0, sizeof(CommandList_struct));
388
389		c->cmdindex = -1;
390
391		c->err_info = (ErrorInfo_struct *)
392		    pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
393			    &err_dma_handle);
394
395		if (c->err_info == NULL) {
396			pci_free_consistent(h->pdev,
397				sizeof(CommandList_struct), c, cmd_dma_handle);
398			return NULL;
399		}
400		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
401	} else {		/* get it out of the controllers pool */
402
403		do {
404			i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS);
405			if (i == NR_CMDS)
406				return NULL;
407		} while (test_and_set_bit
408			 (i & (BITS_PER_LONG - 1),
409			  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
410#ifdef CCISS_DEBUG
411		printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
412#endif
413		c = h->cmd_pool + i;
414		memset(c, 0, sizeof(CommandList_struct));
415		cmd_dma_handle = h->cmd_pool_dhandle
416		    + i * sizeof(CommandList_struct);
417		c->err_info = h->errinfo_pool + i;
418		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
419		err_dma_handle = h->errinfo_pool_dhandle
420		    + i * sizeof(ErrorInfo_struct);
421		h->nr_allocs++;
422
423		c->cmdindex = i;
424	}
425
426	c->busaddr = (__u32) cmd_dma_handle;
427	temp64.val = (__u64) err_dma_handle;
428	c->ErrDesc.Addr.lower = temp64.val32.lower;
429	c->ErrDesc.Addr.upper = temp64.val32.upper;
430	c->ErrDesc.Len = sizeof(ErrorInfo_struct);
431
432	c->ctlr = h->ctlr;
433	return c;
434}
435
436/*
437 * Frees a command block that was previously allocated with cmd_alloc().
438 */
439static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool)
440{
441	int i;
442	u64bit temp64;
443
444	if (!got_from_pool) {
445		temp64.val32.lower = c->ErrDesc.Addr.lower;
446		temp64.val32.upper = c->ErrDesc.Addr.upper;
447		pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
448				    c->err_info, (dma_addr_t) temp64.val);
449		pci_free_consistent(h->pdev, sizeof(CommandList_struct),
450				    c, (dma_addr_t) c->busaddr);
451	} else {
452		i = c - h->cmd_pool;
453		clear_bit(i & (BITS_PER_LONG - 1),
454			  h->cmd_pool_bits + (i / BITS_PER_LONG));
455		h->nr_frees++;
456	}
457}
458
459static inline ctlr_info_t *get_host(struct gendisk *disk)
460{
461	return disk->queue->queuedata;
462}
463
464static inline drive_info_struct *get_drv(struct gendisk *disk)
465{
466	return disk->private_data;
467}
468
469/*
470 * Open.  Make sure the device is really there.
471 */
472static int cciss_open(struct inode *inode, struct file *filep)
473{
474	ctlr_info_t *host = get_host(inode->i_bdev->bd_disk);
475	drive_info_struct *drv = get_drv(inode->i_bdev->bd_disk);
476
477#ifdef CCISS_DEBUG
478	printk(KERN_DEBUG "cciss_open %s\n", inode->i_bdev->bd_disk->disk_name);
479#endif				/* CCISS_DEBUG */
480
481	if (host->busy_initializing || drv->busy_configuring)
482		return -EBUSY;
483	/*
484	 * Root is allowed to open raw volume zero even if it's not configured
485	 * so array config can still work. Root is also allowed to open any
486	 * volume that has a LUN ID, so it can issue IOCTL to reread the
487	 * disk information.  I don't think I really like this
488	 * but I'm already using way to many device nodes to claim another one
489	 * for "raw controller".
490	 */
491	if (drv->nr_blocks == 0) {
492		if (iminor(inode) != 0) {	/* not node 0? */
493			/* if not node 0 make sure it is a partition = 0 */
494			if (iminor(inode) & 0x0f) {
495				return -ENXIO;
496				/* if it is, make sure we have a LUN ID */
497			} else if (drv->LunID == 0) {
498				return -ENXIO;
499			}
500		}
501		if (!capable(CAP_SYS_ADMIN))
502			return -EPERM;
503	}
504	drv->usage_count++;
505	host->usage_count++;
506	return 0;
507}
508
509/*
510 * Close.  Sync first.
511 */
512static int cciss_release(struct inode *inode, struct file *filep)
513{
514	ctlr_info_t *host = get_host(inode->i_bdev->bd_disk);
515	drive_info_struct *drv = get_drv(inode->i_bdev->bd_disk);
516
517#ifdef CCISS_DEBUG
518	printk(KERN_DEBUG "cciss_release %s\n",
519	       inode->i_bdev->bd_disk->disk_name);
520#endif				/* CCISS_DEBUG */
521
522	drv->usage_count--;
523	host->usage_count--;
524	return 0;
525}
526
527#ifdef CONFIG_COMPAT
528
529static int do_ioctl(struct file *f, unsigned cmd, unsigned long arg)
530{
531	int ret;
532	lock_kernel();
533	ret = cciss_ioctl(f->f_dentry->d_inode, f, cmd, arg);
534	unlock_kernel();
535	return ret;
536}
537
538static int cciss_ioctl32_passthru(struct file *f, unsigned cmd,
539				  unsigned long arg);
540static int cciss_ioctl32_big_passthru(struct file *f, unsigned cmd,
541				      unsigned long arg);
542
543static long cciss_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
544{
545	switch (cmd) {
546	case CCISS_GETPCIINFO:
547	case CCISS_GETINTINFO:
548	case CCISS_SETINTINFO:
549	case CCISS_GETNODENAME:
550	case CCISS_SETNODENAME:
551	case CCISS_GETHEARTBEAT:
552	case CCISS_GETBUSTYPES:
553	case CCISS_GETFIRMVER:
554	case CCISS_GETDRIVVER:
555	case CCISS_REVALIDVOLS:
556	case CCISS_DEREGDISK:
557	case CCISS_REGNEWDISK:
558	case CCISS_REGNEWD:
559	case CCISS_RESCANDISK:
560	case CCISS_GETLUNINFO:
561		return do_ioctl(f, cmd, arg);
562
563	case CCISS_PASSTHRU32:
564		return cciss_ioctl32_passthru(f, cmd, arg);
565	case CCISS_BIG_PASSTHRU32:
566		return cciss_ioctl32_big_passthru(f, cmd, arg);
567
568	default:
569		return -ENOIOCTLCMD;
570	}
571}
572
573static int cciss_ioctl32_passthru(struct file *f, unsigned cmd,
574				  unsigned long arg)
575{
576	IOCTL32_Command_struct __user *arg32 =
577	    (IOCTL32_Command_struct __user *) arg;
578	IOCTL_Command_struct arg64;
579	IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
580	int err;
581	u32 cp;
582
583	err = 0;
584	err |=
585	    copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
586			   sizeof(arg64.LUN_info));
587	err |=
588	    copy_from_user(&arg64.Request, &arg32->Request,
589			   sizeof(arg64.Request));
590	err |=
591	    copy_from_user(&arg64.error_info, &arg32->error_info,
592			   sizeof(arg64.error_info));
593	err |= get_user(arg64.buf_size, &arg32->buf_size);
594	err |= get_user(cp, &arg32->buf);
595	arg64.buf = compat_ptr(cp);
596	err |= copy_to_user(p, &arg64, sizeof(arg64));
597
598	if (err)
599		return -EFAULT;
600
601	err = do_ioctl(f, CCISS_PASSTHRU, (unsigned long)p);
602	if (err)
603		return err;
604	err |=
605	    copy_in_user(&arg32->error_info, &p->error_info,
606			 sizeof(arg32->error_info));
607	if (err)
608		return -EFAULT;
609	return err;
610}
611
612static int cciss_ioctl32_big_passthru(struct file *file, unsigned cmd,
613				      unsigned long arg)
614{
615	BIG_IOCTL32_Command_struct __user *arg32 =
616	    (BIG_IOCTL32_Command_struct __user *) arg;
617	BIG_IOCTL_Command_struct arg64;
618	BIG_IOCTL_Command_struct __user *p =
619	    compat_alloc_user_space(sizeof(arg64));
620	int err;
621	u32 cp;
622
623	err = 0;
624	err |=
625	    copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
626			   sizeof(arg64.LUN_info));
627	err |=
628	    copy_from_user(&arg64.Request, &arg32->Request,
629			   sizeof(arg64.Request));
630	err |=
631	    copy_from_user(&arg64.error_info, &arg32->error_info,
632			   sizeof(arg64.error_info));
633	err |= get_user(arg64.buf_size, &arg32->buf_size);
634	err |= get_user(arg64.malloc_size, &arg32->malloc_size);
635	err |= get_user(cp, &arg32->buf);
636	arg64.buf = compat_ptr(cp);
637	err |= copy_to_user(p, &arg64, sizeof(arg64));
638
639	if (err)
640		return -EFAULT;
641
642	err = do_ioctl(file, CCISS_BIG_PASSTHRU, (unsigned long)p);
643	if (err)
644		return err;
645	err |=
646	    copy_in_user(&arg32->error_info, &p->error_info,
647			 sizeof(arg32->error_info));
648	if (err)
649		return -EFAULT;
650	return err;
651}
652#endif
653
654static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
655{
656	drive_info_struct *drv = get_drv(bdev->bd_disk);
657
658	if (!drv->cylinders)
659		return -ENXIO;
660
661	geo->heads = drv->heads;
662	geo->sectors = drv->sectors;
663	geo->cylinders = drv->cylinders;
664	return 0;
665}
666
667/*
668 * ioctl
669 */
670static int cciss_ioctl(struct inode *inode, struct file *filep,
671		       unsigned int cmd, unsigned long arg)
672{
673	struct block_device *bdev = inode->i_bdev;
674	struct gendisk *disk = bdev->bd_disk;
675	ctlr_info_t *host = get_host(disk);
676	drive_info_struct *drv = get_drv(disk);
677	int ctlr = host->ctlr;
678	void __user *argp = (void __user *)arg;
679
680#ifdef CCISS_DEBUG
681	printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
682#endif				/* CCISS_DEBUG */
683
684	switch (cmd) {
685	case CCISS_GETPCIINFO:
686		{
687			cciss_pci_info_struct pciinfo;
688
689			if (!arg)
690				return -EINVAL;
691			pciinfo.domain = pci_domain_nr(host->pdev->bus);
692			pciinfo.bus = host->pdev->bus->number;
693			pciinfo.dev_fn = host->pdev->devfn;
694			pciinfo.board_id = host->board_id;
695			if (copy_to_user
696			    (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
697				return -EFAULT;
698			return 0;
699		}
700	case CCISS_GETINTINFO:
701		{
702			cciss_coalint_struct intinfo;
703			if (!arg)
704				return -EINVAL;
705			intinfo.delay =
706			    readl(&host->cfgtable->HostWrite.CoalIntDelay);
707			intinfo.count =
708			    readl(&host->cfgtable->HostWrite.CoalIntCount);
709			if (copy_to_user
710			    (argp, &intinfo, sizeof(cciss_coalint_struct)))
711				return -EFAULT;
712			return 0;
713		}
714	case CCISS_SETINTINFO:
715		{
716			cciss_coalint_struct intinfo;
717			unsigned long flags;
718			int i;
719
720			if (!arg)
721				return -EINVAL;
722			if (!capable(CAP_SYS_ADMIN))
723				return -EPERM;
724			if (copy_from_user
725			    (&intinfo, argp, sizeof(cciss_coalint_struct)))
726				return -EFAULT;
727			if ((intinfo.delay == 0) && (intinfo.count == 0))
728			{
729//                      printk("cciss_ioctl: delay and count cannot be 0\n");
730				return -EINVAL;
731			}
732			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
733			/* Update the field, and then ring the doorbell */
734			writel(intinfo.delay,
735			       &(host->cfgtable->HostWrite.CoalIntDelay));
736			writel(intinfo.count,
737			       &(host->cfgtable->HostWrite.CoalIntCount));
738			writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
739
740			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
741				if (!(readl(host->vaddr + SA5_DOORBELL)
742				      & CFGTBL_ChangeReq))
743					break;
744				/* delay and try again */
745				udelay(1000);
746			}
747			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
748			if (i >= MAX_IOCTL_CONFIG_WAIT)
749				return -EAGAIN;
750			return 0;
751		}
752	case CCISS_GETNODENAME:
753		{
754			NodeName_type NodeName;
755			int i;
756
757			if (!arg)
758				return -EINVAL;
759			for (i = 0; i < 16; i++)
760				NodeName[i] =
761				    readb(&host->cfgtable->ServerName[i]);
762			if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
763				return -EFAULT;
764			return 0;
765		}
766	case CCISS_SETNODENAME:
767		{
768			NodeName_type NodeName;
769			unsigned long flags;
770			int i;
771
772			if (!arg)
773				return -EINVAL;
774			if (!capable(CAP_SYS_ADMIN))
775				return -EPERM;
776
777			if (copy_from_user
778			    (NodeName, argp, sizeof(NodeName_type)))
779				return -EFAULT;
780
781			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
782
783			/* Update the field, and then ring the doorbell */
784			for (i = 0; i < 16; i++)
785				writeb(NodeName[i],
786				       &host->cfgtable->ServerName[i]);
787
788			writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
789
790			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
791				if (!(readl(host->vaddr + SA5_DOORBELL)
792				      & CFGTBL_ChangeReq))
793					break;
794				/* delay and try again */
795				udelay(1000);
796			}
797			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
798			if (i >= MAX_IOCTL_CONFIG_WAIT)
799				return -EAGAIN;
800			return 0;
801		}
802
803	case CCISS_GETHEARTBEAT:
804		{
805			Heartbeat_type heartbeat;
806
807			if (!arg)
808				return -EINVAL;
809			heartbeat = readl(&host->cfgtable->HeartBeat);
810			if (copy_to_user
811			    (argp, &heartbeat, sizeof(Heartbeat_type)))
812				return -EFAULT;
813			return 0;
814		}
815	case CCISS_GETBUSTYPES:
816		{
817			BusTypes_type BusTypes;
818
819			if (!arg)
820				return -EINVAL;
821			BusTypes = readl(&host->cfgtable->BusTypes);
822			if (copy_to_user
823			    (argp, &BusTypes, sizeof(BusTypes_type)))
824				return -EFAULT;
825			return 0;
826		}
827	case CCISS_GETFIRMVER:
828		{
829			FirmwareVer_type firmware;
830
831			if (!arg)
832				return -EINVAL;
833			memcpy(firmware, host->firm_ver, 4);
834
835			if (copy_to_user
836			    (argp, firmware, sizeof(FirmwareVer_type)))
837				return -EFAULT;
838			return 0;
839		}
840	case CCISS_GETDRIVVER:
841		{
842			DriverVer_type DriverVer = DRIVER_VERSION;
843
844			if (!arg)
845				return -EINVAL;
846
847			if (copy_to_user
848			    (argp, &DriverVer, sizeof(DriverVer_type)))
849				return -EFAULT;
850			return 0;
851		}
852
853	case CCISS_REVALIDVOLS:
854		if (bdev != bdev->bd_contains || drv != host->drv)
855			return -ENXIO;
856		return revalidate_allvol(host);
857
858	case CCISS_GETLUNINFO:{
859			LogvolInfo_struct luninfo;
860
861			luninfo.LunID = drv->LunID;
862			luninfo.num_opens = drv->usage_count;
863			luninfo.num_parts = 0;
864			if (copy_to_user(argp, &luninfo,
865					 sizeof(LogvolInfo_struct)))
866				return -EFAULT;
867			return 0;
868		}
869	case CCISS_DEREGDISK:
870		return rebuild_lun_table(host, disk);
871
872	case CCISS_REGNEWD:
873		return rebuild_lun_table(host, NULL);
874
875	case CCISS_PASSTHRU:
876		{
877			IOCTL_Command_struct iocommand;
878			CommandList_struct *c;
879			char *buff = NULL;
880			u64bit temp64;
881			unsigned long flags;
882			DECLARE_COMPLETION_ONSTACK(wait);
883
884			if (!arg)
885				return -EINVAL;
886
887			if (!capable(CAP_SYS_RAWIO))
888				return -EPERM;
889
890			if (copy_from_user
891			    (&iocommand, argp, sizeof(IOCTL_Command_struct)))
892				return -EFAULT;
893			if ((iocommand.buf_size < 1) &&
894			    (iocommand.Request.Type.Direction != XFER_NONE)) {
895				return -EINVAL;
896			}
897#if 0				/* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */
898			/* Check kmalloc limits */
899			if (iocommand.buf_size > 128000)
900				return -EINVAL;
901#endif
902			if (iocommand.buf_size > 0) {
903				buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
904				if (buff == NULL)
905					return -EFAULT;
906			}
907			if (iocommand.Request.Type.Direction == XFER_WRITE) {
908				/* Copy the data into the buffer we created */
909				if (copy_from_user
910				    (buff, iocommand.buf, iocommand.buf_size)) {
911					kfree(buff);
912					return -EFAULT;
913				}
914			} else {
915				memset(buff, 0, iocommand.buf_size);
916			}
917			if ((c = cmd_alloc(host, 0)) == NULL) {
918				kfree(buff);
919				return -ENOMEM;
920			}
921			// Fill in the command type
922			c->cmd_type = CMD_IOCTL_PEND;
923			// Fill in Command Header
924			c->Header.ReplyQueue = 0;	// unused in simple mode
925			if (iocommand.buf_size > 0)	// buffer to fill
926			{
927				c->Header.SGList = 1;
928				c->Header.SGTotal = 1;
929			} else	// no buffers to fill
930			{
931				c->Header.SGList = 0;
932				c->Header.SGTotal = 0;
933			}
934			c->Header.LUN = iocommand.LUN_info;
935			c->Header.Tag.lower = c->busaddr;	// use the kernel address the cmd block for tag
936
937			// Fill in Request block
938			c->Request = iocommand.Request;
939
940			// Fill in the scatter gather information
941			if (iocommand.buf_size > 0) {
942				temp64.val = pci_map_single(host->pdev, buff,
943					iocommand.buf_size,
944					PCI_DMA_BIDIRECTIONAL);
945				c->SG[0].Addr.lower = temp64.val32.lower;
946				c->SG[0].Addr.upper = temp64.val32.upper;
947				c->SG[0].Len = iocommand.buf_size;
948				c->SG[0].Ext = 0;	// we are not chaining
949			}
950			c->waiting = &wait;
951
952			/* Put the request on the tail of the request queue */
953			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
954			addQ(&host->reqQ, c);
955			host->Qdepth++;
956			start_io(host);
957			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
958
959			wait_for_completion(&wait);
960
961			/* unlock the buffers from DMA */
962			temp64.val32.lower = c->SG[0].Addr.lower;
963			temp64.val32.upper = c->SG[0].Addr.upper;
964			pci_unmap_single(host->pdev, (dma_addr_t) temp64.val,
965					 iocommand.buf_size,
966					 PCI_DMA_BIDIRECTIONAL);
967
968			/* Copy the error information out */
969			iocommand.error_info = *(c->err_info);
970			if (copy_to_user
971			    (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
972				kfree(buff);
973				cmd_free(host, c, 0);
974				return -EFAULT;
975			}
976
977			if (iocommand.Request.Type.Direction == XFER_READ) {
978				/* Copy the data out of the buffer we created */
979				if (copy_to_user
980				    (iocommand.buf, buff, iocommand.buf_size)) {
981					kfree(buff);
982					cmd_free(host, c, 0);
983					return -EFAULT;
984				}
985			}
986			kfree(buff);
987			cmd_free(host, c, 0);
988			return 0;
989		}
990	case CCISS_BIG_PASSTHRU:{
991			BIG_IOCTL_Command_struct *ioc;
992			CommandList_struct *c;
993			unsigned char **buff = NULL;
994			int *buff_size = NULL;
995			u64bit temp64;
996			unsigned long flags;
997			BYTE sg_used = 0;
998			int status = 0;
999			int i;
1000			DECLARE_COMPLETION_ONSTACK(wait);
1001			__u32 left;
1002			__u32 sz;
1003			BYTE __user *data_ptr;
1004
1005			if (!arg)
1006				return -EINVAL;
1007			if (!capable(CAP_SYS_RAWIO))
1008				return -EPERM;
1009			ioc = (BIG_IOCTL_Command_struct *)
1010			    kmalloc(sizeof(*ioc), GFP_KERNEL);
1011			if (!ioc) {
1012				status = -ENOMEM;
1013				goto cleanup1;
1014			}
1015			if (copy_from_user(ioc, argp, sizeof(*ioc))) {
1016				status = -EFAULT;
1017				goto cleanup1;
1018			}
1019			if ((ioc->buf_size < 1) &&
1020			    (ioc->Request.Type.Direction != XFER_NONE)) {
1021				status = -EINVAL;
1022				goto cleanup1;
1023			}
1024			/* Check kmalloc limits  using all SGs */
1025			if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
1026				status = -EINVAL;
1027				goto cleanup1;
1028			}
1029			if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
1030				status = -EINVAL;
1031				goto cleanup1;
1032			}
1033			buff =
1034			    kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
1035			if (!buff) {
1036				status = -ENOMEM;
1037				goto cleanup1;
1038			}
1039			buff_size = (int *)kmalloc(MAXSGENTRIES * sizeof(int),
1040						   GFP_KERNEL);
1041			if (!buff_size) {
1042				status = -ENOMEM;
1043				goto cleanup1;
1044			}
1045			left = ioc->buf_size;
1046			data_ptr = ioc->buf;
1047			while (left) {
1048				sz = (left >
1049				      ioc->malloc_size) ? ioc->
1050				    malloc_size : left;
1051				buff_size[sg_used] = sz;
1052				buff[sg_used] = kmalloc(sz, GFP_KERNEL);
1053				if (buff[sg_used] == NULL) {
1054					status = -ENOMEM;
1055					goto cleanup1;
1056				}
1057				if (ioc->Request.Type.Direction == XFER_WRITE) {
1058					if (copy_from_user
1059					    (buff[sg_used], data_ptr, sz)) {
1060						status = -ENOMEM;
1061						goto cleanup1;
1062					}
1063				} else {
1064					memset(buff[sg_used], 0, sz);
1065				}
1066				left -= sz;
1067				data_ptr += sz;
1068				sg_used++;
1069			}
1070			if ((c = cmd_alloc(host, 0)) == NULL) {
1071				status = -ENOMEM;
1072				goto cleanup1;
1073			}
1074			c->cmd_type = CMD_IOCTL_PEND;
1075			c->Header.ReplyQueue = 0;
1076
1077			if (ioc->buf_size > 0) {
1078				c->Header.SGList = sg_used;
1079				c->Header.SGTotal = sg_used;
1080			} else {
1081				c->Header.SGList = 0;
1082				c->Header.SGTotal = 0;
1083			}
1084			c->Header.LUN = ioc->LUN_info;
1085			c->Header.Tag.lower = c->busaddr;
1086
1087			c->Request = ioc->Request;
1088			if (ioc->buf_size > 0) {
1089				int i;
1090				for (i = 0; i < sg_used; i++) {
1091					temp64.val =
1092					    pci_map_single(host->pdev, buff[i],
1093						    buff_size[i],
1094						    PCI_DMA_BIDIRECTIONAL);
1095					c->SG[i].Addr.lower =
1096					    temp64.val32.lower;
1097					c->SG[i].Addr.upper =
1098					    temp64.val32.upper;
1099					c->SG[i].Len = buff_size[i];
1100					c->SG[i].Ext = 0;	/* we are not chaining */
1101				}
1102			}
1103			c->waiting = &wait;
1104			/* Put the request on the tail of the request queue */
1105			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1106			addQ(&host->reqQ, c);
1107			host->Qdepth++;
1108			start_io(host);
1109			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1110			wait_for_completion(&wait);
1111			/* unlock the buffers from DMA */
1112			for (i = 0; i < sg_used; i++) {
1113				temp64.val32.lower = c->SG[i].Addr.lower;
1114				temp64.val32.upper = c->SG[i].Addr.upper;
1115				pci_unmap_single(host->pdev,
1116					(dma_addr_t) temp64.val, buff_size[i],
1117					PCI_DMA_BIDIRECTIONAL);
1118			}
1119			/* Copy the error information out */
1120			ioc->error_info = *(c->err_info);
1121			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
1122				cmd_free(host, c, 0);
1123				status = -EFAULT;
1124				goto cleanup1;
1125			}
1126			if (ioc->Request.Type.Direction == XFER_READ) {
1127				/* Copy the data out of the buffer we created */
1128				BYTE __user *ptr = ioc->buf;
1129				for (i = 0; i < sg_used; i++) {
1130					if (copy_to_user
1131					    (ptr, buff[i], buff_size[i])) {
1132						cmd_free(host, c, 0);
1133						status = -EFAULT;
1134						goto cleanup1;
1135					}
1136					ptr += buff_size[i];
1137				}
1138			}
1139			cmd_free(host, c, 0);
1140			status = 0;
1141		      cleanup1:
1142			if (buff) {
1143				for (i = 0; i < sg_used; i++)
1144					kfree(buff[i]);
1145				kfree(buff);
1146			}
1147			kfree(buff_size);
1148			kfree(ioc);
1149			return status;
1150		}
1151	default:
1152		return -ENOTTY;
1153	}
1154}
1155
1156/*
1157 * revalidate_allvol is for online array config utilities.  After a
1158 * utility reconfigures the drives in the array, it can use this function
1159 * (through an ioctl) to make the driver zap any previous disk structs for
1160 * that controller and get new ones.
1161 *
1162 * Right now I'm using the getgeometry() function to do this, but this
1163 * function should probably be finer grained and allow you to revalidate one
1164 * particular logical volume (instead of all of them on a particular
1165 * controller).
1166 */
1167static int revalidate_allvol(ctlr_info_t *host)
1168{
1169	int ctlr = host->ctlr, i;
1170	unsigned long flags;
1171
1172	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1173	if (host->usage_count > 1) {
1174		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1175		printk(KERN_WARNING "cciss: Device busy for volume"
1176		       " revalidation (usage=%d)\n", host->usage_count);
1177		return -EBUSY;
1178	}
1179	host->usage_count++;
1180	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1181
1182	for (i = 0; i < NWD; i++) {
1183		struct gendisk *disk = host->gendisk[i];
1184		if (disk) {
1185			request_queue_t *q = disk->queue;
1186
1187			if (disk->flags & GENHD_FL_UP)
1188				del_gendisk(disk);
1189			if (q)
1190				blk_cleanup_queue(q);
1191		}
1192	}
1193
1194	/*
1195	 * Set the partition and block size structures for all volumes
1196	 * on this controller to zero.  We will reread all of this data
1197	 */
1198	memset(host->drv, 0, sizeof(drive_info_struct)
1199	       * CISS_MAX_LUN);
1200	/*
1201	 * Tell the array controller not to give us any interrupts while
1202	 * we check the new geometry.  Then turn interrupts back on when
1203	 * we're done.
1204	 */
1205	host->access.set_intr_mask(host, CCISS_INTR_OFF);
1206	cciss_getgeometry(ctlr);
1207	host->access.set_intr_mask(host, CCISS_INTR_ON);
1208
1209	/* Loop through each real device */
1210	for (i = 0; i < NWD; i++) {
1211		struct gendisk *disk = host->gendisk[i];
1212		drive_info_struct *drv = &(host->drv[i]);
1213		/* we must register the controller even if no disks exist */
1214		/* this is for the online array utilities */
1215		if (!drv->heads && i)
1216			continue;
1217		blk_queue_hardsect_size(drv->queue, drv->block_size);
1218		set_capacity(disk, drv->nr_blocks);
1219		add_disk(disk);
1220	}
1221	host->usage_count--;
1222	return 0;
1223}
1224
1225static inline void complete_buffers(struct bio *bio, int status)
1226{
1227	while (bio) {
1228		struct bio *xbh = bio->bi_next;
1229		int nr_sectors = bio_sectors(bio);
1230
1231		bio->bi_next = NULL;
1232		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
1233		bio = xbh;
1234	}
1235}
1236
1237static void cciss_check_queues(ctlr_info_t *h)
1238{
1239	int start_queue = h->next_to_run;
1240	int i;
1241
1242	/* check to see if we have maxed out the number of commands that can
1243	 * be placed on the queue.  If so then exit.  We do this check here
1244	 * in case the interrupt we serviced was from an ioctl and did not
1245	 * free any new commands.
1246	 */
1247	if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS)
1248		return;
1249
1250	/* We have room on the queue for more commands.  Now we need to queue
1251	 * them up.  We will also keep track of the next queue to run so
1252	 * that every queue gets a chance to be started first.
1253	 */
1254	for (i = 0; i < h->highest_lun + 1; i++) {
1255		int curr_queue = (start_queue + i) % (h->highest_lun + 1);
1256		/* make sure the disk has been added and the drive is real
1257		 * because this can be called from the middle of init_one.
1258		 */
1259		if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
1260			continue;
1261		blk_start_queue(h->gendisk[curr_queue]->queue);
1262
1263		/* check to see if we have maxed out the number of commands
1264		 * that can be placed on the queue.
1265		 */
1266		if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS) {
1267			if (curr_queue == start_queue) {
1268				h->next_to_run =
1269				    (start_queue + 1) % (h->highest_lun + 1);
1270				break;
1271			} else {
1272				h->next_to_run = curr_queue;
1273				break;
1274			}
1275		} else {
1276			curr_queue = (curr_queue + 1) % (h->highest_lun + 1);
1277		}
1278	}
1279}
1280
1281static void cciss_softirq_done(struct request *rq)
1282{
1283	CommandList_struct *cmd = rq->completion_data;
1284	ctlr_info_t *h = hba[cmd->ctlr];
1285	unsigned long flags;
1286	u64bit temp64;
1287	int i, ddir;
1288
1289	if (cmd->Request.Type.Direction == XFER_READ)
1290		ddir = PCI_DMA_FROMDEVICE;
1291	else
1292		ddir = PCI_DMA_TODEVICE;
1293
1294	/* command did not need to be retried */
1295	/* unmap the DMA mapping for all the scatter gather elements */
1296	for (i = 0; i < cmd->Header.SGList; i++) {
1297		temp64.val32.lower = cmd->SG[i].Addr.lower;
1298		temp64.val32.upper = cmd->SG[i].Addr.upper;
1299		pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
1300	}
1301
1302	complete_buffers(rq->bio, rq->errors);
1303
1304#ifdef CCISS_DEBUG
1305	printk("Done with %p\n", rq);
1306#endif				/* CCISS_DEBUG */
1307
1308	add_disk_randomness(rq->rq_disk);
1309	spin_lock_irqsave(&h->lock, flags);
1310	end_that_request_last(rq, rq->errors);
1311	cmd_free(h, cmd, 1);
1312	cciss_check_queues(h);
1313	spin_unlock_irqrestore(&h->lock, flags);
1314}
1315
1316/* This function will check the usage_count of the drive to be updated/added.
1317 * If the usage_count is zero then the drive information will be updated and
1318 * the disk will be re-registered with the kernel.  If not then it will be
1319 * left alone for the next reboot.  The exception to this is disk 0 which
1320 * will always be left registered with the kernel since it is also the
1321 * controller node.  Any changes to disk 0 will show up on the next
1322 * reboot.
1323 */
1324static void cciss_update_drive_info(int ctlr, int drv_index)
1325{
1326	ctlr_info_t *h = hba[ctlr];
1327	struct gendisk *disk;
1328	InquiryData_struct *inq_buff = NULL;
1329	unsigned int block_size;
1330	sector_t total_size;
1331	unsigned long flags = 0;
1332	int ret = 0;
1333
1334	/* if the disk already exists then deregister it before proceeding */
1335	if (h->drv[drv_index].raid_level != -1) {
1336		spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1337		h->drv[drv_index].busy_configuring = 1;
1338		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1339		ret = deregister_disk(h->gendisk[drv_index],
1340				      &h->drv[drv_index], 0);
1341		h->drv[drv_index].busy_configuring = 0;
1342	}
1343
1344	/* If the disk is in use return */
1345	if (ret)
1346		return;
1347
1348	/* Get information about the disk and modify the driver structure */
1349	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
1350	if (inq_buff == NULL)
1351		goto mem_msg;
1352
1353	cciss_read_capacity(ctlr, drv_index, 1,
1354			    &total_size, &block_size);
1355
1356	/* total size = last LBA + 1 */
1357	/* FFFFFFFF + 1 = 0, cannot have a logical volume of size 0 */
1358	/* so we assume this volume this must be >2TB in size */
1359	if (total_size == (__u32) 0) {
1360		cciss_read_capacity_16(ctlr, drv_index, 1,
1361		&total_size, &block_size);
1362		h->cciss_read = CCISS_READ_16;
1363		h->cciss_write = CCISS_WRITE_16;
1364	} else {
1365		h->cciss_read = CCISS_READ_10;
1366		h->cciss_write = CCISS_WRITE_10;
1367	}
1368	cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
1369			       inq_buff, &h->drv[drv_index]);
1370
1371	++h->num_luns;
1372	disk = h->gendisk[drv_index];
1373	set_capacity(disk, h->drv[drv_index].nr_blocks);
1374
1375	/* if it's the controller it's already added */
1376	if (drv_index) {
1377		disk->queue = blk_init_queue(do_cciss_request, &h->lock);
1378
1379		/* Set up queue information */
1380		disk->queue->backing_dev_info.ra_pages = READ_AHEAD;
1381		blk_queue_bounce_limit(disk->queue, hba[ctlr]->pdev->dma_mask);
1382
1383		/* This is a hardware imposed limit. */
1384		blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
1385
1386		/* This is a limit in the driver and could be eliminated. */
1387		blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
1388
1389		blk_queue_max_sectors(disk->queue, 512);
1390
1391		blk_queue_softirq_done(disk->queue, cciss_softirq_done);
1392
1393		disk->queue->queuedata = hba[ctlr];
1394
1395		blk_queue_hardsect_size(disk->queue,
1396					hba[ctlr]->drv[drv_index].block_size);
1397
1398		h->drv[drv_index].queue = disk->queue;
1399		add_disk(disk);
1400	}
1401
1402      freeret:
1403	kfree(inq_buff);
1404	return;
1405      mem_msg:
1406	printk(KERN_ERR "cciss: out of memory\n");
1407	goto freeret;
1408}
1409
1410/* This function will find the first index of the controllers drive array
1411 * that has a -1 for the raid_level and will return that index.  This is
1412 * where new drives will be added.  If the index to be returned is greater
1413 * than the highest_lun index for the controller then highest_lun is set
1414 * to this new index.  If there are no available indexes then -1 is returned.
1415 */
1416static int cciss_find_free_drive_index(int ctlr)
1417{
1418	int i;
1419
1420	for (i = 0; i < CISS_MAX_LUN; i++) {
1421		if (hba[ctlr]->drv[i].raid_level == -1) {
1422			if (i > hba[ctlr]->highest_lun)
1423				hba[ctlr]->highest_lun = i;
1424			return i;
1425		}
1426	}
1427	return -1;
1428}
1429
1430/* This function will add and remove logical drives from the Logical
1431 * drive array of the controller and maintain persistency of ordering
1432 * so that mount points are preserved until the next reboot.  This allows
1433 * for the removal of logical drives in the middle of the drive array
1434 * without a re-ordering of those drives.
1435 * INPUT
1436 * h		= The controller to perform the operations on
1437 * del_disk	= The disk to remove if specified.  If the value given
1438 *		  is NULL then no disk is removed.
1439 */
1440static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk)
1441{
1442	int ctlr = h->ctlr;
1443	int num_luns;
1444	ReportLunData_struct *ld_buff = NULL;
1445	drive_info_struct *drv = NULL;
1446	int return_code;
1447	int listlength = 0;
1448	int i;
1449	int drv_found;
1450	int drv_index = 0;
1451	__u32 lunid = 0;
1452	unsigned long flags;
1453
1454	/* Set busy_configuring flag for this operation */
1455	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1456	if (h->num_luns >= CISS_MAX_LUN) {
1457		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1458		return -EINVAL;
1459	}
1460
1461	if (h->busy_configuring) {
1462		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1463		return -EBUSY;
1464	}
1465	h->busy_configuring = 1;
1466
1467	/* if del_disk is NULL then we are being called to add a new disk
1468	 * and update the logical drive table.  If it is not NULL then
1469	 * we will check if the disk is in use or not.
1470	 */
1471	if (del_disk != NULL) {
1472		drv = get_drv(del_disk);
1473		drv->busy_configuring = 1;
1474		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1475		return_code = deregister_disk(del_disk, drv, 1);
1476		drv->busy_configuring = 0;
1477		h->busy_configuring = 0;
1478		return return_code;
1479	} else {
1480		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1481		if (!capable(CAP_SYS_RAWIO))
1482			return -EPERM;
1483
1484		ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
1485		if (ld_buff == NULL)
1486			goto mem_msg;
1487
1488		return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
1489					      sizeof(ReportLunData_struct), 0,
1490					      0, 0, TYPE_CMD);
1491
1492		if (return_code == IO_OK) {
1493			listlength |=
1494			    (0xff & (unsigned int)(ld_buff->LUNListLength[0]))
1495			    << 24;
1496			listlength |=
1497			    (0xff & (unsigned int)(ld_buff->LUNListLength[1]))
1498			    << 16;
1499			listlength |=
1500			    (0xff & (unsigned int)(ld_buff->LUNListLength[2]))
1501			    << 8;
1502			listlength |=
1503			    0xff & (unsigned int)(ld_buff->LUNListLength[3]);
1504		} else {	/* reading number of logical volumes failed */
1505			printk(KERN_WARNING "cciss: report logical volume"
1506			       " command failed\n");
1507			listlength = 0;
1508			goto freeret;
1509		}
1510
1511		num_luns = listlength / 8;	/* 8 bytes per entry */
1512		if (num_luns > CISS_MAX_LUN) {
1513			num_luns = CISS_MAX_LUN;
1514			printk(KERN_WARNING "cciss: more luns configured"
1515			       " on controller than can be handled by"
1516			       " this driver.\n");
1517		}
1518
1519		/* Compare controller drive array to drivers drive array.
1520		 * Check for updates in the drive information and any new drives
1521		 * on the controller.
1522		 */
1523		for (i = 0; i < num_luns; i++) {
1524			int j;
1525
1526			drv_found = 0;
1527
1528			lunid = (0xff &
1529				 (unsigned int)(ld_buff->LUN[i][3])) << 24;
1530			lunid |= (0xff &
1531				  (unsigned int)(ld_buff->LUN[i][2])) << 16;
1532			lunid |= (0xff &
1533				  (unsigned int)(ld_buff->LUN[i][1])) << 8;
1534			lunid |= 0xff & (unsigned int)(ld_buff->LUN[i][0]);
1535
1536			/* Find if the LUN is already in the drive array
1537			 * of the controller.  If so then update its info
1538			 * if not is use.  If it does not exist then find
1539			 * the first free index and add it.
1540			 */
1541			for (j = 0; j <= h->highest_lun; j++) {
1542				if (h->drv[j].LunID == lunid) {
1543					drv_index = j;
1544					drv_found = 1;
1545				}
1546			}
1547
1548			/* check if the drive was found already in the array */
1549			if (!drv_found) {
1550				drv_index = cciss_find_free_drive_index(ctlr);
1551				if (drv_index == -1)
1552					goto freeret;
1553
1554			}
1555			h->drv[drv_index].LunID = lunid;
1556			cciss_update_drive_info(ctlr, drv_index);
1557		}		/* end for */
1558	}			/* end else */
1559
1560      freeret:
1561	kfree(ld_buff);
1562	h->busy_configuring = 0;
1563	/* We return -1 here to tell the ACU that we have registered/updated
1564	 * all of the drives that we can and to keep it from calling us
1565	 * additional times.
1566	 */
1567	return -1;
1568      mem_msg:
1569	printk(KERN_ERR "cciss: out of memory\n");
1570	goto freeret;
1571}
1572
1573/* This function will deregister the disk and it's queue from the
1574 * kernel.  It must be called with the controller lock held and the
1575 * drv structures busy_configuring flag set.  It's parameters are:
1576 *
1577 * disk = This is the disk to be deregistered
1578 * drv  = This is the drive_info_struct associated with the disk to be
1579 *        deregistered.  It contains information about the disk used
1580 *        by the driver.
1581 * clear_all = This flag determines whether or not the disk information
1582 *             is going to be completely cleared out and the highest_lun
1583 *             reset.  Sometimes we want to clear out information about
1584 *             the disk in preparation for re-adding it.  In this case
1585 *             the highest_lun should be left unchanged and the LunID
1586 *             should not be cleared.
1587*/
1588static int deregister_disk(struct gendisk *disk, drive_info_struct *drv,
1589			   int clear_all)
1590{
1591	ctlr_info_t *h = get_host(disk);
1592
1593	if (!capable(CAP_SYS_RAWIO))
1594		return -EPERM;
1595
1596	/* make sure logical volume is NOT is use */
1597	if (clear_all || (h->gendisk[0] == disk)) {
1598		if (drv->usage_count > 1)
1599			return -EBUSY;
1600	} else if (drv->usage_count > 0)
1601		return -EBUSY;
1602
1603	/* invalidate the devices and deregister the disk.  If it is disk
1604	 * zero do not deregister it but just zero out it's values.  This
1605	 * allows us to delete disk zero but keep the controller registered.
1606	 */
1607	if (h->gendisk[0] != disk) {
1608		if (disk) {
1609			request_queue_t *q = disk->queue;
1610			if (disk->flags & GENHD_FL_UP)
1611				del_gendisk(disk);
1612			if (q) {
1613				blk_cleanup_queue(q);
1614				drv->queue = NULL;
1615			}
1616		}
1617	}
1618
1619	--h->num_luns;
1620	/* zero out the disk size info */
1621	drv->nr_blocks = 0;
1622	drv->block_size = 0;
1623	drv->heads = 0;
1624	drv->sectors = 0;
1625	drv->cylinders = 0;
1626	drv->raid_level = -1;	/* This can be used as a flag variable to
1627				 * indicate that this element of the drive
1628				 * array is free.
1629				 */
1630
1631	if (clear_all) {
1632		/* check to see if it was the last disk */
1633		if (drv == h->drv + h->highest_lun) {
1634			/* if so, find the new hightest lun */
1635			int i, newhighest = -1;
1636			for (i = 0; i < h->highest_lun; i++) {
1637				/* if the disk has size > 0, it is available */
1638				if (h->drv[i].heads)
1639					newhighest = i;
1640			}
1641			h->highest_lun = newhighest;
1642		}
1643
1644		drv->LunID = 0;
1645	}
1646	return 0;
1647}
1648
1649static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,	/* 0: address the controller,
1650															   1: address logical volume log_unit,
1651															   2: periph device address is scsi3addr */
1652		    unsigned int log_unit, __u8 page_code,
1653		    unsigned char *scsi3addr, int cmd_type)
1654{
1655	ctlr_info_t *h = hba[ctlr];
1656	u64bit buff_dma_handle;
1657	int status = IO_OK;
1658
1659	c->cmd_type = CMD_IOCTL_PEND;
1660	c->Header.ReplyQueue = 0;
1661	if (buff != NULL) {
1662		c->Header.SGList = 1;
1663		c->Header.SGTotal = 1;
1664	} else {
1665		c->Header.SGList = 0;
1666		c->Header.SGTotal = 0;
1667	}
1668	c->Header.Tag.lower = c->busaddr;
1669
1670	c->Request.Type.Type = cmd_type;
1671	if (cmd_type == TYPE_CMD) {
1672		switch (cmd) {
1673		case CISS_INQUIRY:
1674			/* If the logical unit number is 0 then, this is going
1675			   to controller so It's a physical command
1676			   mode = 0 target = 0.  So we have nothing to write.
1677			   otherwise, if use_unit_num == 1,
1678			   mode = 1(volume set addressing) target = LUNID
1679			   otherwise, if use_unit_num == 2,
1680			   mode = 0(periph dev addr) target = scsi3addr */
1681			if (use_unit_num == 1) {
1682				c->Header.LUN.LogDev.VolId =
1683				    h->drv[log_unit].LunID;
1684				c->Header.LUN.LogDev.Mode = 1;
1685			} else if (use_unit_num == 2) {
1686				memcpy(c->Header.LUN.LunAddrBytes, scsi3addr,
1687				       8);
1688				c->Header.LUN.LogDev.Mode = 0;
1689			}
1690			/* are we trying to read a vital product page */
1691			if (page_code != 0) {
1692				c->Request.CDB[1] = 0x01;
1693				c->Request.CDB[2] = page_code;
1694			}
1695			c->Request.CDBLen = 6;
1696			c->Request.Type.Attribute = ATTR_SIMPLE;
1697			c->Request.Type.Direction = XFER_READ;
1698			c->Request.Timeout = 0;
1699			c->Request.CDB[0] = CISS_INQUIRY;
1700			c->Request.CDB[4] = size & 0xFF;
1701			break;
1702		case CISS_REPORT_LOG:
1703		case CISS_REPORT_PHYS:
1704			/* Talking to controller so It's a physical command
1705			   mode = 00 target = 0.  Nothing to write.
1706			 */
1707			c->Request.CDBLen = 12;
1708			c->Request.Type.Attribute = ATTR_SIMPLE;
1709			c->Request.Type.Direction = XFER_READ;
1710			c->Request.Timeout = 0;
1711			c->Request.CDB[0] = cmd;
1712			c->Request.CDB[6] = (size >> 24) & 0xFF;	//MSB
1713			c->Request.CDB[7] = (size >> 16) & 0xFF;
1714			c->Request.CDB[8] = (size >> 8) & 0xFF;
1715			c->Request.CDB[9] = size & 0xFF;
1716			break;
1717
1718		case CCISS_READ_CAPACITY:
1719			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1720			c->Header.LUN.LogDev.Mode = 1;
1721			c->Request.CDBLen = 10;
1722			c->Request.Type.Attribute = ATTR_SIMPLE;
1723			c->Request.Type.Direction = XFER_READ;
1724			c->Request.Timeout = 0;
1725			c->Request.CDB[0] = cmd;
1726			break;
1727		case CCISS_READ_CAPACITY_16:
1728			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1729			c->Header.LUN.LogDev.Mode = 1;
1730			c->Request.CDBLen = 16;
1731			c->Request.Type.Attribute = ATTR_SIMPLE;
1732			c->Request.Type.Direction = XFER_READ;
1733			c->Request.Timeout = 0;
1734			c->Request.CDB[0] = cmd;
1735			c->Request.CDB[1] = 0x10;
1736			c->Request.CDB[10] = (size >> 24) & 0xFF;
1737			c->Request.CDB[11] = (size >> 16) & 0xFF;
1738			c->Request.CDB[12] = (size >> 8) & 0xFF;
1739			c->Request.CDB[13] = size & 0xFF;
1740			c->Request.Timeout = 0;
1741			c->Request.CDB[0] = cmd;
1742			break;
1743		case CCISS_CACHE_FLUSH:
1744			c->Request.CDBLen = 12;
1745			c->Request.Type.Attribute = ATTR_SIMPLE;
1746			c->Request.Type.Direction = XFER_WRITE;
1747			c->Request.Timeout = 0;
1748			c->Request.CDB[0] = BMIC_WRITE;
1749			c->Request.CDB[6] = BMIC_CACHE_FLUSH;
1750			break;
1751		default:
1752			printk(KERN_WARNING
1753			       "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
1754			return IO_ERROR;
1755		}
1756	} else if (cmd_type == TYPE_MSG) {
1757		switch (cmd) {
1758		case 0:	/* ABORT message */
1759			c->Request.CDBLen = 12;
1760			c->Request.Type.Attribute = ATTR_SIMPLE;
1761			c->Request.Type.Direction = XFER_WRITE;
1762			c->Request.Timeout = 0;
1763			c->Request.CDB[0] = cmd;	/* abort */
1764			c->Request.CDB[1] = 0;	/* abort a command */
1765			/* buff contains the tag of the command to abort */
1766			memcpy(&c->Request.CDB[4], buff, 8);
1767			break;
1768		case 1:	/* RESET message */
1769			c->Request.CDBLen = 12;
1770			c->Request.Type.Attribute = ATTR_SIMPLE;
1771			c->Request.Type.Direction = XFER_WRITE;
1772			c->Request.Timeout = 0;
1773			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
1774			c->Request.CDB[0] = cmd;	/* reset */
1775			c->Request.CDB[1] = 0x04;	/* reset a LUN */
1776			break;
1777		case 3:	/* No-Op message */
1778			c->Request.CDBLen = 1;
1779			c->Request.Type.Attribute = ATTR_SIMPLE;
1780			c->Request.Type.Direction = XFER_WRITE;
1781			c->Request.Timeout = 0;
1782			c->Request.CDB[0] = cmd;
1783			break;
1784		default:
1785			printk(KERN_WARNING
1786			       "cciss%d: unknown message type %d\n", ctlr, cmd);
1787			return IO_ERROR;
1788		}
1789	} else {
1790		printk(KERN_WARNING
1791		       "cciss%d: unknown command type %d\n", ctlr, cmd_type);
1792		return IO_ERROR;
1793	}
1794	/* Fill in the scatter gather information */
1795	if (size > 0) {
1796		buff_dma_handle.val = (__u64) pci_map_single(h->pdev,
1797							     buff, size,
1798							     PCI_DMA_BIDIRECTIONAL);
1799		c->SG[0].Addr.lower = buff_dma_handle.val32.lower;
1800		c->SG[0].Addr.upper = buff_dma_handle.val32.upper;
1801		c->SG[0].Len = size;
1802		c->SG[0].Ext = 0;	/* we are not chaining */
1803	}
1804	return status;
1805}
1806
1807static int sendcmd_withirq(__u8 cmd,
1808			   int ctlr,
1809			   void *buff,
1810			   size_t size,
1811			   unsigned int use_unit_num,
1812			   unsigned int log_unit, __u8 page_code, int cmd_type)
1813{
1814	ctlr_info_t *h = hba[ctlr];
1815	CommandList_struct *c;
1816	u64bit buff_dma_handle;
1817	unsigned long flags;
1818	int return_status;
1819	DECLARE_COMPLETION_ONSTACK(wait);
1820
1821	if ((c = cmd_alloc(h, 0)) == NULL)
1822		return -ENOMEM;
1823	return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
1824				 log_unit, page_code, NULL, cmd_type);
1825	if (return_status != IO_OK) {
1826		cmd_free(h, c, 0);
1827		return return_status;
1828	}
1829      resend_cmd2:
1830	c->waiting = &wait;
1831
1832	/* Put the request on the tail of the queue and send it */
1833	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1834	addQ(&h->reqQ, c);
1835	h->Qdepth++;
1836	start_io(h);
1837	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1838
1839	wait_for_completion(&wait);
1840
1841	if (c->err_info->CommandStatus != 0) {	/* an error has occurred */
1842		switch (c->err_info->CommandStatus) {
1843		case CMD_TARGET_STATUS:
1844			printk(KERN_WARNING "cciss: cmd %p has "
1845			       " completed with errors\n", c);
1846			if (c->err_info->ScsiStatus) {
1847				printk(KERN_WARNING "cciss: cmd %p "
1848				       "has SCSI Status = %x\n",
1849				       c, c->err_info->ScsiStatus);
1850			}
1851
1852			break;
1853		case CMD_DATA_UNDERRUN:
1854		case CMD_DATA_OVERRUN:
1855			/* expected for inquire and report lun commands */
1856			break;
1857		case CMD_INVALID:
1858			printk(KERN_WARNING "cciss: Cmd %p is "
1859			       "reported invalid\n", c);
1860			return_status = IO_ERROR;
1861			break;
1862		case CMD_PROTOCOL_ERR:
1863			printk(KERN_WARNING "cciss: cmd %p has "
1864			       "protocol error \n", c);
1865			return_status = IO_ERROR;
1866			break;
1867		case CMD_HARDWARE_ERR:
1868			printk(KERN_WARNING "cciss: cmd %p had "
1869			       " hardware error\n", c);
1870			return_status = IO_ERROR;
1871			break;
1872		case CMD_CONNECTION_LOST:
1873			printk(KERN_WARNING "cciss: cmd %p had "
1874			       "connection lost\n", c);
1875			return_status = IO_ERROR;
1876			break;
1877		case CMD_ABORTED:
1878			printk(KERN_WARNING "cciss: cmd %p was "
1879			       "aborted\n", c);
1880			return_status = IO_ERROR;
1881			break;
1882		case CMD_ABORT_FAILED:
1883			printk(KERN_WARNING "cciss: cmd %p reports "
1884			       "abort failed\n", c);
1885			return_status = IO_ERROR;
1886			break;
1887		case CMD_UNSOLICITED_ABORT:
1888			printk(KERN_WARNING
1889			       "cciss%d: unsolicited abort %p\n", ctlr, c);
1890			if (c->retry_count < MAX_CMD_RETRIES) {
1891				printk(KERN_WARNING
1892				       "cciss%d: retrying %p\n", ctlr, c);
1893				c->retry_count++;
1894				/* erase the old error information */
1895				memset(c->err_info, 0,
1896				       sizeof(ErrorInfo_struct));
1897				return_status = IO_OK;
1898				INIT_COMPLETION(wait);
1899				goto resend_cmd2;
1900			}
1901			return_status = IO_ERROR;
1902			break;
1903		default:
1904			printk(KERN_WARNING "cciss: cmd %p returned "
1905			       "unknown status %x\n", c,
1906			       c->err_info->CommandStatus);
1907			return_status = IO_ERROR;
1908		}
1909	}
1910	/* unlock the buffers from DMA */
1911	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
1912	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
1913	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
1914			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
1915	cmd_free(h, c, 0);
1916	return return_status;
1917}
1918
1919static void cciss_geometry_inquiry(int ctlr, int logvol,
1920				   int withirq, sector_t total_size,
1921				   unsigned int block_size,
1922				   InquiryData_struct *inq_buff,
1923				   drive_info_struct *drv)
1924{
1925	int return_code;
1926	unsigned long t;
1927	unsigned long rem;
1928
1929	memset(inq_buff, 0, sizeof(InquiryData_struct));
1930	if (withirq)
1931		return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
1932					      inq_buff, sizeof(*inq_buff), 1,
1933					      logvol, 0xC1, TYPE_CMD);
1934	else
1935		return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
1936				      sizeof(*inq_buff), 1, logvol, 0xC1, NULL,
1937				      TYPE_CMD);
1938	if (return_code == IO_OK) {
1939		if (inq_buff->data_byte[8] == 0xFF) {
1940			printk(KERN_WARNING
1941			       "cciss: reading geometry failed, volume "
1942			       "does not support reading geometry\n");
1943			drv->block_size = block_size;
1944			drv->nr_blocks = total_size;
1945			drv->heads = 255;
1946			drv->sectors = 32;	// Sectors per track
1947			t = drv->heads * drv->sectors;
1948			drv->cylinders = total_size;
1949			rem = do_div(drv->cylinders, t);
1950		} else {
1951			drv->block_size = block_size;
1952			drv->nr_blocks = total_size;
1953			drv->heads = inq_buff->data_byte[6];
1954			drv->sectors = inq_buff->data_byte[7];
1955			drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
1956			drv->cylinders += inq_buff->data_byte[5];
1957			drv->raid_level = inq_buff->data_byte[8];
1958			t = drv->heads * drv->sectors;
1959			if (t > 1) {
1960				drv->cylinders = total_size;
1961				rem = do_div(drv->cylinders, t);
1962			}
1963		}
1964	} else {		/* Get geometry failed */
1965		printk(KERN_WARNING "cciss: reading geometry failed\n");
1966	}
1967	printk(KERN_INFO "      heads=%d, sectors=%d, cylinders=%d\n\n",
1968	       drv->heads, drv->sectors, drv->cylinders);
1969}
1970
1971static void
1972cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
1973		    unsigned int *block_size)
1974{
1975	ReadCapdata_struct *buf;
1976	int return_code;
1977	buf = kmalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
1978	if (buf == NULL) {
1979		printk(KERN_WARNING "cciss: out of memory\n");
1980		return;
1981	}
1982	memset(buf, 0, sizeof(ReadCapdata_struct));
1983	if (withirq)
1984		return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
1985				ctlr, buf, sizeof(ReadCapdata_struct),
1986					1, logvol, 0, TYPE_CMD);
1987	else
1988		return_code = sendcmd(CCISS_READ_CAPACITY,
1989				ctlr, buf, sizeof(ReadCapdata_struct),
1990					1, logvol, 0, NULL, TYPE_CMD);
1991	if (return_code == IO_OK) {
1992		*total_size = be32_to_cpu(*(__u32 *) buf->total_size)+1;
1993		*block_size = be32_to_cpu(*(__u32 *) buf->block_size);
1994	} else {		/* read capacity command failed */
1995		printk(KERN_WARNING "cciss: read capacity failed\n");
1996		*total_size = 0;
1997		*block_size = BLOCK_SIZE;
1998	}
1999	if (*total_size != (__u32) 0)
2000		printk(KERN_INFO "      blocks= %lld block_size= %d\n",
2001		*total_size, *block_size);
2002	kfree(buf);
2003	return;
2004}
2005
2006static void
2007cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size, 				unsigned int *block_size)
2008{
2009	ReadCapdata_struct_16 *buf;
2010	int return_code;
2011	buf = kmalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
2012	if (buf == NULL) {
2013		printk(KERN_WARNING "cciss: out of memory\n");
2014		return;
2015	}
2016	memset(buf, 0, sizeof(ReadCapdata_struct_16));
2017	if (withirq) {
2018		return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
2019			ctlr, buf, sizeof(ReadCapdata_struct_16),
2020				1, logvol, 0, TYPE_CMD);
2021	}
2022	else {
2023		return_code = sendcmd(CCISS_READ_CAPACITY_16,
2024			ctlr, buf, sizeof(ReadCapdata_struct_16),
2025				1, logvol, 0, NULL, TYPE_CMD);
2026	}
2027	if (return_code == IO_OK) {
2028		*total_size = be64_to_cpu(*(__u64 *) buf->total_size)+1;
2029		*block_size = be32_to_cpu(*(__u32 *) buf->block_size);
2030	} else {		/* read capacity command failed */
2031		printk(KERN_WARNING "cciss: read capacity failed\n");
2032		*total_size = 0;
2033		*block_size = BLOCK_SIZE;
2034	}
2035	printk(KERN_INFO "      blocks= %lld block_size= %d\n",
2036	       *total_size, *block_size);
2037	kfree(buf);
2038	return;
2039}
2040
2041static int cciss_revalidate(struct gendisk *disk)
2042{
2043	ctlr_info_t *h = get_host(disk);
2044	drive_info_struct *drv = get_drv(disk);
2045	int logvol;
2046	int FOUND = 0;
2047	unsigned int block_size;
2048	sector_t total_size;
2049	InquiryData_struct *inq_buff = NULL;
2050
2051	for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
2052		if (h->drv[logvol].LunID == drv->LunID) {
2053			FOUND = 1;
2054			break;
2055		}
2056	}
2057
2058	if (!FOUND)
2059		return 1;
2060
2061	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
2062	if (inq_buff == NULL) {
2063		printk(KERN_WARNING "cciss: out of memory\n");
2064		return 1;
2065	}
2066	if (h->cciss_read == CCISS_READ_10) {
2067		cciss_read_capacity(h->ctlr, logvol, 1,
2068					&total_size, &block_size);
2069	} else {
2070		cciss_read_capacity_16(h->ctlr, logvol, 1,
2071					&total_size, &block_size);
2072	}
2073	cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
2074			       inq_buff, drv);
2075
2076	blk_queue_hardsect_size(drv->queue, drv->block_size);
2077	set_capacity(disk, drv->nr_blocks);
2078
2079	kfree(inq_buff);
2080	return 0;
2081}
2082
2083/*
2084 *   Wait polling for a command to complete.
2085 *   The memory mapped FIFO is polled for the completion.
2086 *   Used only at init time, interrupts from the HBA are disabled.
2087 */
2088static unsigned long pollcomplete(int ctlr)
2089{
2090	unsigned long done;
2091	int i;
2092
2093	/* Wait (up to 20 seconds) for a command to complete */
2094
2095	for (i = 20 * HZ; i > 0; i--) {
2096		done = hba[ctlr]->access.command_completed(hba[ctlr]);
2097		if (done == FIFO_EMPTY)
2098			schedule_timeout_uninterruptible(1);
2099		else
2100			return done;
2101	}
2102	/* Invalid address to tell caller we ran out of time */
2103	return 1;
2104}
2105
2106static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
2107{
2108	/* We get in here if sendcmd() is polling for completions
2109	   and gets some command back that it wasn't expecting --
2110	   something other than that which it just sent down.
2111	   Ordinarily, that shouldn't happen, but it can happen when
2112	   the scsi tape stuff gets into error handling mode, and
2113	   starts using sendcmd() to try to abort commands and
2114	   reset tape drives.  In that case, sendcmd may pick up
2115	   completions of commands that were sent to logical drives
2116	   through the block i/o system, or cciss ioctls completing, etc.
2117	   In that case, we need to save those completions for later
2118	   processing by the interrupt handler.
2119	 */
2120
2121#ifdef CONFIG_CISS_SCSI_TAPE
2122	struct sendcmd_reject_list *srl = &hba[ctlr]->scsi_rejects;
2123
2124	/* If it's not the scsi tape stuff doing error handling, (abort */
2125	/* or reset) then we don't expect anything weird. */
2126	if (cmd != CCISS_RESET_MSG && cmd != CCISS_ABORT_MSG) {
2127#endif
2128		printk(KERN_WARNING "cciss cciss%d: SendCmd "
2129		       "Invalid command list address returned! (%lx)\n",
2130		       ctlr, complete);
2131		/* not much we can do. */
2132#ifdef CONFIG_CISS_SCSI_TAPE
2133		return 1;
2134	}
2135
2136	/* We've sent down an abort or reset, but something else
2137	   has completed */
2138	if (srl->ncompletions >= (NR_CMDS + 2)) {
2139		/* Uh oh.  No room to save it for later... */
2140		printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, "
2141		       "reject list overflow, command lost!\n", ctlr);
2142		return 1;
2143	}
2144	/* Save it for later */
2145	srl->complete[srl->ncompletions] = complete;
2146	srl->ncompletions++;
2147#endif
2148	return 0;
2149}
2150
2151/*
2152 * Send a command to the controller, and wait for it to complete.
2153 * Only used at init time.
2154 */
2155static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,	/* 0: address the controller,
2156												   1: address logical volume log_unit,
2157												   2: periph device address is scsi3addr */
2158		   unsigned int log_unit,
2159		   __u8 page_code, unsigned char *scsi3addr, int cmd_type)
2160{
2161	CommandList_struct *c;
2162	int i;
2163	unsigned long complete;
2164	ctlr_info_t *info_p = hba[ctlr];
2165	u64bit buff_dma_handle;
2166	int status, done = 0;
2167
2168	if ((c = cmd_alloc(info_p, 1)) == NULL) {
2169		printk(KERN_WARNING "cciss: unable to get memory");
2170		return IO_ERROR;
2171	}
2172	status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2173			  log_unit, page_code, scsi3addr, cmd_type);
2174	if (status != IO_OK) {
2175		cmd_free(info_p, c, 1);
2176		return status;
2177	}
2178      resend_cmd1:
2179	/*
2180	 * Disable interrupt
2181	 */
2182#ifdef CCISS_DEBUG
2183	printk(KERN_DEBUG "cciss: turning intr off\n");
2184#endif				/* CCISS_DEBUG */
2185	info_p->access.set_intr_mask(info_p, CCISS_INTR_OFF);
2186
2187	/* Make sure there is room in the command FIFO */
2188	/* Actually it should be completely empty at this time */
2189	/* unless we are in here doing error handling for the scsi */
2190	/* tape side of the driver. */
2191	for (i = 200000; i > 0; i--) {
2192		/* if fifo isn't full go */
2193		if (!(info_p->access.fifo_full(info_p))) {
2194
2195			break;
2196		}
2197		udelay(10);
2198		printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
2199		       " waiting!\n", ctlr);
2200	}
2201	/*
2202	 * Send the cmd
2203	 */
2204	info_p->access.submit_command(info_p, c);
2205	done = 0;
2206	do {
2207		complete = pollcomplete(ctlr);
2208
2209#ifdef CCISS_DEBUG
2210		printk(KERN_DEBUG "cciss: command completed\n");
2211#endif				/* CCISS_DEBUG */
2212
2213		if (complete == 1) {
2214			printk(KERN_WARNING
2215			       "cciss cciss%d: SendCmd Timeout out, "
2216			       "No command list address returned!\n", ctlr);
2217			status = IO_ERROR;
2218			done = 1;
2219			break;
2220		}
2221
2222		/* This will need to change for direct lookup completions */
2223		if ((complete & CISS_ERROR_BIT)
2224		    && (complete & ~CISS_ERROR_BIT) == c->busaddr) {
2225			/* if data overrun or underun on Report command
2226			   ignore it
2227			 */
2228			if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
2229			     (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
2230			     (c->Request.CDB[0] == CISS_INQUIRY)) &&
2231			    ((c->err_info->CommandStatus ==
2232			      CMD_DATA_OVERRUN) ||
2233			     (c->err_info->CommandStatus == CMD_DATA_UNDERRUN)
2234			    )) {
2235				complete = c->busaddr;
2236			} else {
2237				if (c->err_info->CommandStatus ==
2238				    CMD_UNSOLICITED_ABORT) {
2239					printk(KERN_WARNING "cciss%d: "
2240					       "unsolicited abort %p\n",
2241					       ctlr, c);
2242					if (c->retry_count < MAX_CMD_RETRIES) {
2243						printk(KERN_WARNING
2244						       "cciss%d: retrying %p\n",
2245						       ctlr, c);
2246						c->retry_count++;
2247						/* erase the old error */
2248						/* information */
2249						memset(c->err_info, 0,
2250						       sizeof
2251						       (ErrorInfo_struct));
2252						goto resend_cmd1;
2253					} else {
2254						printk(KERN_WARNING
2255						       "cciss%d: retried %p too "
2256						       "many times\n", ctlr, c);
2257						status = IO_ERROR;
2258						goto cleanup1;
2259					}
2260				} else if (c->err_info->CommandStatus ==
2261					   CMD_UNABORTABLE) {
2262					printk(KERN_WARNING
2263					       "cciss%d: command could not be aborted.\n",
2264					       ctlr);
2265					status = IO_ERROR;
2266					goto cleanup1;
2267				}
2268				printk(KERN_WARNING "ciss ciss%d: sendcmd"
2269				       " Error %x \n", ctlr,
2270				       c->err_info->CommandStatus);
2271				printk(KERN_WARNING "ciss ciss%d: sendcmd"
2272				       " offensive info\n"
2273				       "  size %x\n   num %x   value %x\n",
2274				       ctlr,
2275				       c->err_info->MoreErrInfo.Invalid_Cmd.
2276				       offense_size,
2277				       c->err_info->MoreErrInfo.Invalid_Cmd.
2278				       offense_num,
2279				       c->err_info->MoreErrInfo.Invalid_Cmd.
2280				       offense_value);
2281				status = IO_ERROR;
2282				goto cleanup1;
2283			}
2284		}
2285		/* This will need changing for direct lookup completions */
2286		if (complete != c->busaddr) {
2287			if (add_sendcmd_reject(cmd, ctlr, complete) != 0) {
2288				BUG();	/* we are pretty much hosed if we get here. */
2289			}
2290			continue;
2291		} else
2292			done = 1;
2293	} while (!done);
2294
2295      cleanup1:
2296	/* unlock the data buffer from DMA */
2297	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2298	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2299	pci_unmap_single(info_p->pdev, (dma_addr_t) buff_dma_handle.val,
2300			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2301#ifdef CONFIG_CISS_SCSI_TAPE
2302	/* if we saved some commands for later, process them now. */
2303	if (info_p->scsi_rejects.ncompletions > 0)
2304		do_cciss_intr(0, info_p, NULL);
2305#endif
2306	cmd_free(info_p, c, 1);
2307	return status;
2308}
2309
2310/*
2311 * Map (physical) PCI mem into (virtual) kernel space
2312 */
2313static void __iomem *remap_pci_mem(ulong base, ulong size)
2314{
2315	ulong page_base = ((ulong) base) & PAGE_MASK;
2316	ulong page_offs = ((ulong) base) - page_base;
2317	void __iomem *page_remapped = ioremap(page_base, page_offs + size);
2318
2319	return page_remapped ? (page_remapped + page_offs) : NULL;
2320}
2321
2322/*
2323 * Takes jobs of the Q and sends them to the hardware, then puts it on
2324 * the Q to wait for completion.
2325 */
2326static void start_io(ctlr_info_t *h)
2327{
2328	CommandList_struct *c;
2329
2330	while ((c = h->reqQ) != NULL) {
2331		/* can't do anything if fifo is full */
2332		if ((h->access.fifo_full(h))) {
2333			printk(KERN_WARNING "cciss: fifo full\n");
2334			break;
2335		}
2336
2337		/* Get the first entry from the Request Q */
2338		removeQ(&(h->reqQ), c);
2339		h->Qdepth--;
2340
2341		/* Tell the controller execute command */
2342		h->access.submit_command(h, c);
2343
2344		/* Put job onto the completed Q */
2345		addQ(&(h->cmpQ), c);
2346	}
2347}
2348
2349/* Assumes that CCISS_LOCK(h->ctlr) is held. */
2350/* Zeros out the error record and then resends the command back */
2351/* to the controller */
2352static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
2353{
2354	/* erase the old error information */
2355	memset(c->err_info, 0, sizeof(ErrorInfo_struct));
2356
2357	/* add it to software queue and then send it to the controller */
2358	addQ(&(h->reqQ), c);
2359	h->Qdepth++;
2360	if (h->Qdepth > h->maxQsinceinit)
2361		h->maxQsinceinit = h->Qdepth;
2362
2363	start_io(h);
2364}
2365
2366/* checks the status of the job and calls complete buffers to mark all
2367 * buffers for the completed job. Note that this function does not need
2368 * to hold the hba/queue lock.
2369 */
2370static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
2371				    int timeout)
2372{
2373	int status = 1;
2374	int retry_cmd = 0;
2375
2376	if (timeout)
2377		status = 0;
2378
2379	if (cmd->err_info->CommandStatus != 0) {	/* an error has occurred */
2380		switch (cmd->err_info->CommandStatus) {
2381			unsigned char sense_key;
2382		case CMD_TARGET_STATUS:
2383			status = 0;
2384
2385			if (cmd->err_info->ScsiStatus == 0x02) {
2386				printk(KERN_WARNING "cciss: cmd %p "
2387				       "has CHECK CONDITION "
2388				       " byte 2 = 0x%x\n", cmd,
2389				       cmd->err_info->SenseInfo[2]
2390				    );
2391				/* check the sense key */
2392				sense_key = 0xf & cmd->err_info->SenseInfo[2];
2393				/* no status or recovered error */
2394				if ((sense_key == 0x0) || (sense_key == 0x1)) {
2395					status = 1;
2396				}
2397			} else {
2398				printk(KERN_WARNING "cciss: cmd %p "
2399				       "has SCSI Status 0x%x\n",
2400				       cmd, cmd->err_info->ScsiStatus);
2401			}
2402			break;
2403		case CMD_DATA_UNDERRUN:
2404			printk(KERN_WARNING "cciss: cmd %p has"
2405			       " completed with data underrun "
2406			       "reported\n", cmd);
2407			break;
2408		case CMD_DATA_OVERRUN:
2409			printk(KERN_WARNING "cciss: cmd %p has"
2410			       " completed with data overrun "
2411			       "reported\n", cmd);
2412			break;
2413		case CMD_INVALID:
2414			printk(KERN_WARNING "cciss: cmd %p is "
2415			       "reported invalid\n", cmd);
2416			status = 0;
2417			break;
2418		case CMD_PROTOCOL_ERR:
2419			printk(KERN_WARNING "cciss: cmd %p has "
2420			       "protocol error \n", cmd);
2421			status = 0;
2422			break;
2423		case CMD_HARDWARE_ERR:
2424			printk(KERN_WARNING "cciss: cmd %p had "
2425			       " hardware error\n", cmd);
2426			status = 0;
2427			break;
2428		case CMD_CONNECTION_LOST:
2429			printk(KERN_WARNING "cciss: cmd %p had "
2430			       "connection lost\n", cmd);
2431			status = 0;
2432			break;
2433		case CMD_ABORTED:
2434			printk(KERN_WARNING "cciss: cmd %p was "
2435			       "aborted\n", cmd);
2436			status = 0;
2437			break;
2438		case CMD_ABORT_FAILED:
2439			printk(KERN_WARNING "cciss: cmd %p reports "
2440			       "abort failed\n", cmd);
2441			status = 0;
2442			break;
2443		case CMD_UNSOLICITED_ABORT:
2444			printk(KERN_WARNING "cciss%d: unsolicited "
2445			       "abort %p\n", h->ctlr, cmd);
2446			if (cmd->retry_count < MAX_CMD_RETRIES) {
2447				retry_cmd = 1;
2448				printk(KERN_WARNING
2449				       "cciss%d: retrying %p\n", h->ctlr, cmd);
2450				cmd->retry_count++;
2451			} else
2452				printk(KERN_WARNING
2453				       "cciss%d: %p retried too "
2454				       "many times\n", h->ctlr, cmd);
2455			status = 0;
2456			break;
2457		case CMD_TIMEOUT:
2458			printk(KERN_WARNING "cciss: cmd %p timedout\n", cmd);
2459			status = 0;
2460			break;
2461		default:
2462			printk(KERN_WARNING "cciss: cmd %p returned "
2463			       "unknown status %x\n", cmd,
2464			       cmd->err_info->CommandStatus);
2465			status = 0;
2466		}
2467	}
2468	/* We need to return this command */
2469	if (retry_cmd) {
2470		resend_cciss_cmd(h, cmd);
2471		return;
2472	}
2473
2474	cmd->rq->completion_data = cmd;
2475	cmd->rq->errors = status;
2476	blk_add_trace_rq(cmd->rq->q, cmd->rq, BLK_TA_COMPLETE);
2477	blk_complete_request(cmd->rq);
2478}
2479
2480/*
2481 * Get a request and submit it to the controller.
2482 */
2483static void do_cciss_request(request_queue_t *q)
2484{
2485	ctlr_info_t *h = q->queuedata;
2486	CommandList_struct *c;
2487	sector_t start_blk;
2488	int seg;
2489	struct request *creq;
2490	u64bit temp64;
2491	struct scatterlist tmp_sg[MAXSGENTRIES];
2492	drive_info_struct *drv;
2493	int i, dir;
2494
2495	/* We call start_io here in case there is a command waiting on the
2496	 * queue that has not been sent.
2497	 */
2498	if (blk_queue_plugged(q))
2499		goto startio;
2500
2501      queue:
2502	creq = elv_next_request(q);
2503	if (!creq)
2504		goto startio;
2505
2506	BUG_ON(creq->nr_phys_segments > MAXSGENTRIES);
2507
2508	if ((c = cmd_alloc(h, 1)) == NULL)
2509		goto full;
2510
2511	blkdev_dequeue_request(creq);
2512
2513	spin_unlock_irq(q->queue_lock);
2514
2515	c->cmd_type = CMD_RWREQ;
2516	c->rq = creq;
2517
2518	/* fill in the request */
2519	drv = creq->rq_disk->private_data;
2520	c->Header.ReplyQueue = 0;	// unused in simple mode
2521	/* got command from pool, so use the command block index instead */
2522	/* for direct lookups. */
2523	/* The first 2 bits are reserved for controller error reporting. */
2524	c->Header.Tag.lower = (c->cmdindex << 3);
2525	c->Header.Tag.lower |= 0x04;	/* flag for direct lookup. */
2526	c->Header.LUN.LogDev.VolId = drv->LunID;
2527	c->Header.LUN.LogDev.Mode = 1;
2528	c->Request.CDBLen = 10;	// 12 byte commands not in FW yet;
2529	c->Request.Type.Type = TYPE_CMD;	// It is a command.
2530	c->Request.Type.Attribute = ATTR_SIMPLE;
2531	c->Request.Type.Direction =
2532	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
2533	c->Request.Timeout = 0;	// Don't time out
2534	c->Request.CDB[0] =
2535	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
2536	start_blk = creq->sector;
2537#ifdef CCISS_DEBUG
2538	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", (int)creq->sector,
2539	       (int)creq->nr_sectors);
2540#endif				/* CCISS_DEBUG */
2541
2542	seg = blk_rq_map_sg(q, creq, tmp_sg);
2543
2544	/* get the DMA records for the setup */
2545	if (c->Request.Type.Direction == XFER_READ)
2546		dir = PCI_DMA_FROMDEVICE;
2547	else
2548		dir = PCI_DMA_TODEVICE;
2549
2550	for (i = 0; i < seg; i++) {
2551		c->SG[i].Len = tmp_sg[i].length;
2552		temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page,
2553						  tmp_sg[i].offset,
2554						  tmp_sg[i].length, dir);
2555		c->SG[i].Addr.lower = temp64.val32.lower;
2556		c->SG[i].Addr.upper = temp64.val32.upper;
2557		c->SG[i].Ext = 0;	// we are not chaining
2558	}
2559	/* track how many SG entries we are using */
2560	if (seg > h->maxSG)
2561		h->maxSG = seg;
2562
2563#ifdef CCISS_DEBUG
2564	printk(KERN_DEBUG "cciss: Submitting %d sectors in %d segments\n",
2565	       creq->nr_sectors, seg);
2566#endif				/* CCISS_DEBUG */
2567
2568	c->Header.SGList = c->Header.SGTotal = seg;
2569	if(h->cciss_read == CCISS_READ_10) {
2570		c->Request.CDB[1] = 0;
2571		c->Request.CDB[2] = (start_blk >> 24) & 0xff;	//MSB
2572		c->Request.CDB[3] = (start_blk >> 16) & 0xff;
2573		c->Request.CDB[4] = (start_blk >> 8) & 0xff;
2574		c->Request.CDB[5] = start_blk & 0xff;
2575		c->Request.CDB[6] = 0;	// (sect >> 24) & 0xff; MSB
2576		c->Request.CDB[7] = (creq->nr_sectors >> 8) & 0xff;
2577		c->Request.CDB[8] = creq->nr_sectors & 0xff;
2578		c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
2579	} else {
2580		c->Request.CDBLen = 16;
2581		c->Request.CDB[1]= 0;
2582		c->Request.CDB[2]= (start_blk >> 56) & 0xff;	//MSB
2583		c->Request.CDB[3]= (start_blk >> 48) & 0xff;
2584		c->Request.CDB[4]= (start_blk >> 40) & 0xff;
2585		c->Request.CDB[5]= (start_blk >> 32) & 0xff;
2586		c->Request.CDB[6]= (start_blk >> 24) & 0xff;
2587		c->Request.CDB[7]= (start_blk >> 16) & 0xff;
2588		c->Request.CDB[8]= (start_blk >>  8) & 0xff;
2589		c->Request.CDB[9]= start_blk & 0xff;
2590		c->Request.CDB[10]= (creq->nr_sectors >>  24) & 0xff;
2591		c->Request.CDB[11]= (creq->nr_sectors >>  16) & 0xff;
2592		c->Request.CDB[12]= (creq->nr_sectors >>  8) & 0xff;
2593		c->Request.CDB[13]= creq->nr_sectors & 0xff;
2594		c->Request.CDB[14] = c->Request.CDB[15] = 0;
2595	}
2596
2597	spin_lock_irq(q->queue_lock);
2598
2599	addQ(&(h->reqQ), c);
2600	h->Qdepth++;
2601	if (h->Qdepth > h->maxQsinceinit)
2602		h->maxQsinceinit = h->Qdepth;
2603
2604	goto queue;
2605full:
2606	blk_stop_queue(q);
2607startio:
2608	/* We will already have the driver lock here so not need
2609	 * to lock it.
2610	 */
2611	start_io(h);
2612}
2613
2614static inline unsigned long get_next_completion(ctlr_info_t *h)
2615{
2616#ifdef CONFIG_CISS_SCSI_TAPE
2617	/* Any rejects from sendcmd() lying around? Process them first */
2618	if (h->scsi_rejects.ncompletions == 0)
2619		return h->access.command_completed(h);
2620	else {
2621		struct sendcmd_reject_list *srl;
2622		int n;
2623		srl = &h->scsi_rejects;
2624		n = --srl->ncompletions;
2625		/* printk("cciss%d: processing saved reject\n", h->ctlr); */
2626		printk("p");
2627		return srl->complete[n];
2628	}
2629#else
2630	return h->access.command_completed(h);
2631#endif
2632}
2633
2634static inline int interrupt_pending(ctlr_info_t *h)
2635{
2636#ifdef CONFIG_CISS_SCSI_TAPE
2637	return (h->access.intr_pending(h)
2638		|| (h->scsi_rejects.ncompletions > 0));
2639#else
2640	return h->access.intr_pending(h);
2641#endif
2642}
2643
2644static inline long interrupt_not_for_us(ctlr_info_t *h)
2645{
2646#ifdef CONFIG_CISS_SCSI_TAPE
2647	return (((h->access.intr_pending(h) == 0) ||
2648		 (h->interrupts_enabled == 0))
2649		&& (h->scsi_rejects.ncompletions == 0));
2650#else
2651	return (((h->access.intr_pending(h) == 0) ||
2652		 (h->interrupts_enabled == 0)));
2653#endif
2654}
2655
2656static irqreturn_t do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs)
2657{
2658	ctlr_info_t *h = dev_id;
2659	CommandList_struct *c;
2660	unsigned long flags;
2661	__u32 a, a1, a2;
2662
2663	if (interrupt_not_for_us(h))
2664		return IRQ_NONE;
2665	/*
2666	 * If there are completed commands in the completion queue,
2667	 * we had better do something about it.
2668	 */
2669	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
2670	while (interrupt_pending(h)) {
2671		while ((a = get_next_completion(h)) != FIFO_EMPTY) {
2672			a1 = a;
2673			if ((a & 0x04)) {
2674				a2 = (a >> 3);
2675				if (a2 >= NR_CMDS) {
2676					printk(KERN_WARNING
2677					       "cciss: controller cciss%d failed, stopping.\n",
2678					       h->ctlr);
2679					fail_all_cmds(h->ctlr);
2680					return IRQ_HANDLED;
2681				}
2682
2683				c = h->cmd_pool + a2;
2684				a = c->busaddr;
2685
2686			} else {
2687				a &= ~3;
2688				if ((c = h->cmpQ) == NULL) {
2689					printk(KERN_WARNING
2690					       "cciss: Completion of %08x ignored\n",
2691					       a1);
2692					continue;
2693				}
2694				while (c->busaddr != a) {
2695					c = c->next;
2696					if (c == h->cmpQ)
2697						break;
2698				}
2699			}
2700			/*
2701			 * If we've found the command, take it off the
2702			 * completion Q and free it
2703			 */
2704			if (c->busaddr == a) {
2705				removeQ(&h->cmpQ, c);
2706				if (c->cmd_type == CMD_RWREQ) {
2707					complete_command(h, c, 0);
2708				} else if (c->cmd_type == CMD_IOCTL_PEND) {
2709					complete(c->waiting);
2710				}
2711#				ifdef CONFIG_CISS_SCSI_TAPE
2712				else if (c->cmd_type == CMD_SCSI)
2713					complete_scsi_command(c, 0, a1);
2714#				endif
2715				continue;
2716			}
2717		}
2718	}
2719
2720	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
2721	return IRQ_HANDLED;
2722}
2723
2724/*
2725 *  We cannot read the structure directly, for portability we must use
2726 *   the io functions.
2727 *   This is for debug only.
2728 */
2729#ifdef CCISS_DEBUG
2730static void print_cfg_table(CfgTable_struct *tb)
2731{
2732	int i;
2733	char temp_name[17];
2734
2735	printk("Controller Configuration information\n");
2736	printk("------------------------------------\n");
2737	for (i = 0; i < 4; i++)
2738		temp_name[i] = readb(&(tb->Signature[i]));
2739	temp_name[4] = '\0';
2740	printk("   Signature = %s\n", temp_name);
2741	printk("   Spec Number = %d\n", readl(&(tb->SpecValence)));
2742	printk("   Transport methods supported = 0x%x\n",
2743	       readl(&(tb->TransportSupport)));
2744	printk("   Transport methods active = 0x%x\n",
2745	       readl(&(tb->TransportActive)));
2746	printk("   Requested transport Method = 0x%x\n",
2747	       readl(&(tb->HostWrite.TransportRequest)));
2748	printk("   Coalesce Interrupt Delay = 0x%x\n",
2749	       readl(&(tb->HostWrite.CoalIntDelay)));
2750	printk("   Coalesce Interrupt Count = 0x%x\n",
2751	       readl(&(tb->HostWrite.CoalIntCount)));
2752	printk("   Max outstanding commands = 0x%d\n",
2753	       readl(&(tb->CmdsOutMax)));
2754	printk("   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
2755	for (i = 0; i < 16; i++)
2756		temp_name[i] = readb(&(tb->ServerName[i]));
2757	temp_name[16] = '\0';
2758	printk("   Server Name = %s\n", temp_name);
2759	printk("   Heartbeat Counter = 0x%x\n\n\n", readl(&(tb->HeartBeat)));
2760}
2761#endif				/* CCISS_DEBUG */
2762
2763static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
2764{
2765	int i, offset, mem_type, bar_type;
2766	if (pci_bar_addr == PCI_BASE_ADDRESS_0)	/* looking for BAR zero? */
2767		return 0;
2768	offset = 0;
2769	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2770		bar_type = pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE;
2771		if (bar_type == PCI_BASE_ADDRESS_SPACE_IO)
2772			offset += 4;
2773		else {
2774			mem_type = pci_resource_flags(pdev, i) &
2775			    PCI_BASE_ADDRESS_MEM_TYPE_MASK;
2776			switch (mem_type) {
2777			case PCI_BASE_ADDRESS_MEM_TYPE_32:
2778			case PCI_BASE_ADDRESS_MEM_TYPE_1M:
2779				offset += 4;	/* 32 bit */
2780				break;
2781			case PCI_BASE_ADDRESS_MEM_TYPE_64:
2782				offset += 8;
2783				break;
2784			default:	/* reserved in PCI 2.2 */
2785				printk(KERN_WARNING
2786				       "Base address is invalid\n");
2787				return -1;
2788				break;
2789			}
2790		}
2791		if (offset == pci_bar_addr - PCI_BASE_ADDRESS_0)
2792			return i + 1;
2793	}
2794	return -1;
2795}
2796
2797/* If MSI/MSI-X is supported by the kernel we will try to enable it on
2798 * controllers that are capable. If not, we use IO-APIC mode.
2799 */
2800
2801static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
2802					   struct pci_dev *pdev, __u32 board_id)
2803{
2804#ifdef CONFIG_PCI_MSI
2805	int err;
2806	struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
2807	{0, 2}, {0, 3}
2808	};
2809
2810	/* Some boards advertise MSI but don't really support it */
2811	if ((board_id == 0x40700E11) ||
2812	    (board_id == 0x40800E11) ||
2813	    (board_id == 0x40820E11) || (board_id == 0x40830E11))
2814		goto default_int_mode;
2815
2816	if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
2817		err = pci_enable_msix(pdev, cciss_msix_entries, 4);
2818		if (!err) {
2819			c->intr[0] = cciss_msix_entries[0].vector;
2820			c->intr[1] = cciss_msix_entries[1].vector;
2821			c->intr[2] = cciss_msix_entries[2].vector;
2822			c->intr[3] = cciss_msix_entries[3].vector;
2823			c->msix_vector = 1;
2824			return;
2825		}
2826		if (err > 0) {
2827			printk(KERN_WARNING "cciss: only %d MSI-X vectors "
2828			       "available\n", err);
2829		} else {
2830			printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
2831			       err);
2832		}
2833	}
2834	if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
2835		if (!pci_enable_msi(pdev)) {
2836			c->intr[SIMPLE_MODE_INT] = pdev->irq;
2837			c->msi_vector = 1;
2838			return;
2839		} else {
2840			printk(KERN_WARNING "cciss: MSI init failed\n");
2841			c->intr[SIMPLE_MODE_INT] = pdev->irq;
2842			return;
2843		}
2844	}
2845      default_int_mode:
2846#endif				/* CONFIG_PCI_MSI */
2847	/* if we get here we're going to use the default interrupt mode */
2848	c->intr[SIMPLE_MODE_INT] = pdev->irq;
2849	return;
2850}
2851
2852static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
2853{
2854	ushort subsystem_vendor_id, subsystem_device_id, command;
2855	__u32 board_id, scratchpad = 0;
2856	__u64 cfg_offset;
2857	__u32 cfg_base_addr;
2858	__u64 cfg_base_addr_index;
2859	int i, err;
2860
2861	/* check to see if controller has been disabled */
2862	/* BEFORE trying to enable it */
2863	(void)pci_read_config_word(pdev, PCI_COMMAND, &command);
2864	if (!(command & 0x02)) {
2865		printk(KERN_WARNING
2866		       "cciss: controller appears to be disabled\n");
2867		return -ENODEV;
2868	}
2869
2870	err = pci_enable_device(pdev);
2871	if (err) {
2872		printk(KERN_ERR "cciss: Unable to Enable PCI device\n");
2873		return err;
2874	}
2875
2876	err = pci_request_regions(pdev, "cciss");
2877	if (err) {
2878		printk(KERN_ERR "cciss: Cannot obtain PCI resources, "
2879		       "aborting\n");
2880		goto err_out_disable_pdev;
2881	}
2882
2883	subsystem_vendor_id = pdev->subsystem_vendor;
2884	subsystem_device_id = pdev->subsystem_device;
2885	board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) |
2886		    subsystem_vendor_id);
2887
2888#ifdef CCISS_DEBUG
2889	printk("command = %x\n", command);
2890	printk("irq = %x\n", pdev->irq);
2891	printk("board_id = %x\n", board_id);
2892#endif				/* CCISS_DEBUG */
2893
2894/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
2895 * else we use the IO-APIC interrupt assigned to us by system ROM.
2896 */
2897	cciss_interrupt_mode(c, pdev, board_id);
2898
2899	/*
2900	 * Memory base addr is first addr , the second points to the config
2901	 *   table
2902	 */
2903
2904	c->paddr = pci_resource_start(pdev, 0);	/* addressing mode bits already removed */
2905#ifdef CCISS_DEBUG
2906	printk("address 0 = %x\n", c->paddr);
2907#endif				/* CCISS_DEBUG */
2908	c->vaddr = remap_pci_mem(c->paddr, 200);
2909
2910	/* Wait for the board to become ready.  (PCI hotplug needs this.)
2911	 * We poll for up to 120 secs, once per 100ms. */
2912	for (i = 0; i < 1200; i++) {
2913		scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
2914		if (scratchpad == CCISS_FIRMWARE_READY)
2915			break;
2916		set_current_state(TASK_INTERRUPTIBLE);
2917		schedule_timeout(HZ / 10);	/* wait 100ms */
2918	}
2919	if (scratchpad != CCISS_FIRMWARE_READY) {
2920		printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
2921		err = -ENODEV;
2922		goto err_out_free_res;
2923	}
2924
2925	/* get the address index number */
2926	cfg_base_addr = readl(c->vaddr + SA5_CTCFG_OFFSET);
2927	cfg_base_addr &= (__u32) 0x0000ffff;
2928#ifdef CCISS_DEBUG
2929	printk("cfg base address = %x\n", cfg_base_addr);
2930#endif				/* CCISS_DEBUG */
2931	cfg_base_addr_index = find_PCI_BAR_index(pdev, cfg_base_addr);
2932#ifdef CCISS_DEBUG
2933	printk("cfg base address index = %x\n", cfg_base_addr_index);
2934#endif				/* CCISS_DEBUG */
2935	if (cfg_base_addr_index == -1) {
2936		printk(KERN_WARNING "cciss: Cannot find cfg_base_addr_index\n");
2937		err = -ENODEV;
2938		goto err_out_free_res;
2939	}
2940
2941	cfg_offset = readl(c->vaddr + SA5_CTMEM_OFFSET);
2942#ifdef CCISS_DEBUG
2943	printk("cfg offset = %x\n", cfg_offset);
2944#endif				/* CCISS_DEBUG */
2945	c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
2946						       cfg_base_addr_index) +
2947				    cfg_offset, sizeof(CfgTable_struct));
2948	c->board_id = board_id;
2949
2950#ifdef CCISS_DEBUG
2951	print_cfg_table(c->cfgtable);
2952#endif				/* CCISS_DEBUG */
2953
2954	for (i = 0; i < ARRAY_SIZE(products); i++) {
2955		if (board_id == products[i].board_id) {
2956			c->product_name = products[i].product_name;
2957			c->access = *(products[i].access);
2958			break;
2959		}
2960	}
2961	if (i == ARRAY_SIZE(products)) {
2962		printk(KERN_WARNING "cciss: Sorry, I don't know how"
2963		       " to access the Smart Array controller %08lx\n",
2964		       (unsigned long)board_id);
2965		err = -ENODEV;
2966		goto err_out_free_res;
2967	}
2968	if ((readb(&c->cfgtable->Signature[0]) != 'C') ||
2969	    (readb(&c->cfgtable->Signature[1]) != 'I') ||
2970	    (readb(&c->cfgtable->Signature[2]) != 'S') ||
2971	    (readb(&c->cfgtable->Signature[3]) != 'S')) {
2972		printk("Does not appear to be a valid CISS config table\n");
2973		err = -ENODEV;
2974		goto err_out_free_res;
2975	}
2976#ifdef CONFIG_X86
2977	{
2978		/* Need to enable prefetch in the SCSI core for 6400 in x86 */
2979		__u32 prefetch;
2980		prefetch = readl(&(c->cfgtable->SCSI_Prefetch));
2981		prefetch |= 0x100;
2982		writel(prefetch, &(c->cfgtable->SCSI_Prefetch));
2983	}
2984#endif
2985
2986#ifdef CCISS_DEBUG
2987	printk("Trying to put board into Simple mode\n");
2988#endif				/* CCISS_DEBUG */
2989	c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
2990	/* Update the field, and then ring the doorbell */
2991	writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
2992	writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
2993
2994	/* under certain very rare conditions, this can take awhile.
2995	 * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
2996	 * as we enter this code.) */
2997	for (i = 0; i < MAX_CONFIG_WAIT; i++) {
2998		if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
2999			break;
3000		/* delay and try again */
3001		set_current_state(TASK_INTERRUPTIBLE);
3002		schedule_timeout(10);
3003	}
3004
3005#ifdef CCISS_DEBUG
3006	printk(KERN_DEBUG "I counter got to %d %x\n", i,
3007	       readl(c->vaddr + SA5_DOORBELL));
3008#endif				/* CCISS_DEBUG */
3009#ifdef CCISS_DEBUG
3010	print_cfg_table(c->cfgtable);
3011#endif				/* CCISS_DEBUG */
3012
3013	if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
3014		printk(KERN_WARNING "cciss: unable to get board into"
3015		       " simple mode\n");
3016		err = -ENODEV;
3017		goto err_out_free_res;
3018	}
3019	return 0;
3020
3021      err_out_free_res:
3022	pci_release_regions(pdev);
3023
3024      err_out_disable_pdev:
3025	pci_disable_device(pdev);
3026	return err;
3027}
3028
3029/*
3030 * Gets information about the local volumes attached to the controller.
3031 */
3032static void cciss_getgeometry(int cntl_num)
3033{
3034	ReportLunData_struct *ld_buff;
3035	InquiryData_struct *inq_buff;
3036	int return_code;
3037	int i;
3038	int listlength = 0;
3039	__u32 lunid = 0;
3040	int block_size;
3041	sector_t total_size;
3042
3043	ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
3044	if (ld_buff == NULL) {
3045		printk(KERN_ERR "cciss: out of memory\n");
3046		return;
3047	}
3048	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
3049	if (inq_buff == NULL) {
3050		printk(KERN_ERR "cciss: out of memory\n");
3051		kfree(ld_buff);
3052		return;
3053	}
3054	/* Get the firmware version */
3055	return_code = sendcmd(CISS_INQUIRY, cntl_num, inq_buff,
3056			      sizeof(InquiryData_struct), 0, 0, 0, NULL,
3057			      TYPE_CMD);
3058	if (return_code == IO_OK) {
3059		hba[cntl_num]->firm_ver[0] = inq_buff->data_byte[32];
3060		hba[cntl_num]->firm_ver[1] = inq_buff->data_byte[33];
3061		hba[cntl_num]->firm_ver[2] = inq_buff->data_byte[34];
3062		hba[cntl_num]->firm_ver[3] = inq_buff->data_byte[35];
3063	} else {		/* send command failed */
3064
3065		printk(KERN_WARNING "cciss: unable to determine firmware"
3066		       " version of controller\n");
3067	}
3068	/* Get the number of logical volumes */
3069	return_code = sendcmd(CISS_REPORT_LOG, cntl_num, ld_buff,
3070			      sizeof(ReportLunData_struct), 0, 0, 0, NULL,
3071			      TYPE_CMD);
3072
3073	if (return_code == IO_OK) {
3074#ifdef CCISS_DEBUG
3075		printk("LUN Data\n--------------------------\n");
3076#endif				/* CCISS_DEBUG */
3077
3078		listlength |=
3079		    (0xff & (unsigned int)(ld_buff->LUNListLength[0])) << 24;
3080		listlength |=
3081		    (0xff & (unsigned int)(ld_buff->LUNListLength[1])) << 16;
3082		listlength |=
3083		    (0xff & (unsigned int)(ld_buff->LUNListLength[2])) << 8;
3084		listlength |= 0xff & (unsigned int)(ld_buff->LUNListLength[3]);
3085	} else {		/* reading number of logical volumes failed */
3086
3087		printk(KERN_WARNING "cciss: report logical volume"
3088		       " command failed\n");
3089		listlength = 0;
3090	}
3091	hba[cntl_num]->num_luns = listlength / 8;	// 8 bytes pre entry
3092	if (hba[cntl_num]->num_luns > CISS_MAX_LUN) {
3093		printk(KERN_ERR
3094		       "ciss:  only %d number of logical volumes supported\n",
3095		       CISS_MAX_LUN);
3096		hba[cntl_num]->num_luns = CISS_MAX_LUN;
3097	}
3098#ifdef CCISS_DEBUG
3099	printk(KERN_DEBUG "Length = %x %x %x %x = %d\n",
3100	       ld_buff->LUNListLength[0], ld_buff->LUNListLength[1],
3101	       ld_buff->LUNListLength[2], ld_buff->LUNListLength[3],
3102	       hba[cntl_num]->num_luns);
3103#endif				/* CCISS_DEBUG */
3104
3105	hba[cntl_num]->highest_lun = hba[cntl_num]->num_luns - 1;
3106	for (i = 0; i < CISS_MAX_LUN; i++) {
3107		if (i < hba[cntl_num]->num_luns) {
3108			lunid = (0xff & (unsigned int)(ld_buff->LUN[i][3]))
3109			    << 24;
3110			lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][2]))
3111			    << 16;
3112			lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][1]))
3113			    << 8;
3114			lunid |= 0xff & (unsigned int)(ld_buff->LUN[i][0]);
3115
3116			hba[cntl_num]->drv[i].LunID = lunid;
3117
3118#ifdef CCISS_DEBUG
3119			printk(KERN_DEBUG "LUN[%d]:  %x %x %x %x = %x\n", i,
3120			       ld_buff->LUN[i][0], ld_buff->LUN[i][1],
3121			       ld_buff->LUN[i][2], ld_buff->LUN[i][3],
3122			       hba[cntl_num]->drv[i].LunID);
3123#endif				/* CCISS_DEBUG */
3124
3125		/* testing to see if 16-byte CDBs are already being used */
3126		if(hba[cntl_num]->cciss_read == CCISS_READ_16) {
3127			cciss_read_capacity_16(cntl_num, i, 0,
3128					    &total_size, &block_size);
3129			goto geo_inq;
3130		}
3131		cciss_read_capacity(cntl_num, i, 0, &total_size, &block_size);
3132
3133		/* total_size = last LBA + 1 */
3134		if(total_size == (__u32) 0) {
3135			cciss_read_capacity_16(cntl_num, i, 0,
3136			&total_size, &block_size);
3137			hba[cntl_num]->cciss_read = CCISS_READ_16;
3138			hba[cntl_num]->cciss_write = CCISS_WRITE_16;
3139		} else {
3140			hba[cntl_num]->cciss_read = CCISS_READ_10;
3141			hba[cntl_num]->cciss_write = CCISS_WRITE_10;
3142		}
3143geo_inq:
3144			cciss_geometry_inquiry(cntl_num, i, 0, total_size,
3145					       block_size, inq_buff,
3146					       &hba[cntl_num]->drv[i]);
3147		} else {
3148			/* initialize raid_level to indicate a free space */
3149			hba[cntl_num]->drv[i].raid_level = -1;
3150		}
3151	}
3152	kfree(ld_buff);
3153	kfree(inq_buff);
3154}
3155
3156/* Function to find the first free pointer into our hba[] array */
3157/* Returns -1 if no free entries are left.  */
3158static int alloc_cciss_hba(void)
3159{
3160	struct gendisk *disk[NWD];
3161	int i, n;
3162	for (n = 0; n < NWD; n++) {
3163		disk[n] = alloc_disk(1 << NWD_SHIFT);
3164		if (!disk[n])
3165			goto out;
3166	}
3167
3168	for (i = 0; i < MAX_CTLR; i++) {
3169		if (!hba[i]) {
3170			ctlr_info_t *p;
3171			p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
3172			if (!p)
3173				goto Enomem;
3174			for (n = 0; n < NWD; n++)
3175				p->gendisk[n] = disk[n];
3176			hba[i] = p;
3177			return i;
3178		}
3179	}
3180	printk(KERN_WARNING "cciss: This driver supports a maximum"
3181	       " of %d controllers.\n", MAX_CTLR);
3182	goto out;
3183      Enomem:
3184	printk(KERN_ERR "cciss: out of memory.\n");
3185      out:
3186	while (n--)
3187		put_disk(disk[n]);
3188	return -1;
3189}
3190
3191static void free_hba(int i)
3192{
3193	ctlr_info_t *p = hba[i];
3194	int n;
3195
3196	hba[i] = NULL;
3197	for (n = 0; n < NWD; n++)
3198		put_disk(p->gendisk[n]);
3199	kfree(p);
3200}
3201
3202/*
3203 *  This is it.  Find all the controllers and register them.  I really hate
3204 *  stealing all these major device numbers.
3205 *  returns the number of block devices registered.
3206 */
3207static int __devinit cciss_init_one(struct pci_dev *pdev,
3208				    const struct pci_device_id *ent)
3209{
3210	request_queue_t *q;
3211	int i;
3212	int j;
3213	int rc;
3214	int dac;
3215
3216	i = alloc_cciss_hba();
3217	if (i < 0)
3218		return -1;
3219
3220	hba[i]->busy_initializing = 1;
3221
3222	if (cciss_pci_init(hba[i], pdev) != 0)
3223		goto clean1;
3224
3225	sprintf(hba[i]->devname, "cciss%d", i);
3226	hba[i]->ctlr = i;
3227	hba[i]->pdev = pdev;
3228
3229	/* configure PCI DMA stuff */
3230	if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK))
3231		dac = 1;
3232	else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK))
3233		dac = 0;
3234	else {
3235		printk(KERN_ERR "cciss: no suitable DMA available\n");
3236		goto clean1;
3237	}
3238
3239	/*
3240	 * register with the major number, or get a dynamic major number
3241	 * by passing 0 as argument.  This is done for greater than
3242	 * 8 controller support.
3243	 */
3244	if (i < MAX_CTLR_ORIG)
3245		hba[i]->major = COMPAQ_CISS_MAJOR + i;
3246	rc = register_blkdev(hba[i]->major, hba[i]->devname);
3247	if (rc == -EBUSY || rc == -EINVAL) {
3248		printk(KERN_ERR
3249		       "cciss:  Unable to get major number %d for %s "
3250		       "on hba %d\n", hba[i]->major, hba[i]->devname, i);
3251		goto clean1;
3252	} else {
3253		if (i >= MAX_CTLR_ORIG)
3254			hba[i]->major = rc;
3255	}
3256
3257	/* make sure the board interrupts are off */
3258	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
3259	if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
3260			IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
3261		printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
3262		       hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
3263		goto clean2;
3264	}
3265
3266	printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
3267	       hba[i]->devname, pdev->device, pci_name(pdev),
3268	       hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
3269
3270	hba[i]->cmd_pool_bits =
3271	    kmalloc(((NR_CMDS + BITS_PER_LONG -
3272		      1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL);
3273	hba[i]->cmd_pool = (CommandList_struct *)
3274	    pci_alloc_consistent(hba[i]->pdev,
3275		    NR_CMDS * sizeof(CommandList_struct),
3276		    &(hba[i]->cmd_pool_dhandle));
3277	hba[i]->errinfo_pool = (ErrorInfo_struct *)
3278	    pci_alloc_consistent(hba[i]->pdev,
3279		    NR_CMDS * sizeof(ErrorInfo_struct),
3280		    &(hba[i]->errinfo_pool_dhandle));
3281	if ((hba[i]->cmd_pool_bits == NULL)
3282	    || (hba[i]->cmd_pool == NULL)
3283	    || (hba[i]->errinfo_pool == NULL)) {
3284		printk(KERN_ERR "cciss: out of memory");
3285		goto clean4;
3286	}
3287#ifdef CONFIG_CISS_SCSI_TAPE
3288	hba[i]->scsi_rejects.complete =
3289	    kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) *
3290		    (NR_CMDS + 5), GFP_KERNEL);
3291	if (hba[i]->scsi_rejects.complete == NULL) {
3292		printk(KERN_ERR "cciss: out of memory");
3293		goto clean4;
3294	}
3295#endif
3296	spin_lock_init(&hba[i]->lock);
3297
3298	/* Initialize the pdev driver private data.
3299	   have it point to hba[i].  */
3300	pci_set_drvdata(pdev, hba[i]);
3301	/* command and error info recs zeroed out before
3302	   they are used */
3303	memset(hba[i]->cmd_pool_bits, 0,
3304	       ((NR_CMDS + BITS_PER_LONG -
3305		 1) / BITS_PER_LONG) * sizeof(unsigned long));
3306
3307#ifdef CCISS_DEBUG
3308	printk(KERN_DEBUG "Scanning for drives on controller cciss%d\n", i);
3309#endif				/* CCISS_DEBUG */
3310
3311	cciss_getgeometry(i);
3312
3313	cciss_scsi_setup(i);
3314
3315	/* Turn the interrupts on so we can service requests */
3316	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
3317
3318	cciss_procinit(i);
3319	hba[i]->busy_initializing = 0;
3320
3321	for (j = 0; j < NWD; j++) {	/* mfm */
3322		drive_info_struct *drv = &(hba[i]->drv[j]);
3323		struct gendisk *disk = hba[i]->gendisk[j];
3324
3325		q = blk_init_queue(do_cciss_request, &hba[i]->lock);
3326		if (!q) {
3327			printk(KERN_ERR
3328			       "cciss:  unable to allocate queue for disk %d\n",
3329			       j);
3330			break;
3331		}
3332		drv->queue = q;
3333
3334		q->backing_dev_info.ra_pages = READ_AHEAD;
3335		blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask);
3336
3337		/* This is a hardware imposed limit. */
3338		blk_queue_max_hw_segments(q, MAXSGENTRIES);
3339
3340		/* This is a limit in the driver and could be eliminated. */
3341		blk_queue_max_phys_segments(q, MAXSGENTRIES);
3342
3343		blk_queue_max_sectors(q, 512);
3344
3345		blk_queue_softirq_done(q, cciss_softirq_done);
3346
3347		q->queuedata = hba[i];
3348		sprintf(disk->disk_name, "cciss/c%dd%d", i, j);
3349		disk->major = hba[i]->major;
3350		disk->first_minor = j << NWD_SHIFT;
3351		disk->fops = &cciss_fops;
3352		disk->queue = q;
3353		disk->private_data = drv;
3354		disk->driverfs_dev = &pdev->dev;
3355		/* we must register the controller even if no disks exist */
3356		/* this is for the online array utilities */
3357		if (!drv->heads && j)
3358			continue;
3359		blk_queue_hardsect_size(q, drv->block_size);
3360		set_capacity(disk, drv->nr_blocks);
3361		add_disk(disk);
3362	}
3363
3364	return 1;
3365
3366      clean4:
3367#ifdef CONFIG_CISS_SCSI_TAPE
3368	kfree(hba[i]->scsi_rejects.complete);
3369#endif
3370	kfree(hba[i]->cmd_pool_bits);
3371	if (hba[i]->cmd_pool)
3372		pci_free_consistent(hba[i]->pdev,
3373				    NR_CMDS * sizeof(CommandList_struct),
3374				    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3375	if (hba[i]->errinfo_pool)
3376		pci_free_consistent(hba[i]->pdev,
3377				    NR_CMDS * sizeof(ErrorInfo_struct),
3378				    hba[i]->errinfo_pool,
3379				    hba[i]->errinfo_pool_dhandle);
3380	free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
3381      clean2:
3382	unregister_blkdev(hba[i]->major, hba[i]->devname);
3383      clean1:
3384	hba[i]->busy_initializing = 0;
3385	free_hba(i);
3386	return -1;
3387}
3388
3389static void __devexit cciss_remove_one(struct pci_dev *pdev)
3390{
3391	ctlr_info_t *tmp_ptr;
3392	int i, j;
3393	char flush_buf[4];
3394	int return_code;
3395
3396	if (pci_get_drvdata(pdev) == NULL) {
3397		printk(KERN_ERR "cciss: Unable to remove device \n");
3398		return;
3399	}
3400	tmp_ptr = pci_get_drvdata(pdev);
3401	i = tmp_ptr->ctlr;
3402	if (hba[i] == NULL) {
3403		printk(KERN_ERR "cciss: device appears to "
3404		       "already be removed \n");
3405		return;
3406	}
3407	/* Turn board interrupts off  and send the flush cache command */
3408	/* sendcmd will turn off interrupt, and send the flush...
3409	 * To write all data in the battery backed cache to disks */
3410	memset(flush_buf, 0, 4);
3411	return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
3412			      TYPE_CMD);
3413	if (return_code != IO_OK) {
3414		printk(KERN_WARNING "Error Flushing cache on controller %d\n",
3415		       i);
3416	}
3417	free_irq(hba[i]->intr[2], hba[i]);
3418
3419#ifdef CONFIG_PCI_MSI
3420	if (hba[i]->msix_vector)
3421		pci_disable_msix(hba[i]->pdev);
3422	else if (hba[i]->msi_vector)
3423		pci_disable_msi(hba[i]->pdev);
3424#endif				/* CONFIG_PCI_MSI */
3425
3426	iounmap(hba[i]->vaddr);
3427	cciss_unregister_scsi(i);	/* unhook from SCSI subsystem */
3428	unregister_blkdev(hba[i]->major, hba[i]->devname);
3429	remove_proc_entry(hba[i]->devname, proc_cciss);
3430
3431	/* remove it from the disk list */
3432	for (j = 0; j < NWD; j++) {
3433		struct gendisk *disk = hba[i]->gendisk[j];
3434		if (disk) {
3435			request_queue_t *q = disk->queue;
3436
3437			if (disk->flags & GENHD_FL_UP)
3438				del_gendisk(disk);
3439			if (q)
3440				blk_cleanup_queue(q);
3441		}
3442	}
3443
3444	pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct),
3445			    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3446	pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(ErrorInfo_struct),
3447			    hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
3448	kfree(hba[i]->cmd_pool_bits);
3449#ifdef CONFIG_CISS_SCSI_TAPE
3450	kfree(hba[i]->scsi_rejects.complete);
3451#endif
3452	pci_release_regions(pdev);
3453	pci_disable_device(pdev);
3454	pci_set_drvdata(pdev, NULL);
3455	free_hba(i);
3456}
3457
3458static struct pci_driver cciss_pci_driver = {
3459	.name = "cciss",
3460	.probe = cciss_init_one,
3461	.remove = __devexit_p(cciss_remove_one),
3462	.id_table = cciss_pci_device_id,	/* id_table */
3463};
3464
3465/*
3466 *  This is it.  Register the PCI driver information for the cards we control
3467 *  the OS will call our registered routines when it finds one of our cards.
3468 */
3469static int __init cciss_init(void)
3470{
3471	printk(KERN_INFO DRIVER_NAME "\n");
3472
3473	/* Register for our PCI devices */
3474	return pci_register_driver(&cciss_pci_driver);
3475}
3476
3477static void __exit cciss_cleanup(void)
3478{
3479	int i;
3480
3481	pci_unregister_driver(&cciss_pci_driver);
3482	/* double check that all controller entrys have been removed */
3483	for (i = 0; i < MAX_CTLR; i++) {
3484		if (hba[i] != NULL) {
3485			printk(KERN_WARNING "cciss: had to remove"
3486			       " controller %d\n", i);
3487			cciss_remove_one(hba[i]->pdev);
3488		}
3489	}
3490	remove_proc_entry("cciss", proc_root_driver);
3491}
3492
3493static void fail_all_cmds(unsigned long ctlr)
3494{
3495	/* If we get here, the board is apparently dead. */
3496	ctlr_info_t *h = hba[ctlr];
3497	CommandList_struct *c;
3498	unsigned long flags;
3499
3500	printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
3501	h->alive = 0;		/* the controller apparently died... */
3502
3503	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
3504
3505	pci_disable_device(h->pdev);	/* Make sure it is really dead. */
3506
3507	/* move everything off the request queue onto the completed queue */
3508	while ((c = h->reqQ) != NULL) {
3509		removeQ(&(h->reqQ), c);
3510		h->Qdepth--;
3511		addQ(&(h->cmpQ), c);
3512	}
3513
3514	/* Now, fail everything on the completed queue with a HW error */
3515	while ((c = h->cmpQ) != NULL) {
3516		removeQ(&h->cmpQ, c);
3517		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
3518		if (c->cmd_type == CMD_RWREQ) {
3519			complete_command(h, c, 0);
3520		} else if (c->cmd_type == CMD_IOCTL_PEND)
3521			complete(c->waiting);
3522#ifdef CONFIG_CISS_SCSI_TAPE
3523		else if (c->cmd_type == CMD_SCSI)
3524			complete_scsi_command(c, 0, 0);
3525#endif
3526	}
3527	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
3528	return;
3529}
3530
3531module_init(cciss_init);
3532module_exit(cciss_cleanup);
3533