cciss.c revision 5390cfc3fea49d015ae1eed8551c0bf00489b50e
1/*
2 *    Disk Array driver for HP Smart Array controllers.
3 *    (C) Copyright 2000, 2007 Hewlett-Packard Development Company, L.P.
4 *
5 *    This program is free software; you can redistribute it and/or modify
6 *    it under the terms of the GNU General Public License as published by
7 *    the Free Software Foundation; version 2 of the License.
8 *
9 *    This program is distributed in the hope that it will be useful,
10 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 *    General Public License for more details.
13 *
14 *    You should have received a copy of the GNU General Public License
15 *    along with this program; if not, write to the Free Software
16 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 *    02111-1307, USA.
18 *
19 *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/interrupt.h>
25#include <linux/types.h>
26#include <linux/pci.h>
27#include <linux/kernel.h>
28#include <linux/slab.h>
29#include <linux/delay.h>
30#include <linux/major.h>
31#include <linux/fs.h>
32#include <linux/bio.h>
33#include <linux/blkpg.h>
34#include <linux/timer.h>
35#include <linux/proc_fs.h>
36#include <linux/seq_file.h>
37#include <linux/init.h>
38#include <linux/hdreg.h>
39#include <linux/spinlock.h>
40#include <linux/compat.h>
41#include <linux/blktrace_api.h>
42#include <asm/uaccess.h>
43#include <asm/io.h>
44
45#include <linux/dma-mapping.h>
46#include <linux/blkdev.h>
47#include <linux/genhd.h>
48#include <linux/completion.h>
49#include <scsi/scsi.h>
50#include <scsi/sg.h>
51#include <scsi/scsi_ioctl.h>
52#include <linux/cdrom.h>
53#include <linux/scatterlist.h>
54#include <linux/kthread.h>
55
56#define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
57#define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
58#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
59
60/* Embedded module documentation macros - see modules.h */
61MODULE_AUTHOR("Hewlett-Packard Company");
62MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
63MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
64			" SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
65			" Smart Array G2 Series SAS/SATA Controllers");
66MODULE_VERSION("3.6.20");
67MODULE_LICENSE("GPL");
68
69#include "cciss_cmd.h"
70#include "cciss.h"
71#include <linux/cciss_ioctl.h>
72
73/* define the PCI info for the cards we can control */
74static const struct pci_device_id cciss_pci_device_id[] = {
75	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISS,  0x0E11, 0x4070},
76	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4080},
77	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4082},
78	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4083},
79	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x4091},
80	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409A},
81	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409B},
82	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409C},
83	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409D},
84	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSA,     0x103C, 0x3225},
85	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3223},
86	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3234},
87	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3235},
88	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3211},
89	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3212},
90	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3213},
91	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3214},
92	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3215},
93	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3237},
94	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x323D},
95	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3241},
96	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3243},
97	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3245},
98	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3247},
99	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
100	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
101	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
102	{PCI_VENDOR_ID_HP,     PCI_ANY_ID,	PCI_ANY_ID, PCI_ANY_ID,
103		PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0},
104	{0,}
105};
106
107MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
108
109/*  board_id = Subsystem Device ID & Vendor ID
110 *  product = Marketing Name for the board
111 *  access = Address of the struct of function pointers
112 */
113static struct board_type products[] = {
114	{0x40700E11, "Smart Array 5300", &SA5_access},
115	{0x40800E11, "Smart Array 5i", &SA5B_access},
116	{0x40820E11, "Smart Array 532", &SA5B_access},
117	{0x40830E11, "Smart Array 5312", &SA5B_access},
118	{0x409A0E11, "Smart Array 641", &SA5_access},
119	{0x409B0E11, "Smart Array 642", &SA5_access},
120	{0x409C0E11, "Smart Array 6400", &SA5_access},
121	{0x409D0E11, "Smart Array 6400 EM", &SA5_access},
122	{0x40910E11, "Smart Array 6i", &SA5_access},
123	{0x3225103C, "Smart Array P600", &SA5_access},
124	{0x3223103C, "Smart Array P800", &SA5_access},
125	{0x3234103C, "Smart Array P400", &SA5_access},
126	{0x3235103C, "Smart Array P400i", &SA5_access},
127	{0x3211103C, "Smart Array E200i", &SA5_access},
128	{0x3212103C, "Smart Array E200", &SA5_access},
129	{0x3213103C, "Smart Array E200i", &SA5_access},
130	{0x3214103C, "Smart Array E200i", &SA5_access},
131	{0x3215103C, "Smart Array E200i", &SA5_access},
132	{0x3237103C, "Smart Array E500", &SA5_access},
133	{0x323D103C, "Smart Array P700m", &SA5_access},
134	{0x3241103C, "Smart Array P212", &SA5_access},
135	{0x3243103C, "Smart Array P410", &SA5_access},
136	{0x3245103C, "Smart Array P410i", &SA5_access},
137	{0x3247103C, "Smart Array P411", &SA5_access},
138	{0x3249103C, "Smart Array P812", &SA5_access},
139	{0x324A103C, "Smart Array P712m", &SA5_access},
140	{0x324B103C, "Smart Array P711m", &SA5_access},
141	{0xFFFF103C, "Unknown Smart Array", &SA5_access},
142};
143
144/* How long to wait (in milliseconds) for board to go into simple mode */
145#define MAX_CONFIG_WAIT 30000
146#define MAX_IOCTL_CONFIG_WAIT 1000
147
148/*define how many times we will try a command because of bus resets */
149#define MAX_CMD_RETRIES 3
150
151#define MAX_CTLR	32
152
153/* Originally cciss driver only supports 8 major numbers */
154#define MAX_CTLR_ORIG 	8
155
156static ctlr_info_t *hba[MAX_CTLR];
157
158static void do_cciss_request(struct request_queue *q);
159static irqreturn_t do_cciss_intr(int irq, void *dev_id);
160static int cciss_open(struct block_device *bdev, fmode_t mode);
161static int cciss_release(struct gendisk *disk, fmode_t mode);
162static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
163		       unsigned int cmd, unsigned long arg);
164static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
165
166static int cciss_revalidate(struct gendisk *disk);
167static int rebuild_lun_table(ctlr_info_t *h, int first_time);
168static int deregister_disk(ctlr_info_t *h, int drv_index,
169			   int clear_all);
170
171static void cciss_read_capacity(int ctlr, int logvol, int withirq,
172			sector_t *total_size, unsigned int *block_size);
173static void cciss_read_capacity_16(int ctlr, int logvol, int withirq,
174			sector_t *total_size, unsigned int *block_size);
175static void cciss_geometry_inquiry(int ctlr, int logvol,
176			int withirq, sector_t total_size,
177			unsigned int block_size, InquiryData_struct *inq_buff,
178				   drive_info_struct *drv);
179static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
180					   __u32);
181static void start_io(ctlr_info_t *h);
182static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
183		   unsigned int use_unit_num, unsigned int log_unit,
184		   __u8 page_code, unsigned char *scsi3addr, int cmd_type);
185static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
186			   unsigned int use_unit_num, unsigned int log_unit,
187			   __u8 page_code, int cmd_type);
188
189static void fail_all_cmds(unsigned long ctlr);
190static int scan_thread(void *data);
191static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
192
193#ifdef CONFIG_PROC_FS
194static void cciss_procinit(int i);
195#else
196static void cciss_procinit(int i)
197{
198}
199#endif				/* CONFIG_PROC_FS */
200
201#ifdef CONFIG_COMPAT
202static int cciss_compat_ioctl(struct block_device *, fmode_t,
203			      unsigned, unsigned long);
204#endif
205
206static struct block_device_operations cciss_fops = {
207	.owner = THIS_MODULE,
208	.open = cciss_open,
209	.release = cciss_release,
210	.locked_ioctl = cciss_ioctl,
211	.getgeo = cciss_getgeo,
212#ifdef CONFIG_COMPAT
213	.compat_ioctl = cciss_compat_ioctl,
214#endif
215	.revalidate_disk = cciss_revalidate,
216};
217
218/*
219 * Enqueuing and dequeuing functions for cmdlists.
220 */
221static inline void addQ(struct hlist_head *list, CommandList_struct *c)
222{
223	hlist_add_head(&c->list, list);
224}
225
226static inline void removeQ(CommandList_struct *c)
227{
228	if (WARN_ON(hlist_unhashed(&c->list)))
229		return;
230
231	hlist_del_init(&c->list);
232}
233
234#include "cciss_scsi.c"		/* For SCSI tape support */
235
236#define RAID_UNKNOWN 6
237
238#ifdef CONFIG_PROC_FS
239
240/*
241 * Report information about this controller.
242 */
243#define ENG_GIG 1000000000
244#define ENG_GIG_FACTOR (ENG_GIG/512)
245#define ENGAGE_SCSI	"engage scsi"
246static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
247	"UNKNOWN"
248};
249
250static struct proc_dir_entry *proc_cciss;
251
252static void cciss_seq_show_header(struct seq_file *seq)
253{
254	ctlr_info_t *h = seq->private;
255
256	seq_printf(seq, "%s: HP %s Controller\n"
257		"Board ID: 0x%08lx\n"
258		"Firmware Version: %c%c%c%c\n"
259		"IRQ: %d\n"
260		"Logical drives: %d\n"
261		"Current Q depth: %d\n"
262		"Current # commands on controller: %d\n"
263		"Max Q depth since init: %d\n"
264		"Max # commands on controller since init: %d\n"
265		"Max SG entries since init: %d\n",
266		h->devname,
267		h->product_name,
268		(unsigned long)h->board_id,
269		h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
270		h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
271		h->num_luns,
272		h->Qdepth, h->commands_outstanding,
273		h->maxQsinceinit, h->max_outstanding, h->maxSG);
274
275#ifdef CONFIG_CISS_SCSI_TAPE
276	cciss_seq_tape_report(seq, h->ctlr);
277#endif /* CONFIG_CISS_SCSI_TAPE */
278}
279
280static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
281{
282	ctlr_info_t *h = seq->private;
283	unsigned ctlr = h->ctlr;
284	unsigned long flags;
285
286	/* prevent displaying bogus info during configuration
287	 * or deconfiguration of a logical volume
288	 */
289	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
290	if (h->busy_configuring) {
291		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
292		return ERR_PTR(-EBUSY);
293	}
294	h->busy_configuring = 1;
295	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
296
297	if (*pos == 0)
298		cciss_seq_show_header(seq);
299
300	return pos;
301}
302
303static int cciss_seq_show(struct seq_file *seq, void *v)
304{
305	sector_t vol_sz, vol_sz_frac;
306	ctlr_info_t *h = seq->private;
307	unsigned ctlr = h->ctlr;
308	loff_t *pos = v;
309	drive_info_struct *drv = &h->drv[*pos];
310
311	if (*pos > h->highest_lun)
312		return 0;
313
314	if (drv->heads == 0)
315		return 0;
316
317	vol_sz = drv->nr_blocks;
318	vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
319	vol_sz_frac *= 100;
320	sector_div(vol_sz_frac, ENG_GIG_FACTOR);
321
322	if (drv->raid_level > 5)
323		drv->raid_level = RAID_UNKNOWN;
324	seq_printf(seq, "cciss/c%dd%d:"
325			"\t%4u.%02uGB\tRAID %s\n",
326			ctlr, (int) *pos, (int)vol_sz, (int)vol_sz_frac,
327			raid_label[drv->raid_level]);
328	return 0;
329}
330
331static void *cciss_seq_next(struct seq_file *seq, void *v, loff_t *pos)
332{
333	ctlr_info_t *h = seq->private;
334
335	if (*pos > h->highest_lun)
336		return NULL;
337	*pos += 1;
338
339	return pos;
340}
341
342static void cciss_seq_stop(struct seq_file *seq, void *v)
343{
344	ctlr_info_t *h = seq->private;
345
346	/* Only reset h->busy_configuring if we succeeded in setting
347	 * it during cciss_seq_start. */
348	if (v == ERR_PTR(-EBUSY))
349		return;
350
351	h->busy_configuring = 0;
352}
353
354static struct seq_operations cciss_seq_ops = {
355	.start = cciss_seq_start,
356	.show  = cciss_seq_show,
357	.next  = cciss_seq_next,
358	.stop  = cciss_seq_stop,
359};
360
361static int cciss_seq_open(struct inode *inode, struct file *file)
362{
363	int ret = seq_open(file, &cciss_seq_ops);
364	struct seq_file *seq = file->private_data;
365
366	if (!ret)
367		seq->private = PDE(inode)->data;
368
369	return ret;
370}
371
372static ssize_t
373cciss_proc_write(struct file *file, const char __user *buf,
374		 size_t length, loff_t *ppos)
375{
376	int err;
377	char *buffer;
378
379#ifndef CONFIG_CISS_SCSI_TAPE
380	return -EINVAL;
381#endif
382
383	if (!buf || length > PAGE_SIZE - 1)
384		return -EINVAL;
385
386	buffer = (char *)__get_free_page(GFP_KERNEL);
387	if (!buffer)
388		return -ENOMEM;
389
390	err = -EFAULT;
391	if (copy_from_user(buffer, buf, length))
392		goto out;
393	buffer[length] = '\0';
394
395#ifdef CONFIG_CISS_SCSI_TAPE
396	if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) {
397		struct seq_file *seq = file->private_data;
398		ctlr_info_t *h = seq->private;
399		int rc;
400
401		rc = cciss_engage_scsi(h->ctlr);
402		if (rc != 0)
403			err = -rc;
404		else
405			err = length;
406	} else
407#endif /* CONFIG_CISS_SCSI_TAPE */
408		err = -EINVAL;
409	/* might be nice to have "disengage" too, but it's not
410	   safely possible. (only 1 module use count, lock issues.) */
411
412out:
413	free_page((unsigned long)buffer);
414	return err;
415}
416
417static struct file_operations cciss_proc_fops = {
418	.owner	 = THIS_MODULE,
419	.open    = cciss_seq_open,
420	.read    = seq_read,
421	.llseek  = seq_lseek,
422	.release = seq_release,
423	.write	 = cciss_proc_write,
424};
425
426static void __devinit cciss_procinit(int i)
427{
428	struct proc_dir_entry *pde;
429
430	if (proc_cciss == NULL)
431		proc_cciss = proc_mkdir("driver/cciss", NULL);
432	if (!proc_cciss)
433		return;
434	pde = proc_create_data(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP |
435					S_IROTH, proc_cciss,
436					&cciss_proc_fops, hba[i]);
437}
438#endif				/* CONFIG_PROC_FS */
439
440#define MAX_PRODUCT_NAME_LEN 19
441
442#define to_hba(n) container_of(n, struct ctlr_info, dev)
443#define to_drv(n) container_of(n, drive_info_struct, dev)
444
445static struct device_type cciss_host_type = {
446	.name		= "cciss_host",
447};
448
449static ssize_t dev_show_unique_id(struct device *dev,
450				 struct device_attribute *attr,
451				 char *buf)
452{
453	drive_info_struct *drv = to_drv(dev);
454	struct ctlr_info *h = to_hba(drv->dev.parent);
455	__u8 sn[16];
456	unsigned long flags;
457	int ret = 0;
458
459	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
460	if (h->busy_configuring)
461		ret = -EBUSY;
462	else
463		memcpy(sn, drv->serial_no, sizeof(sn));
464	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
465
466	if (ret)
467		return ret;
468	else
469		return snprintf(buf, 16 * 2 + 2,
470				"%02X%02X%02X%02X%02X%02X%02X%02X"
471				"%02X%02X%02X%02X%02X%02X%02X%02X\n",
472				sn[0], sn[1], sn[2], sn[3],
473				sn[4], sn[5], sn[6], sn[7],
474				sn[8], sn[9], sn[10], sn[11],
475				sn[12], sn[13], sn[14], sn[15]);
476}
477DEVICE_ATTR(unique_id, S_IRUGO, dev_show_unique_id, NULL);
478
479static ssize_t dev_show_vendor(struct device *dev,
480			       struct device_attribute *attr,
481			       char *buf)
482{
483	drive_info_struct *drv = to_drv(dev);
484	struct ctlr_info *h = to_hba(drv->dev.parent);
485	char vendor[VENDOR_LEN + 1];
486	unsigned long flags;
487	int ret = 0;
488
489	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
490	if (h->busy_configuring)
491		ret = -EBUSY;
492	else
493		memcpy(vendor, drv->vendor, VENDOR_LEN + 1);
494	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
495
496	if (ret)
497		return ret;
498	else
499		return snprintf(buf, sizeof(vendor) + 1, "%s\n", drv->vendor);
500}
501DEVICE_ATTR(vendor, S_IRUGO, dev_show_vendor, NULL);
502
503static ssize_t dev_show_model(struct device *dev,
504			      struct device_attribute *attr,
505			      char *buf)
506{
507	drive_info_struct *drv = to_drv(dev);
508	struct ctlr_info *h = to_hba(drv->dev.parent);
509	char model[MODEL_LEN + 1];
510	unsigned long flags;
511	int ret = 0;
512
513	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
514	if (h->busy_configuring)
515		ret = -EBUSY;
516	else
517		memcpy(model, drv->model, MODEL_LEN + 1);
518	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
519
520	if (ret)
521		return ret;
522	else
523		return snprintf(buf, sizeof(model) + 1, "%s\n", drv->model);
524}
525DEVICE_ATTR(model, S_IRUGO, dev_show_model, NULL);
526
527static ssize_t dev_show_rev(struct device *dev,
528			    struct device_attribute *attr,
529			    char *buf)
530{
531	drive_info_struct *drv = to_drv(dev);
532	struct ctlr_info *h = to_hba(drv->dev.parent);
533	char rev[REV_LEN + 1];
534	unsigned long flags;
535	int ret = 0;
536
537	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
538	if (h->busy_configuring)
539		ret = -EBUSY;
540	else
541		memcpy(rev, drv->rev, REV_LEN + 1);
542	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
543
544	if (ret)
545		return ret;
546	else
547		return snprintf(buf, sizeof(rev) + 1, "%s\n", drv->rev);
548}
549DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL);
550
551static struct attribute *cciss_dev_attrs[] = {
552	&dev_attr_unique_id.attr,
553	&dev_attr_model.attr,
554	&dev_attr_vendor.attr,
555	&dev_attr_rev.attr,
556	NULL
557};
558
559static struct attribute_group cciss_dev_attr_group = {
560	.attrs = cciss_dev_attrs,
561};
562
563static struct attribute_group *cciss_dev_attr_groups[] = {
564	&cciss_dev_attr_group,
565	NULL
566};
567
568static struct device_type cciss_dev_type = {
569	.name		= "cciss_device",
570	.groups		= cciss_dev_attr_groups,
571};
572
573static struct bus_type cciss_bus_type = {
574	.name		= "cciss",
575};
576
577
578/*
579 * Initialize sysfs entry for each controller.  This sets up and registers
580 * the 'cciss#' directory for each individual controller under
581 * /sys/bus/pci/devices/<dev>/.
582 */
583static int cciss_create_hba_sysfs_entry(struct ctlr_info *h)
584{
585	device_initialize(&h->dev);
586	h->dev.type = &cciss_host_type;
587	h->dev.bus = &cciss_bus_type;
588	dev_set_name(&h->dev, "%s", h->devname);
589	h->dev.parent = &h->pdev->dev;
590
591	return device_add(&h->dev);
592}
593
594/*
595 * Remove sysfs entries for an hba.
596 */
597static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
598{
599	device_del(&h->dev);
600}
601
602/*
603 * Initialize sysfs for each logical drive.  This sets up and registers
604 * the 'c#d#' directory for each individual logical drive under
605 * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from
606 * /sys/block/cciss!c#d# to this entry.
607 */
608static int cciss_create_ld_sysfs_entry(struct ctlr_info *h,
609				       drive_info_struct *drv,
610				       int drv_index)
611{
612	device_initialize(&drv->dev);
613	drv->dev.type = &cciss_dev_type;
614	drv->dev.bus = &cciss_bus_type;
615	dev_set_name(&drv->dev, "c%dd%d", h->ctlr, drv_index);
616	drv->dev.parent = &h->dev;
617	return device_add(&drv->dev);
618}
619
620/*
621 * Remove sysfs entries for a logical drive.
622 */
623static void cciss_destroy_ld_sysfs_entry(drive_info_struct *drv)
624{
625	device_del(&drv->dev);
626}
627
628/*
629 * For operations that cannot sleep, a command block is allocated at init,
630 * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
631 * which ones are free or in use.  For operations that can wait for kmalloc
632 * to possible sleep, this routine can be called with get_from_pool set to 0.
633 * cmd_free() MUST be called with a got_from_pool set to 0 if cmd_alloc was.
634 */
635static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
636{
637	CommandList_struct *c;
638	int i;
639	u64bit temp64;
640	dma_addr_t cmd_dma_handle, err_dma_handle;
641
642	if (!get_from_pool) {
643		c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
644			sizeof(CommandList_struct), &cmd_dma_handle);
645		if (c == NULL)
646			return NULL;
647		memset(c, 0, sizeof(CommandList_struct));
648
649		c->cmdindex = -1;
650
651		c->err_info = (ErrorInfo_struct *)
652		    pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
653			    &err_dma_handle);
654
655		if (c->err_info == NULL) {
656			pci_free_consistent(h->pdev,
657				sizeof(CommandList_struct), c, cmd_dma_handle);
658			return NULL;
659		}
660		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
661	} else {		/* get it out of the controllers pool */
662
663		do {
664			i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
665			if (i == h->nr_cmds)
666				return NULL;
667		} while (test_and_set_bit
668			 (i & (BITS_PER_LONG - 1),
669			  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
670#ifdef CCISS_DEBUG
671		printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
672#endif
673		c = h->cmd_pool + i;
674		memset(c, 0, sizeof(CommandList_struct));
675		cmd_dma_handle = h->cmd_pool_dhandle
676		    + i * sizeof(CommandList_struct);
677		c->err_info = h->errinfo_pool + i;
678		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
679		err_dma_handle = h->errinfo_pool_dhandle
680		    + i * sizeof(ErrorInfo_struct);
681		h->nr_allocs++;
682
683		c->cmdindex = i;
684	}
685
686	INIT_HLIST_NODE(&c->list);
687	c->busaddr = (__u32) cmd_dma_handle;
688	temp64.val = (__u64) err_dma_handle;
689	c->ErrDesc.Addr.lower = temp64.val32.lower;
690	c->ErrDesc.Addr.upper = temp64.val32.upper;
691	c->ErrDesc.Len = sizeof(ErrorInfo_struct);
692
693	c->ctlr = h->ctlr;
694	return c;
695}
696
697/*
698 * Frees a command block that was previously allocated with cmd_alloc().
699 */
700static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool)
701{
702	int i;
703	u64bit temp64;
704
705	if (!got_from_pool) {
706		temp64.val32.lower = c->ErrDesc.Addr.lower;
707		temp64.val32.upper = c->ErrDesc.Addr.upper;
708		pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
709				    c->err_info, (dma_addr_t) temp64.val);
710		pci_free_consistent(h->pdev, sizeof(CommandList_struct),
711				    c, (dma_addr_t) c->busaddr);
712	} else {
713		i = c - h->cmd_pool;
714		clear_bit(i & (BITS_PER_LONG - 1),
715			  h->cmd_pool_bits + (i / BITS_PER_LONG));
716		h->nr_frees++;
717	}
718}
719
720static inline ctlr_info_t *get_host(struct gendisk *disk)
721{
722	return disk->queue->queuedata;
723}
724
725static inline drive_info_struct *get_drv(struct gendisk *disk)
726{
727	return disk->private_data;
728}
729
730/*
731 * Open.  Make sure the device is really there.
732 */
733static int cciss_open(struct block_device *bdev, fmode_t mode)
734{
735	ctlr_info_t *host = get_host(bdev->bd_disk);
736	drive_info_struct *drv = get_drv(bdev->bd_disk);
737
738#ifdef CCISS_DEBUG
739	printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
740#endif				/* CCISS_DEBUG */
741
742	if (host->busy_initializing || drv->busy_configuring)
743		return -EBUSY;
744	/*
745	 * Root is allowed to open raw volume zero even if it's not configured
746	 * so array config can still work. Root is also allowed to open any
747	 * volume that has a LUN ID, so it can issue IOCTL to reread the
748	 * disk information.  I don't think I really like this
749	 * but I'm already using way to many device nodes to claim another one
750	 * for "raw controller".
751	 */
752	if (drv->heads == 0) {
753		if (MINOR(bdev->bd_dev) != 0) {	/* not node 0? */
754			/* if not node 0 make sure it is a partition = 0 */
755			if (MINOR(bdev->bd_dev) & 0x0f) {
756				return -ENXIO;
757				/* if it is, make sure we have a LUN ID */
758			} else if (drv->LunID == 0) {
759				return -ENXIO;
760			}
761		}
762		if (!capable(CAP_SYS_ADMIN))
763			return -EPERM;
764	}
765	drv->usage_count++;
766	host->usage_count++;
767	return 0;
768}
769
770/*
771 * Close.  Sync first.
772 */
773static int cciss_release(struct gendisk *disk, fmode_t mode)
774{
775	ctlr_info_t *host = get_host(disk);
776	drive_info_struct *drv = get_drv(disk);
777
778#ifdef CCISS_DEBUG
779	printk(KERN_DEBUG "cciss_release %s\n", disk->disk_name);
780#endif				/* CCISS_DEBUG */
781
782	drv->usage_count--;
783	host->usage_count--;
784	return 0;
785}
786
787#ifdef CONFIG_COMPAT
788
789static int do_ioctl(struct block_device *bdev, fmode_t mode,
790		    unsigned cmd, unsigned long arg)
791{
792	int ret;
793	lock_kernel();
794	ret = cciss_ioctl(bdev, mode, cmd, arg);
795	unlock_kernel();
796	return ret;
797}
798
799static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
800				  unsigned cmd, unsigned long arg);
801static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
802				      unsigned cmd, unsigned long arg);
803
804static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode,
805			      unsigned cmd, unsigned long arg)
806{
807	switch (cmd) {
808	case CCISS_GETPCIINFO:
809	case CCISS_GETINTINFO:
810	case CCISS_SETINTINFO:
811	case CCISS_GETNODENAME:
812	case CCISS_SETNODENAME:
813	case CCISS_GETHEARTBEAT:
814	case CCISS_GETBUSTYPES:
815	case CCISS_GETFIRMVER:
816	case CCISS_GETDRIVVER:
817	case CCISS_REVALIDVOLS:
818	case CCISS_DEREGDISK:
819	case CCISS_REGNEWDISK:
820	case CCISS_REGNEWD:
821	case CCISS_RESCANDISK:
822	case CCISS_GETLUNINFO:
823		return do_ioctl(bdev, mode, cmd, arg);
824
825	case CCISS_PASSTHRU32:
826		return cciss_ioctl32_passthru(bdev, mode, cmd, arg);
827	case CCISS_BIG_PASSTHRU32:
828		return cciss_ioctl32_big_passthru(bdev, mode, cmd, arg);
829
830	default:
831		return -ENOIOCTLCMD;
832	}
833}
834
835static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
836				  unsigned cmd, unsigned long arg)
837{
838	IOCTL32_Command_struct __user *arg32 =
839	    (IOCTL32_Command_struct __user *) arg;
840	IOCTL_Command_struct arg64;
841	IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
842	int err;
843	u32 cp;
844
845	err = 0;
846	err |=
847	    copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
848			   sizeof(arg64.LUN_info));
849	err |=
850	    copy_from_user(&arg64.Request, &arg32->Request,
851			   sizeof(arg64.Request));
852	err |=
853	    copy_from_user(&arg64.error_info, &arg32->error_info,
854			   sizeof(arg64.error_info));
855	err |= get_user(arg64.buf_size, &arg32->buf_size);
856	err |= get_user(cp, &arg32->buf);
857	arg64.buf = compat_ptr(cp);
858	err |= copy_to_user(p, &arg64, sizeof(arg64));
859
860	if (err)
861		return -EFAULT;
862
863	err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p);
864	if (err)
865		return err;
866	err |=
867	    copy_in_user(&arg32->error_info, &p->error_info,
868			 sizeof(arg32->error_info));
869	if (err)
870		return -EFAULT;
871	return err;
872}
873
874static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
875				      unsigned cmd, unsigned long arg)
876{
877	BIG_IOCTL32_Command_struct __user *arg32 =
878	    (BIG_IOCTL32_Command_struct __user *) arg;
879	BIG_IOCTL_Command_struct arg64;
880	BIG_IOCTL_Command_struct __user *p =
881	    compat_alloc_user_space(sizeof(arg64));
882	int err;
883	u32 cp;
884
885	err = 0;
886	err |=
887	    copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
888			   sizeof(arg64.LUN_info));
889	err |=
890	    copy_from_user(&arg64.Request, &arg32->Request,
891			   sizeof(arg64.Request));
892	err |=
893	    copy_from_user(&arg64.error_info, &arg32->error_info,
894			   sizeof(arg64.error_info));
895	err |= get_user(arg64.buf_size, &arg32->buf_size);
896	err |= get_user(arg64.malloc_size, &arg32->malloc_size);
897	err |= get_user(cp, &arg32->buf);
898	arg64.buf = compat_ptr(cp);
899	err |= copy_to_user(p, &arg64, sizeof(arg64));
900
901	if (err)
902		return -EFAULT;
903
904	err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p);
905	if (err)
906		return err;
907	err |=
908	    copy_in_user(&arg32->error_info, &p->error_info,
909			 sizeof(arg32->error_info));
910	if (err)
911		return -EFAULT;
912	return err;
913}
914#endif
915
916static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
917{
918	drive_info_struct *drv = get_drv(bdev->bd_disk);
919
920	if (!drv->cylinders)
921		return -ENXIO;
922
923	geo->heads = drv->heads;
924	geo->sectors = drv->sectors;
925	geo->cylinders = drv->cylinders;
926	return 0;
927}
928
929static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
930{
931	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
932			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
933		(void)check_for_unit_attention(host, c);
934}
935/*
936 * ioctl
937 */
938static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
939		       unsigned int cmd, unsigned long arg)
940{
941	struct gendisk *disk = bdev->bd_disk;
942	ctlr_info_t *host = get_host(disk);
943	drive_info_struct *drv = get_drv(disk);
944	int ctlr = host->ctlr;
945	void __user *argp = (void __user *)arg;
946
947#ifdef CCISS_DEBUG
948	printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
949#endif				/* CCISS_DEBUG */
950
951	switch (cmd) {
952	case CCISS_GETPCIINFO:
953		{
954			cciss_pci_info_struct pciinfo;
955
956			if (!arg)
957				return -EINVAL;
958			pciinfo.domain = pci_domain_nr(host->pdev->bus);
959			pciinfo.bus = host->pdev->bus->number;
960			pciinfo.dev_fn = host->pdev->devfn;
961			pciinfo.board_id = host->board_id;
962			if (copy_to_user
963			    (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
964				return -EFAULT;
965			return 0;
966		}
967	case CCISS_GETINTINFO:
968		{
969			cciss_coalint_struct intinfo;
970			if (!arg)
971				return -EINVAL;
972			intinfo.delay =
973			    readl(&host->cfgtable->HostWrite.CoalIntDelay);
974			intinfo.count =
975			    readl(&host->cfgtable->HostWrite.CoalIntCount);
976			if (copy_to_user
977			    (argp, &intinfo, sizeof(cciss_coalint_struct)))
978				return -EFAULT;
979			return 0;
980		}
981	case CCISS_SETINTINFO:
982		{
983			cciss_coalint_struct intinfo;
984			unsigned long flags;
985			int i;
986
987			if (!arg)
988				return -EINVAL;
989			if (!capable(CAP_SYS_ADMIN))
990				return -EPERM;
991			if (copy_from_user
992			    (&intinfo, argp, sizeof(cciss_coalint_struct)))
993				return -EFAULT;
994			if ((intinfo.delay == 0) && (intinfo.count == 0))
995			{
996//                      printk("cciss_ioctl: delay and count cannot be 0\n");
997				return -EINVAL;
998			}
999			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1000			/* Update the field, and then ring the doorbell */
1001			writel(intinfo.delay,
1002			       &(host->cfgtable->HostWrite.CoalIntDelay));
1003			writel(intinfo.count,
1004			       &(host->cfgtable->HostWrite.CoalIntCount));
1005			writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
1006
1007			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
1008				if (!(readl(host->vaddr + SA5_DOORBELL)
1009				      & CFGTBL_ChangeReq))
1010					break;
1011				/* delay and try again */
1012				udelay(1000);
1013			}
1014			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1015			if (i >= MAX_IOCTL_CONFIG_WAIT)
1016				return -EAGAIN;
1017			return 0;
1018		}
1019	case CCISS_GETNODENAME:
1020		{
1021			NodeName_type NodeName;
1022			int i;
1023
1024			if (!arg)
1025				return -EINVAL;
1026			for (i = 0; i < 16; i++)
1027				NodeName[i] =
1028				    readb(&host->cfgtable->ServerName[i]);
1029			if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
1030				return -EFAULT;
1031			return 0;
1032		}
1033	case CCISS_SETNODENAME:
1034		{
1035			NodeName_type NodeName;
1036			unsigned long flags;
1037			int i;
1038
1039			if (!arg)
1040				return -EINVAL;
1041			if (!capable(CAP_SYS_ADMIN))
1042				return -EPERM;
1043
1044			if (copy_from_user
1045			    (NodeName, argp, sizeof(NodeName_type)))
1046				return -EFAULT;
1047
1048			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1049
1050			/* Update the field, and then ring the doorbell */
1051			for (i = 0; i < 16; i++)
1052				writeb(NodeName[i],
1053				       &host->cfgtable->ServerName[i]);
1054
1055			writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
1056
1057			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
1058				if (!(readl(host->vaddr + SA5_DOORBELL)
1059				      & CFGTBL_ChangeReq))
1060					break;
1061				/* delay and try again */
1062				udelay(1000);
1063			}
1064			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1065			if (i >= MAX_IOCTL_CONFIG_WAIT)
1066				return -EAGAIN;
1067			return 0;
1068		}
1069
1070	case CCISS_GETHEARTBEAT:
1071		{
1072			Heartbeat_type heartbeat;
1073
1074			if (!arg)
1075				return -EINVAL;
1076			heartbeat = readl(&host->cfgtable->HeartBeat);
1077			if (copy_to_user
1078			    (argp, &heartbeat, sizeof(Heartbeat_type)))
1079				return -EFAULT;
1080			return 0;
1081		}
1082	case CCISS_GETBUSTYPES:
1083		{
1084			BusTypes_type BusTypes;
1085
1086			if (!arg)
1087				return -EINVAL;
1088			BusTypes = readl(&host->cfgtable->BusTypes);
1089			if (copy_to_user
1090			    (argp, &BusTypes, sizeof(BusTypes_type)))
1091				return -EFAULT;
1092			return 0;
1093		}
1094	case CCISS_GETFIRMVER:
1095		{
1096			FirmwareVer_type firmware;
1097
1098			if (!arg)
1099				return -EINVAL;
1100			memcpy(firmware, host->firm_ver, 4);
1101
1102			if (copy_to_user
1103			    (argp, firmware, sizeof(FirmwareVer_type)))
1104				return -EFAULT;
1105			return 0;
1106		}
1107	case CCISS_GETDRIVVER:
1108		{
1109			DriverVer_type DriverVer = DRIVER_VERSION;
1110
1111			if (!arg)
1112				return -EINVAL;
1113
1114			if (copy_to_user
1115			    (argp, &DriverVer, sizeof(DriverVer_type)))
1116				return -EFAULT;
1117			return 0;
1118		}
1119
1120	case CCISS_DEREGDISK:
1121	case CCISS_REGNEWD:
1122	case CCISS_REVALIDVOLS:
1123		return rebuild_lun_table(host, 0);
1124
1125	case CCISS_GETLUNINFO:{
1126			LogvolInfo_struct luninfo;
1127
1128			luninfo.LunID = drv->LunID;
1129			luninfo.num_opens = drv->usage_count;
1130			luninfo.num_parts = 0;
1131			if (copy_to_user(argp, &luninfo,
1132					 sizeof(LogvolInfo_struct)))
1133				return -EFAULT;
1134			return 0;
1135		}
1136	case CCISS_PASSTHRU:
1137		{
1138			IOCTL_Command_struct iocommand;
1139			CommandList_struct *c;
1140			char *buff = NULL;
1141			u64bit temp64;
1142			unsigned long flags;
1143			DECLARE_COMPLETION_ONSTACK(wait);
1144
1145			if (!arg)
1146				return -EINVAL;
1147
1148			if (!capable(CAP_SYS_RAWIO))
1149				return -EPERM;
1150
1151			if (copy_from_user
1152			    (&iocommand, argp, sizeof(IOCTL_Command_struct)))
1153				return -EFAULT;
1154			if ((iocommand.buf_size < 1) &&
1155			    (iocommand.Request.Type.Direction != XFER_NONE)) {
1156				return -EINVAL;
1157			}
1158#if 0				/* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */
1159			/* Check kmalloc limits */
1160			if (iocommand.buf_size > 128000)
1161				return -EINVAL;
1162#endif
1163			if (iocommand.buf_size > 0) {
1164				buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
1165				if (buff == NULL)
1166					return -EFAULT;
1167			}
1168			if (iocommand.Request.Type.Direction == XFER_WRITE) {
1169				/* Copy the data into the buffer we created */
1170				if (copy_from_user
1171				    (buff, iocommand.buf, iocommand.buf_size)) {
1172					kfree(buff);
1173					return -EFAULT;
1174				}
1175			} else {
1176				memset(buff, 0, iocommand.buf_size);
1177			}
1178			if ((c = cmd_alloc(host, 0)) == NULL) {
1179				kfree(buff);
1180				return -ENOMEM;
1181			}
1182			// Fill in the command type
1183			c->cmd_type = CMD_IOCTL_PEND;
1184			// Fill in Command Header
1185			c->Header.ReplyQueue = 0;	// unused in simple mode
1186			if (iocommand.buf_size > 0)	// buffer to fill
1187			{
1188				c->Header.SGList = 1;
1189				c->Header.SGTotal = 1;
1190			} else	// no buffers to fill
1191			{
1192				c->Header.SGList = 0;
1193				c->Header.SGTotal = 0;
1194			}
1195			c->Header.LUN = iocommand.LUN_info;
1196			c->Header.Tag.lower = c->busaddr;	// use the kernel address the cmd block for tag
1197
1198			// Fill in Request block
1199			c->Request = iocommand.Request;
1200
1201			// Fill in the scatter gather information
1202			if (iocommand.buf_size > 0) {
1203				temp64.val = pci_map_single(host->pdev, buff,
1204					iocommand.buf_size,
1205					PCI_DMA_BIDIRECTIONAL);
1206				c->SG[0].Addr.lower = temp64.val32.lower;
1207				c->SG[0].Addr.upper = temp64.val32.upper;
1208				c->SG[0].Len = iocommand.buf_size;
1209				c->SG[0].Ext = 0;	// we are not chaining
1210			}
1211			c->waiting = &wait;
1212
1213			/* Put the request on the tail of the request queue */
1214			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1215			addQ(&host->reqQ, c);
1216			host->Qdepth++;
1217			start_io(host);
1218			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1219
1220			wait_for_completion(&wait);
1221
1222			/* unlock the buffers from DMA */
1223			temp64.val32.lower = c->SG[0].Addr.lower;
1224			temp64.val32.upper = c->SG[0].Addr.upper;
1225			pci_unmap_single(host->pdev, (dma_addr_t) temp64.val,
1226					 iocommand.buf_size,
1227					 PCI_DMA_BIDIRECTIONAL);
1228
1229			check_ioctl_unit_attention(host, c);
1230
1231			/* Copy the error information out */
1232			iocommand.error_info = *(c->err_info);
1233			if (copy_to_user
1234			    (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
1235				kfree(buff);
1236				cmd_free(host, c, 0);
1237				return -EFAULT;
1238			}
1239
1240			if (iocommand.Request.Type.Direction == XFER_READ) {
1241				/* Copy the data out of the buffer we created */
1242				if (copy_to_user
1243				    (iocommand.buf, buff, iocommand.buf_size)) {
1244					kfree(buff);
1245					cmd_free(host, c, 0);
1246					return -EFAULT;
1247				}
1248			}
1249			kfree(buff);
1250			cmd_free(host, c, 0);
1251			return 0;
1252		}
1253	case CCISS_BIG_PASSTHRU:{
1254			BIG_IOCTL_Command_struct *ioc;
1255			CommandList_struct *c;
1256			unsigned char **buff = NULL;
1257			int *buff_size = NULL;
1258			u64bit temp64;
1259			unsigned long flags;
1260			BYTE sg_used = 0;
1261			int status = 0;
1262			int i;
1263			DECLARE_COMPLETION_ONSTACK(wait);
1264			__u32 left;
1265			__u32 sz;
1266			BYTE __user *data_ptr;
1267
1268			if (!arg)
1269				return -EINVAL;
1270			if (!capable(CAP_SYS_RAWIO))
1271				return -EPERM;
1272			ioc = (BIG_IOCTL_Command_struct *)
1273			    kmalloc(sizeof(*ioc), GFP_KERNEL);
1274			if (!ioc) {
1275				status = -ENOMEM;
1276				goto cleanup1;
1277			}
1278			if (copy_from_user(ioc, argp, sizeof(*ioc))) {
1279				status = -EFAULT;
1280				goto cleanup1;
1281			}
1282			if ((ioc->buf_size < 1) &&
1283			    (ioc->Request.Type.Direction != XFER_NONE)) {
1284				status = -EINVAL;
1285				goto cleanup1;
1286			}
1287			/* Check kmalloc limits  using all SGs */
1288			if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
1289				status = -EINVAL;
1290				goto cleanup1;
1291			}
1292			if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
1293				status = -EINVAL;
1294				goto cleanup1;
1295			}
1296			buff =
1297			    kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
1298			if (!buff) {
1299				status = -ENOMEM;
1300				goto cleanup1;
1301			}
1302			buff_size = kmalloc(MAXSGENTRIES * sizeof(int),
1303						   GFP_KERNEL);
1304			if (!buff_size) {
1305				status = -ENOMEM;
1306				goto cleanup1;
1307			}
1308			left = ioc->buf_size;
1309			data_ptr = ioc->buf;
1310			while (left) {
1311				sz = (left >
1312				      ioc->malloc_size) ? ioc->
1313				    malloc_size : left;
1314				buff_size[sg_used] = sz;
1315				buff[sg_used] = kmalloc(sz, GFP_KERNEL);
1316				if (buff[sg_used] == NULL) {
1317					status = -ENOMEM;
1318					goto cleanup1;
1319				}
1320				if (ioc->Request.Type.Direction == XFER_WRITE) {
1321					if (copy_from_user
1322					    (buff[sg_used], data_ptr, sz)) {
1323						status = -EFAULT;
1324						goto cleanup1;
1325					}
1326				} else {
1327					memset(buff[sg_used], 0, sz);
1328				}
1329				left -= sz;
1330				data_ptr += sz;
1331				sg_used++;
1332			}
1333			if ((c = cmd_alloc(host, 0)) == NULL) {
1334				status = -ENOMEM;
1335				goto cleanup1;
1336			}
1337			c->cmd_type = CMD_IOCTL_PEND;
1338			c->Header.ReplyQueue = 0;
1339
1340			if (ioc->buf_size > 0) {
1341				c->Header.SGList = sg_used;
1342				c->Header.SGTotal = sg_used;
1343			} else {
1344				c->Header.SGList = 0;
1345				c->Header.SGTotal = 0;
1346			}
1347			c->Header.LUN = ioc->LUN_info;
1348			c->Header.Tag.lower = c->busaddr;
1349
1350			c->Request = ioc->Request;
1351			if (ioc->buf_size > 0) {
1352				int i;
1353				for (i = 0; i < sg_used; i++) {
1354					temp64.val =
1355					    pci_map_single(host->pdev, buff[i],
1356						    buff_size[i],
1357						    PCI_DMA_BIDIRECTIONAL);
1358					c->SG[i].Addr.lower =
1359					    temp64.val32.lower;
1360					c->SG[i].Addr.upper =
1361					    temp64.val32.upper;
1362					c->SG[i].Len = buff_size[i];
1363					c->SG[i].Ext = 0;	/* we are not chaining */
1364				}
1365			}
1366			c->waiting = &wait;
1367			/* Put the request on the tail of the request queue */
1368			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1369			addQ(&host->reqQ, c);
1370			host->Qdepth++;
1371			start_io(host);
1372			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1373			wait_for_completion(&wait);
1374			/* unlock the buffers from DMA */
1375			for (i = 0; i < sg_used; i++) {
1376				temp64.val32.lower = c->SG[i].Addr.lower;
1377				temp64.val32.upper = c->SG[i].Addr.upper;
1378				pci_unmap_single(host->pdev,
1379					(dma_addr_t) temp64.val, buff_size[i],
1380					PCI_DMA_BIDIRECTIONAL);
1381			}
1382			check_ioctl_unit_attention(host, c);
1383			/* Copy the error information out */
1384			ioc->error_info = *(c->err_info);
1385			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
1386				cmd_free(host, c, 0);
1387				status = -EFAULT;
1388				goto cleanup1;
1389			}
1390			if (ioc->Request.Type.Direction == XFER_READ) {
1391				/* Copy the data out of the buffer we created */
1392				BYTE __user *ptr = ioc->buf;
1393				for (i = 0; i < sg_used; i++) {
1394					if (copy_to_user
1395					    (ptr, buff[i], buff_size[i])) {
1396						cmd_free(host, c, 0);
1397						status = -EFAULT;
1398						goto cleanup1;
1399					}
1400					ptr += buff_size[i];
1401				}
1402			}
1403			cmd_free(host, c, 0);
1404			status = 0;
1405		      cleanup1:
1406			if (buff) {
1407				for (i = 0; i < sg_used; i++)
1408					kfree(buff[i]);
1409				kfree(buff);
1410			}
1411			kfree(buff_size);
1412			kfree(ioc);
1413			return status;
1414		}
1415
1416	/* scsi_cmd_ioctl handles these, below, though some are not */
1417	/* very meaningful for cciss.  SG_IO is the main one people want. */
1418
1419	case SG_GET_VERSION_NUM:
1420	case SG_SET_TIMEOUT:
1421	case SG_GET_TIMEOUT:
1422	case SG_GET_RESERVED_SIZE:
1423	case SG_SET_RESERVED_SIZE:
1424	case SG_EMULATED_HOST:
1425	case SG_IO:
1426	case SCSI_IOCTL_SEND_COMMAND:
1427		return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
1428
1429	/* scsi_cmd_ioctl would normally handle these, below, but */
1430	/* they aren't a good fit for cciss, as CD-ROMs are */
1431	/* not supported, and we don't have any bus/target/lun */
1432	/* which we present to the kernel. */
1433
1434	case CDROM_SEND_PACKET:
1435	case CDROMCLOSETRAY:
1436	case CDROMEJECT:
1437	case SCSI_IOCTL_GET_IDLUN:
1438	case SCSI_IOCTL_GET_BUS_NUMBER:
1439	default:
1440		return -ENOTTY;
1441	}
1442}
1443
1444static void cciss_check_queues(ctlr_info_t *h)
1445{
1446	int start_queue = h->next_to_run;
1447	int i;
1448
1449	/* check to see if we have maxed out the number of commands that can
1450	 * be placed on the queue.  If so then exit.  We do this check here
1451	 * in case the interrupt we serviced was from an ioctl and did not
1452	 * free any new commands.
1453	 */
1454	if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds)
1455		return;
1456
1457	/* We have room on the queue for more commands.  Now we need to queue
1458	 * them up.  We will also keep track of the next queue to run so
1459	 * that every queue gets a chance to be started first.
1460	 */
1461	for (i = 0; i < h->highest_lun + 1; i++) {
1462		int curr_queue = (start_queue + i) % (h->highest_lun + 1);
1463		/* make sure the disk has been added and the drive is real
1464		 * because this can be called from the middle of init_one.
1465		 */
1466		if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
1467			continue;
1468		blk_start_queue(h->gendisk[curr_queue]->queue);
1469
1470		/* check to see if we have maxed out the number of commands
1471		 * that can be placed on the queue.
1472		 */
1473		if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) {
1474			if (curr_queue == start_queue) {
1475				h->next_to_run =
1476				    (start_queue + 1) % (h->highest_lun + 1);
1477				break;
1478			} else {
1479				h->next_to_run = curr_queue;
1480				break;
1481			}
1482		}
1483	}
1484}
1485
1486static void cciss_softirq_done(struct request *rq)
1487{
1488	CommandList_struct *cmd = rq->completion_data;
1489	ctlr_info_t *h = hba[cmd->ctlr];
1490	unsigned long flags;
1491	u64bit temp64;
1492	int i, ddir;
1493
1494	if (cmd->Request.Type.Direction == XFER_READ)
1495		ddir = PCI_DMA_FROMDEVICE;
1496	else
1497		ddir = PCI_DMA_TODEVICE;
1498
1499	/* command did not need to be retried */
1500	/* unmap the DMA mapping for all the scatter gather elements */
1501	for (i = 0; i < cmd->Header.SGList; i++) {
1502		temp64.val32.lower = cmd->SG[i].Addr.lower;
1503		temp64.val32.upper = cmd->SG[i].Addr.upper;
1504		pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
1505	}
1506
1507#ifdef CCISS_DEBUG
1508	printk("Done with %p\n", rq);
1509#endif				/* CCISS_DEBUG */
1510
1511	/* set the residual count for pc requests */
1512	if (blk_pc_request(rq))
1513		rq->resid_len = cmd->err_info->ResidualCnt;
1514
1515	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
1516
1517	spin_lock_irqsave(&h->lock, flags);
1518	cmd_free(h, cmd, 1);
1519	cciss_check_queues(h);
1520	spin_unlock_irqrestore(&h->lock, flags);
1521}
1522
1523/* This function gets the SCSI vendor, model, and revision of a logical drive
1524 * via the inquiry page 0.  Model, vendor, and rev are set to empty strings if
1525 * they cannot be read.
1526 */
1527static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
1528				   char *vendor, char *model, char *rev)
1529{
1530	int rc;
1531	InquiryData_struct *inq_buf;
1532
1533	*vendor = '\0';
1534	*model = '\0';
1535	*rev = '\0';
1536
1537	inq_buf = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
1538	if (!inq_buf)
1539		return;
1540
1541	if (withirq)
1542		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf,
1543				     sizeof(InquiryData_struct), 1, logvol,
1544				     0, TYPE_CMD);
1545	else
1546		rc = sendcmd(CISS_INQUIRY, ctlr, inq_buf,
1547			     sizeof(InquiryData_struct), 1, logvol, 0, NULL,
1548			     TYPE_CMD);
1549	if (rc == IO_OK) {
1550		memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
1551		vendor[VENDOR_LEN] = '\0';
1552		memcpy(model, &inq_buf->data_byte[16], MODEL_LEN);
1553		model[MODEL_LEN] = '\0';
1554		memcpy(rev, &inq_buf->data_byte[32], REV_LEN);
1555		rev[REV_LEN] = '\0';
1556	}
1557
1558	kfree(inq_buf);
1559	return;
1560}
1561
1562/* This function gets the serial number of a logical drive via
1563 * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
1564 * number cannot be had, for whatever reason, 16 bytes of 0xff
1565 * are returned instead.
1566 */
1567static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
1568				unsigned char *serial_no, int buflen)
1569{
1570#define PAGE_83_INQ_BYTES 64
1571	int rc;
1572	unsigned char *buf;
1573
1574	if (buflen > 16)
1575		buflen = 16;
1576	memset(serial_no, 0xff, buflen);
1577	buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL);
1578	if (!buf)
1579		return;
1580	memset(serial_no, 0, buflen);
1581	if (withirq)
1582		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
1583			PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD);
1584	else
1585		rc = sendcmd(CISS_INQUIRY, ctlr, buf,
1586			PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD);
1587	if (rc == IO_OK)
1588		memcpy(serial_no, &buf[8], buflen);
1589	kfree(buf);
1590	return;
1591}
1592
1593static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
1594				int drv_index)
1595{
1596	disk->queue = blk_init_queue(do_cciss_request, &h->lock);
1597	sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
1598	disk->major = h->major;
1599	disk->first_minor = drv_index << NWD_SHIFT;
1600	disk->fops = &cciss_fops;
1601	disk->private_data = &h->drv[drv_index];
1602	disk->driverfs_dev = &h->drv[drv_index].dev;
1603
1604	/* Set up queue information */
1605	blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
1606
1607	/* This is a hardware imposed limit. */
1608	blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
1609
1610	/* This is a limit in the driver and could be eliminated. */
1611	blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
1612
1613	blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
1614
1615	blk_queue_softirq_done(disk->queue, cciss_softirq_done);
1616
1617	disk->queue->queuedata = h;
1618
1619	blk_queue_logical_block_size(disk->queue,
1620				     h->drv[drv_index].block_size);
1621
1622	/* Make sure all queue data is written out before */
1623	/* setting h->drv[drv_index].queue, as setting this */
1624	/* allows the interrupt handler to start the queue */
1625	wmb();
1626	h->drv[drv_index].queue = disk->queue;
1627	add_disk(disk);
1628}
1629
1630/* This function will check the usage_count of the drive to be updated/added.
1631 * If the usage_count is zero and it is a heretofore unknown drive, or,
1632 * the drive's capacity, geometry, or serial number has changed,
1633 * then the drive information will be updated and the disk will be
1634 * re-registered with the kernel.  If these conditions don't hold,
1635 * then it will be left alone for the next reboot.  The exception to this
1636 * is disk 0 which will always be left registered with the kernel since it
1637 * is also the controller node.  Any changes to disk 0 will show up on
1638 * the next reboot.
1639 */
1640static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
1641{
1642	ctlr_info_t *h = hba[ctlr];
1643	struct gendisk *disk;
1644	InquiryData_struct *inq_buff = NULL;
1645	unsigned int block_size;
1646	sector_t total_size;
1647	unsigned long flags = 0;
1648	int ret = 0;
1649	drive_info_struct *drvinfo;
1650	int was_only_controller_node;
1651
1652	/* Get information about the disk and modify the driver structure */
1653	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
1654	drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL);
1655	if (inq_buff == NULL || drvinfo == NULL)
1656		goto mem_msg;
1657
1658	/* See if we're trying to update the "controller node"
1659	 * this will happen the when the first logical drive gets
1660	 * created by ACU.
1661	 */
1662	was_only_controller_node = (drv_index == 0 &&
1663				h->drv[0].raid_level == -1);
1664
1665	/* testing to see if 16-byte CDBs are already being used */
1666	if (h->cciss_read == CCISS_READ_16) {
1667		cciss_read_capacity_16(h->ctlr, drv_index, 1,
1668			&total_size, &block_size);
1669
1670	} else {
1671		cciss_read_capacity(ctlr, drv_index, 1,
1672				    &total_size, &block_size);
1673
1674		/* if read_capacity returns all F's this volume is >2TB */
1675		/* in size so we switch to 16-byte CDB's for all */
1676		/* read/write ops */
1677		if (total_size == 0xFFFFFFFFULL) {
1678			cciss_read_capacity_16(ctlr, drv_index, 1,
1679			&total_size, &block_size);
1680			h->cciss_read = CCISS_READ_16;
1681			h->cciss_write = CCISS_WRITE_16;
1682		} else {
1683			h->cciss_read = CCISS_READ_10;
1684			h->cciss_write = CCISS_WRITE_10;
1685		}
1686	}
1687
1688	cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
1689			       inq_buff, drvinfo);
1690	drvinfo->block_size = block_size;
1691	drvinfo->nr_blocks = total_size + 1;
1692
1693	cciss_get_device_descr(ctlr, drv_index, 1, drvinfo->vendor,
1694				drvinfo->model, drvinfo->rev);
1695	cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
1696			sizeof(drvinfo->serial_no));
1697
1698	/* Is it the same disk we already know, and nothing's changed? */
1699	if (h->drv[drv_index].raid_level != -1 &&
1700		((memcmp(drvinfo->serial_no,
1701				h->drv[drv_index].serial_no, 16) == 0) &&
1702		drvinfo->block_size == h->drv[drv_index].block_size &&
1703		drvinfo->nr_blocks == h->drv[drv_index].nr_blocks &&
1704		drvinfo->heads == h->drv[drv_index].heads &&
1705		drvinfo->sectors == h->drv[drv_index].sectors &&
1706		drvinfo->cylinders == h->drv[drv_index].cylinders))
1707			/* The disk is unchanged, nothing to update */
1708			goto freeret;
1709
1710	/* If we get here it's not the same disk, or something's changed,
1711	 * so we need to * deregister it, and re-register it, if it's not
1712	 * in use.
1713	 * If the disk already exists then deregister it before proceeding
1714	 * (unless it's the first disk (for the controller node).
1715	 */
1716	if (h->drv[drv_index].raid_level != -1 && drv_index != 0) {
1717		printk(KERN_WARNING "disk %d has changed.\n", drv_index);
1718		spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1719		h->drv[drv_index].busy_configuring = 1;
1720		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1721
1722		/* deregister_disk sets h->drv[drv_index].queue = NULL
1723		 * which keeps the interrupt handler from starting
1724		 * the queue.
1725		 */
1726		ret = deregister_disk(h, drv_index, 0);
1727		h->drv[drv_index].busy_configuring = 0;
1728	}
1729
1730	/* If the disk is in use return */
1731	if (ret)
1732		goto freeret;
1733
1734	/* Save the new information from cciss_geometry_inquiry
1735	 * and serial number inquiry.
1736	 */
1737	h->drv[drv_index].block_size = drvinfo->block_size;
1738	h->drv[drv_index].nr_blocks = drvinfo->nr_blocks;
1739	h->drv[drv_index].heads = drvinfo->heads;
1740	h->drv[drv_index].sectors = drvinfo->sectors;
1741	h->drv[drv_index].cylinders = drvinfo->cylinders;
1742	h->drv[drv_index].raid_level = drvinfo->raid_level;
1743	memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
1744	memcpy(h->drv[drv_index].vendor, drvinfo->vendor, VENDOR_LEN + 1);
1745	memcpy(h->drv[drv_index].model, drvinfo->model, MODEL_LEN + 1);
1746	memcpy(h->drv[drv_index].rev, drvinfo->rev, REV_LEN + 1);
1747
1748	++h->num_luns;
1749	disk = h->gendisk[drv_index];
1750	set_capacity(disk, h->drv[drv_index].nr_blocks);
1751
1752	/* If it's not disk 0 (drv_index != 0)
1753	 * or if it was disk 0, but there was previously
1754	 * no actual corresponding configured logical drive
1755	 * (raid_leve == -1) then we want to update the
1756	 * logical drive's information.
1757	 */
1758	if (drv_index || first_time)
1759		cciss_add_disk(h, disk, drv_index);
1760
1761freeret:
1762	kfree(inq_buff);
1763	kfree(drvinfo);
1764	return;
1765mem_msg:
1766	printk(KERN_ERR "cciss: out of memory\n");
1767	goto freeret;
1768}
1769
1770/* This function will find the first index of the controllers drive array
1771 * that has a -1 for the raid_level and will return that index.  This is
1772 * where new drives will be added.  If the index to be returned is greater
1773 * than the highest_lun index for the controller then highest_lun is set
1774 * to this new index.  If there are no available indexes then -1 is returned.
1775 * "controller_node" is used to know if this is a real logical drive, or just
1776 * the controller node, which determines if this counts towards highest_lun.
1777 */
1778static int cciss_find_free_drive_index(int ctlr, int controller_node)
1779{
1780	int i;
1781
1782	for (i = 0; i < CISS_MAX_LUN; i++) {
1783		if (hba[ctlr]->drv[i].raid_level == -1) {
1784			if (i > hba[ctlr]->highest_lun)
1785				if (!controller_node)
1786					hba[ctlr]->highest_lun = i;
1787			return i;
1788		}
1789	}
1790	return -1;
1791}
1792
1793/* cciss_add_gendisk finds a free hba[]->drv structure
1794 * and allocates a gendisk if needed, and sets the lunid
1795 * in the drvinfo structure.   It returns the index into
1796 * the ->drv[] array, or -1 if none are free.
1797 * is_controller_node indicates whether highest_lun should
1798 * count this disk, or if it's only being added to provide
1799 * a means to talk to the controller in case no logical
1800 * drives have yet been configured.
1801 */
1802static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
1803{
1804	int drv_index;
1805
1806	drv_index = cciss_find_free_drive_index(h->ctlr, controller_node);
1807	if (drv_index == -1)
1808		return -1;
1809	/*Check if the gendisk needs to be allocated */
1810	if (!h->gendisk[drv_index]) {
1811		h->gendisk[drv_index] =
1812			alloc_disk(1 << NWD_SHIFT);
1813		if (!h->gendisk[drv_index]) {
1814			printk(KERN_ERR "cciss%d: could not "
1815				"allocate a new disk %d\n",
1816				h->ctlr, drv_index);
1817			return -1;
1818		}
1819	}
1820	h->drv[drv_index].LunID = lunid;
1821	if (cciss_create_ld_sysfs_entry(h, &h->drv[drv_index], drv_index))
1822		goto err_free_disk;
1823
1824	/* Don't need to mark this busy because nobody */
1825	/* else knows about this disk yet to contend */
1826	/* for access to it. */
1827	h->drv[drv_index].busy_configuring = 0;
1828	wmb();
1829	return drv_index;
1830
1831err_free_disk:
1832	put_disk(h->gendisk[drv_index]);
1833	h->gendisk[drv_index] = NULL;
1834	return -1;
1835}
1836
1837/* This is for the special case of a controller which
1838 * has no logical drives.  In this case, we still need
1839 * to register a disk so the controller can be accessed
1840 * by the Array Config Utility.
1841 */
1842static void cciss_add_controller_node(ctlr_info_t *h)
1843{
1844	struct gendisk *disk;
1845	int drv_index;
1846
1847	if (h->gendisk[0] != NULL) /* already did this? Then bail. */
1848		return;
1849
1850	drv_index = cciss_add_gendisk(h, 0, 1);
1851	if (drv_index == -1) {
1852		printk(KERN_WARNING "cciss%d: could not "
1853			"add disk 0.\n", h->ctlr);
1854		return;
1855	}
1856	h->drv[drv_index].block_size = 512;
1857	h->drv[drv_index].nr_blocks = 0;
1858	h->drv[drv_index].heads = 0;
1859	h->drv[drv_index].sectors = 0;
1860	h->drv[drv_index].cylinders = 0;
1861	h->drv[drv_index].raid_level = -1;
1862	memset(h->drv[drv_index].serial_no, 0, 16);
1863	disk = h->gendisk[drv_index];
1864	cciss_add_disk(h, disk, drv_index);
1865}
1866
1867/* This function will add and remove logical drives from the Logical
1868 * drive array of the controller and maintain persistency of ordering
1869 * so that mount points are preserved until the next reboot.  This allows
1870 * for the removal of logical drives in the middle of the drive array
1871 * without a re-ordering of those drives.
1872 * INPUT
1873 * h		= The controller to perform the operations on
1874 */
1875static int rebuild_lun_table(ctlr_info_t *h, int first_time)
1876{
1877	int ctlr = h->ctlr;
1878	int num_luns;
1879	ReportLunData_struct *ld_buff = NULL;
1880	int return_code;
1881	int listlength = 0;
1882	int i;
1883	int drv_found;
1884	int drv_index = 0;
1885	__u32 lunid = 0;
1886	unsigned long flags;
1887
1888	if (!capable(CAP_SYS_RAWIO))
1889		return -EPERM;
1890
1891	/* Set busy_configuring flag for this operation */
1892	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1893	if (h->busy_configuring) {
1894		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1895		return -EBUSY;
1896	}
1897	h->busy_configuring = 1;
1898	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1899
1900	ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
1901	if (ld_buff == NULL)
1902		goto mem_msg;
1903
1904	return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
1905				      sizeof(ReportLunData_struct), 0,
1906				      0, 0, TYPE_CMD);
1907
1908	if (return_code == IO_OK)
1909		listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
1910	else {	/* reading number of logical volumes failed */
1911		printk(KERN_WARNING "cciss: report logical volume"
1912		       " command failed\n");
1913		listlength = 0;
1914		goto freeret;
1915	}
1916
1917	num_luns = listlength / 8;	/* 8 bytes per entry */
1918	if (num_luns > CISS_MAX_LUN) {
1919		num_luns = CISS_MAX_LUN;
1920		printk(KERN_WARNING "cciss: more luns configured"
1921		       " on controller than can be handled by"
1922		       " this driver.\n");
1923	}
1924
1925	if (num_luns == 0)
1926		cciss_add_controller_node(h);
1927
1928	/* Compare controller drive array to driver's drive array
1929	 * to see if any drives are missing on the controller due
1930	 * to action of Array Config Utility (user deletes drive)
1931	 * and deregister logical drives which have disappeared.
1932	 */
1933	for (i = 0; i <= h->highest_lun; i++) {
1934		int j;
1935		drv_found = 0;
1936
1937		/* skip holes in the array from already deleted drives */
1938		if (h->drv[i].raid_level == -1)
1939			continue;
1940
1941		for (j = 0; j < num_luns; j++) {
1942			memcpy(&lunid, &ld_buff->LUN[j][0], 4);
1943			lunid = le32_to_cpu(lunid);
1944			if (h->drv[i].LunID == lunid) {
1945				drv_found = 1;
1946				break;
1947			}
1948		}
1949		if (!drv_found) {
1950			/* Deregister it from the OS, it's gone. */
1951			spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1952			h->drv[i].busy_configuring = 1;
1953			spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1954			return_code = deregister_disk(h, i, 1);
1955			cciss_destroy_ld_sysfs_entry(&h->drv[i]);
1956			h->drv[i].busy_configuring = 0;
1957		}
1958	}
1959
1960	/* Compare controller drive array to driver's drive array.
1961	 * Check for updates in the drive information and any new drives
1962	 * on the controller due to ACU adding logical drives, or changing
1963	 * a logical drive's size, etc.  Reregister any new/changed drives
1964	 */
1965	for (i = 0; i < num_luns; i++) {
1966		int j;
1967
1968		drv_found = 0;
1969
1970		memcpy(&lunid, &ld_buff->LUN[i][0], 4);
1971		lunid = le32_to_cpu(lunid);
1972
1973		/* Find if the LUN is already in the drive array
1974		 * of the driver.  If so then update its info
1975		 * if not in use.  If it does not exist then find
1976		 * the first free index and add it.
1977		 */
1978		for (j = 0; j <= h->highest_lun; j++) {
1979			if (h->drv[j].raid_level != -1 &&
1980				h->drv[j].LunID == lunid) {
1981				drv_index = j;
1982				drv_found = 1;
1983				break;
1984			}
1985		}
1986
1987		/* check if the drive was found already in the array */
1988		if (!drv_found) {
1989			drv_index = cciss_add_gendisk(h, lunid, 0);
1990			if (drv_index == -1)
1991				goto freeret;
1992		}
1993		cciss_update_drive_info(ctlr, drv_index, first_time);
1994	}		/* end for */
1995
1996freeret:
1997	kfree(ld_buff);
1998	h->busy_configuring = 0;
1999	/* We return -1 here to tell the ACU that we have registered/updated
2000	 * all of the drives that we can and to keep it from calling us
2001	 * additional times.
2002	 */
2003	return -1;
2004mem_msg:
2005	printk(KERN_ERR "cciss: out of memory\n");
2006	h->busy_configuring = 0;
2007	goto freeret;
2008}
2009
2010/* This function will deregister the disk and it's queue from the
2011 * kernel.  It must be called with the controller lock held and the
2012 * drv structures busy_configuring flag set.  It's parameters are:
2013 *
2014 * disk = This is the disk to be deregistered
2015 * drv  = This is the drive_info_struct associated with the disk to be
2016 *        deregistered.  It contains information about the disk used
2017 *        by the driver.
2018 * clear_all = This flag determines whether or not the disk information
2019 *             is going to be completely cleared out and the highest_lun
2020 *             reset.  Sometimes we want to clear out information about
2021 *             the disk in preparation for re-adding it.  In this case
2022 *             the highest_lun should be left unchanged and the LunID
2023 *             should not be cleared.
2024*/
2025static int deregister_disk(ctlr_info_t *h, int drv_index,
2026			   int clear_all)
2027{
2028	int i;
2029	struct gendisk *disk;
2030	drive_info_struct *drv;
2031
2032	if (!capable(CAP_SYS_RAWIO))
2033		return -EPERM;
2034
2035	drv = &h->drv[drv_index];
2036	disk = h->gendisk[drv_index];
2037
2038	/* make sure logical volume is NOT is use */
2039	if (clear_all || (h->gendisk[0] == disk)) {
2040		if (drv->usage_count > 1)
2041			return -EBUSY;
2042	} else if (drv->usage_count > 0)
2043		return -EBUSY;
2044
2045	/* invalidate the devices and deregister the disk.  If it is disk
2046	 * zero do not deregister it but just zero out it's values.  This
2047	 * allows us to delete disk zero but keep the controller registered.
2048	 */
2049	if (h->gendisk[0] != disk) {
2050		struct request_queue *q = disk->queue;
2051		if (disk->flags & GENHD_FL_UP)
2052			del_gendisk(disk);
2053		if (q) {
2054			blk_cleanup_queue(q);
2055			/* Set drv->queue to NULL so that we do not try
2056			 * to call blk_start_queue on this queue in the
2057			 * interrupt handler
2058			 */
2059			drv->queue = NULL;
2060		}
2061		/* If clear_all is set then we are deleting the logical
2062		 * drive, not just refreshing its info.  For drives
2063		 * other than disk 0 we will call put_disk.  We do not
2064		 * do this for disk 0 as we need it to be able to
2065		 * configure the controller.
2066		 */
2067		if (clear_all){
2068			/* This isn't pretty, but we need to find the
2069			 * disk in our array and NULL our the pointer.
2070			 * This is so that we will call alloc_disk if
2071			 * this index is used again later.
2072			 */
2073			for (i=0; i < CISS_MAX_LUN; i++){
2074				if (h->gendisk[i] == disk) {
2075					h->gendisk[i] = NULL;
2076					break;
2077				}
2078			}
2079			put_disk(disk);
2080		}
2081	} else {
2082		set_capacity(disk, 0);
2083	}
2084
2085	--h->num_luns;
2086	/* zero out the disk size info */
2087	drv->nr_blocks = 0;
2088	drv->block_size = 0;
2089	drv->heads = 0;
2090	drv->sectors = 0;
2091	drv->cylinders = 0;
2092	drv->raid_level = -1;	/* This can be used as a flag variable to
2093				 * indicate that this element of the drive
2094				 * array is free.
2095				 */
2096
2097	if (clear_all) {
2098		/* check to see if it was the last disk */
2099		if (drv == h->drv + h->highest_lun) {
2100			/* if so, find the new hightest lun */
2101			int i, newhighest = -1;
2102			for (i = 0; i <= h->highest_lun; i++) {
2103				/* if the disk has size > 0, it is available */
2104				if (h->drv[i].heads)
2105					newhighest = i;
2106			}
2107			h->highest_lun = newhighest;
2108		}
2109
2110		drv->LunID = 0;
2111	}
2112	return 0;
2113}
2114
2115static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,	/* 0: address the controller,
2116															   1: address logical volume log_unit,
2117															   2: periph device address is scsi3addr */
2118		    unsigned int log_unit, __u8 page_code,
2119		    unsigned char *scsi3addr, int cmd_type)
2120{
2121	ctlr_info_t *h = hba[ctlr];
2122	u64bit buff_dma_handle;
2123	int status = IO_OK;
2124
2125	c->cmd_type = CMD_IOCTL_PEND;
2126	c->Header.ReplyQueue = 0;
2127	if (buff != NULL) {
2128		c->Header.SGList = 1;
2129		c->Header.SGTotal = 1;
2130	} else {
2131		c->Header.SGList = 0;
2132		c->Header.SGTotal = 0;
2133	}
2134	c->Header.Tag.lower = c->busaddr;
2135
2136	c->Request.Type.Type = cmd_type;
2137	if (cmd_type == TYPE_CMD) {
2138		switch (cmd) {
2139		case CISS_INQUIRY:
2140			/* If the logical unit number is 0 then, this is going
2141			   to controller so It's a physical command
2142			   mode = 0 target = 0.  So we have nothing to write.
2143			   otherwise, if use_unit_num == 1,
2144			   mode = 1(volume set addressing) target = LUNID
2145			   otherwise, if use_unit_num == 2,
2146			   mode = 0(periph dev addr) target = scsi3addr */
2147			if (use_unit_num == 1) {
2148				c->Header.LUN.LogDev.VolId =
2149				    h->drv[log_unit].LunID;
2150				c->Header.LUN.LogDev.Mode = 1;
2151			} else if (use_unit_num == 2) {
2152				memcpy(c->Header.LUN.LunAddrBytes, scsi3addr,
2153				       8);
2154				c->Header.LUN.LogDev.Mode = 0;
2155			}
2156			/* are we trying to read a vital product page */
2157			if (page_code != 0) {
2158				c->Request.CDB[1] = 0x01;
2159				c->Request.CDB[2] = page_code;
2160			}
2161			c->Request.CDBLen = 6;
2162			c->Request.Type.Attribute = ATTR_SIMPLE;
2163			c->Request.Type.Direction = XFER_READ;
2164			c->Request.Timeout = 0;
2165			c->Request.CDB[0] = CISS_INQUIRY;
2166			c->Request.CDB[4] = size & 0xFF;
2167			break;
2168		case CISS_REPORT_LOG:
2169		case CISS_REPORT_PHYS:
2170			/* Talking to controller so It's a physical command
2171			   mode = 00 target = 0.  Nothing to write.
2172			 */
2173			c->Request.CDBLen = 12;
2174			c->Request.Type.Attribute = ATTR_SIMPLE;
2175			c->Request.Type.Direction = XFER_READ;
2176			c->Request.Timeout = 0;
2177			c->Request.CDB[0] = cmd;
2178			c->Request.CDB[6] = (size >> 24) & 0xFF;	//MSB
2179			c->Request.CDB[7] = (size >> 16) & 0xFF;
2180			c->Request.CDB[8] = (size >> 8) & 0xFF;
2181			c->Request.CDB[9] = size & 0xFF;
2182			break;
2183
2184		case CCISS_READ_CAPACITY:
2185			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
2186			c->Header.LUN.LogDev.Mode = 1;
2187			c->Request.CDBLen = 10;
2188			c->Request.Type.Attribute = ATTR_SIMPLE;
2189			c->Request.Type.Direction = XFER_READ;
2190			c->Request.Timeout = 0;
2191			c->Request.CDB[0] = cmd;
2192			break;
2193		case CCISS_READ_CAPACITY_16:
2194			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
2195			c->Header.LUN.LogDev.Mode = 1;
2196			c->Request.CDBLen = 16;
2197			c->Request.Type.Attribute = ATTR_SIMPLE;
2198			c->Request.Type.Direction = XFER_READ;
2199			c->Request.Timeout = 0;
2200			c->Request.CDB[0] = cmd;
2201			c->Request.CDB[1] = 0x10;
2202			c->Request.CDB[10] = (size >> 24) & 0xFF;
2203			c->Request.CDB[11] = (size >> 16) & 0xFF;
2204			c->Request.CDB[12] = (size >> 8) & 0xFF;
2205			c->Request.CDB[13] = size & 0xFF;
2206			c->Request.Timeout = 0;
2207			c->Request.CDB[0] = cmd;
2208			break;
2209		case CCISS_CACHE_FLUSH:
2210			c->Request.CDBLen = 12;
2211			c->Request.Type.Attribute = ATTR_SIMPLE;
2212			c->Request.Type.Direction = XFER_WRITE;
2213			c->Request.Timeout = 0;
2214			c->Request.CDB[0] = BMIC_WRITE;
2215			c->Request.CDB[6] = BMIC_CACHE_FLUSH;
2216			break;
2217		case TEST_UNIT_READY:
2218			memcpy(c->Header. LUN.LunAddrBytes, scsi3addr, 8);
2219			c->Request.CDBLen = 6;
2220			c->Request.Type.Attribute = ATTR_SIMPLE;
2221			c->Request.Type.Direction = XFER_NONE;
2222			c->Request.Timeout = 0;
2223			break;
2224		default:
2225			printk(KERN_WARNING
2226			       "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
2227			return IO_ERROR;
2228		}
2229	} else if (cmd_type == TYPE_MSG) {
2230		switch (cmd) {
2231		case 0:	/* ABORT message */
2232			c->Request.CDBLen = 12;
2233			c->Request.Type.Attribute = ATTR_SIMPLE;
2234			c->Request.Type.Direction = XFER_WRITE;
2235			c->Request.Timeout = 0;
2236			c->Request.CDB[0] = cmd;	/* abort */
2237			c->Request.CDB[1] = 0;	/* abort a command */
2238			/* buff contains the tag of the command to abort */
2239			memcpy(&c->Request.CDB[4], buff, 8);
2240			break;
2241		case 1:	/* RESET message */
2242			memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
2243			c->Request.CDBLen = 16;
2244			c->Request.Type.Attribute = ATTR_SIMPLE;
2245			c->Request.Type.Direction = XFER_NONE;
2246			c->Request.Timeout = 0;
2247			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
2248			c->Request.CDB[0] = cmd;	/* reset */
2249			c->Request.CDB[1] = 0x03;	/* reset a target */
2250			break;
2251		case 3:	/* No-Op message */
2252			c->Request.CDBLen = 1;
2253			c->Request.Type.Attribute = ATTR_SIMPLE;
2254			c->Request.Type.Direction = XFER_WRITE;
2255			c->Request.Timeout = 0;
2256			c->Request.CDB[0] = cmd;
2257			break;
2258		default:
2259			printk(KERN_WARNING
2260			       "cciss%d: unknown message type %d\n", ctlr, cmd);
2261			return IO_ERROR;
2262		}
2263	} else {
2264		printk(KERN_WARNING
2265		       "cciss%d: unknown command type %d\n", ctlr, cmd_type);
2266		return IO_ERROR;
2267	}
2268	/* Fill in the scatter gather information */
2269	if (size > 0) {
2270		buff_dma_handle.val = (__u64) pci_map_single(h->pdev,
2271							     buff, size,
2272							     PCI_DMA_BIDIRECTIONAL);
2273		c->SG[0].Addr.lower = buff_dma_handle.val32.lower;
2274		c->SG[0].Addr.upper = buff_dma_handle.val32.upper;
2275		c->SG[0].Len = size;
2276		c->SG[0].Ext = 0;	/* we are not chaining */
2277	}
2278	return status;
2279}
2280
2281static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c)
2282{
2283	DECLARE_COMPLETION_ONSTACK(wait);
2284	u64bit buff_dma_handle;
2285	unsigned long flags;
2286	int return_status = IO_OK;
2287
2288resend_cmd2:
2289	c->waiting = &wait;
2290	/* Put the request on the tail of the queue and send it */
2291	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
2292	addQ(&h->reqQ, c);
2293	h->Qdepth++;
2294	start_io(h);
2295	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
2296
2297	wait_for_completion(&wait);
2298
2299	if (c->err_info->CommandStatus == 0)
2300		goto command_done;
2301
2302	switch (c->err_info->CommandStatus) {
2303	case CMD_TARGET_STATUS:
2304		printk(KERN_WARNING "cciss: cmd 0x%02x "
2305		"has completed with errors\n", c->Request.CDB[0]);
2306		if (c->err_info->ScsiStatus) {
2307			printk(KERN_WARNING "cciss: cmd 0x%02x "
2308			       "has SCSI Status = %x\n",
2309			       c->Request.CDB[0], c->err_info->ScsiStatus);
2310		}
2311		break;
2312	case CMD_DATA_UNDERRUN:
2313	case CMD_DATA_OVERRUN:
2314		/* expected for inquiry and report lun commands */
2315		break;
2316	case CMD_INVALID:
2317		printk(KERN_WARNING "cciss: Cmd 0x%02x is "
2318		       "reported invalid\n", c->Request.CDB[0]);
2319		return_status = IO_ERROR;
2320		break;
2321	case CMD_PROTOCOL_ERR:
2322		printk(KERN_WARNING "cciss: cmd 0x%02x has "
2323		       "protocol error \n", c->Request.CDB[0]);
2324		return_status = IO_ERROR;
2325		break;
2326	case CMD_HARDWARE_ERR:
2327		printk(KERN_WARNING "cciss: cmd 0x%02x had "
2328		       " hardware error\n", c->Request.CDB[0]);
2329		return_status = IO_ERROR;
2330		break;
2331	case CMD_CONNECTION_LOST:
2332		printk(KERN_WARNING "cciss: cmd 0x%02x had "
2333		       "connection lost\n", c->Request.CDB[0]);
2334		return_status = IO_ERROR;
2335		break;
2336	case CMD_ABORTED:
2337		printk(KERN_WARNING "cciss: cmd 0x%02x was "
2338		       "aborted\n", c->Request.CDB[0]);
2339		return_status = IO_ERROR;
2340		break;
2341	case CMD_ABORT_FAILED:
2342		printk(KERN_WARNING "cciss: cmd 0x%02x reports "
2343		       "abort failed\n", c->Request.CDB[0]);
2344		return_status = IO_ERROR;
2345		break;
2346	case CMD_UNSOLICITED_ABORT:
2347		printk(KERN_WARNING
2348		       "cciss%d: unsolicited abort 0x%02x\n", h->ctlr,
2349			c->Request.CDB[0]);
2350		if (c->retry_count < MAX_CMD_RETRIES) {
2351			printk(KERN_WARNING
2352			       "cciss%d: retrying 0x%02x\n", h->ctlr,
2353				c->Request.CDB[0]);
2354			c->retry_count++;
2355			/* erase the old error information */
2356			memset(c->err_info, 0,
2357			       sizeof(ErrorInfo_struct));
2358			return_status = IO_OK;
2359			INIT_COMPLETION(wait);
2360			goto resend_cmd2;
2361		}
2362		return_status = IO_ERROR;
2363		break;
2364	default:
2365		printk(KERN_WARNING "cciss: cmd 0x%02x returned "
2366		       "unknown status %x\n", c->Request.CDB[0],
2367		       c->err_info->CommandStatus);
2368		return_status = IO_ERROR;
2369	}
2370
2371command_done:
2372	/* unlock the buffers from DMA */
2373	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2374	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2375	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
2376			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2377	return return_status;
2378}
2379
2380static int sendcmd_withirq(__u8 cmd,
2381			   int ctlr,
2382			   void *buff,
2383			   size_t size,
2384			   unsigned int use_unit_num,
2385			   unsigned int log_unit, __u8 page_code, int cmd_type)
2386{
2387	ctlr_info_t *h = hba[ctlr];
2388	CommandList_struct *c;
2389	int return_status;
2390
2391	c = cmd_alloc(h, 0);
2392	if (!c)
2393		return -ENOMEM;
2394	return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2395				 log_unit, page_code, NULL, cmd_type);
2396	if (return_status == IO_OK)
2397		return_status = sendcmd_withirq_core(h, c);
2398	cmd_free(h, c, 0);
2399	return return_status;
2400}
2401
2402static void cciss_geometry_inquiry(int ctlr, int logvol,
2403				   int withirq, sector_t total_size,
2404				   unsigned int block_size,
2405				   InquiryData_struct *inq_buff,
2406				   drive_info_struct *drv)
2407{
2408	int return_code;
2409	unsigned long t;
2410
2411	memset(inq_buff, 0, sizeof(InquiryData_struct));
2412	if (withirq)
2413		return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
2414					      inq_buff, sizeof(*inq_buff), 1,
2415					      logvol, 0xC1, TYPE_CMD);
2416	else
2417		return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
2418				      sizeof(*inq_buff), 1, logvol, 0xC1, NULL,
2419				      TYPE_CMD);
2420	if (return_code == IO_OK) {
2421		if (inq_buff->data_byte[8] == 0xFF) {
2422			printk(KERN_WARNING
2423			       "cciss: reading geometry failed, volume "
2424			       "does not support reading geometry\n");
2425			drv->heads = 255;
2426			drv->sectors = 32;	// Sectors per track
2427			drv->cylinders = total_size + 1;
2428			drv->raid_level = RAID_UNKNOWN;
2429		} else {
2430			drv->heads = inq_buff->data_byte[6];
2431			drv->sectors = inq_buff->data_byte[7];
2432			drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
2433			drv->cylinders += inq_buff->data_byte[5];
2434			drv->raid_level = inq_buff->data_byte[8];
2435		}
2436		drv->block_size = block_size;
2437		drv->nr_blocks = total_size + 1;
2438		t = drv->heads * drv->sectors;
2439		if (t > 1) {
2440			sector_t real_size = total_size + 1;
2441			unsigned long rem = sector_div(real_size, t);
2442			if (rem)
2443				real_size++;
2444			drv->cylinders = real_size;
2445		}
2446	} else {		/* Get geometry failed */
2447		printk(KERN_WARNING "cciss: reading geometry failed\n");
2448	}
2449	printk(KERN_INFO "      heads=%d, sectors=%d, cylinders=%d\n\n",
2450	       drv->heads, drv->sectors, drv->cylinders);
2451}
2452
2453static void
2454cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
2455		    unsigned int *block_size)
2456{
2457	ReadCapdata_struct *buf;
2458	int return_code;
2459
2460	buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
2461	if (!buf) {
2462		printk(KERN_WARNING "cciss: out of memory\n");
2463		return;
2464	}
2465
2466	if (withirq)
2467		return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
2468				ctlr, buf, sizeof(ReadCapdata_struct),
2469					1, logvol, 0, TYPE_CMD);
2470	else
2471		return_code = sendcmd(CCISS_READ_CAPACITY,
2472				ctlr, buf, sizeof(ReadCapdata_struct),
2473					1, logvol, 0, NULL, TYPE_CMD);
2474	if (return_code == IO_OK) {
2475		*total_size = be32_to_cpu(*(__be32 *) buf->total_size);
2476		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2477	} else {		/* read capacity command failed */
2478		printk(KERN_WARNING "cciss: read capacity failed\n");
2479		*total_size = 0;
2480		*block_size = BLOCK_SIZE;
2481	}
2482	if (*total_size != 0)
2483		printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2484		(unsigned long long)*total_size+1, *block_size);
2485	kfree(buf);
2486}
2487
2488static void
2489cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size, 				unsigned int *block_size)
2490{
2491	ReadCapdata_struct_16 *buf;
2492	int return_code;
2493
2494	buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
2495	if (!buf) {
2496		printk(KERN_WARNING "cciss: out of memory\n");
2497		return;
2498	}
2499
2500	if (withirq) {
2501		return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
2502			ctlr, buf, sizeof(ReadCapdata_struct_16),
2503				1, logvol, 0, TYPE_CMD);
2504	}
2505	else {
2506		return_code = sendcmd(CCISS_READ_CAPACITY_16,
2507			ctlr, buf, sizeof(ReadCapdata_struct_16),
2508				1, logvol, 0, NULL, TYPE_CMD);
2509	}
2510	if (return_code == IO_OK) {
2511		*total_size = be64_to_cpu(*(__be64 *) buf->total_size);
2512		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2513	} else {		/* read capacity command failed */
2514		printk(KERN_WARNING "cciss: read capacity failed\n");
2515		*total_size = 0;
2516		*block_size = BLOCK_SIZE;
2517	}
2518	printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2519	       (unsigned long long)*total_size+1, *block_size);
2520	kfree(buf);
2521}
2522
2523static int cciss_revalidate(struct gendisk *disk)
2524{
2525	ctlr_info_t *h = get_host(disk);
2526	drive_info_struct *drv = get_drv(disk);
2527	int logvol;
2528	int FOUND = 0;
2529	unsigned int block_size;
2530	sector_t total_size;
2531	InquiryData_struct *inq_buff = NULL;
2532
2533	for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
2534		if (h->drv[logvol].LunID == drv->LunID) {
2535			FOUND = 1;
2536			break;
2537		}
2538	}
2539
2540	if (!FOUND)
2541		return 1;
2542
2543	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
2544	if (inq_buff == NULL) {
2545		printk(KERN_WARNING "cciss: out of memory\n");
2546		return 1;
2547	}
2548	if (h->cciss_read == CCISS_READ_10) {
2549		cciss_read_capacity(h->ctlr, logvol, 1,
2550					&total_size, &block_size);
2551	} else {
2552		cciss_read_capacity_16(h->ctlr, logvol, 1,
2553					&total_size, &block_size);
2554	}
2555	cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
2556			       inq_buff, drv);
2557
2558	blk_queue_logical_block_size(drv->queue, drv->block_size);
2559	set_capacity(disk, drv->nr_blocks);
2560
2561	kfree(inq_buff);
2562	return 0;
2563}
2564
2565/*
2566 *   Wait polling for a command to complete.
2567 *   The memory mapped FIFO is polled for the completion.
2568 *   Used only at init time, interrupts from the HBA are disabled.
2569 */
2570static unsigned long pollcomplete(int ctlr)
2571{
2572	unsigned long done;
2573	int i;
2574
2575	/* Wait (up to 20 seconds) for a command to complete */
2576
2577	for (i = 20 * HZ; i > 0; i--) {
2578		done = hba[ctlr]->access.command_completed(hba[ctlr]);
2579		if (done == FIFO_EMPTY)
2580			schedule_timeout_uninterruptible(1);
2581		else
2582			return done;
2583	}
2584	/* Invalid address to tell caller we ran out of time */
2585	return 1;
2586}
2587
2588static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
2589{
2590	/* We get in here if sendcmd() is polling for completions
2591	   and gets some command back that it wasn't expecting --
2592	   something other than that which it just sent down.
2593	   Ordinarily, that shouldn't happen, but it can happen when
2594	   the scsi tape stuff gets into error handling mode, and
2595	   starts using sendcmd() to try to abort commands and
2596	   reset tape drives.  In that case, sendcmd may pick up
2597	   completions of commands that were sent to logical drives
2598	   through the block i/o system, or cciss ioctls completing, etc.
2599	   In that case, we need to save those completions for later
2600	   processing by the interrupt handler.
2601	 */
2602
2603#ifdef CONFIG_CISS_SCSI_TAPE
2604	struct sendcmd_reject_list *srl = &hba[ctlr]->scsi_rejects;
2605
2606	/* If it's not the scsi tape stuff doing error handling, (abort */
2607	/* or reset) then we don't expect anything weird. */
2608	if (cmd != CCISS_RESET_MSG && cmd != CCISS_ABORT_MSG) {
2609#endif
2610		printk(KERN_WARNING "cciss cciss%d: SendCmd "
2611		       "Invalid command list address returned! (%lx)\n",
2612		       ctlr, complete);
2613		/* not much we can do. */
2614#ifdef CONFIG_CISS_SCSI_TAPE
2615		return 1;
2616	}
2617
2618	/* We've sent down an abort or reset, but something else
2619	   has completed */
2620	if (srl->ncompletions >= (hba[ctlr]->nr_cmds + 2)) {
2621		/* Uh oh.  No room to save it for later... */
2622		printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, "
2623		       "reject list overflow, command lost!\n", ctlr);
2624		return 1;
2625	}
2626	/* Save it for later */
2627	srl->complete[srl->ncompletions] = complete;
2628	srl->ncompletions++;
2629#endif
2630	return 0;
2631}
2632
2633/* Send command c to controller h and poll for it to complete.
2634 * Turns interrupts off on the board.  Used at driver init time
2635 * and during SCSI error recovery.
2636 */
2637static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c)
2638{
2639	int i;
2640	unsigned long complete;
2641	int status = IO_ERROR;
2642	u64bit buff_dma_handle;
2643
2644resend_cmd1:
2645
2646	/* Disable interrupt on the board. */
2647	h->access.set_intr_mask(h, CCISS_INTR_OFF);
2648
2649	/* Make sure there is room in the command FIFO */
2650	/* Actually it should be completely empty at this time */
2651	/* unless we are in here doing error handling for the scsi */
2652	/* tape side of the driver. */
2653	for (i = 200000; i > 0; i--) {
2654		/* if fifo isn't full go */
2655		if (!(h->access.fifo_full(h)))
2656			break;
2657		udelay(10);
2658		printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
2659		       " waiting!\n", h->ctlr);
2660	}
2661	h->access.submit_command(h, c); /* Send the cmd */
2662	do {
2663		complete = pollcomplete(h->ctlr);
2664
2665#ifdef CCISS_DEBUG
2666		printk(KERN_DEBUG "cciss: command completed\n");
2667#endif				/* CCISS_DEBUG */
2668
2669		if (complete == 1) {
2670			printk(KERN_WARNING
2671			       "cciss cciss%d: SendCmd Timeout out, "
2672			       "No command list address returned!\n", h->ctlr);
2673			status = IO_ERROR;
2674			break;
2675		}
2676
2677		/* If it's not the cmd we're looking for, save it for later */
2678		if ((complete & ~CISS_ERROR_BIT) != c->busaddr) {
2679			if (add_sendcmd_reject(c->Request.CDB[0],
2680				h->ctlr, complete) != 0)
2681				BUG(); /* we are hosed if we get here. */
2682			continue;
2683		}
2684
2685		/* It is our command.  If no error, we're done. */
2686		if (!(complete & CISS_ERROR_BIT)) {
2687			status = IO_OK;
2688			break;
2689		}
2690
2691		/* There is an error... */
2692
2693		/* if data overrun or underun on Report command ignore it */
2694		if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
2695		     (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
2696		     (c->Request.CDB[0] == CISS_INQUIRY)) &&
2697			((c->err_info->CommandStatus == CMD_DATA_OVERRUN) ||
2698			 (c->err_info->CommandStatus == CMD_DATA_UNDERRUN))) {
2699			complete = c->busaddr;
2700			status = IO_OK;
2701			break;
2702		}
2703
2704		if (c->err_info->CommandStatus == CMD_UNSOLICITED_ABORT) {
2705			printk(KERN_WARNING "cciss%d: unsolicited abort %p\n",
2706				h->ctlr, c);
2707			if (c->retry_count < MAX_CMD_RETRIES) {
2708				printk(KERN_WARNING "cciss%d: retrying %p\n",
2709				   h->ctlr, c);
2710				c->retry_count++;
2711				/* erase the old error information */
2712				memset(c->err_info, 0, sizeof(c->err_info));
2713				goto resend_cmd1;
2714			}
2715			printk(KERN_WARNING "cciss%d: retried %p too many "
2716				"times\n", h->ctlr, c);
2717			status = IO_ERROR;
2718			goto cleanup1;
2719		}
2720
2721		if (c->err_info->CommandStatus == CMD_UNABORTABLE) {
2722			printk(KERN_WARNING "cciss%d: command could not be "
2723				"aborted.\n", h->ctlr);
2724			status = IO_ERROR;
2725			goto cleanup1;
2726		}
2727
2728		printk(KERN_WARNING "cciss%d: sendcmd error\n", h->ctlr);
2729		printk(KERN_WARNING "cmd = 0x%02x, CommandStatus = 0x%02x\n",
2730			c->Request.CDB[0], c->err_info->CommandStatus);
2731		if (c->err_info->CommandStatus == CMD_TARGET_STATUS) {
2732			printk(KERN_WARNING "Target status = 0x%02x\n",
2733			c->err_info->ScsiStatus);
2734			if (c->err_info->ScsiStatus == 2) /* chk cond */
2735				printk(KERN_WARNING "Sense key = 0x%02x\n",
2736					0xf & c->err_info->SenseInfo[2]);
2737		}
2738
2739		status = IO_ERROR;
2740		goto cleanup1;
2741
2742	} while (1);
2743
2744cleanup1:
2745	/* unlock the data buffer from DMA */
2746	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2747	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2748	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
2749			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2750#ifdef CONFIG_CISS_SCSI_TAPE
2751	/* if we saved some commands for later, process them now. */
2752	if (h->scsi_rejects.ncompletions > 0)
2753		do_cciss_intr(0, h);
2754#endif
2755	return status;
2756}
2757
2758/*
2759 * Send a command to the controller, and wait for it to complete.
2760 * Used at init time, and during SCSI error recovery.
2761 */
2762static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
2763	unsigned int use_unit_num,/* 0: address the controller,
2764				     1: address logical volume log_unit,
2765				     2: periph device address is scsi3addr */
2766	unsigned int log_unit,
2767	__u8 page_code, unsigned char *scsi3addr, int cmd_type)
2768{
2769	CommandList_struct *c;
2770	int status;
2771
2772	c = cmd_alloc(hba[ctlr], 1);
2773	if (!c) {
2774		printk(KERN_WARNING "cciss: unable to get memory");
2775		return IO_ERROR;
2776	}
2777	status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2778			  log_unit, page_code, scsi3addr, cmd_type);
2779	if (status == IO_OK)
2780		status = sendcmd_core(hba[ctlr], c);
2781	cmd_free(hba[ctlr], c, 1);
2782	return status;
2783}
2784
2785/*
2786 * Map (physical) PCI mem into (virtual) kernel space
2787 */
2788static void __iomem *remap_pci_mem(ulong base, ulong size)
2789{
2790	ulong page_base = ((ulong) base) & PAGE_MASK;
2791	ulong page_offs = ((ulong) base) - page_base;
2792	void __iomem *page_remapped = ioremap(page_base, page_offs + size);
2793
2794	return page_remapped ? (page_remapped + page_offs) : NULL;
2795}
2796
2797/*
2798 * Takes jobs of the Q and sends them to the hardware, then puts it on
2799 * the Q to wait for completion.
2800 */
2801static void start_io(ctlr_info_t *h)
2802{
2803	CommandList_struct *c;
2804
2805	while (!hlist_empty(&h->reqQ)) {
2806		c = hlist_entry(h->reqQ.first, CommandList_struct, list);
2807		/* can't do anything if fifo is full */
2808		if ((h->access.fifo_full(h))) {
2809			printk(KERN_WARNING "cciss: fifo full\n");
2810			break;
2811		}
2812
2813		/* Get the first entry from the Request Q */
2814		removeQ(c);
2815		h->Qdepth--;
2816
2817		/* Tell the controller execute command */
2818		h->access.submit_command(h, c);
2819
2820		/* Put job onto the completed Q */
2821		addQ(&h->cmpQ, c);
2822	}
2823}
2824
2825/* Assumes that CCISS_LOCK(h->ctlr) is held. */
2826/* Zeros out the error record and then resends the command back */
2827/* to the controller */
2828static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
2829{
2830	/* erase the old error information */
2831	memset(c->err_info, 0, sizeof(ErrorInfo_struct));
2832
2833	/* add it to software queue and then send it to the controller */
2834	addQ(&h->reqQ, c);
2835	h->Qdepth++;
2836	if (h->Qdepth > h->maxQsinceinit)
2837		h->maxQsinceinit = h->Qdepth;
2838
2839	start_io(h);
2840}
2841
2842static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
2843	unsigned int msg_byte, unsigned int host_byte,
2844	unsigned int driver_byte)
2845{
2846	/* inverse of macros in scsi.h */
2847	return (scsi_status_byte & 0xff) |
2848		((msg_byte & 0xff) << 8) |
2849		((host_byte & 0xff) << 16) |
2850		((driver_byte & 0xff) << 24);
2851}
2852
2853static inline int evaluate_target_status(ctlr_info_t *h,
2854			CommandList_struct *cmd, int *retry_cmd)
2855{
2856	unsigned char sense_key;
2857	unsigned char status_byte, msg_byte, host_byte, driver_byte;
2858	int error_value;
2859
2860	*retry_cmd = 0;
2861	/* If we get in here, it means we got "target status", that is, scsi status */
2862	status_byte = cmd->err_info->ScsiStatus;
2863	driver_byte = DRIVER_OK;
2864	msg_byte = cmd->err_info->CommandStatus; /* correct?  seems too device specific */
2865
2866	if (blk_pc_request(cmd->rq))
2867		host_byte = DID_PASSTHROUGH;
2868	else
2869		host_byte = DID_OK;
2870
2871	error_value = make_status_bytes(status_byte, msg_byte,
2872		host_byte, driver_byte);
2873
2874	if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
2875		if (!blk_pc_request(cmd->rq))
2876			printk(KERN_WARNING "cciss: cmd %p "
2877			       "has SCSI Status 0x%x\n",
2878			       cmd, cmd->err_info->ScsiStatus);
2879		return error_value;
2880	}
2881
2882	/* check the sense key */
2883	sense_key = 0xf & cmd->err_info->SenseInfo[2];
2884	/* no status or recovered error */
2885	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
2886		error_value = 0;
2887
2888	if (check_for_unit_attention(h, cmd)) {
2889		*retry_cmd = !blk_pc_request(cmd->rq);
2890		return 0;
2891	}
2892
2893	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
2894		if (error_value != 0)
2895			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
2896			       " sense key = 0x%x\n", cmd, sense_key);
2897		return error_value;
2898	}
2899
2900	/* SG_IO or similar, copy sense data back */
2901	if (cmd->rq->sense) {
2902		if (cmd->rq->sense_len > cmd->err_info->SenseLen)
2903			cmd->rq->sense_len = cmd->err_info->SenseLen;
2904		memcpy(cmd->rq->sense, cmd->err_info->SenseInfo,
2905			cmd->rq->sense_len);
2906	} else
2907		cmd->rq->sense_len = 0;
2908
2909	return error_value;
2910}
2911
2912/* checks the status of the job and calls complete buffers to mark all
2913 * buffers for the completed job. Note that this function does not need
2914 * to hold the hba/queue lock.
2915 */
2916static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
2917				    int timeout)
2918{
2919	int retry_cmd = 0;
2920	struct request *rq = cmd->rq;
2921
2922	rq->errors = 0;
2923
2924	if (timeout)
2925		rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
2926
2927	if (cmd->err_info->CommandStatus == 0)	/* no error has occurred */
2928		goto after_error_processing;
2929
2930	switch (cmd->err_info->CommandStatus) {
2931	case CMD_TARGET_STATUS:
2932		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
2933		break;
2934	case CMD_DATA_UNDERRUN:
2935		if (blk_fs_request(cmd->rq)) {
2936			printk(KERN_WARNING "cciss: cmd %p has"
2937			       " completed with data underrun "
2938			       "reported\n", cmd);
2939			cmd->rq->resid_len = cmd->err_info->ResidualCnt;
2940		}
2941		break;
2942	case CMD_DATA_OVERRUN:
2943		if (blk_fs_request(cmd->rq))
2944			printk(KERN_WARNING "cciss: cmd %p has"
2945			       " completed with data overrun "
2946			       "reported\n", cmd);
2947		break;
2948	case CMD_INVALID:
2949		printk(KERN_WARNING "cciss: cmd %p is "
2950		       "reported invalid\n", cmd);
2951		rq->errors = make_status_bytes(SAM_STAT_GOOD,
2952			cmd->err_info->CommandStatus, DRIVER_OK,
2953			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2954		break;
2955	case CMD_PROTOCOL_ERR:
2956		printk(KERN_WARNING "cciss: cmd %p has "
2957		       "protocol error \n", cmd);
2958		rq->errors = make_status_bytes(SAM_STAT_GOOD,
2959			cmd->err_info->CommandStatus, DRIVER_OK,
2960			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2961		break;
2962	case CMD_HARDWARE_ERR:
2963		printk(KERN_WARNING "cciss: cmd %p had "
2964		       " hardware error\n", cmd);
2965		rq->errors = make_status_bytes(SAM_STAT_GOOD,
2966			cmd->err_info->CommandStatus, DRIVER_OK,
2967			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2968		break;
2969	case CMD_CONNECTION_LOST:
2970		printk(KERN_WARNING "cciss: cmd %p had "
2971		       "connection lost\n", cmd);
2972		rq->errors = make_status_bytes(SAM_STAT_GOOD,
2973			cmd->err_info->CommandStatus, DRIVER_OK,
2974			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2975		break;
2976	case CMD_ABORTED:
2977		printk(KERN_WARNING "cciss: cmd %p was "
2978		       "aborted\n", cmd);
2979		rq->errors = make_status_bytes(SAM_STAT_GOOD,
2980			cmd->err_info->CommandStatus, DRIVER_OK,
2981			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
2982		break;
2983	case CMD_ABORT_FAILED:
2984		printk(KERN_WARNING "cciss: cmd %p reports "
2985		       "abort failed\n", cmd);
2986		rq->errors = make_status_bytes(SAM_STAT_GOOD,
2987			cmd->err_info->CommandStatus, DRIVER_OK,
2988			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2989		break;
2990	case CMD_UNSOLICITED_ABORT:
2991		printk(KERN_WARNING "cciss%d: unsolicited "
2992		       "abort %p\n", h->ctlr, cmd);
2993		if (cmd->retry_count < MAX_CMD_RETRIES) {
2994			retry_cmd = 1;
2995			printk(KERN_WARNING
2996			       "cciss%d: retrying %p\n", h->ctlr, cmd);
2997			cmd->retry_count++;
2998		} else
2999			printk(KERN_WARNING
3000			       "cciss%d: %p retried too "
3001			       "many times\n", h->ctlr, cmd);
3002		rq->errors = make_status_bytes(SAM_STAT_GOOD,
3003			cmd->err_info->CommandStatus, DRIVER_OK,
3004			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
3005		break;
3006	case CMD_TIMEOUT:
3007		printk(KERN_WARNING "cciss: cmd %p timedout\n", cmd);
3008		rq->errors = make_status_bytes(SAM_STAT_GOOD,
3009			cmd->err_info->CommandStatus, DRIVER_OK,
3010			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
3011		break;
3012	default:
3013		printk(KERN_WARNING "cciss: cmd %p returned "
3014		       "unknown status %x\n", cmd,
3015		       cmd->err_info->CommandStatus);
3016		rq->errors = make_status_bytes(SAM_STAT_GOOD,
3017			cmd->err_info->CommandStatus, DRIVER_OK,
3018			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
3019	}
3020
3021after_error_processing:
3022
3023	/* We need to return this command */
3024	if (retry_cmd) {
3025		resend_cciss_cmd(h, cmd);
3026		return;
3027	}
3028	cmd->rq->completion_data = cmd;
3029	blk_complete_request(cmd->rq);
3030}
3031
3032/*
3033 * Get a request and submit it to the controller.
3034 */
3035static void do_cciss_request(struct request_queue *q)
3036{
3037	ctlr_info_t *h = q->queuedata;
3038	CommandList_struct *c;
3039	sector_t start_blk;
3040	int seg;
3041	struct request *creq;
3042	u64bit temp64;
3043	struct scatterlist tmp_sg[MAXSGENTRIES];
3044	drive_info_struct *drv;
3045	int i, dir;
3046
3047	/* We call start_io here in case there is a command waiting on the
3048	 * queue that has not been sent.
3049	 */
3050	if (blk_queue_plugged(q))
3051		goto startio;
3052
3053      queue:
3054	creq = blk_peek_request(q);
3055	if (!creq)
3056		goto startio;
3057
3058	BUG_ON(creq->nr_phys_segments > MAXSGENTRIES);
3059
3060	if ((c = cmd_alloc(h, 1)) == NULL)
3061		goto full;
3062
3063	blk_start_request(creq);
3064
3065	spin_unlock_irq(q->queue_lock);
3066
3067	c->cmd_type = CMD_RWREQ;
3068	c->rq = creq;
3069
3070	/* fill in the request */
3071	drv = creq->rq_disk->private_data;
3072	c->Header.ReplyQueue = 0;	// unused in simple mode
3073	/* got command from pool, so use the command block index instead */
3074	/* for direct lookups. */
3075	/* The first 2 bits are reserved for controller error reporting. */
3076	c->Header.Tag.lower = (c->cmdindex << 3);
3077	c->Header.Tag.lower |= 0x04;	/* flag for direct lookup. */
3078	c->Header.LUN.LogDev.VolId = drv->LunID;
3079	c->Header.LUN.LogDev.Mode = 1;
3080	c->Request.CDBLen = 10;	// 12 byte commands not in FW yet;
3081	c->Request.Type.Type = TYPE_CMD;	// It is a command.
3082	c->Request.Type.Attribute = ATTR_SIMPLE;
3083	c->Request.Type.Direction =
3084	    (rq_data_dir(creq) == READ) ? XFER_READ : XFER_WRITE;
3085	c->Request.Timeout = 0;	// Don't time out
3086	c->Request.CDB[0] =
3087	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
3088	start_blk = blk_rq_pos(creq);
3089#ifdef CCISS_DEBUG
3090	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",
3091	       (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
3092#endif				/* CCISS_DEBUG */
3093
3094	sg_init_table(tmp_sg, MAXSGENTRIES);
3095	seg = blk_rq_map_sg(q, creq, tmp_sg);
3096
3097	/* get the DMA records for the setup */
3098	if (c->Request.Type.Direction == XFER_READ)
3099		dir = PCI_DMA_FROMDEVICE;
3100	else
3101		dir = PCI_DMA_TODEVICE;
3102
3103	for (i = 0; i < seg; i++) {
3104		c->SG[i].Len = tmp_sg[i].length;
3105		temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
3106						  tmp_sg[i].offset,
3107						  tmp_sg[i].length, dir);
3108		c->SG[i].Addr.lower = temp64.val32.lower;
3109		c->SG[i].Addr.upper = temp64.val32.upper;
3110		c->SG[i].Ext = 0;	// we are not chaining
3111	}
3112	/* track how many SG entries we are using */
3113	if (seg > h->maxSG)
3114		h->maxSG = seg;
3115
3116#ifdef CCISS_DEBUG
3117	printk(KERN_DEBUG "cciss: Submitting %u sectors in %d segments\n",
3118	       blk_rq_sectors(creq), seg);
3119#endif				/* CCISS_DEBUG */
3120
3121	c->Header.SGList = c->Header.SGTotal = seg;
3122	if (likely(blk_fs_request(creq))) {
3123		if(h->cciss_read == CCISS_READ_10) {
3124			c->Request.CDB[1] = 0;
3125			c->Request.CDB[2] = (start_blk >> 24) & 0xff;	//MSB
3126			c->Request.CDB[3] = (start_blk >> 16) & 0xff;
3127			c->Request.CDB[4] = (start_blk >> 8) & 0xff;
3128			c->Request.CDB[5] = start_blk & 0xff;
3129			c->Request.CDB[6] = 0;	// (sect >> 24) & 0xff; MSB
3130			c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
3131			c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
3132			c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
3133		} else {
3134			u32 upper32 = upper_32_bits(start_blk);
3135
3136			c->Request.CDBLen = 16;
3137			c->Request.CDB[1]= 0;
3138			c->Request.CDB[2]= (upper32 >> 24) & 0xff;	//MSB
3139			c->Request.CDB[3]= (upper32 >> 16) & 0xff;
3140			c->Request.CDB[4]= (upper32 >>  8) & 0xff;
3141			c->Request.CDB[5]= upper32 & 0xff;
3142			c->Request.CDB[6]= (start_blk >> 24) & 0xff;
3143			c->Request.CDB[7]= (start_blk >> 16) & 0xff;
3144			c->Request.CDB[8]= (start_blk >>  8) & 0xff;
3145			c->Request.CDB[9]= start_blk & 0xff;
3146			c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff;
3147			c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff;
3148			c->Request.CDB[12]= (blk_rq_sectors(creq) >>  8) & 0xff;
3149			c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
3150			c->Request.CDB[14] = c->Request.CDB[15] = 0;
3151		}
3152	} else if (blk_pc_request(creq)) {
3153		c->Request.CDBLen = creq->cmd_len;
3154		memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
3155	} else {
3156		printk(KERN_WARNING "cciss%d: bad request type %d\n", h->ctlr, creq->cmd_type);
3157		BUG();
3158	}
3159
3160	spin_lock_irq(q->queue_lock);
3161
3162	addQ(&h->reqQ, c);
3163	h->Qdepth++;
3164	if (h->Qdepth > h->maxQsinceinit)
3165		h->maxQsinceinit = h->Qdepth;
3166
3167	goto queue;
3168full:
3169	blk_stop_queue(q);
3170startio:
3171	/* We will already have the driver lock here so not need
3172	 * to lock it.
3173	 */
3174	start_io(h);
3175}
3176
3177static inline unsigned long get_next_completion(ctlr_info_t *h)
3178{
3179#ifdef CONFIG_CISS_SCSI_TAPE
3180	/* Any rejects from sendcmd() lying around? Process them first */
3181	if (h->scsi_rejects.ncompletions == 0)
3182		return h->access.command_completed(h);
3183	else {
3184		struct sendcmd_reject_list *srl;
3185		int n;
3186		srl = &h->scsi_rejects;
3187		n = --srl->ncompletions;
3188		/* printk("cciss%d: processing saved reject\n", h->ctlr); */
3189		printk("p");
3190		return srl->complete[n];
3191	}
3192#else
3193	return h->access.command_completed(h);
3194#endif
3195}
3196
3197static inline int interrupt_pending(ctlr_info_t *h)
3198{
3199#ifdef CONFIG_CISS_SCSI_TAPE
3200	return (h->access.intr_pending(h)
3201		|| (h->scsi_rejects.ncompletions > 0));
3202#else
3203	return h->access.intr_pending(h);
3204#endif
3205}
3206
3207static inline long interrupt_not_for_us(ctlr_info_t *h)
3208{
3209#ifdef CONFIG_CISS_SCSI_TAPE
3210	return (((h->access.intr_pending(h) == 0) ||
3211		 (h->interrupts_enabled == 0))
3212		&& (h->scsi_rejects.ncompletions == 0));
3213#else
3214	return (((h->access.intr_pending(h) == 0) ||
3215		 (h->interrupts_enabled == 0)));
3216#endif
3217}
3218
3219static irqreturn_t do_cciss_intr(int irq, void *dev_id)
3220{
3221	ctlr_info_t *h = dev_id;
3222	CommandList_struct *c;
3223	unsigned long flags;
3224	__u32 a, a1, a2;
3225
3226	if (interrupt_not_for_us(h))
3227		return IRQ_NONE;
3228	/*
3229	 * If there are completed commands in the completion queue,
3230	 * we had better do something about it.
3231	 */
3232	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
3233	while (interrupt_pending(h)) {
3234		while ((a = get_next_completion(h)) != FIFO_EMPTY) {
3235			a1 = a;
3236			if ((a & 0x04)) {
3237				a2 = (a >> 3);
3238				if (a2 >= h->nr_cmds) {
3239					printk(KERN_WARNING
3240					       "cciss: controller cciss%d failed, stopping.\n",
3241					       h->ctlr);
3242					fail_all_cmds(h->ctlr);
3243					return IRQ_HANDLED;
3244				}
3245
3246				c = h->cmd_pool + a2;
3247				a = c->busaddr;
3248
3249			} else {
3250				struct hlist_node *tmp;
3251
3252				a &= ~3;
3253				c = NULL;
3254				hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
3255					if (c->busaddr == a)
3256						break;
3257				}
3258			}
3259			/*
3260			 * If we've found the command, take it off the
3261			 * completion Q and free it
3262			 */
3263			if (c && c->busaddr == a) {
3264				removeQ(c);
3265				if (c->cmd_type == CMD_RWREQ) {
3266					complete_command(h, c, 0);
3267				} else if (c->cmd_type == CMD_IOCTL_PEND) {
3268					complete(c->waiting);
3269				}
3270#				ifdef CONFIG_CISS_SCSI_TAPE
3271				else if (c->cmd_type == CMD_SCSI)
3272					complete_scsi_command(c, 0, a1);
3273#				endif
3274				continue;
3275			}
3276		}
3277	}
3278
3279	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
3280	return IRQ_HANDLED;
3281}
3282
3283static int scan_thread(void *data)
3284{
3285	ctlr_info_t *h = data;
3286	int rc;
3287	DECLARE_COMPLETION_ONSTACK(wait);
3288	h->rescan_wait = &wait;
3289
3290	for (;;) {
3291		rc = wait_for_completion_interruptible(&wait);
3292		if (kthread_should_stop())
3293			break;
3294		if (!rc)
3295			rebuild_lun_table(h, 0);
3296	}
3297	return 0;
3298}
3299
3300static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
3301{
3302	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
3303		return 0;
3304
3305	switch (c->err_info->SenseInfo[12]) {
3306	case STATE_CHANGED:
3307		printk(KERN_WARNING "cciss%d: a state change "
3308			"detected, command retried\n", h->ctlr);
3309		return 1;
3310	break;
3311	case LUN_FAILED:
3312		printk(KERN_WARNING "cciss%d: LUN failure "
3313			"detected, action required\n", h->ctlr);
3314		return 1;
3315	break;
3316	case REPORT_LUNS_CHANGED:
3317		printk(KERN_WARNING "cciss%d: report LUN data "
3318			"changed\n", h->ctlr);
3319		if (h->rescan_wait)
3320			complete(h->rescan_wait);
3321		return 1;
3322	break;
3323	case POWER_OR_RESET:
3324		printk(KERN_WARNING "cciss%d: a power on "
3325			"or device reset detected\n", h->ctlr);
3326		return 1;
3327	break;
3328	case UNIT_ATTENTION_CLEARED:
3329		printk(KERN_WARNING "cciss%d: unit attention "
3330		    "cleared by another initiator\n", h->ctlr);
3331		return 1;
3332	break;
3333	default:
3334		printk(KERN_WARNING "cciss%d: unknown "
3335			"unit attention detected\n", h->ctlr);
3336				return 1;
3337	}
3338}
3339
3340/*
3341 *  We cannot read the structure directly, for portability we must use
3342 *   the io functions.
3343 *   This is for debug only.
3344 */
3345#ifdef CCISS_DEBUG
3346static void print_cfg_table(CfgTable_struct *tb)
3347{
3348	int i;
3349	char temp_name[17];
3350
3351	printk("Controller Configuration information\n");
3352	printk("------------------------------------\n");
3353	for (i = 0; i < 4; i++)
3354		temp_name[i] = readb(&(tb->Signature[i]));
3355	temp_name[4] = '\0';
3356	printk("   Signature = %s\n", temp_name);
3357	printk("   Spec Number = %d\n", readl(&(tb->SpecValence)));
3358	printk("   Transport methods supported = 0x%x\n",
3359	       readl(&(tb->TransportSupport)));
3360	printk("   Transport methods active = 0x%x\n",
3361	       readl(&(tb->TransportActive)));
3362	printk("   Requested transport Method = 0x%x\n",
3363	       readl(&(tb->HostWrite.TransportRequest)));
3364	printk("   Coalesce Interrupt Delay = 0x%x\n",
3365	       readl(&(tb->HostWrite.CoalIntDelay)));
3366	printk("   Coalesce Interrupt Count = 0x%x\n",
3367	       readl(&(tb->HostWrite.CoalIntCount)));
3368	printk("   Max outstanding commands = 0x%d\n",
3369	       readl(&(tb->CmdsOutMax)));
3370	printk("   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
3371	for (i = 0; i < 16; i++)
3372		temp_name[i] = readb(&(tb->ServerName[i]));
3373	temp_name[16] = '\0';
3374	printk("   Server Name = %s\n", temp_name);
3375	printk("   Heartbeat Counter = 0x%x\n\n\n", readl(&(tb->HeartBeat)));
3376}
3377#endif				/* CCISS_DEBUG */
3378
3379static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
3380{
3381	int i, offset, mem_type, bar_type;
3382	if (pci_bar_addr == PCI_BASE_ADDRESS_0)	/* looking for BAR zero? */
3383		return 0;
3384	offset = 0;
3385	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3386		bar_type = pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE;
3387		if (bar_type == PCI_BASE_ADDRESS_SPACE_IO)
3388			offset += 4;
3389		else {
3390			mem_type = pci_resource_flags(pdev, i) &
3391			    PCI_BASE_ADDRESS_MEM_TYPE_MASK;
3392			switch (mem_type) {
3393			case PCI_BASE_ADDRESS_MEM_TYPE_32:
3394			case PCI_BASE_ADDRESS_MEM_TYPE_1M:
3395				offset += 4;	/* 32 bit */
3396				break;
3397			case PCI_BASE_ADDRESS_MEM_TYPE_64:
3398				offset += 8;
3399				break;
3400			default:	/* reserved in PCI 2.2 */
3401				printk(KERN_WARNING
3402				       "Base address is invalid\n");
3403				return -1;
3404				break;
3405			}
3406		}
3407		if (offset == pci_bar_addr - PCI_BASE_ADDRESS_0)
3408			return i + 1;
3409	}
3410	return -1;
3411}
3412
3413/* If MSI/MSI-X is supported by the kernel we will try to enable it on
3414 * controllers that are capable. If not, we use IO-APIC mode.
3415 */
3416
3417static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
3418					   struct pci_dev *pdev, __u32 board_id)
3419{
3420#ifdef CONFIG_PCI_MSI
3421	int err;
3422	struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
3423	{0, 2}, {0, 3}
3424	};
3425
3426	/* Some boards advertise MSI but don't really support it */
3427	if ((board_id == 0x40700E11) ||
3428	    (board_id == 0x40800E11) ||
3429	    (board_id == 0x40820E11) || (board_id == 0x40830E11))
3430		goto default_int_mode;
3431
3432	if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
3433		err = pci_enable_msix(pdev, cciss_msix_entries, 4);
3434		if (!err) {
3435			c->intr[0] = cciss_msix_entries[0].vector;
3436			c->intr[1] = cciss_msix_entries[1].vector;
3437			c->intr[2] = cciss_msix_entries[2].vector;
3438			c->intr[3] = cciss_msix_entries[3].vector;
3439			c->msix_vector = 1;
3440			return;
3441		}
3442		if (err > 0) {
3443			printk(KERN_WARNING "cciss: only %d MSI-X vectors "
3444			       "available\n", err);
3445			goto default_int_mode;
3446		} else {
3447			printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
3448			       err);
3449			goto default_int_mode;
3450		}
3451	}
3452	if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
3453		if (!pci_enable_msi(pdev)) {
3454			c->msi_vector = 1;
3455		} else {
3456			printk(KERN_WARNING "cciss: MSI init failed\n");
3457		}
3458	}
3459default_int_mode:
3460#endif				/* CONFIG_PCI_MSI */
3461	/* if we get here we're going to use the default interrupt mode */
3462	c->intr[SIMPLE_MODE_INT] = pdev->irq;
3463	return;
3464}
3465
3466static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
3467{
3468	ushort subsystem_vendor_id, subsystem_device_id, command;
3469	__u32 board_id, scratchpad = 0;
3470	__u64 cfg_offset;
3471	__u32 cfg_base_addr;
3472	__u64 cfg_base_addr_index;
3473	int i, err;
3474
3475	/* check to see if controller has been disabled */
3476	/* BEFORE trying to enable it */
3477	(void)pci_read_config_word(pdev, PCI_COMMAND, &command);
3478	if (!(command & 0x02)) {
3479		printk(KERN_WARNING
3480		       "cciss: controller appears to be disabled\n");
3481		return -ENODEV;
3482	}
3483
3484	err = pci_enable_device(pdev);
3485	if (err) {
3486		printk(KERN_ERR "cciss: Unable to Enable PCI device\n");
3487		return err;
3488	}
3489
3490	err = pci_request_regions(pdev, "cciss");
3491	if (err) {
3492		printk(KERN_ERR "cciss: Cannot obtain PCI resources, "
3493		       "aborting\n");
3494		return err;
3495	}
3496
3497	subsystem_vendor_id = pdev->subsystem_vendor;
3498	subsystem_device_id = pdev->subsystem_device;
3499	board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) |
3500		    subsystem_vendor_id);
3501
3502#ifdef CCISS_DEBUG
3503	printk("command = %x\n", command);
3504	printk("irq = %x\n", pdev->irq);
3505	printk("board_id = %x\n", board_id);
3506#endif				/* CCISS_DEBUG */
3507
3508/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
3509 * else we use the IO-APIC interrupt assigned to us by system ROM.
3510 */
3511	cciss_interrupt_mode(c, pdev, board_id);
3512
3513	/* find the memory BAR */
3514	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3515		if (pci_resource_flags(pdev, i) & IORESOURCE_MEM)
3516			break;
3517	}
3518	if (i == DEVICE_COUNT_RESOURCE) {
3519		printk(KERN_WARNING "cciss: No memory BAR found\n");
3520		err = -ENODEV;
3521		goto err_out_free_res;
3522	}
3523
3524	c->paddr = pci_resource_start(pdev, i); /* addressing mode bits
3525						 * already removed
3526						 */
3527
3528#ifdef CCISS_DEBUG
3529	printk("address 0 = %lx\n", c->paddr);
3530#endif				/* CCISS_DEBUG */
3531	c->vaddr = remap_pci_mem(c->paddr, 0x250);
3532
3533	/* Wait for the board to become ready.  (PCI hotplug needs this.)
3534	 * We poll for up to 120 secs, once per 100ms. */
3535	for (i = 0; i < 1200; i++) {
3536		scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
3537		if (scratchpad == CCISS_FIRMWARE_READY)
3538			break;
3539		set_current_state(TASK_INTERRUPTIBLE);
3540		schedule_timeout(HZ / 10);	/* wait 100ms */
3541	}
3542	if (scratchpad != CCISS_FIRMWARE_READY) {
3543		printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
3544		err = -ENODEV;
3545		goto err_out_free_res;
3546	}
3547
3548	/* get the address index number */
3549	cfg_base_addr = readl(c->vaddr + SA5_CTCFG_OFFSET);
3550	cfg_base_addr &= (__u32) 0x0000ffff;
3551#ifdef CCISS_DEBUG
3552	printk("cfg base address = %x\n", cfg_base_addr);
3553#endif				/* CCISS_DEBUG */
3554	cfg_base_addr_index = find_PCI_BAR_index(pdev, cfg_base_addr);
3555#ifdef CCISS_DEBUG
3556	printk("cfg base address index = %llx\n",
3557		(unsigned long long)cfg_base_addr_index);
3558#endif				/* CCISS_DEBUG */
3559	if (cfg_base_addr_index == -1) {
3560		printk(KERN_WARNING "cciss: Cannot find cfg_base_addr_index\n");
3561		err = -ENODEV;
3562		goto err_out_free_res;
3563	}
3564
3565	cfg_offset = readl(c->vaddr + SA5_CTMEM_OFFSET);
3566#ifdef CCISS_DEBUG
3567	printk("cfg offset = %llx\n", (unsigned long long)cfg_offset);
3568#endif				/* CCISS_DEBUG */
3569	c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
3570						       cfg_base_addr_index) +
3571				    cfg_offset, sizeof(CfgTable_struct));
3572	c->board_id = board_id;
3573
3574#ifdef CCISS_DEBUG
3575	print_cfg_table(c->cfgtable);
3576#endif				/* CCISS_DEBUG */
3577
3578	/* Some controllers support Zero Memory Raid (ZMR).
3579	 * When configured in ZMR mode the number of supported
3580	 * commands drops to 64. So instead of just setting an
3581	 * arbitrary value we make the driver a little smarter.
3582	 * We read the config table to tell us how many commands
3583	 * are supported on the controller then subtract 4 to
3584	 * leave a little room for ioctl calls.
3585	 */
3586	c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3587	for (i = 0; i < ARRAY_SIZE(products); i++) {
3588		if (board_id == products[i].board_id) {
3589			c->product_name = products[i].product_name;
3590			c->access = *(products[i].access);
3591			c->nr_cmds = c->max_commands - 4;
3592			break;
3593		}
3594	}
3595	if ((readb(&c->cfgtable->Signature[0]) != 'C') ||
3596	    (readb(&c->cfgtable->Signature[1]) != 'I') ||
3597	    (readb(&c->cfgtable->Signature[2]) != 'S') ||
3598	    (readb(&c->cfgtable->Signature[3]) != 'S')) {
3599		printk("Does not appear to be a valid CISS config table\n");
3600		err = -ENODEV;
3601		goto err_out_free_res;
3602	}
3603	/* We didn't find the controller in our list. We know the
3604	 * signature is valid. If it's an HP device let's try to
3605	 * bind to the device and fire it up. Otherwise we bail.
3606	 */
3607	if (i == ARRAY_SIZE(products)) {
3608		if (subsystem_vendor_id == PCI_VENDOR_ID_HP) {
3609			c->product_name = products[i-1].product_name;
3610			c->access = *(products[i-1].access);
3611			c->nr_cmds = c->max_commands - 4;
3612			printk(KERN_WARNING "cciss: This is an unknown "
3613				"Smart Array controller.\n"
3614				"cciss: Please update to the latest driver "
3615				"available from www.hp.com.\n");
3616		} else {
3617			printk(KERN_WARNING "cciss: Sorry, I don't know how"
3618				" to access the Smart Array controller %08lx\n"
3619					, (unsigned long)board_id);
3620			err = -ENODEV;
3621			goto err_out_free_res;
3622		}
3623	}
3624#ifdef CONFIG_X86
3625	{
3626		/* Need to enable prefetch in the SCSI core for 6400 in x86 */
3627		__u32 prefetch;
3628		prefetch = readl(&(c->cfgtable->SCSI_Prefetch));
3629		prefetch |= 0x100;
3630		writel(prefetch, &(c->cfgtable->SCSI_Prefetch));
3631	}
3632#endif
3633
3634	/* Disabling DMA prefetch and refetch for the P600.
3635	 * An ASIC bug may result in accesses to invalid memory addresses.
3636	 * We've disabled prefetch for some time now. Testing with XEN
3637	 * kernels revealed a bug in the refetch if dom0 resides on a P600.
3638	 */
3639	if(board_id == 0x3225103C) {
3640		__u32 dma_prefetch;
3641		__u32 dma_refetch;
3642		dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG);
3643		dma_prefetch |= 0x8000;
3644		writel(dma_prefetch, c->vaddr + I2O_DMA1_CFG);
3645		pci_read_config_dword(pdev, PCI_COMMAND_PARITY, &dma_refetch);
3646		dma_refetch |= 0x1;
3647		pci_write_config_dword(pdev, PCI_COMMAND_PARITY, dma_refetch);
3648	}
3649
3650#ifdef CCISS_DEBUG
3651	printk("Trying to put board into Simple mode\n");
3652#endif				/* CCISS_DEBUG */
3653	c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3654	/* Update the field, and then ring the doorbell */
3655	writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
3656	writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
3657
3658	/* under certain very rare conditions, this can take awhile.
3659	 * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
3660	 * as we enter this code.) */
3661	for (i = 0; i < MAX_CONFIG_WAIT; i++) {
3662		if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
3663			break;
3664		/* delay and try again */
3665		set_current_state(TASK_INTERRUPTIBLE);
3666		schedule_timeout(10);
3667	}
3668
3669#ifdef CCISS_DEBUG
3670	printk(KERN_DEBUG "I counter got to %d %x\n", i,
3671	       readl(c->vaddr + SA5_DOORBELL));
3672#endif				/* CCISS_DEBUG */
3673#ifdef CCISS_DEBUG
3674	print_cfg_table(c->cfgtable);
3675#endif				/* CCISS_DEBUG */
3676
3677	if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
3678		printk(KERN_WARNING "cciss: unable to get board into"
3679		       " simple mode\n");
3680		err = -ENODEV;
3681		goto err_out_free_res;
3682	}
3683	return 0;
3684
3685err_out_free_res:
3686	/*
3687	 * Deliberately omit pci_disable_device(): it does something nasty to
3688	 * Smart Array controllers that pci_enable_device does not undo
3689	 */
3690	pci_release_regions(pdev);
3691	return err;
3692}
3693
3694/* Function to find the first free pointer into our hba[] array
3695 * Returns -1 if no free entries are left.
3696 */
3697static int alloc_cciss_hba(void)
3698{
3699	int i;
3700
3701	for (i = 0; i < MAX_CTLR; i++) {
3702		if (!hba[i]) {
3703			ctlr_info_t *p;
3704
3705			p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
3706			if (!p)
3707				goto Enomem;
3708			hba[i] = p;
3709			return i;
3710		}
3711	}
3712	printk(KERN_WARNING "cciss: This driver supports a maximum"
3713	       " of %d controllers.\n", MAX_CTLR);
3714	return -1;
3715Enomem:
3716	printk(KERN_ERR "cciss: out of memory.\n");
3717	return -1;
3718}
3719
3720static void free_hba(int i)
3721{
3722	ctlr_info_t *p = hba[i];
3723	int n;
3724
3725	hba[i] = NULL;
3726	for (n = 0; n < CISS_MAX_LUN; n++)
3727		put_disk(p->gendisk[n]);
3728	kfree(p);
3729}
3730
3731/* Send a message CDB to the firmware. */
3732static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
3733{
3734	typedef struct {
3735		CommandListHeader_struct CommandHeader;
3736		RequestBlock_struct Request;
3737		ErrDescriptor_struct ErrorDescriptor;
3738	} Command;
3739	static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
3740	Command *cmd;
3741	dma_addr_t paddr64;
3742	uint32_t paddr32, tag;
3743	void __iomem *vaddr;
3744	int i, err;
3745
3746	vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
3747	if (vaddr == NULL)
3748		return -ENOMEM;
3749
3750	/* The Inbound Post Queue only accepts 32-bit physical addresses for the
3751	   CCISS commands, so they must be allocated from the lower 4GiB of
3752	   memory. */
3753	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3754	if (err) {
3755		iounmap(vaddr);
3756		return -ENOMEM;
3757	}
3758
3759	cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
3760	if (cmd == NULL) {
3761		iounmap(vaddr);
3762		return -ENOMEM;
3763	}
3764
3765	/* This must fit, because of the 32-bit consistent DMA mask.  Also,
3766	   although there's no guarantee, we assume that the address is at
3767	   least 4-byte aligned (most likely, it's page-aligned). */
3768	paddr32 = paddr64;
3769
3770	cmd->CommandHeader.ReplyQueue = 0;
3771	cmd->CommandHeader.SGList = 0;
3772	cmd->CommandHeader.SGTotal = 0;
3773	cmd->CommandHeader.Tag.lower = paddr32;
3774	cmd->CommandHeader.Tag.upper = 0;
3775	memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
3776
3777	cmd->Request.CDBLen = 16;
3778	cmd->Request.Type.Type = TYPE_MSG;
3779	cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
3780	cmd->Request.Type.Direction = XFER_NONE;
3781	cmd->Request.Timeout = 0; /* Don't time out */
3782	cmd->Request.CDB[0] = opcode;
3783	cmd->Request.CDB[1] = type;
3784	memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
3785
3786	cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
3787	cmd->ErrorDescriptor.Addr.upper = 0;
3788	cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
3789
3790	writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
3791
3792	for (i = 0; i < 10; i++) {
3793		tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
3794		if ((tag & ~3) == paddr32)
3795			break;
3796		schedule_timeout_uninterruptible(HZ);
3797	}
3798
3799	iounmap(vaddr);
3800
3801	/* we leak the DMA buffer here ... no choice since the controller could
3802	   still complete the command. */
3803	if (i == 10) {
3804		printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
3805			opcode, type);
3806		return -ETIMEDOUT;
3807	}
3808
3809	pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
3810
3811	if (tag & 2) {
3812		printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
3813			opcode, type);
3814		return -EIO;
3815	}
3816
3817	printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
3818		opcode, type);
3819	return 0;
3820}
3821
3822#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
3823#define cciss_noop(p) cciss_message(p, 3, 0)
3824
3825static __devinit int cciss_reset_msi(struct pci_dev *pdev)
3826{
3827/* the #defines are stolen from drivers/pci/msi.h. */
3828#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
3829#define PCI_MSIX_FLAGS_ENABLE		(1 << 15)
3830
3831	int pos;
3832	u16 control = 0;
3833
3834	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
3835	if (pos) {
3836		pci_read_config_word(pdev, msi_control_reg(pos), &control);
3837		if (control & PCI_MSI_FLAGS_ENABLE) {
3838			printk(KERN_INFO "cciss: resetting MSI\n");
3839			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
3840		}
3841	}
3842
3843	pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
3844	if (pos) {
3845		pci_read_config_word(pdev, msi_control_reg(pos), &control);
3846		if (control & PCI_MSIX_FLAGS_ENABLE) {
3847			printk(KERN_INFO "cciss: resetting MSI-X\n");
3848			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
3849		}
3850	}
3851
3852	return 0;
3853}
3854
3855/* This does a hard reset of the controller using PCI power management
3856 * states. */
3857static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
3858{
3859	u16 pmcsr, saved_config_space[32];
3860	int i, pos;
3861
3862	printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
3863
3864	/* This is very nearly the same thing as
3865
3866	   pci_save_state(pci_dev);
3867	   pci_set_power_state(pci_dev, PCI_D3hot);
3868	   pci_set_power_state(pci_dev, PCI_D0);
3869	   pci_restore_state(pci_dev);
3870
3871	   but we can't use these nice canned kernel routines on
3872	   kexec, because they also check the MSI/MSI-X state in PCI
3873	   configuration space and do the wrong thing when it is
3874	   set/cleared.  Also, the pci_save/restore_state functions
3875	   violate the ordering requirements for restoring the
3876	   configuration space from the CCISS document (see the
3877	   comment below).  So we roll our own .... */
3878
3879	for (i = 0; i < 32; i++)
3880		pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
3881
3882	pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
3883	if (pos == 0) {
3884		printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
3885		return -ENODEV;
3886	}
3887
3888	/* Quoting from the Open CISS Specification: "The Power
3889	 * Management Control/Status Register (CSR) controls the power
3890	 * state of the device.  The normal operating state is D0,
3891	 * CSR=00h.  The software off state is D3, CSR=03h.  To reset
3892	 * the controller, place the interface device in D3 then to
3893	 * D0, this causes a secondary PCI reset which will reset the
3894	 * controller." */
3895
3896	/* enter the D3hot power management state */
3897	pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
3898	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3899	pmcsr |= PCI_D3hot;
3900	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3901
3902	schedule_timeout_uninterruptible(HZ >> 1);
3903
3904	/* enter the D0 power management state */
3905	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3906	pmcsr |= PCI_D0;
3907	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3908
3909	schedule_timeout_uninterruptible(HZ >> 1);
3910
3911	/* Restore the PCI configuration space.  The Open CISS
3912	 * Specification says, "Restore the PCI Configuration
3913	 * Registers, offsets 00h through 60h. It is important to
3914	 * restore the command register, 16-bits at offset 04h,
3915	 * last. Do not restore the configuration status register,
3916	 * 16-bits at offset 06h."  Note that the offset is 2*i. */
3917	for (i = 0; i < 32; i++) {
3918		if (i == 2 || i == 3)
3919			continue;
3920		pci_write_config_word(pdev, 2*i, saved_config_space[i]);
3921	}
3922	wmb();
3923	pci_write_config_word(pdev, 4, saved_config_space[2]);
3924
3925	return 0;
3926}
3927
3928/*
3929 *  This is it.  Find all the controllers and register them.  I really hate
3930 *  stealing all these major device numbers.
3931 *  returns the number of block devices registered.
3932 */
3933static int __devinit cciss_init_one(struct pci_dev *pdev,
3934				    const struct pci_device_id *ent)
3935{
3936	int i;
3937	int j = 0;
3938	int rc;
3939	int dac, return_code;
3940	InquiryData_struct *inq_buff = NULL;
3941
3942	if (reset_devices) {
3943		/* Reset the controller with a PCI power-cycle */
3944		if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
3945			return -ENODEV;
3946
3947		/* Now try to get the controller to respond to a no-op. Some
3948		   devices (notably the HP Smart Array 5i Controller) need
3949		   up to 30 seconds to respond. */
3950		for (i=0; i<30; i++) {
3951			if (cciss_noop(pdev) == 0)
3952				break;
3953
3954			schedule_timeout_uninterruptible(HZ);
3955		}
3956		if (i == 30) {
3957			printk(KERN_ERR "cciss: controller seems dead\n");
3958			return -EBUSY;
3959		}
3960	}
3961
3962	i = alloc_cciss_hba();
3963	if (i < 0)
3964		return -1;
3965
3966	hba[i]->busy_initializing = 1;
3967	INIT_HLIST_HEAD(&hba[i]->cmpQ);
3968	INIT_HLIST_HEAD(&hba[i]->reqQ);
3969
3970	if (cciss_pci_init(hba[i], pdev) != 0)
3971		goto clean0;
3972
3973	sprintf(hba[i]->devname, "cciss%d", i);
3974	hba[i]->ctlr = i;
3975	hba[i]->pdev = pdev;
3976
3977	if (cciss_create_hba_sysfs_entry(hba[i]))
3978		goto clean0;
3979
3980	/* configure PCI DMA stuff */
3981	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
3982		dac = 1;
3983	else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
3984		dac = 0;
3985	else {
3986		printk(KERN_ERR "cciss: no suitable DMA available\n");
3987		goto clean1;
3988	}
3989
3990	/*
3991	 * register with the major number, or get a dynamic major number
3992	 * by passing 0 as argument.  This is done for greater than
3993	 * 8 controller support.
3994	 */
3995	if (i < MAX_CTLR_ORIG)
3996		hba[i]->major = COMPAQ_CISS_MAJOR + i;
3997	rc = register_blkdev(hba[i]->major, hba[i]->devname);
3998	if (rc == -EBUSY || rc == -EINVAL) {
3999		printk(KERN_ERR
4000		       "cciss:  Unable to get major number %d for %s "
4001		       "on hba %d\n", hba[i]->major, hba[i]->devname, i);
4002		goto clean1;
4003	} else {
4004		if (i >= MAX_CTLR_ORIG)
4005			hba[i]->major = rc;
4006	}
4007
4008	/* make sure the board interrupts are off */
4009	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
4010	if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
4011			IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
4012		printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
4013		       hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
4014		goto clean2;
4015	}
4016
4017	printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
4018	       hba[i]->devname, pdev->device, pci_name(pdev),
4019	       hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
4020
4021	hba[i]->cmd_pool_bits =
4022	    kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
4023			* sizeof(unsigned long), GFP_KERNEL);
4024	hba[i]->cmd_pool = (CommandList_struct *)
4025	    pci_alloc_consistent(hba[i]->pdev,
4026		    hba[i]->nr_cmds * sizeof(CommandList_struct),
4027		    &(hba[i]->cmd_pool_dhandle));
4028	hba[i]->errinfo_pool = (ErrorInfo_struct *)
4029	    pci_alloc_consistent(hba[i]->pdev,
4030		    hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
4031		    &(hba[i]->errinfo_pool_dhandle));
4032	if ((hba[i]->cmd_pool_bits == NULL)
4033	    || (hba[i]->cmd_pool == NULL)
4034	    || (hba[i]->errinfo_pool == NULL)) {
4035		printk(KERN_ERR "cciss: out of memory");
4036		goto clean4;
4037	}
4038#ifdef CONFIG_CISS_SCSI_TAPE
4039	hba[i]->scsi_rejects.complete =
4040	    kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) *
4041		    (hba[i]->nr_cmds + 5), GFP_KERNEL);
4042	if (hba[i]->scsi_rejects.complete == NULL) {
4043		printk(KERN_ERR "cciss: out of memory");
4044		goto clean4;
4045	}
4046#endif
4047	spin_lock_init(&hba[i]->lock);
4048
4049	/* Initialize the pdev driver private data.
4050	   have it point to hba[i].  */
4051	pci_set_drvdata(pdev, hba[i]);
4052	/* command and error info recs zeroed out before
4053	   they are used */
4054	memset(hba[i]->cmd_pool_bits, 0,
4055	       DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
4056			* sizeof(unsigned long));
4057
4058	hba[i]->num_luns = 0;
4059	hba[i]->highest_lun = -1;
4060	for (j = 0; j < CISS_MAX_LUN; j++) {
4061		hba[i]->drv[j].raid_level = -1;
4062		hba[i]->drv[j].queue = NULL;
4063		hba[i]->gendisk[j] = NULL;
4064	}
4065
4066	cciss_scsi_setup(i);
4067
4068	/* Turn the interrupts on so we can service requests */
4069	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
4070
4071	/* Get the firmware version */
4072	inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
4073	if (inq_buff == NULL) {
4074		printk(KERN_ERR "cciss: out of memory\n");
4075		goto clean4;
4076	}
4077
4078	return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
4079		sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD);
4080	if (return_code == IO_OK) {
4081		hba[i]->firm_ver[0] = inq_buff->data_byte[32];
4082		hba[i]->firm_ver[1] = inq_buff->data_byte[33];
4083		hba[i]->firm_ver[2] = inq_buff->data_byte[34];
4084		hba[i]->firm_ver[3] = inq_buff->data_byte[35];
4085	} else {	 /* send command failed */
4086		printk(KERN_WARNING "cciss: unable to determine firmware"
4087			" version of controller\n");
4088	}
4089
4090	cciss_procinit(i);
4091
4092	hba[i]->cciss_max_sectors = 2048;
4093
4094	hba[i]->busy_initializing = 0;
4095
4096	rebuild_lun_table(hba[i], 1);
4097	hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
4098				"cciss_scan%02d", i);
4099	if (IS_ERR(hba[i]->cciss_scan_thread))
4100		return PTR_ERR(hba[i]->cciss_scan_thread);
4101
4102	return 1;
4103
4104clean4:
4105	kfree(inq_buff);
4106#ifdef CONFIG_CISS_SCSI_TAPE
4107	kfree(hba[i]->scsi_rejects.complete);
4108#endif
4109	kfree(hba[i]->cmd_pool_bits);
4110	if (hba[i]->cmd_pool)
4111		pci_free_consistent(hba[i]->pdev,
4112				    hba[i]->nr_cmds * sizeof(CommandList_struct),
4113				    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
4114	if (hba[i]->errinfo_pool)
4115		pci_free_consistent(hba[i]->pdev,
4116				    hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
4117				    hba[i]->errinfo_pool,
4118				    hba[i]->errinfo_pool_dhandle);
4119	free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
4120clean2:
4121	unregister_blkdev(hba[i]->major, hba[i]->devname);
4122clean1:
4123	cciss_destroy_hba_sysfs_entry(hba[i]);
4124clean0:
4125	hba[i]->busy_initializing = 0;
4126	/* cleanup any queues that may have been initialized */
4127	for (j=0; j <= hba[i]->highest_lun; j++){
4128		drive_info_struct *drv = &(hba[i]->drv[j]);
4129		if (drv->queue)
4130			blk_cleanup_queue(drv->queue);
4131	}
4132	/*
4133	 * Deliberately omit pci_disable_device(): it does something nasty to
4134	 * Smart Array controllers that pci_enable_device does not undo
4135	 */
4136	pci_release_regions(pdev);
4137	pci_set_drvdata(pdev, NULL);
4138	free_hba(i);
4139	return -1;
4140}
4141
4142static void cciss_shutdown(struct pci_dev *pdev)
4143{
4144	ctlr_info_t *tmp_ptr;
4145	int i;
4146	char flush_buf[4];
4147	int return_code;
4148
4149	tmp_ptr = pci_get_drvdata(pdev);
4150	if (tmp_ptr == NULL)
4151		return;
4152	i = tmp_ptr->ctlr;
4153	if (hba[i] == NULL)
4154		return;
4155
4156	/* Turn board interrupts off  and send the flush cache command */
4157	/* sendcmd will turn off interrupt, and send the flush...
4158	 * To write all data in the battery backed cache to disks */
4159	memset(flush_buf, 0, 4);
4160	return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
4161			      TYPE_CMD);
4162	if (return_code == IO_OK) {
4163		printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
4164	} else {
4165		printk(KERN_WARNING "Error flushing cache on controller %d\n", i);
4166	}
4167	free_irq(hba[i]->intr[2], hba[i]);
4168}
4169
4170static void __devexit cciss_remove_one(struct pci_dev *pdev)
4171{
4172	ctlr_info_t *tmp_ptr;
4173	int i, j;
4174
4175	if (pci_get_drvdata(pdev) == NULL) {
4176		printk(KERN_ERR "cciss: Unable to remove device \n");
4177		return;
4178	}
4179
4180	tmp_ptr = pci_get_drvdata(pdev);
4181	i = tmp_ptr->ctlr;
4182	if (hba[i] == NULL) {
4183		printk(KERN_ERR "cciss: device appears to "
4184		       "already be removed \n");
4185		return;
4186	}
4187
4188	kthread_stop(hba[i]->cciss_scan_thread);
4189
4190	remove_proc_entry(hba[i]->devname, proc_cciss);
4191	unregister_blkdev(hba[i]->major, hba[i]->devname);
4192
4193	/* remove it from the disk list */
4194	for (j = 0; j < CISS_MAX_LUN; j++) {
4195		struct gendisk *disk = hba[i]->gendisk[j];
4196		if (disk) {
4197			struct request_queue *q = disk->queue;
4198
4199			if (disk->flags & GENHD_FL_UP)
4200				del_gendisk(disk);
4201			if (q)
4202				blk_cleanup_queue(q);
4203		}
4204	}
4205
4206#ifdef CONFIG_CISS_SCSI_TAPE
4207	cciss_unregister_scsi(i);	/* unhook from SCSI subsystem */
4208#endif
4209
4210	cciss_shutdown(pdev);
4211
4212#ifdef CONFIG_PCI_MSI
4213	if (hba[i]->msix_vector)
4214		pci_disable_msix(hba[i]->pdev);
4215	else if (hba[i]->msi_vector)
4216		pci_disable_msi(hba[i]->pdev);
4217#endif				/* CONFIG_PCI_MSI */
4218
4219	iounmap(hba[i]->vaddr);
4220
4221	pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct),
4222			    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
4223	pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
4224			    hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
4225	kfree(hba[i]->cmd_pool_bits);
4226#ifdef CONFIG_CISS_SCSI_TAPE
4227	kfree(hba[i]->scsi_rejects.complete);
4228#endif
4229	/*
4230	 * Deliberately omit pci_disable_device(): it does something nasty to
4231	 * Smart Array controllers that pci_enable_device does not undo
4232	 */
4233	pci_release_regions(pdev);
4234	pci_set_drvdata(pdev, NULL);
4235	cciss_destroy_hba_sysfs_entry(hba[i]);
4236	free_hba(i);
4237}
4238
4239static struct pci_driver cciss_pci_driver = {
4240	.name = "cciss",
4241	.probe = cciss_init_one,
4242	.remove = __devexit_p(cciss_remove_one),
4243	.id_table = cciss_pci_device_id,	/* id_table */
4244	.shutdown = cciss_shutdown,
4245};
4246
4247/*
4248 *  This is it.  Register the PCI driver information for the cards we control
4249 *  the OS will call our registered routines when it finds one of our cards.
4250 */
4251static int __init cciss_init(void)
4252{
4253	int err;
4254
4255	/*
4256	 * The hardware requires that commands are aligned on a 64-bit
4257	 * boundary. Given that we use pci_alloc_consistent() to allocate an
4258	 * array of them, the size must be a multiple of 8 bytes.
4259	 */
4260	BUILD_BUG_ON(sizeof(CommandList_struct) % 8);
4261
4262	printk(KERN_INFO DRIVER_NAME "\n");
4263
4264	err = bus_register(&cciss_bus_type);
4265	if (err)
4266		return err;
4267
4268	/* Register for our PCI devices */
4269	err = pci_register_driver(&cciss_pci_driver);
4270	if (err)
4271		goto err_bus_register;
4272
4273	return 0;
4274
4275err_bus_register:
4276	bus_unregister(&cciss_bus_type);
4277	return err;
4278}
4279
4280static void __exit cciss_cleanup(void)
4281{
4282	int i;
4283
4284	pci_unregister_driver(&cciss_pci_driver);
4285	/* double check that all controller entrys have been removed */
4286	for (i = 0; i < MAX_CTLR; i++) {
4287		if (hba[i] != NULL) {
4288			printk(KERN_WARNING "cciss: had to remove"
4289			       " controller %d\n", i);
4290			cciss_remove_one(hba[i]->pdev);
4291		}
4292	}
4293	remove_proc_entry("driver/cciss", NULL);
4294	bus_unregister(&cciss_bus_type);
4295}
4296
4297static void fail_all_cmds(unsigned long ctlr)
4298{
4299	/* If we get here, the board is apparently dead. */
4300	ctlr_info_t *h = hba[ctlr];
4301	CommandList_struct *c;
4302	unsigned long flags;
4303
4304	printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
4305	h->alive = 0;		/* the controller apparently died... */
4306
4307	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
4308
4309	pci_disable_device(h->pdev);	/* Make sure it is really dead. */
4310
4311	/* move everything off the request queue onto the completed queue */
4312	while (!hlist_empty(&h->reqQ)) {
4313		c = hlist_entry(h->reqQ.first, CommandList_struct, list);
4314		removeQ(c);
4315		h->Qdepth--;
4316		addQ(&h->cmpQ, c);
4317	}
4318
4319	/* Now, fail everything on the completed queue with a HW error */
4320	while (!hlist_empty(&h->cmpQ)) {
4321		c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
4322		removeQ(c);
4323		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
4324		if (c->cmd_type == CMD_RWREQ) {
4325			complete_command(h, c, 0);
4326		} else if (c->cmd_type == CMD_IOCTL_PEND)
4327			complete(c->waiting);
4328#ifdef CONFIG_CISS_SCSI_TAPE
4329		else if (c->cmd_type == CMD_SCSI)
4330			complete_scsi_command(c, 0, 0);
4331#endif
4332	}
4333	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
4334	return;
4335}
4336
4337module_init(cciss_init);
4338module_exit(cciss_cleanup);
4339