1/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
2/*
3 * aoecmd.c
4 * Filesystem request handling methods
5 */
6
7#include <linux/ata.h>
8#include <linux/slab.h>
9#include <linux/hdreg.h>
10#include <linux/blkdev.h>
11#include <linux/skbuff.h>
12#include <linux/netdevice.h>
13#include <linux/genhd.h>
14#include <linux/moduleparam.h>
15#include <net/net_namespace.h>
16#include <asm/unaligned.h>
17#include "aoe.h"
18
19static int aoe_deadsecs = 60 * 3;
20module_param(aoe_deadsecs, int, 0644);
21MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
22
23static int aoe_maxout = 16;
24module_param(aoe_maxout, int, 0644);
25MODULE_PARM_DESC(aoe_maxout,
26	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
27
28static struct sk_buff *
29new_skb(ulong len)
30{
31	struct sk_buff *skb;
32
33	skb = alloc_skb(len, GFP_ATOMIC);
34	if (skb) {
35		skb_reset_mac_header(skb);
36		skb_reset_network_header(skb);
37		skb->protocol = __constant_htons(ETH_P_AOE);
38	}
39	return skb;
40}
41
42static struct frame *
43getframe(struct aoetgt *t, int tag)
44{
45	struct frame *f, *e;
46
47	f = t->frames;
48	e = f + t->nframes;
49	for (; f<e; f++)
50		if (f->tag == tag)
51			return f;
52	return NULL;
53}
54
55/*
56 * Leave the top bit clear so we have tagspace for userland.
57 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
58 * This driver reserves tag -1 to mean "unused frame."
59 */
60static int
61newtag(struct aoetgt *t)
62{
63	register ulong n;
64
65	n = jiffies & 0xffff;
66	return n |= (++t->lasttag & 0x7fff) << 16;
67}
68
69static int
70aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
71{
72	u32 host_tag = newtag(t);
73
74	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
75	memcpy(h->dst, t->addr, sizeof h->dst);
76	h->type = __constant_cpu_to_be16(ETH_P_AOE);
77	h->verfl = AOE_HVER;
78	h->major = cpu_to_be16(d->aoemajor);
79	h->minor = d->aoeminor;
80	h->cmd = AOECMD_ATA;
81	h->tag = cpu_to_be32(host_tag);
82
83	return host_tag;
84}
85
86static inline void
87put_lba(struct aoe_atahdr *ah, sector_t lba)
88{
89	ah->lba0 = lba;
90	ah->lba1 = lba >>= 8;
91	ah->lba2 = lba >>= 8;
92	ah->lba3 = lba >>= 8;
93	ah->lba4 = lba >>= 8;
94	ah->lba5 = lba >>= 8;
95}
96
97static void
98ifrotate(struct aoetgt *t)
99{
100	t->ifp++;
101	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
102		t->ifp = t->ifs;
103	if (t->ifp->nd == NULL) {
104		printk(KERN_INFO "aoe: no interface to rotate to\n");
105		BUG();
106	}
107}
108
109static void
110skb_pool_put(struct aoedev *d, struct sk_buff *skb)
111{
112	__skb_queue_tail(&d->skbpool, skb);
113}
114
115static struct sk_buff *
116skb_pool_get(struct aoedev *d)
117{
118	struct sk_buff *skb = skb_peek(&d->skbpool);
119
120	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
121		__skb_unlink(skb, &d->skbpool);
122		return skb;
123	}
124	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
125	    (skb = new_skb(ETH_ZLEN)))
126		return skb;
127
128	return NULL;
129}
130
131/* freeframe is where we do our load balancing so it's a little hairy. */
132static struct frame *
133freeframe(struct aoedev *d)
134{
135	struct frame *f, *e, *rf;
136	struct aoetgt **t;
137	struct sk_buff *skb;
138
139	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
140		printk(KERN_ERR "aoe: NULL TARGETS!\n");
141		return NULL;
142	}
143	t = d->tgt;
144	t++;
145	if (t >= &d->targets[NTARGETS] || !*t)
146		t = d->targets;
147	for (;;) {
148		if ((*t)->nout < (*t)->maxout
149		&& t != d->htgt
150		&& (*t)->ifp->nd) {
151			rf = NULL;
152			f = (*t)->frames;
153			e = f + (*t)->nframes;
154			for (; f < e; f++) {
155				if (f->tag != FREETAG)
156					continue;
157				skb = f->skb;
158				if (!skb
159				&& !(f->skb = skb = new_skb(ETH_ZLEN)))
160					continue;
161				if (atomic_read(&skb_shinfo(skb)->dataref)
162					!= 1) {
163					if (!rf)
164						rf = f;
165					continue;
166				}
167gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
168				skb_trim(skb, 0);
169				d->tgt = t;
170				ifrotate(*t);
171				return f;
172			}
173			/* Work can be done, but the network layer is
174			   holding our precious packets.  Try to grab
175			   one from the pool. */
176			f = rf;
177			if (f == NULL) {	/* more paranoia */
178				printk(KERN_ERR
179					"aoe: freeframe: %s.\n",
180					"unexpected null rf");
181				d->flags |= DEVFL_KICKME;
182				return NULL;
183			}
184			skb = skb_pool_get(d);
185			if (skb) {
186				skb_pool_put(d, f->skb);
187				f->skb = skb;
188				goto gotone;
189			}
190			(*t)->dataref++;
191			if ((*t)->nout == 0)
192				d->flags |= DEVFL_KICKME;
193		}
194		if (t == d->tgt)	/* we've looped and found nada */
195			break;
196		t++;
197		if (t >= &d->targets[NTARGETS] || !*t)
198			t = d->targets;
199	}
200	return NULL;
201}
202
203static int
204aoecmd_ata_rw(struct aoedev *d)
205{
206	struct frame *f;
207	struct aoe_hdr *h;
208	struct aoe_atahdr *ah;
209	struct buf *buf;
210	struct bio_vec *bv;
211	struct aoetgt *t;
212	struct sk_buff *skb;
213	ulong bcnt;
214	char writebit, extbit;
215
216	writebit = 0x10;
217	extbit = 0x4;
218
219	f = freeframe(d);
220	if (f == NULL)
221		return 0;
222	t = *d->tgt;
223	buf = d->inprocess;
224	bv = buf->bv;
225	bcnt = t->ifp->maxbcnt;
226	if (bcnt == 0)
227		bcnt = DEFAULTBCNT;
228	if (bcnt > buf->bv_resid)
229		bcnt = buf->bv_resid;
230	/* initialize the headers & frame */
231	skb = f->skb;
232	h = (struct aoe_hdr *) skb_mac_header(skb);
233	ah = (struct aoe_atahdr *) (h+1);
234	skb_put(skb, sizeof *h + sizeof *ah);
235	memset(h, 0, skb->len);
236	f->tag = aoehdr_atainit(d, t, h);
237	t->nout++;
238	f->waited = 0;
239	f->buf = buf;
240	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
241	f->bcnt = bcnt;
242	f->lba = buf->sector;
243
244	/* set up ata header */
245	ah->scnt = bcnt >> 9;
246	put_lba(ah, buf->sector);
247	if (d->flags & DEVFL_EXT) {
248		ah->aflags |= AOEAFL_EXT;
249	} else {
250		extbit = 0;
251		ah->lba3 &= 0x0f;
252		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
253	}
254	if (bio_data_dir(buf->bio) == WRITE) {
255		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
256		ah->aflags |= AOEAFL_WRITE;
257		skb->len += bcnt;
258		skb->data_len = bcnt;
259		t->wpkts++;
260	} else {
261		t->rpkts++;
262		writebit = 0;
263	}
264
265	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
266
267	/* mark all tracking fields and load out */
268	buf->nframesout += 1;
269	buf->bv_off += bcnt;
270	buf->bv_resid -= bcnt;
271	buf->resid -= bcnt;
272	buf->sector += bcnt >> 9;
273	if (buf->resid == 0) {
274		d->inprocess = NULL;
275	} else if (buf->bv_resid == 0) {
276		buf->bv = ++bv;
277		buf->bv_resid = bv->bv_len;
278		WARN_ON(buf->bv_resid == 0);
279		buf->bv_off = bv->bv_offset;
280	}
281
282	skb->dev = t->ifp->nd;
283	skb = skb_clone(skb, GFP_ATOMIC);
284	if (skb)
285		__skb_queue_tail(&d->sendq, skb);
286	return 1;
287}
288
289/* some callers cannot sleep, and they can call this function,
290 * transmitting the packets later, when interrupts are on
291 */
292static void
293aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
294{
295	struct aoe_hdr *h;
296	struct aoe_cfghdr *ch;
297	struct sk_buff *skb;
298	struct net_device *ifp;
299
300	rcu_read_lock();
301	for_each_netdev_rcu(&init_net, ifp) {
302		dev_hold(ifp);
303		if (!is_aoe_netif(ifp))
304			goto cont;
305
306		skb = new_skb(sizeof *h + sizeof *ch);
307		if (skb == NULL) {
308			printk(KERN_INFO "aoe: skb alloc failure\n");
309			goto cont;
310		}
311		skb_put(skb, sizeof *h + sizeof *ch);
312		skb->dev = ifp;
313		__skb_queue_tail(queue, skb);
314		h = (struct aoe_hdr *) skb_mac_header(skb);
315		memset(h, 0, sizeof *h + sizeof *ch);
316
317		memset(h->dst, 0xff, sizeof h->dst);
318		memcpy(h->src, ifp->dev_addr, sizeof h->src);
319		h->type = __constant_cpu_to_be16(ETH_P_AOE);
320		h->verfl = AOE_HVER;
321		h->major = cpu_to_be16(aoemajor);
322		h->minor = aoeminor;
323		h->cmd = AOECMD_CFG;
324
325cont:
326		dev_put(ifp);
327	}
328	rcu_read_unlock();
329}
330
331static void
332resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
333{
334	struct sk_buff *skb;
335	struct aoe_hdr *h;
336	struct aoe_atahdr *ah;
337	char buf[128];
338	u32 n;
339
340	ifrotate(t);
341	n = newtag(t);
342	skb = f->skb;
343	h = (struct aoe_hdr *) skb_mac_header(skb);
344	ah = (struct aoe_atahdr *) (h+1);
345
346	snprintf(buf, sizeof buf,
347		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
348		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
349		h->src, h->dst, t->nout);
350	aoechr_error(buf);
351
352	f->tag = n;
353	h->tag = cpu_to_be32(n);
354	memcpy(h->dst, t->addr, sizeof h->dst);
355	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
356
357	switch (ah->cmdstat) {
358	default:
359		break;
360	case ATA_CMD_PIO_READ:
361	case ATA_CMD_PIO_READ_EXT:
362	case ATA_CMD_PIO_WRITE:
363	case ATA_CMD_PIO_WRITE_EXT:
364		put_lba(ah, f->lba);
365
366		n = f->bcnt;
367		if (n > DEFAULTBCNT)
368			n = DEFAULTBCNT;
369		ah->scnt = n >> 9;
370		if (ah->aflags & AOEAFL_WRITE) {
371			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
372				offset_in_page(f->bufaddr), n);
373			skb->len = sizeof *h + sizeof *ah + n;
374			skb->data_len = n;
375		}
376	}
377	skb->dev = t->ifp->nd;
378	skb = skb_clone(skb, GFP_ATOMIC);
379	if (skb == NULL)
380		return;
381	__skb_queue_tail(&d->sendq, skb);
382}
383
384static int
385tsince(int tag)
386{
387	int n;
388
389	n = jiffies & 0xffff;
390	n -= tag & 0xffff;
391	if (n < 0)
392		n += 1<<16;
393	return n;
394}
395
396static struct aoeif *
397getif(struct aoetgt *t, struct net_device *nd)
398{
399	struct aoeif *p, *e;
400
401	p = t->ifs;
402	e = p + NAOEIFS;
403	for (; p < e; p++)
404		if (p->nd == nd)
405			return p;
406	return NULL;
407}
408
409static struct aoeif *
410addif(struct aoetgt *t, struct net_device *nd)
411{
412	struct aoeif *p;
413
414	p = getif(t, NULL);
415	if (!p)
416		return NULL;
417	p->nd = nd;
418	p->maxbcnt = DEFAULTBCNT;
419	p->lost = 0;
420	p->lostjumbo = 0;
421	return p;
422}
423
424static void
425ejectif(struct aoetgt *t, struct aoeif *ifp)
426{
427	struct aoeif *e;
428	ulong n;
429
430	e = t->ifs + NAOEIFS - 1;
431	n = (e - ifp) * sizeof *ifp;
432	memmove(ifp, ifp+1, n);
433	e->nd = NULL;
434}
435
436static int
437sthtith(struct aoedev *d)
438{
439	struct frame *f, *e, *nf;
440	struct sk_buff *skb;
441	struct aoetgt *ht = *d->htgt;
442
443	f = ht->frames;
444	e = f + ht->nframes;
445	for (; f < e; f++) {
446		if (f->tag == FREETAG)
447			continue;
448		nf = freeframe(d);
449		if (!nf)
450			return 0;
451		skb = nf->skb;
452		*nf = *f;
453		f->skb = skb;
454		f->tag = FREETAG;
455		nf->waited = 0;
456		ht->nout--;
457		(*d->tgt)->nout++;
458		resend(d, *d->tgt, nf);
459	}
460	/* he's clean, he's useless.  take away his interfaces */
461	memset(ht->ifs, 0, sizeof ht->ifs);
462	d->htgt = NULL;
463	return 1;
464}
465
466static inline unsigned char
467ata_scnt(unsigned char *packet) {
468	struct aoe_hdr *h;
469	struct aoe_atahdr *ah;
470
471	h = (struct aoe_hdr *) packet;
472	ah = (struct aoe_atahdr *) (h+1);
473	return ah->scnt;
474}
475
476static void
477rexmit_timer(ulong vp)
478{
479	struct sk_buff_head queue;
480	struct aoedev *d;
481	struct aoetgt *t, **tt, **te;
482	struct aoeif *ifp;
483	struct frame *f, *e;
484	register long timeout;
485	ulong flags, n;
486
487	d = (struct aoedev *) vp;
488
489	/* timeout is always ~150% of the moving average */
490	timeout = d->rttavg;
491	timeout += timeout >> 1;
492
493	spin_lock_irqsave(&d->lock, flags);
494
495	if (d->flags & DEVFL_TKILL) {
496		spin_unlock_irqrestore(&d->lock, flags);
497		return;
498	}
499	tt = d->targets;
500	te = tt + NTARGETS;
501	for (; tt < te && *tt; tt++) {
502		t = *tt;
503		f = t->frames;
504		e = f + t->nframes;
505		for (; f < e; f++) {
506			if (f->tag == FREETAG
507			|| tsince(f->tag) < timeout)
508				continue;
509			n = f->waited += timeout;
510			n /= HZ;
511			if (n > aoe_deadsecs) {
512				/* waited too long.  device failure. */
513				aoedev_downdev(d);
514				break;
515			}
516
517			if (n > HELPWAIT /* see if another target can help */
518			&& (tt != d->targets || d->targets[1]))
519				d->htgt = tt;
520
521			if (t->nout == t->maxout) {
522				if (t->maxout > 1)
523					t->maxout--;
524				t->lastwadj = jiffies;
525			}
526
527			ifp = getif(t, f->skb->dev);
528			if (ifp && ++ifp->lost > (t->nframes << 1)
529			&& (ifp != t->ifs || t->ifs[1].nd)) {
530				ejectif(t, ifp);
531				ifp = NULL;
532			}
533
534			if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
535			&& ifp && ++ifp->lostjumbo > (t->nframes << 1)
536			&& ifp->maxbcnt != DEFAULTBCNT) {
537				printk(KERN_INFO
538					"aoe: e%ld.%d: "
539					"too many lost jumbo on "
540					"%s:%pm - "
541					"falling back to %d frames.\n",
542					d->aoemajor, d->aoeminor,
543					ifp->nd->name, t->addr,
544					DEFAULTBCNT);
545				ifp->maxbcnt = 0;
546			}
547			resend(d, t, f);
548		}
549
550		/* window check */
551		if (t->nout == t->maxout
552		&& t->maxout < t->nframes
553		&& (jiffies - t->lastwadj)/HZ > 10) {
554			t->maxout++;
555			t->lastwadj = jiffies;
556		}
557	}
558
559	if (!skb_queue_empty(&d->sendq)) {
560		n = d->rttavg <<= 1;
561		if (n > MAXTIMER)
562			d->rttavg = MAXTIMER;
563	}
564
565	if (d->flags & DEVFL_KICKME || d->htgt) {
566		d->flags &= ~DEVFL_KICKME;
567		aoecmd_work(d);
568	}
569
570	__skb_queue_head_init(&queue);
571	skb_queue_splice_init(&d->sendq, &queue);
572
573	d->timer.expires = jiffies + TIMERTICK;
574	add_timer(&d->timer);
575
576	spin_unlock_irqrestore(&d->lock, flags);
577
578	aoenet_xmit(&queue);
579}
580
581/* enters with d->lock held */
582void
583aoecmd_work(struct aoedev *d)
584{
585	struct buf *buf;
586loop:
587	if (d->htgt && !sthtith(d))
588		return;
589	if (d->inprocess == NULL) {
590		if (list_empty(&d->bufq))
591			return;
592		buf = container_of(d->bufq.next, struct buf, bufs);
593		list_del(d->bufq.next);
594		d->inprocess = buf;
595	}
596	if (aoecmd_ata_rw(d))
597		goto loop;
598}
599
600/* this function performs work that has been deferred until sleeping is OK
601 */
602void
603aoecmd_sleepwork(struct work_struct *work)
604{
605	struct aoedev *d = container_of(work, struct aoedev, work);
606
607	if (d->flags & DEVFL_GDALLOC)
608		aoeblk_gdalloc(d);
609
610	if (d->flags & DEVFL_NEWSIZE) {
611		struct block_device *bd;
612		unsigned long flags;
613		u64 ssize;
614
615		ssize = get_capacity(d->gd);
616		bd = bdget_disk(d->gd, 0);
617
618		if (bd) {
619			mutex_lock(&bd->bd_inode->i_mutex);
620			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
621			mutex_unlock(&bd->bd_inode->i_mutex);
622			bdput(bd);
623		}
624		spin_lock_irqsave(&d->lock, flags);
625		d->flags |= DEVFL_UP;
626		d->flags &= ~DEVFL_NEWSIZE;
627		spin_unlock_irqrestore(&d->lock, flags);
628	}
629}
630
631static void
632ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
633{
634	u64 ssize;
635	u16 n;
636
637	/* word 83: command set supported */
638	n = get_unaligned_le16(&id[83 << 1]);
639
640	/* word 86: command set/feature enabled */
641	n |= get_unaligned_le16(&id[86 << 1]);
642
643	if (n & (1<<10)) {	/* bit 10: LBA 48 */
644		d->flags |= DEVFL_EXT;
645
646		/* word 100: number lba48 sectors */
647		ssize = get_unaligned_le64(&id[100 << 1]);
648
649		/* set as in ide-disk.c:init_idedisk_capacity */
650		d->geo.cylinders = ssize;
651		d->geo.cylinders /= (255 * 63);
652		d->geo.heads = 255;
653		d->geo.sectors = 63;
654	} else {
655		d->flags &= ~DEVFL_EXT;
656
657		/* number lba28 sectors */
658		ssize = get_unaligned_le32(&id[60 << 1]);
659
660		/* NOTE: obsolete in ATA 6 */
661		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
662		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
663		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
664	}
665
666	if (d->ssize != ssize)
667		printk(KERN_INFO
668			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
669			t->addr,
670			d->aoemajor, d->aoeminor,
671			d->fw_ver, (long long)ssize);
672	d->ssize = ssize;
673	d->geo.start = 0;
674	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
675		return;
676	if (d->gd != NULL) {
677		set_capacity(d->gd, ssize);
678		d->flags |= DEVFL_NEWSIZE;
679	} else
680		d->flags |= DEVFL_GDALLOC;
681	schedule_work(&d->work);
682}
683
684static void
685calc_rttavg(struct aoedev *d, int rtt)
686{
687	register long n;
688
689	n = rtt;
690	if (n < 0) {
691		n = -rtt;
692		if (n < MINTIMER)
693			n = MINTIMER;
694		else if (n > MAXTIMER)
695			n = MAXTIMER;
696		d->mintimer += (n - d->mintimer) >> 1;
697	} else if (n < d->mintimer)
698		n = d->mintimer;
699	else if (n > MAXTIMER)
700		n = MAXTIMER;
701
702	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
703	n -= d->rttavg;
704	d->rttavg += n >> 2;
705}
706
707static struct aoetgt *
708gettgt(struct aoedev *d, char *addr)
709{
710	struct aoetgt **t, **e;
711
712	t = d->targets;
713	e = t + NTARGETS;
714	for (; t < e && *t; t++)
715		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
716			return *t;
717	return NULL;
718}
719
720static inline void
721diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
722{
723	unsigned long n_sect = bio->bi_size >> 9;
724	const int rw = bio_data_dir(bio);
725	struct hd_struct *part;
726	int cpu;
727
728	cpu = part_stat_lock();
729	part = disk_map_sector_rcu(disk, sector);
730
731	part_stat_inc(cpu, part, ios[rw]);
732	part_stat_add(cpu, part, ticks[rw], duration);
733	part_stat_add(cpu, part, sectors[rw], n_sect);
734	part_stat_add(cpu, part, io_ticks, duration);
735
736	part_stat_unlock();
737}
738
739void
740aoecmd_ata_rsp(struct sk_buff *skb)
741{
742	struct sk_buff_head queue;
743	struct aoedev *d;
744	struct aoe_hdr *hin, *hout;
745	struct aoe_atahdr *ahin, *ahout;
746	struct frame *f;
747	struct buf *buf;
748	struct aoetgt *t;
749	struct aoeif *ifp;
750	register long n;
751	ulong flags;
752	char ebuf[128];
753	u16 aoemajor;
754
755	hin = (struct aoe_hdr *) skb_mac_header(skb);
756	aoemajor = get_unaligned_be16(&hin->major);
757	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
758	if (d == NULL) {
759		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
760			"for unknown device %d.%d\n",
761			 aoemajor, hin->minor);
762		aoechr_error(ebuf);
763		return;
764	}
765
766	spin_lock_irqsave(&d->lock, flags);
767
768	n = get_unaligned_be32(&hin->tag);
769	t = gettgt(d, hin->src);
770	if (t == NULL) {
771		printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
772			d->aoemajor, d->aoeminor, hin->src);
773		spin_unlock_irqrestore(&d->lock, flags);
774		return;
775	}
776	f = getframe(t, n);
777	if (f == NULL) {
778		calc_rttavg(d, -tsince(n));
779		spin_unlock_irqrestore(&d->lock, flags);
780		snprintf(ebuf, sizeof ebuf,
781			"%15s e%d.%d    tag=%08x@%08lx\n",
782			"unexpected rsp",
783			get_unaligned_be16(&hin->major),
784			hin->minor,
785			get_unaligned_be32(&hin->tag),
786			jiffies);
787		aoechr_error(ebuf);
788		return;
789	}
790
791	calc_rttavg(d, tsince(f->tag));
792
793	ahin = (struct aoe_atahdr *) (hin+1);
794	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
795	ahout = (struct aoe_atahdr *) (hout+1);
796	buf = f->buf;
797
798	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
799		printk(KERN_ERR
800			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
801			ahout->cmdstat, ahin->cmdstat,
802			d->aoemajor, d->aoeminor);
803		if (buf)
804			buf->flags |= BUFFL_FAIL;
805	} else {
806		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
807			d->htgt = NULL;
808		n = ahout->scnt << 9;
809		switch (ahout->cmdstat) {
810		case ATA_CMD_PIO_READ:
811		case ATA_CMD_PIO_READ_EXT:
812			if (skb->len - sizeof *hin - sizeof *ahin < n) {
813				printk(KERN_ERR
814					"aoe: %s.  skb->len=%d need=%ld\n",
815					"runt data size in read", skb->len, n);
816				/* fail frame f?  just returning will rexmit. */
817				spin_unlock_irqrestore(&d->lock, flags);
818				return;
819			}
820			memcpy(f->bufaddr, ahin+1, n);
821		case ATA_CMD_PIO_WRITE:
822		case ATA_CMD_PIO_WRITE_EXT:
823			ifp = getif(t, skb->dev);
824			if (ifp) {
825				ifp->lost = 0;
826				if (n > DEFAULTBCNT)
827					ifp->lostjumbo = 0;
828			}
829			if (f->bcnt -= n) {
830				f->lba += n >> 9;
831				f->bufaddr += n;
832				resend(d, t, f);
833				goto xmit;
834			}
835			break;
836		case ATA_CMD_ID_ATA:
837			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
838				printk(KERN_INFO
839					"aoe: runt data size in ataid.  skb->len=%d\n",
840					skb->len);
841				spin_unlock_irqrestore(&d->lock, flags);
842				return;
843			}
844			ataid_complete(d, t, (char *) (ahin+1));
845			break;
846		default:
847			printk(KERN_INFO
848				"aoe: unrecognized ata command %2.2Xh for %d.%d\n",
849				ahout->cmdstat,
850				get_unaligned_be16(&hin->major),
851				hin->minor);
852		}
853	}
854
855	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
856		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
857		if (buf->flags & BUFFL_FAIL)
858			bio_endio(buf->bio, -EIO);
859		else {
860			bio_flush_dcache_pages(buf->bio);
861			bio_endio(buf->bio, 0);
862		}
863		mempool_free(buf, d->bufpool);
864	}
865
866	f->buf = NULL;
867	f->tag = FREETAG;
868	t->nout--;
869
870	aoecmd_work(d);
871xmit:
872	__skb_queue_head_init(&queue);
873	skb_queue_splice_init(&d->sendq, &queue);
874
875	spin_unlock_irqrestore(&d->lock, flags);
876	aoenet_xmit(&queue);
877}
878
879void
880aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
881{
882	struct sk_buff_head queue;
883
884	__skb_queue_head_init(&queue);
885	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
886	aoenet_xmit(&queue);
887}
888
889struct sk_buff *
890aoecmd_ata_id(struct aoedev *d)
891{
892	struct aoe_hdr *h;
893	struct aoe_atahdr *ah;
894	struct frame *f;
895	struct sk_buff *skb;
896	struct aoetgt *t;
897
898	f = freeframe(d);
899	if (f == NULL)
900		return NULL;
901
902	t = *d->tgt;
903
904	/* initialize the headers & frame */
905	skb = f->skb;
906	h = (struct aoe_hdr *) skb_mac_header(skb);
907	ah = (struct aoe_atahdr *) (h+1);
908	skb_put(skb, sizeof *h + sizeof *ah);
909	memset(h, 0, skb->len);
910	f->tag = aoehdr_atainit(d, t, h);
911	t->nout++;
912	f->waited = 0;
913
914	/* set up ata header */
915	ah->scnt = 1;
916	ah->cmdstat = ATA_CMD_ID_ATA;
917	ah->lba3 = 0xa0;
918
919	skb->dev = t->ifp->nd;
920
921	d->rttavg = MAXTIMER;
922	d->timer.function = rexmit_timer;
923
924	return skb_clone(skb, GFP_ATOMIC);
925}
926
927static struct aoetgt *
928addtgt(struct aoedev *d, char *addr, ulong nframes)
929{
930	struct aoetgt *t, **tt, **te;
931	struct frame *f, *e;
932
933	tt = d->targets;
934	te = tt + NTARGETS;
935	for (; tt < te && *tt; tt++)
936		;
937
938	if (tt == te) {
939		printk(KERN_INFO
940			"aoe: device addtgt failure; too many targets\n");
941		return NULL;
942	}
943	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
944	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
945	if (!t || !f) {
946		kfree(f);
947		kfree(t);
948		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
949		return NULL;
950	}
951
952	t->nframes = nframes;
953	t->frames = f;
954	e = f + nframes;
955	for (; f < e; f++)
956		f->tag = FREETAG;
957	memcpy(t->addr, addr, sizeof t->addr);
958	t->ifp = t->ifs;
959	t->maxout = t->nframes;
960	return *tt = t;
961}
962
963void
964aoecmd_cfg_rsp(struct sk_buff *skb)
965{
966	struct aoedev *d;
967	struct aoe_hdr *h;
968	struct aoe_cfghdr *ch;
969	struct aoetgt *t;
970	struct aoeif *ifp;
971	ulong flags, sysminor, aoemajor;
972	struct sk_buff *sl;
973	u16 n;
974
975	h = (struct aoe_hdr *) skb_mac_header(skb);
976	ch = (struct aoe_cfghdr *) (h+1);
977
978	/*
979	 * Enough people have their dip switches set backwards to
980	 * warrant a loud message for this special case.
981	 */
982	aoemajor = get_unaligned_be16(&h->major);
983	if (aoemajor == 0xfff) {
984		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
985			"Check shelf dip switches.\n");
986		return;
987	}
988
989	sysminor = SYSMINOR(aoemajor, h->minor);
990	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
991		printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
992			aoemajor, (int) h->minor);
993		return;
994	}
995
996	n = be16_to_cpu(ch->bufcnt);
997	if (n > aoe_maxout)	/* keep it reasonable */
998		n = aoe_maxout;
999
1000	d = aoedev_by_sysminor_m(sysminor);
1001	if (d == NULL) {
1002		printk(KERN_INFO "aoe: device sysminor_m failure\n");
1003		return;
1004	}
1005
1006	spin_lock_irqsave(&d->lock, flags);
1007
1008	t = gettgt(d, h->src);
1009	if (!t) {
1010		t = addtgt(d, h->src, n);
1011		if (!t) {
1012			spin_unlock_irqrestore(&d->lock, flags);
1013			return;
1014		}
1015	}
1016	ifp = getif(t, skb->dev);
1017	if (!ifp) {
1018		ifp = addif(t, skb->dev);
1019		if (!ifp) {
1020			printk(KERN_INFO
1021				"aoe: device addif failure; "
1022				"too many interfaces?\n");
1023			spin_unlock_irqrestore(&d->lock, flags);
1024			return;
1025		}
1026	}
1027	if (ifp->maxbcnt) {
1028		n = ifp->nd->mtu;
1029		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
1030		n /= 512;
1031		if (n > ch->scnt)
1032			n = ch->scnt;
1033		n = n ? n * 512 : DEFAULTBCNT;
1034		if (n != ifp->maxbcnt) {
1035			printk(KERN_INFO
1036				"aoe: e%ld.%d: setting %d%s%s:%pm\n",
1037				d->aoemajor, d->aoeminor, n,
1038				" byte data frames on ", ifp->nd->name,
1039				t->addr);
1040			ifp->maxbcnt = n;
1041		}
1042	}
1043
1044	/* don't change users' perspective */
1045	if (d->nopen) {
1046		spin_unlock_irqrestore(&d->lock, flags);
1047		return;
1048	}
1049	d->fw_ver = be16_to_cpu(ch->fwver);
1050
1051	sl = aoecmd_ata_id(d);
1052
1053	spin_unlock_irqrestore(&d->lock, flags);
1054
1055	if (sl) {
1056		struct sk_buff_head queue;
1057		__skb_queue_head_init(&queue);
1058		__skb_queue_tail(&queue, sl);
1059		aoenet_xmit(&queue);
1060	}
1061}
1062
1063void
1064aoecmd_cleanslate(struct aoedev *d)
1065{
1066	struct aoetgt **t, **te;
1067	struct aoeif *p, *e;
1068
1069	d->mintimer = MINTIMER;
1070
1071	t = d->targets;
1072	te = t + NTARGETS;
1073	for (; t < te && *t; t++) {
1074		(*t)->maxout = (*t)->nframes;
1075		p = (*t)->ifs;
1076		e = p + NAOEIFS;
1077		for (; p < e; p++) {
1078			p->lostjumbo = 0;
1079			p->lost = 0;
1080			p->maxbcnt = DEFAULTBCNT;
1081		}
1082	}
1083}
1084