1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/export.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
17#include <linux/bitmap.h>
18
19#include <asm/hypervisor.h>
20#include <asm/iommu.h>
21#include <asm/page.h>
22#include <asm/ldc.h>
23#include <asm/mdesc.h>
24
25#define DRV_MODULE_NAME		"ldc"
26#define PFX DRV_MODULE_NAME	": "
27#define DRV_MODULE_VERSION	"1.1"
28#define DRV_MODULE_RELDATE	"July 22, 2008"
29
30static char version[] =
31	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
32#define LDC_PACKET_SIZE		64
33
34/* Packet header layout for unreliable and reliable mode frames.
35 * When in RAW mode, packets are simply straight 64-byte payloads
36 * with no headers.
37 */
38struct ldc_packet {
39	u8			type;
40#define LDC_CTRL		0x01
41#define LDC_DATA		0x02
42#define LDC_ERR			0x10
43
44	u8			stype;
45#define LDC_INFO		0x01
46#define LDC_ACK			0x02
47#define LDC_NACK		0x04
48
49	u8			ctrl;
50#define LDC_VERS		0x01 /* Link Version		*/
51#define LDC_RTS			0x02 /* Request To Send		*/
52#define LDC_RTR			0x03 /* Ready To Receive	*/
53#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
54#define LDC_CTRL_MSK		0x0f
55
56	u8			env;
57#define LDC_LEN			0x3f
58#define LDC_FRAG_MASK		0xc0
59#define LDC_START		0x40
60#define LDC_STOP		0x80
61
62	u32			seqid;
63
64	union {
65		u8		u_data[LDC_PACKET_SIZE - 8];
66		struct {
67			u32	pad;
68			u32	ackid;
69			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
70		} r;
71	} u;
72};
73
74struct ldc_version {
75	u16 major;
76	u16 minor;
77};
78
79/* Ordered from largest major to lowest.  */
80static struct ldc_version ver_arr[] = {
81	{ .major = 1, .minor = 0 },
82};
83
84#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
85#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
86
87struct ldc_channel;
88
89struct ldc_mode_ops {
90	int (*write)(struct ldc_channel *, const void *, unsigned int);
91	int (*read)(struct ldc_channel *, void *, unsigned int);
92};
93
94static const struct ldc_mode_ops raw_ops;
95static const struct ldc_mode_ops nonraw_ops;
96static const struct ldc_mode_ops stream_ops;
97
98int ldom_domaining_enabled;
99
100struct ldc_iommu {
101	/* Protects arena alloc/free.  */
102	spinlock_t			lock;
103	struct iommu_arena		arena;
104	struct ldc_mtable_entry		*page_table;
105};
106
107struct ldc_channel {
108	/* Protects all operations that depend upon channel state.  */
109	spinlock_t			lock;
110
111	unsigned long			id;
112
113	u8				*mssbuf;
114	u32				mssbuf_len;
115	u32				mssbuf_off;
116
117	struct ldc_packet		*tx_base;
118	unsigned long			tx_head;
119	unsigned long			tx_tail;
120	unsigned long			tx_num_entries;
121	unsigned long			tx_ra;
122
123	unsigned long			tx_acked;
124
125	struct ldc_packet		*rx_base;
126	unsigned long			rx_head;
127	unsigned long			rx_tail;
128	unsigned long			rx_num_entries;
129	unsigned long			rx_ra;
130
131	u32				rcv_nxt;
132	u32				snd_nxt;
133
134	unsigned long			chan_state;
135
136	struct ldc_channel_config	cfg;
137	void				*event_arg;
138
139	const struct ldc_mode_ops	*mops;
140
141	struct ldc_iommu		iommu;
142
143	struct ldc_version		ver;
144
145	u8				hs_state;
146#define LDC_HS_CLOSED			0x00
147#define LDC_HS_OPEN			0x01
148#define LDC_HS_GOTVERS			0x02
149#define LDC_HS_SENTRTR			0x03
150#define LDC_HS_GOTRTR			0x04
151#define LDC_HS_COMPLETE			0x10
152
153	u8				flags;
154#define LDC_FLAG_ALLOCED_QUEUES		0x01
155#define LDC_FLAG_REGISTERED_QUEUES	0x02
156#define LDC_FLAG_REGISTERED_IRQS	0x04
157#define LDC_FLAG_RESET			0x10
158
159	u8				mss;
160	u8				state;
161
162#define LDC_IRQ_NAME_MAX		32
163	char				rx_irq_name[LDC_IRQ_NAME_MAX];
164	char				tx_irq_name[LDC_IRQ_NAME_MAX];
165
166	struct hlist_head		mh_list;
167
168	struct hlist_node		list;
169};
170
171#define ldcdbg(TYPE, f, a...) \
172do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
173		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
174} while (0)
175
176static const char *state_to_str(u8 state)
177{
178	switch (state) {
179	case LDC_STATE_INVALID:
180		return "INVALID";
181	case LDC_STATE_INIT:
182		return "INIT";
183	case LDC_STATE_BOUND:
184		return "BOUND";
185	case LDC_STATE_READY:
186		return "READY";
187	case LDC_STATE_CONNECTED:
188		return "CONNECTED";
189	default:
190		return "<UNKNOWN>";
191	}
192}
193
194static void ldc_set_state(struct ldc_channel *lp, u8 state)
195{
196	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
197	       state_to_str(lp->state),
198	       state_to_str(state));
199
200	lp->state = state;
201}
202
203static unsigned long __advance(unsigned long off, unsigned long num_entries)
204{
205	off += LDC_PACKET_SIZE;
206	if (off == (num_entries * LDC_PACKET_SIZE))
207		off = 0;
208
209	return off;
210}
211
212static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
213{
214	return __advance(off, lp->rx_num_entries);
215}
216
217static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
218{
219	return __advance(off, lp->tx_num_entries);
220}
221
222static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
223						  unsigned long *new_tail)
224{
225	struct ldc_packet *p;
226	unsigned long t;
227
228	t = tx_advance(lp, lp->tx_tail);
229	if (t == lp->tx_head)
230		return NULL;
231
232	*new_tail = t;
233
234	p = lp->tx_base;
235	return p + (lp->tx_tail / LDC_PACKET_SIZE);
236}
237
238/* When we are in reliable or stream mode, have to track the next packet
239 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
240 * to be careful not to stomp over the queue past that point.  During
241 * the handshake, we don't have TX data packets pending in the queue
242 * and that's why handshake_get_tx_packet() need not be mindful of
243 * lp->tx_acked.
244 */
245static unsigned long head_for_data(struct ldc_channel *lp)
246{
247	if (lp->cfg.mode == LDC_MODE_STREAM)
248		return lp->tx_acked;
249	return lp->tx_head;
250}
251
252static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
253{
254	unsigned long limit, tail, new_tail, diff;
255	unsigned int mss;
256
257	limit = head_for_data(lp);
258	tail = lp->tx_tail;
259	new_tail = tx_advance(lp, tail);
260	if (new_tail == limit)
261		return 0;
262
263	if (limit > new_tail)
264		diff = limit - new_tail;
265	else
266		diff = (limit +
267			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
268	diff /= LDC_PACKET_SIZE;
269	mss = lp->mss;
270
271	if (diff * mss < size)
272		return 0;
273
274	return 1;
275}
276
277static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
278					     unsigned long *new_tail)
279{
280	struct ldc_packet *p;
281	unsigned long h, t;
282
283	h = head_for_data(lp);
284	t = tx_advance(lp, lp->tx_tail);
285	if (t == h)
286		return NULL;
287
288	*new_tail = t;
289
290	p = lp->tx_base;
291	return p + (lp->tx_tail / LDC_PACKET_SIZE);
292}
293
294static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
295{
296	unsigned long orig_tail = lp->tx_tail;
297	int limit = 1000;
298
299	lp->tx_tail = tail;
300	while (limit-- > 0) {
301		unsigned long err;
302
303		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
304		if (!err)
305			return 0;
306
307		if (err != HV_EWOULDBLOCK) {
308			lp->tx_tail = orig_tail;
309			return -EINVAL;
310		}
311		udelay(1);
312	}
313
314	lp->tx_tail = orig_tail;
315	return -EBUSY;
316}
317
318/* This just updates the head value in the hypervisor using
319 * a polling loop with a timeout.  The caller takes care of
320 * upating software state representing the head change, if any.
321 */
322static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
323{
324	int limit = 1000;
325
326	while (limit-- > 0) {
327		unsigned long err;
328
329		err = sun4v_ldc_rx_set_qhead(lp->id, head);
330		if (!err)
331			return 0;
332
333		if (err != HV_EWOULDBLOCK)
334			return -EINVAL;
335
336		udelay(1);
337	}
338
339	return -EBUSY;
340}
341
342static int send_tx_packet(struct ldc_channel *lp,
343			  struct ldc_packet *p,
344			  unsigned long new_tail)
345{
346	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
347
348	return set_tx_tail(lp, new_tail);
349}
350
351static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
352						 u8 stype, u8 ctrl,
353						 void *data, int dlen,
354						 unsigned long *new_tail)
355{
356	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
357
358	if (p) {
359		memset(p, 0, sizeof(*p));
360		p->type = LDC_CTRL;
361		p->stype = stype;
362		p->ctrl = ctrl;
363		if (data)
364			memcpy(p->u.u_data, data, dlen);
365	}
366	return p;
367}
368
369static int start_handshake(struct ldc_channel *lp)
370{
371	struct ldc_packet *p;
372	struct ldc_version *ver;
373	unsigned long new_tail;
374
375	ver = &ver_arr[0];
376
377	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
378	       ver->major, ver->minor);
379
380	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
381				   ver, sizeof(*ver), &new_tail);
382	if (p) {
383		int err = send_tx_packet(lp, p, new_tail);
384		if (!err)
385			lp->flags &= ~LDC_FLAG_RESET;
386		return err;
387	}
388	return -EBUSY;
389}
390
391static int send_version_nack(struct ldc_channel *lp,
392			     u16 major, u16 minor)
393{
394	struct ldc_packet *p;
395	struct ldc_version ver;
396	unsigned long new_tail;
397
398	ver.major = major;
399	ver.minor = minor;
400
401	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
402				   &ver, sizeof(ver), &new_tail);
403	if (p) {
404		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
405		       ver.major, ver.minor);
406
407		return send_tx_packet(lp, p, new_tail);
408	}
409	return -EBUSY;
410}
411
412static int send_version_ack(struct ldc_channel *lp,
413			    struct ldc_version *vp)
414{
415	struct ldc_packet *p;
416	unsigned long new_tail;
417
418	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
419				   vp, sizeof(*vp), &new_tail);
420	if (p) {
421		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
422		       vp->major, vp->minor);
423
424		return send_tx_packet(lp, p, new_tail);
425	}
426	return -EBUSY;
427}
428
429static int send_rts(struct ldc_channel *lp)
430{
431	struct ldc_packet *p;
432	unsigned long new_tail;
433
434	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
435				   &new_tail);
436	if (p) {
437		p->env = lp->cfg.mode;
438		p->seqid = 0;
439		lp->rcv_nxt = 0;
440
441		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
442		       p->env, p->seqid);
443
444		return send_tx_packet(lp, p, new_tail);
445	}
446	return -EBUSY;
447}
448
449static int send_rtr(struct ldc_channel *lp)
450{
451	struct ldc_packet *p;
452	unsigned long new_tail;
453
454	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
455				   &new_tail);
456	if (p) {
457		p->env = lp->cfg.mode;
458		p->seqid = 0;
459
460		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
461		       p->env, p->seqid);
462
463		return send_tx_packet(lp, p, new_tail);
464	}
465	return -EBUSY;
466}
467
468static int send_rdx(struct ldc_channel *lp)
469{
470	struct ldc_packet *p;
471	unsigned long new_tail;
472
473	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
474				   &new_tail);
475	if (p) {
476		p->env = 0;
477		p->seqid = ++lp->snd_nxt;
478		p->u.r.ackid = lp->rcv_nxt;
479
480		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
481		       p->env, p->seqid, p->u.r.ackid);
482
483		return send_tx_packet(lp, p, new_tail);
484	}
485	return -EBUSY;
486}
487
488static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
489{
490	struct ldc_packet *p;
491	unsigned long new_tail;
492	int err;
493
494	p = data_get_tx_packet(lp, &new_tail);
495	if (!p)
496		return -EBUSY;
497	memset(p, 0, sizeof(*p));
498	p->type = data_pkt->type;
499	p->stype = LDC_NACK;
500	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
501	p->seqid = lp->snd_nxt + 1;
502	p->u.r.ackid = lp->rcv_nxt;
503
504	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
505	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
506
507	err = send_tx_packet(lp, p, new_tail);
508	if (!err)
509		lp->snd_nxt++;
510
511	return err;
512}
513
514static int ldc_abort(struct ldc_channel *lp)
515{
516	unsigned long hv_err;
517
518	ldcdbg(STATE, "ABORT\n");
519
520	/* We report but do not act upon the hypervisor errors because
521	 * there really isn't much we can do if they fail at this point.
522	 */
523	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
524	if (hv_err)
525		printk(KERN_ERR PFX "ldc_abort: "
526		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
527		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
528
529	hv_err = sun4v_ldc_tx_get_state(lp->id,
530					&lp->tx_head,
531					&lp->tx_tail,
532					&lp->chan_state);
533	if (hv_err)
534		printk(KERN_ERR PFX "ldc_abort: "
535		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
536		       lp->id, hv_err);
537
538	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
539	if (hv_err)
540		printk(KERN_ERR PFX "ldc_abort: "
541		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
542		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
543
544	/* Refetch the RX queue state as well, because we could be invoked
545	 * here in the queue processing context.
546	 */
547	hv_err = sun4v_ldc_rx_get_state(lp->id,
548					&lp->rx_head,
549					&lp->rx_tail,
550					&lp->chan_state);
551	if (hv_err)
552		printk(KERN_ERR PFX "ldc_abort: "
553		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
554		       lp->id, hv_err);
555
556	return -ECONNRESET;
557}
558
559static struct ldc_version *find_by_major(u16 major)
560{
561	struct ldc_version *ret = NULL;
562	int i;
563
564	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
565		struct ldc_version *v = &ver_arr[i];
566		if (v->major <= major) {
567			ret = v;
568			break;
569		}
570	}
571	return ret;
572}
573
574static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
575{
576	struct ldc_version *vap;
577	int err;
578
579	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
580	       vp->major, vp->minor);
581
582	if (lp->hs_state == LDC_HS_GOTVERS) {
583		lp->hs_state = LDC_HS_OPEN;
584		memset(&lp->ver, 0, sizeof(lp->ver));
585	}
586
587	vap = find_by_major(vp->major);
588	if (!vap) {
589		err = send_version_nack(lp, 0, 0);
590	} else if (vap->major != vp->major) {
591		err = send_version_nack(lp, vap->major, vap->minor);
592	} else {
593		struct ldc_version ver = *vp;
594		if (ver.minor > vap->minor)
595			ver.minor = vap->minor;
596		err = send_version_ack(lp, &ver);
597		if (!err) {
598			lp->ver = ver;
599			lp->hs_state = LDC_HS_GOTVERS;
600		}
601	}
602	if (err)
603		return ldc_abort(lp);
604
605	return 0;
606}
607
608static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
609{
610	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
611	       vp->major, vp->minor);
612
613	if (lp->hs_state == LDC_HS_GOTVERS) {
614		if (lp->ver.major != vp->major ||
615		    lp->ver.minor != vp->minor)
616			return ldc_abort(lp);
617	} else {
618		lp->ver = *vp;
619		lp->hs_state = LDC_HS_GOTVERS;
620	}
621	if (send_rts(lp))
622		return ldc_abort(lp);
623	return 0;
624}
625
626static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
627{
628	struct ldc_version *vap;
629	struct ldc_packet *p;
630	unsigned long new_tail;
631
632	if (vp->major == 0 && vp->minor == 0)
633		return ldc_abort(lp);
634
635	vap = find_by_major(vp->major);
636	if (!vap)
637		return ldc_abort(lp);
638
639	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
640					   vap, sizeof(*vap),
641					   &new_tail);
642	if (!p)
643		return ldc_abort(lp);
644
645	return send_tx_packet(lp, p, new_tail);
646}
647
648static int process_version(struct ldc_channel *lp,
649			   struct ldc_packet *p)
650{
651	struct ldc_version *vp;
652
653	vp = (struct ldc_version *) p->u.u_data;
654
655	switch (p->stype) {
656	case LDC_INFO:
657		return process_ver_info(lp, vp);
658
659	case LDC_ACK:
660		return process_ver_ack(lp, vp);
661
662	case LDC_NACK:
663		return process_ver_nack(lp, vp);
664
665	default:
666		return ldc_abort(lp);
667	}
668}
669
670static int process_rts(struct ldc_channel *lp,
671		       struct ldc_packet *p)
672{
673	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
674	       p->stype, p->seqid, p->env);
675
676	if (p->stype     != LDC_INFO	   ||
677	    lp->hs_state != LDC_HS_GOTVERS ||
678	    p->env       != lp->cfg.mode)
679		return ldc_abort(lp);
680
681	lp->snd_nxt = p->seqid;
682	lp->rcv_nxt = p->seqid;
683	lp->hs_state = LDC_HS_SENTRTR;
684	if (send_rtr(lp))
685		return ldc_abort(lp);
686
687	return 0;
688}
689
690static int process_rtr(struct ldc_channel *lp,
691		       struct ldc_packet *p)
692{
693	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
694	       p->stype, p->seqid, p->env);
695
696	if (p->stype     != LDC_INFO ||
697	    p->env       != lp->cfg.mode)
698		return ldc_abort(lp);
699
700	lp->snd_nxt = p->seqid;
701	lp->hs_state = LDC_HS_COMPLETE;
702	ldc_set_state(lp, LDC_STATE_CONNECTED);
703	send_rdx(lp);
704
705	return LDC_EVENT_UP;
706}
707
708static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
709{
710	return lp->rcv_nxt + 1 == seqid;
711}
712
713static int process_rdx(struct ldc_channel *lp,
714		       struct ldc_packet *p)
715{
716	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
717	       p->stype, p->seqid, p->env, p->u.r.ackid);
718
719	if (p->stype != LDC_INFO ||
720	    !(rx_seq_ok(lp, p->seqid)))
721		return ldc_abort(lp);
722
723	lp->rcv_nxt = p->seqid;
724
725	lp->hs_state = LDC_HS_COMPLETE;
726	ldc_set_state(lp, LDC_STATE_CONNECTED);
727
728	return LDC_EVENT_UP;
729}
730
731static int process_control_frame(struct ldc_channel *lp,
732				 struct ldc_packet *p)
733{
734	switch (p->ctrl) {
735	case LDC_VERS:
736		return process_version(lp, p);
737
738	case LDC_RTS:
739		return process_rts(lp, p);
740
741	case LDC_RTR:
742		return process_rtr(lp, p);
743
744	case LDC_RDX:
745		return process_rdx(lp, p);
746
747	default:
748		return ldc_abort(lp);
749	}
750}
751
752static int process_error_frame(struct ldc_channel *lp,
753			       struct ldc_packet *p)
754{
755	return ldc_abort(lp);
756}
757
758static int process_data_ack(struct ldc_channel *lp,
759			    struct ldc_packet *ack)
760{
761	unsigned long head = lp->tx_acked;
762	u32 ackid = ack->u.r.ackid;
763
764	while (1) {
765		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
766
767		head = tx_advance(lp, head);
768
769		if (p->seqid == ackid) {
770			lp->tx_acked = head;
771			return 0;
772		}
773		if (head == lp->tx_tail)
774			return ldc_abort(lp);
775	}
776
777	return 0;
778}
779
780static void send_events(struct ldc_channel *lp, unsigned int event_mask)
781{
782	if (event_mask & LDC_EVENT_RESET)
783		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
784	if (event_mask & LDC_EVENT_UP)
785		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
786	if (event_mask & LDC_EVENT_DATA_READY)
787		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
788}
789
790static irqreturn_t ldc_rx(int irq, void *dev_id)
791{
792	struct ldc_channel *lp = dev_id;
793	unsigned long orig_state, flags;
794	unsigned int event_mask;
795
796	spin_lock_irqsave(&lp->lock, flags);
797
798	orig_state = lp->chan_state;
799
800	/* We should probably check for hypervisor errors here and
801	 * reset the LDC channel if we get one.
802	 */
803	sun4v_ldc_rx_get_state(lp->id,
804			       &lp->rx_head,
805			       &lp->rx_tail,
806			       &lp->chan_state);
807
808	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
809	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
810
811	event_mask = 0;
812
813	if (lp->cfg.mode == LDC_MODE_RAW &&
814	    lp->chan_state == LDC_CHANNEL_UP) {
815		lp->hs_state = LDC_HS_COMPLETE;
816		ldc_set_state(lp, LDC_STATE_CONNECTED);
817
818		event_mask |= LDC_EVENT_UP;
819
820		orig_state = lp->chan_state;
821	}
822
823	/* If we are in reset state, flush the RX queue and ignore
824	 * everything.
825	 */
826	if (lp->flags & LDC_FLAG_RESET) {
827		(void) __set_rx_head(lp, lp->rx_tail);
828		goto out;
829	}
830
831	/* Once we finish the handshake, we let the ldc_read()
832	 * paths do all of the control frame and state management.
833	 * Just trigger the callback.
834	 */
835	if (lp->hs_state == LDC_HS_COMPLETE) {
836handshake_complete:
837		if (lp->chan_state != orig_state) {
838			unsigned int event = LDC_EVENT_RESET;
839
840			if (lp->chan_state == LDC_CHANNEL_UP)
841				event = LDC_EVENT_UP;
842
843			event_mask |= event;
844		}
845		if (lp->rx_head != lp->rx_tail)
846			event_mask |= LDC_EVENT_DATA_READY;
847
848		goto out;
849	}
850
851	if (lp->chan_state != orig_state)
852		goto out;
853
854	while (lp->rx_head != lp->rx_tail) {
855		struct ldc_packet *p;
856		unsigned long new;
857		int err;
858
859		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
860
861		switch (p->type) {
862		case LDC_CTRL:
863			err = process_control_frame(lp, p);
864			if (err > 0)
865				event_mask |= err;
866			break;
867
868		case LDC_DATA:
869			event_mask |= LDC_EVENT_DATA_READY;
870			err = 0;
871			break;
872
873		case LDC_ERR:
874			err = process_error_frame(lp, p);
875			break;
876
877		default:
878			err = ldc_abort(lp);
879			break;
880		}
881
882		if (err < 0)
883			break;
884
885		new = lp->rx_head;
886		new += LDC_PACKET_SIZE;
887		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
888			new = 0;
889		lp->rx_head = new;
890
891		err = __set_rx_head(lp, new);
892		if (err < 0) {
893			(void) ldc_abort(lp);
894			break;
895		}
896		if (lp->hs_state == LDC_HS_COMPLETE)
897			goto handshake_complete;
898	}
899
900out:
901	spin_unlock_irqrestore(&lp->lock, flags);
902
903	send_events(lp, event_mask);
904
905	return IRQ_HANDLED;
906}
907
908static irqreturn_t ldc_tx(int irq, void *dev_id)
909{
910	struct ldc_channel *lp = dev_id;
911	unsigned long flags, orig_state;
912	unsigned int event_mask = 0;
913
914	spin_lock_irqsave(&lp->lock, flags);
915
916	orig_state = lp->chan_state;
917
918	/* We should probably check for hypervisor errors here and
919	 * reset the LDC channel if we get one.
920	 */
921	sun4v_ldc_tx_get_state(lp->id,
922			       &lp->tx_head,
923			       &lp->tx_tail,
924			       &lp->chan_state);
925
926	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
927	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
928
929	if (lp->cfg.mode == LDC_MODE_RAW &&
930	    lp->chan_state == LDC_CHANNEL_UP) {
931		lp->hs_state = LDC_HS_COMPLETE;
932		ldc_set_state(lp, LDC_STATE_CONNECTED);
933
934		event_mask |= LDC_EVENT_UP;
935	}
936
937	spin_unlock_irqrestore(&lp->lock, flags);
938
939	send_events(lp, event_mask);
940
941	return IRQ_HANDLED;
942}
943
944/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
945 * XXX that addition and removal from the ldc_channel_list has
946 * XXX atomicity, otherwise the __ldc_channel_exists() check is
947 * XXX totally pointless as another thread can slip into ldc_alloc()
948 * XXX and add a channel with the same ID.  There also needs to be
949 * XXX a spinlock for ldc_channel_list.
950 */
951static HLIST_HEAD(ldc_channel_list);
952
953static int __ldc_channel_exists(unsigned long id)
954{
955	struct ldc_channel *lp;
956
957	hlist_for_each_entry(lp, &ldc_channel_list, list) {
958		if (lp->id == id)
959			return 1;
960	}
961	return 0;
962}
963
964static int alloc_queue(const char *name, unsigned long num_entries,
965		       struct ldc_packet **base, unsigned long *ra)
966{
967	unsigned long size, order;
968	void *q;
969
970	size = num_entries * LDC_PACKET_SIZE;
971	order = get_order(size);
972
973	q = (void *) __get_free_pages(GFP_KERNEL, order);
974	if (!q) {
975		printk(KERN_ERR PFX "Alloc of %s queue failed with "
976		       "size=%lu order=%lu\n", name, size, order);
977		return -ENOMEM;
978	}
979
980	memset(q, 0, PAGE_SIZE << order);
981
982	*base = q;
983	*ra = __pa(q);
984
985	return 0;
986}
987
988static void free_queue(unsigned long num_entries, struct ldc_packet *q)
989{
990	unsigned long size, order;
991
992	if (!q)
993		return;
994
995	size = num_entries * LDC_PACKET_SIZE;
996	order = get_order(size);
997
998	free_pages((unsigned long)q, order);
999}
1000
1001/* XXX Make this configurable... XXX */
1002#define LDC_IOTABLE_SIZE	(8 * 1024)
1003
1004static int ldc_iommu_init(struct ldc_channel *lp)
1005{
1006	unsigned long sz, num_tsb_entries, tsbsize, order;
1007	struct ldc_iommu *iommu = &lp->iommu;
1008	struct ldc_mtable_entry *table;
1009	unsigned long hv_err;
1010	int err;
1011
1012	num_tsb_entries = LDC_IOTABLE_SIZE;
1013	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1014
1015	spin_lock_init(&iommu->lock);
1016
1017	sz = num_tsb_entries / 8;
1018	sz = (sz + 7UL) & ~7UL;
1019	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1020	if (!iommu->arena.map) {
1021		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1022		return -ENOMEM;
1023	}
1024
1025	iommu->arena.limit = num_tsb_entries;
1026
1027	order = get_order(tsbsize);
1028
1029	table = (struct ldc_mtable_entry *)
1030		__get_free_pages(GFP_KERNEL, order);
1031	err = -ENOMEM;
1032	if (!table) {
1033		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1034		       "size=%lu order=%lu\n", tsbsize, order);
1035		goto out_free_map;
1036	}
1037
1038	memset(table, 0, PAGE_SIZE << order);
1039
1040	iommu->page_table = table;
1041
1042	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1043					 num_tsb_entries);
1044	err = -EINVAL;
1045	if (hv_err)
1046		goto out_free_table;
1047
1048	return 0;
1049
1050out_free_table:
1051	free_pages((unsigned long) table, order);
1052	iommu->page_table = NULL;
1053
1054out_free_map:
1055	kfree(iommu->arena.map);
1056	iommu->arena.map = NULL;
1057
1058	return err;
1059}
1060
1061static void ldc_iommu_release(struct ldc_channel *lp)
1062{
1063	struct ldc_iommu *iommu = &lp->iommu;
1064	unsigned long num_tsb_entries, tsbsize, order;
1065
1066	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1067
1068	num_tsb_entries = iommu->arena.limit;
1069	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1070	order = get_order(tsbsize);
1071
1072	free_pages((unsigned long) iommu->page_table, order);
1073	iommu->page_table = NULL;
1074
1075	kfree(iommu->arena.map);
1076	iommu->arena.map = NULL;
1077}
1078
1079struct ldc_channel *ldc_alloc(unsigned long id,
1080			      const struct ldc_channel_config *cfgp,
1081			      void *event_arg,
1082			      const char *name)
1083{
1084	struct ldc_channel *lp;
1085	const struct ldc_mode_ops *mops;
1086	unsigned long dummy1, dummy2, hv_err;
1087	u8 mss, *mssbuf;
1088	int err;
1089
1090	err = -ENODEV;
1091	if (!ldom_domaining_enabled)
1092		goto out_err;
1093
1094	err = -EINVAL;
1095	if (!cfgp)
1096		goto out_err;
1097	if (!name)
1098		goto out_err;
1099
1100	switch (cfgp->mode) {
1101	case LDC_MODE_RAW:
1102		mops = &raw_ops;
1103		mss = LDC_PACKET_SIZE;
1104		break;
1105
1106	case LDC_MODE_UNRELIABLE:
1107		mops = &nonraw_ops;
1108		mss = LDC_PACKET_SIZE - 8;
1109		break;
1110
1111	case LDC_MODE_STREAM:
1112		mops = &stream_ops;
1113		mss = LDC_PACKET_SIZE - 8 - 8;
1114		break;
1115
1116	default:
1117		goto out_err;
1118	}
1119
1120	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1121		goto out_err;
1122
1123	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1124	err = -ENODEV;
1125	if (hv_err == HV_ECHANNEL)
1126		goto out_err;
1127
1128	err = -EEXIST;
1129	if (__ldc_channel_exists(id))
1130		goto out_err;
1131
1132	mssbuf = NULL;
1133
1134	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1135	err = -ENOMEM;
1136	if (!lp)
1137		goto out_err;
1138
1139	spin_lock_init(&lp->lock);
1140
1141	lp->id = id;
1142
1143	err = ldc_iommu_init(lp);
1144	if (err)
1145		goto out_free_ldc;
1146
1147	lp->mops = mops;
1148	lp->mss = mss;
1149
1150	lp->cfg = *cfgp;
1151	if (!lp->cfg.mtu)
1152		lp->cfg.mtu = LDC_DEFAULT_MTU;
1153
1154	if (lp->cfg.mode == LDC_MODE_STREAM) {
1155		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1156		if (!mssbuf) {
1157			err = -ENOMEM;
1158			goto out_free_iommu;
1159		}
1160		lp->mssbuf = mssbuf;
1161	}
1162
1163	lp->event_arg = event_arg;
1164
1165	/* XXX allow setting via ldc_channel_config to override defaults
1166	 * XXX or use some formula based upon mtu
1167	 */
1168	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1169	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1170
1171	err = alloc_queue("TX", lp->tx_num_entries,
1172			  &lp->tx_base, &lp->tx_ra);
1173	if (err)
1174		goto out_free_mssbuf;
1175
1176	err = alloc_queue("RX", lp->rx_num_entries,
1177			  &lp->rx_base, &lp->rx_ra);
1178	if (err)
1179		goto out_free_txq;
1180
1181	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1182
1183	lp->hs_state = LDC_HS_CLOSED;
1184	ldc_set_state(lp, LDC_STATE_INIT);
1185
1186	INIT_HLIST_NODE(&lp->list);
1187	hlist_add_head(&lp->list, &ldc_channel_list);
1188
1189	INIT_HLIST_HEAD(&lp->mh_list);
1190
1191	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1192	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1193
1194	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1195			  lp->rx_irq_name, lp);
1196	if (err)
1197		goto out_free_txq;
1198
1199	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1200			  lp->tx_irq_name, lp);
1201	if (err) {
1202		free_irq(lp->cfg.rx_irq, lp);
1203		goto out_free_txq;
1204	}
1205
1206	return lp;
1207
1208out_free_txq:
1209	free_queue(lp->tx_num_entries, lp->tx_base);
1210
1211out_free_mssbuf:
1212	kfree(mssbuf);
1213
1214out_free_iommu:
1215	ldc_iommu_release(lp);
1216
1217out_free_ldc:
1218	kfree(lp);
1219
1220out_err:
1221	return ERR_PTR(err);
1222}
1223EXPORT_SYMBOL(ldc_alloc);
1224
1225void ldc_free(struct ldc_channel *lp)
1226{
1227	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1228		free_irq(lp->cfg.rx_irq, lp);
1229		free_irq(lp->cfg.tx_irq, lp);
1230	}
1231
1232	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1233		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1234		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1235		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1236	}
1237	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1238		free_queue(lp->tx_num_entries, lp->tx_base);
1239		free_queue(lp->rx_num_entries, lp->rx_base);
1240		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1241	}
1242
1243	hlist_del(&lp->list);
1244
1245	kfree(lp->mssbuf);
1246
1247	ldc_iommu_release(lp);
1248
1249	kfree(lp);
1250}
1251EXPORT_SYMBOL(ldc_free);
1252
1253/* Bind the channel.  This registers the LDC queues with
1254 * the hypervisor and puts the channel into a pseudo-listening
1255 * state.  This does not initiate a handshake, ldc_connect() does
1256 * that.
1257 */
1258int ldc_bind(struct ldc_channel *lp)
1259{
1260	unsigned long hv_err, flags;
1261	int err = -EINVAL;
1262
1263	if (lp->state != LDC_STATE_INIT)
1264		return -EINVAL;
1265
1266	spin_lock_irqsave(&lp->lock, flags);
1267
1268	enable_irq(lp->cfg.rx_irq);
1269	enable_irq(lp->cfg.tx_irq);
1270
1271	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1272
1273	err = -ENODEV;
1274	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1275	if (hv_err)
1276		goto out_free_irqs;
1277
1278	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1279	if (hv_err)
1280		goto out_free_irqs;
1281
1282	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1283	if (hv_err)
1284		goto out_unmap_tx;
1285
1286	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1287	if (hv_err)
1288		goto out_unmap_tx;
1289
1290	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1291
1292	hv_err = sun4v_ldc_tx_get_state(lp->id,
1293					&lp->tx_head,
1294					&lp->tx_tail,
1295					&lp->chan_state);
1296	err = -EBUSY;
1297	if (hv_err)
1298		goto out_unmap_rx;
1299
1300	lp->tx_acked = lp->tx_head;
1301
1302	lp->hs_state = LDC_HS_OPEN;
1303	ldc_set_state(lp, LDC_STATE_BOUND);
1304
1305	spin_unlock_irqrestore(&lp->lock, flags);
1306
1307	return 0;
1308
1309out_unmap_rx:
1310	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1311	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1312
1313out_unmap_tx:
1314	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1315
1316out_free_irqs:
1317	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1318	free_irq(lp->cfg.tx_irq, lp);
1319	free_irq(lp->cfg.rx_irq, lp);
1320
1321	spin_unlock_irqrestore(&lp->lock, flags);
1322
1323	return err;
1324}
1325EXPORT_SYMBOL(ldc_bind);
1326
1327int ldc_connect(struct ldc_channel *lp)
1328{
1329	unsigned long flags;
1330	int err;
1331
1332	if (lp->cfg.mode == LDC_MODE_RAW)
1333		return -EINVAL;
1334
1335	spin_lock_irqsave(&lp->lock, flags);
1336
1337	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1338	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1339	    lp->hs_state != LDC_HS_OPEN)
1340		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1341	else
1342		err = start_handshake(lp);
1343
1344	spin_unlock_irqrestore(&lp->lock, flags);
1345
1346	return err;
1347}
1348EXPORT_SYMBOL(ldc_connect);
1349
1350int ldc_disconnect(struct ldc_channel *lp)
1351{
1352	unsigned long hv_err, flags;
1353	int err;
1354
1355	if (lp->cfg.mode == LDC_MODE_RAW)
1356		return -EINVAL;
1357
1358	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1359	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1360		return -EINVAL;
1361
1362	spin_lock_irqsave(&lp->lock, flags);
1363
1364	err = -ENODEV;
1365	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1366	if (hv_err)
1367		goto out_err;
1368
1369	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1370	if (hv_err)
1371		goto out_err;
1372
1373	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1374	if (hv_err)
1375		goto out_err;
1376
1377	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1378	if (hv_err)
1379		goto out_err;
1380
1381	ldc_set_state(lp, LDC_STATE_BOUND);
1382	lp->hs_state = LDC_HS_OPEN;
1383	lp->flags |= LDC_FLAG_RESET;
1384
1385	spin_unlock_irqrestore(&lp->lock, flags);
1386
1387	return 0;
1388
1389out_err:
1390	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1391	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1392	free_irq(lp->cfg.tx_irq, lp);
1393	free_irq(lp->cfg.rx_irq, lp);
1394	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1395		       LDC_FLAG_REGISTERED_QUEUES);
1396	ldc_set_state(lp, LDC_STATE_INIT);
1397
1398	spin_unlock_irqrestore(&lp->lock, flags);
1399
1400	return err;
1401}
1402EXPORT_SYMBOL(ldc_disconnect);
1403
1404int ldc_state(struct ldc_channel *lp)
1405{
1406	return lp->state;
1407}
1408EXPORT_SYMBOL(ldc_state);
1409
1410static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1411{
1412	struct ldc_packet *p;
1413	unsigned long new_tail;
1414	int err;
1415
1416	if (size > LDC_PACKET_SIZE)
1417		return -EMSGSIZE;
1418
1419	p = data_get_tx_packet(lp, &new_tail);
1420	if (!p)
1421		return -EAGAIN;
1422
1423	memcpy(p, buf, size);
1424
1425	err = send_tx_packet(lp, p, new_tail);
1426	if (!err)
1427		err = size;
1428
1429	return err;
1430}
1431
1432static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1433{
1434	struct ldc_packet *p;
1435	unsigned long hv_err, new;
1436	int err;
1437
1438	if (size < LDC_PACKET_SIZE)
1439		return -EINVAL;
1440
1441	hv_err = sun4v_ldc_rx_get_state(lp->id,
1442					&lp->rx_head,
1443					&lp->rx_tail,
1444					&lp->chan_state);
1445	if (hv_err)
1446		return ldc_abort(lp);
1447
1448	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1449	    lp->chan_state == LDC_CHANNEL_RESETTING)
1450		return -ECONNRESET;
1451
1452	if (lp->rx_head == lp->rx_tail)
1453		return 0;
1454
1455	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1456	memcpy(buf, p, LDC_PACKET_SIZE);
1457
1458	new = rx_advance(lp, lp->rx_head);
1459	lp->rx_head = new;
1460
1461	err = __set_rx_head(lp, new);
1462	if (err < 0)
1463		err = -ECONNRESET;
1464	else
1465		err = LDC_PACKET_SIZE;
1466
1467	return err;
1468}
1469
1470static const struct ldc_mode_ops raw_ops = {
1471	.write		=	write_raw,
1472	.read		=	read_raw,
1473};
1474
1475static int write_nonraw(struct ldc_channel *lp, const void *buf,
1476			unsigned int size)
1477{
1478	unsigned long hv_err, tail;
1479	unsigned int copied;
1480	u32 seq;
1481	int err;
1482
1483	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1484					&lp->chan_state);
1485	if (unlikely(hv_err))
1486		return -EBUSY;
1487
1488	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1489		return ldc_abort(lp);
1490
1491	if (!tx_has_space_for(lp, size))
1492		return -EAGAIN;
1493
1494	seq = lp->snd_nxt;
1495	copied = 0;
1496	tail = lp->tx_tail;
1497	while (copied < size) {
1498		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1499		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1500			    p->u.u_data :
1501			    p->u.r.r_data);
1502		int data_len;
1503
1504		p->type = LDC_DATA;
1505		p->stype = LDC_INFO;
1506		p->ctrl = 0;
1507
1508		data_len = size - copied;
1509		if (data_len > lp->mss)
1510			data_len = lp->mss;
1511
1512		BUG_ON(data_len > LDC_LEN);
1513
1514		p->env = (data_len |
1515			  (copied == 0 ? LDC_START : 0) |
1516			  (data_len == size - copied ? LDC_STOP : 0));
1517
1518		p->seqid = ++seq;
1519
1520		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1521		       p->type,
1522		       p->stype,
1523		       p->ctrl,
1524		       p->env,
1525		       p->seqid);
1526
1527		memcpy(data, buf, data_len);
1528		buf += data_len;
1529		copied += data_len;
1530
1531		tail = tx_advance(lp, tail);
1532	}
1533
1534	err = set_tx_tail(lp, tail);
1535	if (!err) {
1536		lp->snd_nxt = seq;
1537		err = size;
1538	}
1539
1540	return err;
1541}
1542
1543static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1544		      struct ldc_packet *first_frag)
1545{
1546	int err;
1547
1548	if (first_frag)
1549		lp->rcv_nxt = first_frag->seqid - 1;
1550
1551	err = send_data_nack(lp, p);
1552	if (err)
1553		return err;
1554
1555	err = __set_rx_head(lp, lp->rx_tail);
1556	if (err < 0)
1557		return ldc_abort(lp);
1558
1559	return 0;
1560}
1561
1562static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1563{
1564	if (p->stype & LDC_ACK) {
1565		int err = process_data_ack(lp, p);
1566		if (err)
1567			return err;
1568	}
1569	if (p->stype & LDC_NACK)
1570		return ldc_abort(lp);
1571
1572	return 0;
1573}
1574
1575static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1576{
1577	unsigned long dummy;
1578	int limit = 1000;
1579
1580	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1581	       cur_head, lp->rx_head, lp->rx_tail);
1582	while (limit-- > 0) {
1583		unsigned long hv_err;
1584
1585		hv_err = sun4v_ldc_rx_get_state(lp->id,
1586						&dummy,
1587						&lp->rx_tail,
1588						&lp->chan_state);
1589		if (hv_err)
1590			return ldc_abort(lp);
1591
1592		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1593		    lp->chan_state == LDC_CHANNEL_RESETTING)
1594			return -ECONNRESET;
1595
1596		if (cur_head != lp->rx_tail) {
1597			ldcdbg(DATA, "DATA WAIT DONE "
1598			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1599			       dummy, lp->rx_tail, lp->chan_state);
1600			return 0;
1601		}
1602
1603		udelay(1);
1604	}
1605	return -EAGAIN;
1606}
1607
1608static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1609{
1610	int err = __set_rx_head(lp, head);
1611
1612	if (err < 0)
1613		return ldc_abort(lp);
1614
1615	lp->rx_head = head;
1616	return 0;
1617}
1618
1619static void send_data_ack(struct ldc_channel *lp)
1620{
1621	unsigned long new_tail;
1622	struct ldc_packet *p;
1623
1624	p = data_get_tx_packet(lp, &new_tail);
1625	if (likely(p)) {
1626		int err;
1627
1628		memset(p, 0, sizeof(*p));
1629		p->type = LDC_DATA;
1630		p->stype = LDC_ACK;
1631		p->ctrl = 0;
1632		p->seqid = lp->snd_nxt + 1;
1633		p->u.r.ackid = lp->rcv_nxt;
1634
1635		err = send_tx_packet(lp, p, new_tail);
1636		if (!err)
1637			lp->snd_nxt++;
1638	}
1639}
1640
1641static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1642{
1643	struct ldc_packet *first_frag;
1644	unsigned long hv_err, new;
1645	int err, copied;
1646
1647	hv_err = sun4v_ldc_rx_get_state(lp->id,
1648					&lp->rx_head,
1649					&lp->rx_tail,
1650					&lp->chan_state);
1651	if (hv_err)
1652		return ldc_abort(lp);
1653
1654	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1655	    lp->chan_state == LDC_CHANNEL_RESETTING)
1656		return -ECONNRESET;
1657
1658	if (lp->rx_head == lp->rx_tail)
1659		return 0;
1660
1661	first_frag = NULL;
1662	copied = err = 0;
1663	new = lp->rx_head;
1664	while (1) {
1665		struct ldc_packet *p;
1666		int pkt_len;
1667
1668		BUG_ON(new == lp->rx_tail);
1669		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1670
1671		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1672		       "rcv_nxt[%08x]\n",
1673		       p->type,
1674		       p->stype,
1675		       p->ctrl,
1676		       p->env,
1677		       p->seqid,
1678		       p->u.r.ackid,
1679		       lp->rcv_nxt);
1680
1681		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1682			err = rx_bad_seq(lp, p, first_frag);
1683			copied = 0;
1684			break;
1685		}
1686
1687		if (p->type & LDC_CTRL) {
1688			err = process_control_frame(lp, p);
1689			if (err < 0)
1690				break;
1691			err = 0;
1692		}
1693
1694		lp->rcv_nxt = p->seqid;
1695
1696		if (!(p->type & LDC_DATA)) {
1697			new = rx_advance(lp, new);
1698			goto no_data;
1699		}
1700		if (p->stype & (LDC_ACK | LDC_NACK)) {
1701			err = data_ack_nack(lp, p);
1702			if (err)
1703				break;
1704		}
1705		if (!(p->stype & LDC_INFO)) {
1706			new = rx_advance(lp, new);
1707			err = rx_set_head(lp, new);
1708			if (err)
1709				break;
1710			goto no_data;
1711		}
1712
1713		pkt_len = p->env & LDC_LEN;
1714
1715		/* Every initial packet starts with the START bit set.
1716		 *
1717		 * Singleton packets will have both START+STOP set.
1718		 *
1719		 * Fragments will have START set in the first frame, STOP
1720		 * set in the last frame, and neither bit set in middle
1721		 * frames of the packet.
1722		 *
1723		 * Therefore if we are at the beginning of a packet and
1724		 * we don't see START, or we are in the middle of a fragmented
1725		 * packet and do see START, we are unsynchronized and should
1726		 * flush the RX queue.
1727		 */
1728		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1729		    (first_frag != NULL &&  (p->env & LDC_START))) {
1730			if (!first_frag)
1731				new = rx_advance(lp, new);
1732
1733			err = rx_set_head(lp, new);
1734			if (err)
1735				break;
1736
1737			if (!first_frag)
1738				goto no_data;
1739		}
1740		if (!first_frag)
1741			first_frag = p;
1742
1743		if (pkt_len > size - copied) {
1744			/* User didn't give us a big enough buffer,
1745			 * what to do?  This is a pretty serious error.
1746			 *
1747			 * Since we haven't updated the RX ring head to
1748			 * consume any of the packets, signal the error
1749			 * to the user and just leave the RX ring alone.
1750			 *
1751			 * This seems the best behavior because this allows
1752			 * a user of the LDC layer to start with a small
1753			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1754			 * as a cue to enlarge it's read buffer.
1755			 */
1756			err = -EMSGSIZE;
1757			break;
1758		}
1759
1760		/* Ok, we are gonna eat this one.  */
1761		new = rx_advance(lp, new);
1762
1763		memcpy(buf,
1764		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1765			p->u.u_data : p->u.r.r_data), pkt_len);
1766		buf += pkt_len;
1767		copied += pkt_len;
1768
1769		if (p->env & LDC_STOP)
1770			break;
1771
1772no_data:
1773		if (new == lp->rx_tail) {
1774			err = rx_data_wait(lp, new);
1775			if (err)
1776				break;
1777		}
1778	}
1779
1780	if (!err)
1781		err = rx_set_head(lp, new);
1782
1783	if (err && first_frag)
1784		lp->rcv_nxt = first_frag->seqid - 1;
1785
1786	if (!err) {
1787		err = copied;
1788		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1789			send_data_ack(lp);
1790	}
1791
1792	return err;
1793}
1794
1795static const struct ldc_mode_ops nonraw_ops = {
1796	.write		=	write_nonraw,
1797	.read		=	read_nonraw,
1798};
1799
1800static int write_stream(struct ldc_channel *lp, const void *buf,
1801			unsigned int size)
1802{
1803	if (size > lp->cfg.mtu)
1804		size = lp->cfg.mtu;
1805	return write_nonraw(lp, buf, size);
1806}
1807
1808static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1809{
1810	if (!lp->mssbuf_len) {
1811		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1812		if (err < 0)
1813			return err;
1814
1815		lp->mssbuf_len = err;
1816		lp->mssbuf_off = 0;
1817	}
1818
1819	if (size > lp->mssbuf_len)
1820		size = lp->mssbuf_len;
1821	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1822
1823	lp->mssbuf_off += size;
1824	lp->mssbuf_len -= size;
1825
1826	return size;
1827}
1828
1829static const struct ldc_mode_ops stream_ops = {
1830	.write		=	write_stream,
1831	.read		=	read_stream,
1832};
1833
1834int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1835{
1836	unsigned long flags;
1837	int err;
1838
1839	if (!buf)
1840		return -EINVAL;
1841
1842	if (!size)
1843		return 0;
1844
1845	spin_lock_irqsave(&lp->lock, flags);
1846
1847	if (lp->hs_state != LDC_HS_COMPLETE)
1848		err = -ENOTCONN;
1849	else
1850		err = lp->mops->write(lp, buf, size);
1851
1852	spin_unlock_irqrestore(&lp->lock, flags);
1853
1854	return err;
1855}
1856EXPORT_SYMBOL(ldc_write);
1857
1858int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1859{
1860	unsigned long flags;
1861	int err;
1862
1863	if (!buf)
1864		return -EINVAL;
1865
1866	if (!size)
1867		return 0;
1868
1869	spin_lock_irqsave(&lp->lock, flags);
1870
1871	if (lp->hs_state != LDC_HS_COMPLETE)
1872		err = -ENOTCONN;
1873	else
1874		err = lp->mops->read(lp, buf, size);
1875
1876	spin_unlock_irqrestore(&lp->lock, flags);
1877
1878	return err;
1879}
1880EXPORT_SYMBOL(ldc_read);
1881
1882static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1883{
1884	struct iommu_arena *arena = &iommu->arena;
1885	unsigned long n, start, end, limit;
1886	int pass;
1887
1888	limit = arena->limit;
1889	start = arena->hint;
1890	pass = 0;
1891
1892again:
1893	n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
1894	end = n + npages;
1895	if (unlikely(end >= limit)) {
1896		if (likely(pass < 1)) {
1897			limit = start;
1898			start = 0;
1899			pass++;
1900			goto again;
1901		} else {
1902			/* Scanned the whole thing, give up. */
1903			return -1;
1904		}
1905	}
1906	bitmap_set(arena->map, n, npages);
1907
1908	arena->hint = end;
1909
1910	return n;
1911}
1912
1913#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
1914#define COOKIE_PGSZ_CODE_SHIFT	60ULL
1915
1916static u64 pagesize_code(void)
1917{
1918	switch (PAGE_SIZE) {
1919	default:
1920	case (8ULL * 1024ULL):
1921		return 0;
1922	case (64ULL * 1024ULL):
1923		return 1;
1924	case (512ULL * 1024ULL):
1925		return 2;
1926	case (4ULL * 1024ULL * 1024ULL):
1927		return 3;
1928	case (32ULL * 1024ULL * 1024ULL):
1929		return 4;
1930	case (256ULL * 1024ULL * 1024ULL):
1931		return 5;
1932	}
1933}
1934
1935static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1936{
1937	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1938		(index << PAGE_SHIFT) |
1939		page_offset);
1940}
1941
1942static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1943{
1944	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1945
1946	cookie &= ~COOKIE_PGSZ_CODE;
1947
1948	*shift = szcode * 3;
1949
1950	return (cookie >> (13ULL + (szcode * 3ULL)));
1951}
1952
1953static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1954					     unsigned long npages)
1955{
1956	long entry;
1957
1958	entry = arena_alloc(iommu, npages);
1959	if (unlikely(entry < 0))
1960		return NULL;
1961
1962	return iommu->page_table + entry;
1963}
1964
1965static u64 perm_to_mte(unsigned int map_perm)
1966{
1967	u64 mte_base;
1968
1969	mte_base = pagesize_code();
1970
1971	if (map_perm & LDC_MAP_SHADOW) {
1972		if (map_perm & LDC_MAP_R)
1973			mte_base |= LDC_MTE_COPY_R;
1974		if (map_perm & LDC_MAP_W)
1975			mte_base |= LDC_MTE_COPY_W;
1976	}
1977	if (map_perm & LDC_MAP_DIRECT) {
1978		if (map_perm & LDC_MAP_R)
1979			mte_base |= LDC_MTE_READ;
1980		if (map_perm & LDC_MAP_W)
1981			mte_base |= LDC_MTE_WRITE;
1982		if (map_perm & LDC_MAP_X)
1983			mte_base |= LDC_MTE_EXEC;
1984	}
1985	if (map_perm & LDC_MAP_IO) {
1986		if (map_perm & LDC_MAP_R)
1987			mte_base |= LDC_MTE_IOMMU_R;
1988		if (map_perm & LDC_MAP_W)
1989			mte_base |= LDC_MTE_IOMMU_W;
1990	}
1991
1992	return mte_base;
1993}
1994
1995static int pages_in_region(unsigned long base, long len)
1996{
1997	int count = 0;
1998
1999	do {
2000		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2001
2002		len -= (new - base);
2003		base = new;
2004		count++;
2005	} while (len > 0);
2006
2007	return count;
2008}
2009
2010struct cookie_state {
2011	struct ldc_mtable_entry		*page_table;
2012	struct ldc_trans_cookie		*cookies;
2013	u64				mte_base;
2014	u64				prev_cookie;
2015	u32				pte_idx;
2016	u32				nc;
2017};
2018
2019static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2020			 unsigned long off, unsigned long len)
2021{
2022	do {
2023		unsigned long tlen, new = pa + PAGE_SIZE;
2024		u64 this_cookie;
2025
2026		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2027
2028		tlen = PAGE_SIZE;
2029		if (off)
2030			tlen = PAGE_SIZE - off;
2031		if (tlen > len)
2032			tlen = len;
2033
2034		this_cookie = make_cookie(sp->pte_idx,
2035					  pagesize_code(), off);
2036
2037		off = 0;
2038
2039		if (this_cookie == sp->prev_cookie) {
2040			sp->cookies[sp->nc - 1].cookie_size += tlen;
2041		} else {
2042			sp->cookies[sp->nc].cookie_addr = this_cookie;
2043			sp->cookies[sp->nc].cookie_size = tlen;
2044			sp->nc++;
2045		}
2046		sp->prev_cookie = this_cookie + tlen;
2047
2048		sp->pte_idx++;
2049
2050		len -= tlen;
2051		pa = new;
2052	} while (len > 0);
2053}
2054
2055static int sg_count_one(struct scatterlist *sg)
2056{
2057	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2058	long len = sg->length;
2059
2060	if ((sg->offset | len) & (8UL - 1))
2061		return -EFAULT;
2062
2063	return pages_in_region(base + sg->offset, len);
2064}
2065
2066static int sg_count_pages(struct scatterlist *sg, int num_sg)
2067{
2068	int count;
2069	int i;
2070
2071	count = 0;
2072	for (i = 0; i < num_sg; i++) {
2073		int err = sg_count_one(sg + i);
2074		if (err < 0)
2075			return err;
2076		count += err;
2077	}
2078
2079	return count;
2080}
2081
2082int ldc_map_sg(struct ldc_channel *lp,
2083	       struct scatterlist *sg, int num_sg,
2084	       struct ldc_trans_cookie *cookies, int ncookies,
2085	       unsigned int map_perm)
2086{
2087	unsigned long i, npages, flags;
2088	struct ldc_mtable_entry *base;
2089	struct cookie_state state;
2090	struct ldc_iommu *iommu;
2091	int err;
2092
2093	if (map_perm & ~LDC_MAP_ALL)
2094		return -EINVAL;
2095
2096	err = sg_count_pages(sg, num_sg);
2097	if (err < 0)
2098		return err;
2099
2100	npages = err;
2101	if (err > ncookies)
2102		return -EMSGSIZE;
2103
2104	iommu = &lp->iommu;
2105
2106	spin_lock_irqsave(&iommu->lock, flags);
2107	base = alloc_npages(iommu, npages);
2108	spin_unlock_irqrestore(&iommu->lock, flags);
2109
2110	if (!base)
2111		return -ENOMEM;
2112
2113	state.page_table = iommu->page_table;
2114	state.cookies = cookies;
2115	state.mte_base = perm_to_mte(map_perm);
2116	state.prev_cookie = ~(u64)0;
2117	state.pte_idx = (base - iommu->page_table);
2118	state.nc = 0;
2119
2120	for (i = 0; i < num_sg; i++)
2121		fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2122			     sg[i].offset, sg[i].length);
2123
2124	return state.nc;
2125}
2126EXPORT_SYMBOL(ldc_map_sg);
2127
2128int ldc_map_single(struct ldc_channel *lp,
2129		   void *buf, unsigned int len,
2130		   struct ldc_trans_cookie *cookies, int ncookies,
2131		   unsigned int map_perm)
2132{
2133	unsigned long npages, pa, flags;
2134	struct ldc_mtable_entry *base;
2135	struct cookie_state state;
2136	struct ldc_iommu *iommu;
2137
2138	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2139		return -EINVAL;
2140
2141	pa = __pa(buf);
2142	if ((pa | len) & (8UL - 1))
2143		return -EFAULT;
2144
2145	npages = pages_in_region(pa, len);
2146
2147	iommu = &lp->iommu;
2148
2149	spin_lock_irqsave(&iommu->lock, flags);
2150	base = alloc_npages(iommu, npages);
2151	spin_unlock_irqrestore(&iommu->lock, flags);
2152
2153	if (!base)
2154		return -ENOMEM;
2155
2156	state.page_table = iommu->page_table;
2157	state.cookies = cookies;
2158	state.mte_base = perm_to_mte(map_perm);
2159	state.prev_cookie = ~(u64)0;
2160	state.pte_idx = (base - iommu->page_table);
2161	state.nc = 0;
2162	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2163	BUG_ON(state.nc > ncookies);
2164
2165	return state.nc;
2166}
2167EXPORT_SYMBOL(ldc_map_single);
2168
2169static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2170			u64 cookie, u64 size)
2171{
2172	struct iommu_arena *arena = &iommu->arena;
2173	unsigned long i, shift, index, npages;
2174	struct ldc_mtable_entry *base;
2175
2176	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2177	index = cookie_to_index(cookie, &shift);
2178	base = iommu->page_table + index;
2179
2180	BUG_ON(index > arena->limit ||
2181	       (index + npages) > arena->limit);
2182
2183	for (i = 0; i < npages; i++) {
2184		if (base->cookie)
2185			sun4v_ldc_revoke(id, cookie + (i << shift),
2186					 base->cookie);
2187		base->mte = 0;
2188		__clear_bit(index + i, arena->map);
2189	}
2190}
2191
2192void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2193	       int ncookies)
2194{
2195	struct ldc_iommu *iommu = &lp->iommu;
2196	unsigned long flags;
2197	int i;
2198
2199	spin_lock_irqsave(&iommu->lock, flags);
2200	for (i = 0; i < ncookies; i++) {
2201		u64 addr = cookies[i].cookie_addr;
2202		u64 size = cookies[i].cookie_size;
2203
2204		free_npages(lp->id, iommu, addr, size);
2205	}
2206	spin_unlock_irqrestore(&iommu->lock, flags);
2207}
2208EXPORT_SYMBOL(ldc_unmap);
2209
2210int ldc_copy(struct ldc_channel *lp, int copy_dir,
2211	     void *buf, unsigned int len, unsigned long offset,
2212	     struct ldc_trans_cookie *cookies, int ncookies)
2213{
2214	unsigned int orig_len;
2215	unsigned long ra;
2216	int i;
2217
2218	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2219		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2220		       lp->id, copy_dir);
2221		return -EINVAL;
2222	}
2223
2224	ra = __pa(buf);
2225	if ((ra | len | offset) & (8UL - 1)) {
2226		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2227		       "ra[%lx] len[%x] offset[%lx]\n",
2228		       lp->id, ra, len, offset);
2229		return -EFAULT;
2230	}
2231
2232	if (lp->hs_state != LDC_HS_COMPLETE ||
2233	    (lp->flags & LDC_FLAG_RESET)) {
2234		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2235		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2236		return -ECONNRESET;
2237	}
2238
2239	orig_len = len;
2240	for (i = 0; i < ncookies; i++) {
2241		unsigned long cookie_raddr = cookies[i].cookie_addr;
2242		unsigned long this_len = cookies[i].cookie_size;
2243		unsigned long actual_len;
2244
2245		if (unlikely(offset)) {
2246			unsigned long this_off = offset;
2247
2248			if (this_off > this_len)
2249				this_off = this_len;
2250
2251			offset -= this_off;
2252			this_len -= this_off;
2253			if (!this_len)
2254				continue;
2255			cookie_raddr += this_off;
2256		}
2257
2258		if (this_len > len)
2259			this_len = len;
2260
2261		while (1) {
2262			unsigned long hv_err;
2263
2264			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2265						cookie_raddr, ra,
2266						this_len, &actual_len);
2267			if (unlikely(hv_err)) {
2268				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2269				       "HV error %lu\n",
2270				       lp->id, hv_err);
2271				if (lp->hs_state != LDC_HS_COMPLETE ||
2272				    (lp->flags & LDC_FLAG_RESET))
2273					return -ECONNRESET;
2274				else
2275					return -EFAULT;
2276			}
2277
2278			cookie_raddr += actual_len;
2279			ra += actual_len;
2280			len -= actual_len;
2281			if (actual_len == this_len)
2282				break;
2283
2284			this_len -= actual_len;
2285		}
2286
2287		if (!len)
2288			break;
2289	}
2290
2291	/* It is caller policy what to do about short copies.
2292	 * For example, a networking driver can declare the
2293	 * packet a runt and drop it.
2294	 */
2295
2296	return orig_len - len;
2297}
2298EXPORT_SYMBOL(ldc_copy);
2299
2300void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2301			  struct ldc_trans_cookie *cookies, int *ncookies,
2302			  unsigned int map_perm)
2303{
2304	void *buf;
2305	int err;
2306
2307	if (len & (8UL - 1))
2308		return ERR_PTR(-EINVAL);
2309
2310	buf = kzalloc(len, GFP_KERNEL);
2311	if (!buf)
2312		return ERR_PTR(-ENOMEM);
2313
2314	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2315	if (err < 0) {
2316		kfree(buf);
2317		return ERR_PTR(err);
2318	}
2319	*ncookies = err;
2320
2321	return buf;
2322}
2323EXPORT_SYMBOL(ldc_alloc_exp_dring);
2324
2325void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2326			struct ldc_trans_cookie *cookies, int ncookies)
2327{
2328	ldc_unmap(lp, cookies, ncookies);
2329	kfree(buf);
2330}
2331EXPORT_SYMBOL(ldc_free_exp_dring);
2332
2333static int __init ldc_init(void)
2334{
2335	unsigned long major, minor;
2336	struct mdesc_handle *hp;
2337	const u64 *v;
2338	int err;
2339	u64 mp;
2340
2341	hp = mdesc_grab();
2342	if (!hp)
2343		return -ENODEV;
2344
2345	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2346	err = -ENODEV;
2347	if (mp == MDESC_NODE_NULL)
2348		goto out;
2349
2350	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2351	if (!v)
2352		goto out;
2353
2354	major = 1;
2355	minor = 0;
2356	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2357		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2358		goto out;
2359	}
2360
2361	printk(KERN_INFO "%s", version);
2362
2363	if (!*v) {
2364		printk(KERN_INFO PFX "Domaining disabled.\n");
2365		goto out;
2366	}
2367	ldom_domaining_enabled = 1;
2368	err = 0;
2369
2370out:
2371	mdesc_release(hp);
2372	return err;
2373}
2374
2375core_initcall(ldc_init);
2376