1/*
2 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
3 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the BSD-type
9 * license below:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 *      Redistributions of source code must retain the above copyright
16 *      notice, this list of conditions and the following disclaimer.
17 *
18 *      Redistributions in binary form must reproduce the above
19 *      copyright notice, this list of conditions and the following
20 *      disclaimer in the documentation and/or other materials provided
21 *      with the distribution.
22 *
23 *      Neither the name of the Network Appliance, Inc. nor the names of
24 *      its contributors may be used to endorse or promote products
25 *      derived from this software without specific prior written
26 *      permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Author: Tom Tucker <tom@opengridcomputing.com>
41 */
42
43#include <linux/sunrpc/debug.h>
44#include <linux/sunrpc/rpc_rdma.h>
45#include <linux/spinlock.h>
46#include <linux/highmem.h>
47#include <asm/unaligned.h>
48#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h>
50#include <linux/sunrpc/svc_rdma.h>
51
52#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
53
54/*
55 * Replace the pages in the rq_argpages array with the pages from the SGE in
56 * the RDMA_RECV completion. The SGL should contain full pages up until the
57 * last one.
58 */
59static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
60			       struct svc_rdma_op_ctxt *ctxt,
61			       u32 byte_count)
62{
63	struct page *page;
64	u32 bc;
65	int sge_no;
66
67	/* Swap the page in the SGE with the page in argpages */
68	page = ctxt->pages[0];
69	put_page(rqstp->rq_pages[0]);
70	rqstp->rq_pages[0] = page;
71
72	/* Set up the XDR head */
73	rqstp->rq_arg.head[0].iov_base = page_address(page);
74	rqstp->rq_arg.head[0].iov_len =
75		min_t(size_t, byte_count, ctxt->sge[0].length);
76	rqstp->rq_arg.len = byte_count;
77	rqstp->rq_arg.buflen = byte_count;
78
79	/* Compute bytes past head in the SGL */
80	bc = byte_count - rqstp->rq_arg.head[0].iov_len;
81
82	/* If data remains, store it in the pagelist */
83	rqstp->rq_arg.page_len = bc;
84	rqstp->rq_arg.page_base = 0;
85	rqstp->rq_arg.pages = &rqstp->rq_pages[1];
86	sge_no = 1;
87	while (bc && sge_no < ctxt->count) {
88		page = ctxt->pages[sge_no];
89		put_page(rqstp->rq_pages[sge_no]);
90		rqstp->rq_pages[sge_no] = page;
91		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
92		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
93		sge_no++;
94	}
95	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
96	rqstp->rq_next_page = rqstp->rq_respages + 1;
97
98	/* We should never run out of SGE because the limit is defined to
99	 * support the max allowed RPC data length
100	 */
101	BUG_ON(bc && (sge_no == ctxt->count));
102	BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
103	       != byte_count);
104	BUG_ON(rqstp->rq_arg.len != byte_count);
105
106	/* If not all pages were used from the SGL, free the remaining ones */
107	bc = sge_no;
108	while (sge_no < ctxt->count) {
109		page = ctxt->pages[sge_no++];
110		put_page(page);
111	}
112	ctxt->count = bc;
113
114	/* Set up tail */
115	rqstp->rq_arg.tail[0].iov_base = NULL;
116	rqstp->rq_arg.tail[0].iov_len = 0;
117}
118
119static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
120{
121	if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
122	     RDMA_TRANSPORT_IWARP)
123		return 1;
124	else
125		return min_t(int, sge_count, xprt->sc_max_sge);
126}
127
128typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
129			      struct svc_rqst *rqstp,
130			      struct svc_rdma_op_ctxt *head,
131			      int *page_no,
132			      u32 *page_offset,
133			      u32 rs_handle,
134			      u32 rs_length,
135			      u64 rs_offset,
136			      int last);
137
138/* Issue an RDMA_READ using the local lkey to map the data sink */
139static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
140			       struct svc_rqst *rqstp,
141			       struct svc_rdma_op_ctxt *head,
142			       int *page_no,
143			       u32 *page_offset,
144			       u32 rs_handle,
145			       u32 rs_length,
146			       u64 rs_offset,
147			       int last)
148{
149	struct ib_send_wr read_wr;
150	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
151	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
152	int ret, read, pno;
153	u32 pg_off = *page_offset;
154	u32 pg_no = *page_no;
155
156	ctxt->direction = DMA_FROM_DEVICE;
157	ctxt->read_hdr = head;
158	pages_needed =
159		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
160	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
161
162	for (pno = 0; pno < pages_needed; pno++) {
163		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
164
165		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
166		head->arg.page_len += len;
167		head->arg.len += len;
168		if (!pg_off)
169			head->count++;
170		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
171		rqstp->rq_next_page = rqstp->rq_respages + 1;
172		ctxt->sge[pno].addr =
173			ib_dma_map_page(xprt->sc_cm_id->device,
174					head->arg.pages[pg_no], pg_off,
175					PAGE_SIZE - pg_off,
176					DMA_FROM_DEVICE);
177		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
178					   ctxt->sge[pno].addr);
179		if (ret)
180			goto err;
181		atomic_inc(&xprt->sc_dma_used);
182
183		/* The lkey here is either a local dma lkey or a dma_mr lkey */
184		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
185		ctxt->sge[pno].length = len;
186		ctxt->count++;
187
188		/* adjust offset and wrap to next page if needed */
189		pg_off += len;
190		if (pg_off == PAGE_SIZE) {
191			pg_off = 0;
192			pg_no++;
193		}
194		rs_length -= len;
195	}
196
197	if (last && rs_length == 0)
198		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
199	else
200		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
201
202	memset(&read_wr, 0, sizeof(read_wr));
203	read_wr.wr_id = (unsigned long)ctxt;
204	read_wr.opcode = IB_WR_RDMA_READ;
205	ctxt->wr_op = read_wr.opcode;
206	read_wr.send_flags = IB_SEND_SIGNALED;
207	read_wr.wr.rdma.rkey = rs_handle;
208	read_wr.wr.rdma.remote_addr = rs_offset;
209	read_wr.sg_list = ctxt->sge;
210	read_wr.num_sge = pages_needed;
211
212	ret = svc_rdma_send(xprt, &read_wr);
213	if (ret) {
214		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
215		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
216		goto err;
217	}
218
219	/* return current location in page array */
220	*page_no = pg_no;
221	*page_offset = pg_off;
222	ret = read;
223	atomic_inc(&rdma_stat_read);
224	return ret;
225 err:
226	svc_rdma_unmap_dma(ctxt);
227	svc_rdma_put_context(ctxt, 0);
228	return ret;
229}
230
231/* Issue an RDMA_READ using an FRMR to map the data sink */
232static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
233				struct svc_rqst *rqstp,
234				struct svc_rdma_op_ctxt *head,
235				int *page_no,
236				u32 *page_offset,
237				u32 rs_handle,
238				u32 rs_length,
239				u64 rs_offset,
240				int last)
241{
242	struct ib_send_wr read_wr;
243	struct ib_send_wr inv_wr;
244	struct ib_send_wr fastreg_wr;
245	u8 key;
246	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
247	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
248	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
249	int ret, read, pno;
250	u32 pg_off = *page_offset;
251	u32 pg_no = *page_no;
252
253	if (IS_ERR(frmr))
254		return -ENOMEM;
255
256	ctxt->direction = DMA_FROM_DEVICE;
257	ctxt->frmr = frmr;
258	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
259	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
260
261	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
262	frmr->direction = DMA_FROM_DEVICE;
263	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
264	frmr->map_len = pages_needed << PAGE_SHIFT;
265	frmr->page_list_len = pages_needed;
266
267	for (pno = 0; pno < pages_needed; pno++) {
268		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
269
270		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
271		head->arg.page_len += len;
272		head->arg.len += len;
273		if (!pg_off)
274			head->count++;
275		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
276		rqstp->rq_next_page = rqstp->rq_respages + 1;
277		frmr->page_list->page_list[pno] =
278			ib_dma_map_page(xprt->sc_cm_id->device,
279					head->arg.pages[pg_no], 0,
280					PAGE_SIZE, DMA_FROM_DEVICE);
281		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
282					   frmr->page_list->page_list[pno]);
283		if (ret)
284			goto err;
285		atomic_inc(&xprt->sc_dma_used);
286
287		/* adjust offset and wrap to next page if needed */
288		pg_off += len;
289		if (pg_off == PAGE_SIZE) {
290			pg_off = 0;
291			pg_no++;
292		}
293		rs_length -= len;
294	}
295
296	if (last && rs_length == 0)
297		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
298	else
299		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
300
301	/* Bump the key */
302	key = (u8)(frmr->mr->lkey & 0x000000FF);
303	ib_update_fast_reg_key(frmr->mr, ++key);
304
305	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
306	ctxt->sge[0].lkey = frmr->mr->lkey;
307	ctxt->sge[0].length = read;
308	ctxt->count = 1;
309	ctxt->read_hdr = head;
310
311	/* Prepare FASTREG WR */
312	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
313	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
314	fastreg_wr.send_flags = IB_SEND_SIGNALED;
315	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
316	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
317	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
318	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
319	fastreg_wr.wr.fast_reg.length = frmr->map_len;
320	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
321	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
322	fastreg_wr.next = &read_wr;
323
324	/* Prepare RDMA_READ */
325	memset(&read_wr, 0, sizeof(read_wr));
326	read_wr.send_flags = IB_SEND_SIGNALED;
327	read_wr.wr.rdma.rkey = rs_handle;
328	read_wr.wr.rdma.remote_addr = rs_offset;
329	read_wr.sg_list = ctxt->sge;
330	read_wr.num_sge = 1;
331	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
332		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
333		read_wr.wr_id = (unsigned long)ctxt;
334		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
335	} else {
336		read_wr.opcode = IB_WR_RDMA_READ;
337		read_wr.next = &inv_wr;
338		/* Prepare invalidate */
339		memset(&inv_wr, 0, sizeof(inv_wr));
340		inv_wr.wr_id = (unsigned long)ctxt;
341		inv_wr.opcode = IB_WR_LOCAL_INV;
342		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
343		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
344	}
345	ctxt->wr_op = read_wr.opcode;
346
347	/* Post the chain */
348	ret = svc_rdma_send(xprt, &fastreg_wr);
349	if (ret) {
350		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
351		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
352		goto err;
353	}
354
355	/* return current location in page array */
356	*page_no = pg_no;
357	*page_offset = pg_off;
358	ret = read;
359	atomic_inc(&rdma_stat_read);
360	return ret;
361 err:
362	svc_rdma_unmap_dma(ctxt);
363	svc_rdma_put_context(ctxt, 0);
364	svc_rdma_put_frmr(xprt, frmr);
365	return ret;
366}
367
368static int rdma_read_chunks(struct svcxprt_rdma *xprt,
369			    struct rpcrdma_msg *rmsgp,
370			    struct svc_rqst *rqstp,
371			    struct svc_rdma_op_ctxt *head)
372{
373	int page_no, ch_count, ret;
374	struct rpcrdma_read_chunk *ch;
375	u32 page_offset, byte_count;
376	u64 rs_offset;
377	rdma_reader_fn reader;
378
379	/* If no read list is present, return 0 */
380	ch = svc_rdma_get_read_chunk(rmsgp);
381	if (!ch)
382		return 0;
383
384	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
385	if (ch_count > RPCSVC_MAXPAGES)
386		return -EINVAL;
387
388	/* The request is completed when the RDMA_READs complete. The
389	 * head context keeps all the pages that comprise the
390	 * request.
391	 */
392	head->arg.head[0] = rqstp->rq_arg.head[0];
393	head->arg.tail[0] = rqstp->rq_arg.tail[0];
394	head->arg.pages = &head->pages[head->count];
395	head->hdr_count = head->count;
396	head->arg.page_base = 0;
397	head->arg.page_len = 0;
398	head->arg.len = rqstp->rq_arg.len;
399	head->arg.buflen = rqstp->rq_arg.buflen;
400
401	/* Use FRMR if supported */
402	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
403		reader = rdma_read_chunk_frmr;
404	else
405		reader = rdma_read_chunk_lcl;
406
407	page_no = 0; page_offset = 0;
408	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
409	     ch->rc_discrim != 0; ch++) {
410
411		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
412				 &rs_offset);
413		byte_count = ntohl(ch->rc_target.rs_length);
414
415		while (byte_count > 0) {
416			ret = reader(xprt, rqstp, head,
417				     &page_no, &page_offset,
418				     ntohl(ch->rc_target.rs_handle),
419				     byte_count, rs_offset,
420				     ((ch+1)->rc_discrim == 0) /* last */
421				     );
422			if (ret < 0)
423				goto err;
424			byte_count -= ret;
425			rs_offset += ret;
426			head->arg.buflen += ret;
427		}
428	}
429	ret = 1;
430 err:
431	/* Detach arg pages. svc_recv will replenish them */
432	for (page_no = 0;
433	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
434		rqstp->rq_pages[page_no] = NULL;
435
436	return ret;
437}
438
439/*
440 * To avoid a separate RDMA READ just for a handful of zero bytes,
441 * RFC 5666 section 3.7 allows the client to omit the XDR zero pad
442 * in chunk lists.
443 */
444static void
445rdma_fix_xdr_pad(struct xdr_buf *buf)
446{
447	unsigned int page_len = buf->page_len;
448	unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
449	unsigned int offset, pg_no;
450	char *p;
451
452	if (size == 0)
453		return;
454
455	pg_no = page_len >> PAGE_SHIFT;
456	offset = page_len & ~PAGE_MASK;
457	p = page_address(buf->pages[pg_no]);
458	memset(p + offset, 0, size);
459
460	buf->page_len += size;
461	buf->buflen += size;
462	buf->len += size;
463}
464
465static int rdma_read_complete(struct svc_rqst *rqstp,
466			      struct svc_rdma_op_ctxt *head)
467{
468	int page_no;
469	int ret;
470
471	BUG_ON(!head);
472
473	/* Copy RPC pages */
474	for (page_no = 0; page_no < head->count; page_no++) {
475		put_page(rqstp->rq_pages[page_no]);
476		rqstp->rq_pages[page_no] = head->pages[page_no];
477	}
478	/* Point rq_arg.pages past header */
479	rdma_fix_xdr_pad(&head->arg);
480	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
481	rqstp->rq_arg.page_len = head->arg.page_len;
482	rqstp->rq_arg.page_base = head->arg.page_base;
483
484	/* rq_respages starts after the last arg page */
485	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
486	rqstp->rq_next_page = rqstp->rq_respages + 1;
487
488	/* Rebuild rq_arg head and tail. */
489	rqstp->rq_arg.head[0] = head->arg.head[0];
490	rqstp->rq_arg.tail[0] = head->arg.tail[0];
491	rqstp->rq_arg.len = head->arg.len;
492	rqstp->rq_arg.buflen = head->arg.buflen;
493
494	/* Free the context */
495	svc_rdma_put_context(head, 0);
496
497	/* XXX: What should this be? */
498	rqstp->rq_prot = IPPROTO_MAX;
499	svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
500
501	ret = rqstp->rq_arg.head[0].iov_len
502		+ rqstp->rq_arg.page_len
503		+ rqstp->rq_arg.tail[0].iov_len;
504	dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
505		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
506		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
507		rqstp->rq_arg.head[0].iov_len);
508
509	return ret;
510}
511
512/*
513 * Set up the rqstp thread context to point to the RQ buffer. If
514 * necessary, pull additional data from the client with an RDMA_READ
515 * request.
516 */
517int svc_rdma_recvfrom(struct svc_rqst *rqstp)
518{
519	struct svc_xprt *xprt = rqstp->rq_xprt;
520	struct svcxprt_rdma *rdma_xprt =
521		container_of(xprt, struct svcxprt_rdma, sc_xprt);
522	struct svc_rdma_op_ctxt *ctxt = NULL;
523	struct rpcrdma_msg *rmsgp;
524	int ret = 0;
525	int len;
526
527	dprintk("svcrdma: rqstp=%p\n", rqstp);
528
529	spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
530	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
531		ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
532				  struct svc_rdma_op_ctxt,
533				  dto_q);
534		list_del_init(&ctxt->dto_q);
535		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
536		return rdma_read_complete(rqstp, ctxt);
537	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
538		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
539				  struct svc_rdma_op_ctxt,
540				  dto_q);
541		list_del_init(&ctxt->dto_q);
542	} else {
543		atomic_inc(&rdma_stat_rq_starve);
544		clear_bit(XPT_DATA, &xprt->xpt_flags);
545		ctxt = NULL;
546	}
547	spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
548	if (!ctxt) {
549		/* This is the EAGAIN path. The svc_recv routine will
550		 * return -EAGAIN, the nfsd thread will go to call into
551		 * svc_recv again and we shouldn't be on the active
552		 * transport list
553		 */
554		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
555			goto close_out;
556
557		goto out;
558	}
559	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
560		ctxt, rdma_xprt, rqstp, ctxt->wc_status);
561	BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
562	atomic_inc(&rdma_stat_recv);
563
564	/* Build up the XDR from the receive buffers. */
565	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
566
567	/* Decode the RDMA header. */
568	len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
569	rqstp->rq_xprt_hlen = len;
570
571	/* If the request is invalid, reply with an error */
572	if (len < 0) {
573		if (len == -ENOSYS)
574			svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
575		goto close_out;
576	}
577
578	/* Read read-list data. */
579	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
580	if (ret > 0) {
581		/* read-list posted, defer until data received from client. */
582		goto defer;
583	} else if (ret < 0) {
584		/* Post of read-list failed, free context. */
585		svc_rdma_put_context(ctxt, 1);
586		return 0;
587	}
588
589	ret = rqstp->rq_arg.head[0].iov_len
590		+ rqstp->rq_arg.page_len
591		+ rqstp->rq_arg.tail[0].iov_len;
592	svc_rdma_put_context(ctxt, 0);
593 out:
594	dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
595		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
596		ret, rqstp->rq_arg.len,
597		rqstp->rq_arg.head[0].iov_base,
598		rqstp->rq_arg.head[0].iov_len);
599	rqstp->rq_prot = IPPROTO_MAX;
600	svc_xprt_copy_addrs(rqstp, xprt);
601	return ret;
602
603 close_out:
604	if (ctxt)
605		svc_rdma_put_context(ctxt, 1);
606	dprintk("svcrdma: transport %p is closing\n", xprt);
607	/*
608	 * Set the close bit and enqueue it. svc_recv will see the
609	 * close bit and call svc_xprt_delete
610	 */
611	set_bit(XPT_CLOSE, &xprt->xpt_flags);
612defer:
613	return 0;
614}
615