1/*
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 *      Redistributions of source code must retain the above copyright
15 *      notice, this list of conditions and the following disclaimer.
16 *
17 *      Redistributions in binary form must reproduce the above
18 *      copyright notice, this list of conditions and the following
19 *      disclaimer in the documentation and/or other materials provided
20 *      with the distribution.
21 *
22 *      Neither the name of the Network Appliance, Inc. nor the names of
23 *      its contributors may be used to endorse or promote products
24 *      derived from this software without specific prior written
25 *      permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41
42#include <linux/sunrpc/xdr.h>
43#include <linux/sunrpc/debug.h>
44#include <asm/unaligned.h>
45#include <linux/sunrpc/rpc_rdma.h>
46#include <linux/sunrpc/svc_rdma.h>
47
48#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
49
50/*
51 * Decodes a read chunk list. The expected format is as follows:
52 *    descrim  : xdr_one
53 *    position : u32 offset into XDR stream
54 *    handle   : u32 RKEY
55 *    . . .
56 *  end-of-list: xdr_zero
57 */
58static u32 *decode_read_list(u32 *va, u32 *vaend)
59{
60	struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61
62	while (ch->rc_discrim != xdr_zero) {
63		if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
64		    (unsigned long)vaend) {
65			dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
66			return NULL;
67		}
68		ch++;
69	}
70	return (u32 *)&ch->rc_position;
71}
72
73/*
74 * Determine number of chunks and total bytes in chunk list. The chunk
75 * list has already been verified to fit within the RPCRDMA header.
76 */
77void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
78			       int *ch_count, int *byte_count)
79{
80	/* compute the number of bytes represented by read chunks */
81	*byte_count = 0;
82	*ch_count = 0;
83	for (; ch->rc_discrim != 0; ch++) {
84		*byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
85		*ch_count = *ch_count + 1;
86	}
87}
88
89/*
90 * Decodes a write chunk list. The expected format is as follows:
91 *    descrim  : xdr_one
92 *    nchunks  : <count>
93 *       handle   : u32 RKEY              ---+
94 *       length   : u32 <len of segment>     |
95 *       offset   : remove va                + <count>
96 *       . . .                               |
97 *                                        ---+
98 */
99static u32 *decode_write_list(u32 *va, u32 *vaend)
100{
101	unsigned long start, end;
102	int nchunks;
103
104	struct rpcrdma_write_array *ary =
105		(struct rpcrdma_write_array *)va;
106
107	/* Check for not write-array */
108	if (ary->wc_discrim == xdr_zero)
109		return (u32 *)&ary->wc_nchunks;
110
111	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
112	    (unsigned long)vaend) {
113		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
114		return NULL;
115	}
116	nchunks = ntohl(ary->wc_nchunks);
117
118	start = (unsigned long)&ary->wc_array[0];
119	end = (unsigned long)vaend;
120	if (nchunks < 0 ||
121	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
122	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
123		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
124			ary, nchunks, vaend);
125		return NULL;
126	}
127	/*
128	 * rs_length is the 2nd 4B field in wc_target and taking its
129	 * address skips the list terminator
130	 */
131	return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
132}
133
134static u32 *decode_reply_array(u32 *va, u32 *vaend)
135{
136	unsigned long start, end;
137	int nchunks;
138	struct rpcrdma_write_array *ary =
139		(struct rpcrdma_write_array *)va;
140
141	/* Check for no reply-array */
142	if (ary->wc_discrim == xdr_zero)
143		return (u32 *)&ary->wc_nchunks;
144
145	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
146	    (unsigned long)vaend) {
147		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
148		return NULL;
149	}
150	nchunks = ntohl(ary->wc_nchunks);
151
152	start = (unsigned long)&ary->wc_array[0];
153	end = (unsigned long)vaend;
154	if (nchunks < 0 ||
155	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
156	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
157		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
158			ary, nchunks, vaend);
159		return NULL;
160	}
161	return (u32 *)&ary->wc_array[nchunks];
162}
163
164int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
165			    struct svc_rqst *rqstp)
166{
167	struct rpcrdma_msg *rmsgp = NULL;
168	u32 *va;
169	u32 *vaend;
170	u32 hdr_len;
171
172	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
173
174	/* Verify that there's enough bytes for header + something */
175	if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
176		dprintk("svcrdma: header too short = %d\n",
177			rqstp->rq_arg.len);
178		return -EINVAL;
179	}
180
181	/* Decode the header */
182	rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
183	rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
184	rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
185	rmsgp->rm_type = ntohl(rmsgp->rm_type);
186
187	if (rmsgp->rm_vers != RPCRDMA_VERSION)
188		return -ENOSYS;
189
190	/* Pull in the extra for the padded case and bump our pointer */
191	if (rmsgp->rm_type == RDMA_MSGP) {
192		int hdrlen;
193		rmsgp->rm_body.rm_padded.rm_align =
194			ntohl(rmsgp->rm_body.rm_padded.rm_align);
195		rmsgp->rm_body.rm_padded.rm_thresh =
196			ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
197
198		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
199		rqstp->rq_arg.head[0].iov_base = va;
200		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
201		rqstp->rq_arg.head[0].iov_len -= hdrlen;
202		if (hdrlen > rqstp->rq_arg.len)
203			return -EINVAL;
204		return hdrlen;
205	}
206
207	/* The chunk list may contain either a read chunk list or a write
208	 * chunk list and a reply chunk list.
209	 */
210	va = &rmsgp->rm_body.rm_chunks[0];
211	vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
212	va = decode_read_list(va, vaend);
213	if (!va)
214		return -EINVAL;
215	va = decode_write_list(va, vaend);
216	if (!va)
217		return -EINVAL;
218	va = decode_reply_array(va, vaend);
219	if (!va)
220		return -EINVAL;
221
222	rqstp->rq_arg.head[0].iov_base = va;
223	hdr_len = (unsigned long)va - (unsigned long)rmsgp;
224	rqstp->rq_arg.head[0].iov_len -= hdr_len;
225
226	*rdma_req = rmsgp;
227	return hdr_len;
228}
229
230int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
231{
232	struct rpcrdma_msg *rmsgp = NULL;
233	struct rpcrdma_read_chunk *ch;
234	struct rpcrdma_write_array *ary;
235	u32 *va;
236	u32 hdrlen;
237
238	dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
239		rqstp);
240	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
241
242	/* Pull in the extra for the padded case and bump our pointer */
243	if (rmsgp->rm_type == RDMA_MSGP) {
244		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
245		rqstp->rq_arg.head[0].iov_base = va;
246		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
247		rqstp->rq_arg.head[0].iov_len -= hdrlen;
248		return hdrlen;
249	}
250
251	/*
252	 * Skip all chunks to find RPC msg. These were previously processed
253	 */
254	va = &rmsgp->rm_body.rm_chunks[0];
255
256	/* Skip read-list */
257	for (ch = (struct rpcrdma_read_chunk *)va;
258	     ch->rc_discrim != xdr_zero; ch++);
259	va = (u32 *)&ch->rc_position;
260
261	/* Skip write-list */
262	ary = (struct rpcrdma_write_array *)va;
263	if (ary->wc_discrim == xdr_zero)
264		va = (u32 *)&ary->wc_nchunks;
265	else
266		/*
267		 * rs_length is the 2nd 4B field in wc_target and taking its
268		 * address skips the list terminator
269		 */
270		va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
271
272	/* Skip reply-array */
273	ary = (struct rpcrdma_write_array *)va;
274	if (ary->wc_discrim == xdr_zero)
275		va = (u32 *)&ary->wc_nchunks;
276	else
277		va = (u32 *)&ary->wc_array[ary->wc_nchunks];
278
279	rqstp->rq_arg.head[0].iov_base = va;
280	hdrlen = (unsigned long)va - (unsigned long)rmsgp;
281	rqstp->rq_arg.head[0].iov_len -= hdrlen;
282
283	return hdrlen;
284}
285
286int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
287			      struct rpcrdma_msg *rmsgp,
288			      enum rpcrdma_errcode err, u32 *va)
289{
290	u32 *startp = va;
291
292	*va++ = htonl(rmsgp->rm_xid);
293	*va++ = htonl(rmsgp->rm_vers);
294	*va++ = htonl(xprt->sc_max_requests);
295	*va++ = htonl(RDMA_ERROR);
296	*va++ = htonl(err);
297	if (err == ERR_VERS) {
298		*va++ = htonl(RPCRDMA_VERSION);
299		*va++ = htonl(RPCRDMA_VERSION);
300	}
301
302	return (int)((unsigned long)va - (unsigned long)startp);
303}
304
305int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
306{
307	struct rpcrdma_write_array *wr_ary;
308
309	/* There is no read-list in a reply */
310
311	/* skip write list */
312	wr_ary = (struct rpcrdma_write_array *)
313		&rmsgp->rm_body.rm_chunks[1];
314	if (wr_ary->wc_discrim)
315		wr_ary = (struct rpcrdma_write_array *)
316			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
317			wc_target.rs_length;
318	else
319		wr_ary = (struct rpcrdma_write_array *)
320			&wr_ary->wc_nchunks;
321
322	/* skip reply array */
323	if (wr_ary->wc_discrim)
324		wr_ary = (struct rpcrdma_write_array *)
325			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
326	else
327		wr_ary = (struct rpcrdma_write_array *)
328			&wr_ary->wc_nchunks;
329
330	return (unsigned long) wr_ary - (unsigned long) rmsgp;
331}
332
333void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
334{
335	struct rpcrdma_write_array *ary;
336
337	/* no read-list */
338	rmsgp->rm_body.rm_chunks[0] = xdr_zero;
339
340	/* write-array discrim */
341	ary = (struct rpcrdma_write_array *)
342		&rmsgp->rm_body.rm_chunks[1];
343	ary->wc_discrim = xdr_one;
344	ary->wc_nchunks = htonl(chunks);
345
346	/* write-list terminator */
347	ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
348
349	/* reply-array discriminator */
350	ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
351}
352
353void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
354				 int chunks)
355{
356	ary->wc_discrim = xdr_one;
357	ary->wc_nchunks = htonl(chunks);
358}
359
360void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
361				     int chunk_no,
362				     __be32 rs_handle,
363				     __be64 rs_offset,
364				     u32 write_len)
365{
366	struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
367	seg->rs_handle = rs_handle;
368	seg->rs_offset = rs_offset;
369	seg->rs_length = htonl(write_len);
370}
371
372void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
373				  struct rpcrdma_msg *rdma_argp,
374				  struct rpcrdma_msg *rdma_resp,
375				  enum rpcrdma_proc rdma_type)
376{
377	rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
378	rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
379	rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
380	rdma_resp->rm_type = htonl(rdma_type);
381
382	/* Encode <nul> chunks lists */
383	rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
384	rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
385	rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
386}
387