1/* 2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the BSD-type 8 * license below: 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 17 * Redistributions in binary form must reproduce the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer in the documentation and/or other materials provided 20 * with the distribution. 21 * 22 * Neither the name of the Network Appliance, Inc. nor the names of 23 * its contributors may be used to endorse or promote products 24 * derived from this software without specific prior written 25 * permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 * 39 * Author: Tom Tucker <tom@opengridcomputing.com> 40 */ 41 42#include <linux/sunrpc/xdr.h> 43#include <linux/sunrpc/debug.h> 44#include <asm/unaligned.h> 45#include <linux/sunrpc/rpc_rdma.h> 46#include <linux/sunrpc/svc_rdma.h> 47 48#define RPCDBG_FACILITY RPCDBG_SVCXPRT 49 50/* 51 * Decodes a read chunk list. The expected format is as follows: 52 * descrim : xdr_one 53 * position : u32 offset into XDR stream 54 * handle : u32 RKEY 55 * . . . 56 * end-of-list: xdr_zero 57 */ 58static u32 *decode_read_list(u32 *va, u32 *vaend) 59{ 60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; 61 62 while (ch->rc_discrim != xdr_zero) { 63 if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > 64 (unsigned long)vaend) { 65 dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); 66 return NULL; 67 } 68 ch++; 69 } 70 return (u32 *)&ch->rc_position; 71} 72 73/* 74 * Determine number of chunks and total bytes in chunk list. The chunk 75 * list has already been verified to fit within the RPCRDMA header. 76 */ 77void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, 78 int *ch_count, int *byte_count) 79{ 80 /* compute the number of bytes represented by read chunks */ 81 *byte_count = 0; 82 *ch_count = 0; 83 for (; ch->rc_discrim != 0; ch++) { 84 *byte_count = *byte_count + ntohl(ch->rc_target.rs_length); 85 *ch_count = *ch_count + 1; 86 } 87} 88 89/* 90 * Decodes a write chunk list. The expected format is as follows: 91 * descrim : xdr_one 92 * nchunks : <count> 93 * handle : u32 RKEY ---+ 94 * length : u32 <len of segment> | 95 * offset : remove va + <count> 96 * . . . | 97 * ---+ 98 */ 99static u32 *decode_write_list(u32 *va, u32 *vaend) 100{ 101 unsigned long start, end; 102 int nchunks; 103 104 struct rpcrdma_write_array *ary = 105 (struct rpcrdma_write_array *)va; 106 107 /* Check for not write-array */ 108 if (ary->wc_discrim == xdr_zero) 109 return (u32 *)&ary->wc_nchunks; 110 111 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 112 (unsigned long)vaend) { 113 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 114 return NULL; 115 } 116 nchunks = ntohl(ary->wc_nchunks); 117 118 start = (unsigned long)&ary->wc_array[0]; 119 end = (unsigned long)vaend; 120 if (nchunks < 0 || 121 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || 122 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { 123 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 124 ary, nchunks, vaend); 125 return NULL; 126 } 127 /* 128 * rs_length is the 2nd 4B field in wc_target and taking its 129 * address skips the list terminator 130 */ 131 return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length; 132} 133 134static u32 *decode_reply_array(u32 *va, u32 *vaend) 135{ 136 unsigned long start, end; 137 int nchunks; 138 struct rpcrdma_write_array *ary = 139 (struct rpcrdma_write_array *)va; 140 141 /* Check for no reply-array */ 142 if (ary->wc_discrim == xdr_zero) 143 return (u32 *)&ary->wc_nchunks; 144 145 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 146 (unsigned long)vaend) { 147 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 148 return NULL; 149 } 150 nchunks = ntohl(ary->wc_nchunks); 151 152 start = (unsigned long)&ary->wc_array[0]; 153 end = (unsigned long)vaend; 154 if (nchunks < 0 || 155 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || 156 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { 157 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 158 ary, nchunks, vaend); 159 return NULL; 160 } 161 return (u32 *)&ary->wc_array[nchunks]; 162} 163 164int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, 165 struct svc_rqst *rqstp) 166{ 167 struct rpcrdma_msg *rmsgp = NULL; 168 u32 *va; 169 u32 *vaend; 170 u32 hdr_len; 171 172 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 173 174 /* Verify that there's enough bytes for header + something */ 175 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { 176 dprintk("svcrdma: header too short = %d\n", 177 rqstp->rq_arg.len); 178 return -EINVAL; 179 } 180 181 /* Decode the header */ 182 rmsgp->rm_xid = ntohl(rmsgp->rm_xid); 183 rmsgp->rm_vers = ntohl(rmsgp->rm_vers); 184 rmsgp->rm_credit = ntohl(rmsgp->rm_credit); 185 rmsgp->rm_type = ntohl(rmsgp->rm_type); 186 187 if (rmsgp->rm_vers != RPCRDMA_VERSION) 188 return -ENOSYS; 189 190 /* Pull in the extra for the padded case and bump our pointer */ 191 if (rmsgp->rm_type == RDMA_MSGP) { 192 int hdrlen; 193 rmsgp->rm_body.rm_padded.rm_align = 194 ntohl(rmsgp->rm_body.rm_padded.rm_align); 195 rmsgp->rm_body.rm_padded.rm_thresh = 196 ntohl(rmsgp->rm_body.rm_padded.rm_thresh); 197 198 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 199 rqstp->rq_arg.head[0].iov_base = va; 200 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 201 rqstp->rq_arg.head[0].iov_len -= hdrlen; 202 if (hdrlen > rqstp->rq_arg.len) 203 return -EINVAL; 204 return hdrlen; 205 } 206 207 /* The chunk list may contain either a read chunk list or a write 208 * chunk list and a reply chunk list. 209 */ 210 va = &rmsgp->rm_body.rm_chunks[0]; 211 vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); 212 va = decode_read_list(va, vaend); 213 if (!va) 214 return -EINVAL; 215 va = decode_write_list(va, vaend); 216 if (!va) 217 return -EINVAL; 218 va = decode_reply_array(va, vaend); 219 if (!va) 220 return -EINVAL; 221 222 rqstp->rq_arg.head[0].iov_base = va; 223 hdr_len = (unsigned long)va - (unsigned long)rmsgp; 224 rqstp->rq_arg.head[0].iov_len -= hdr_len; 225 226 *rdma_req = rmsgp; 227 return hdr_len; 228} 229 230int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) 231{ 232 struct rpcrdma_msg *rmsgp = NULL; 233 struct rpcrdma_read_chunk *ch; 234 struct rpcrdma_write_array *ary; 235 u32 *va; 236 u32 hdrlen; 237 238 dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", 239 rqstp); 240 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 241 242 /* Pull in the extra for the padded case and bump our pointer */ 243 if (rmsgp->rm_type == RDMA_MSGP) { 244 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 245 rqstp->rq_arg.head[0].iov_base = va; 246 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 247 rqstp->rq_arg.head[0].iov_len -= hdrlen; 248 return hdrlen; 249 } 250 251 /* 252 * Skip all chunks to find RPC msg. These were previously processed 253 */ 254 va = &rmsgp->rm_body.rm_chunks[0]; 255 256 /* Skip read-list */ 257 for (ch = (struct rpcrdma_read_chunk *)va; 258 ch->rc_discrim != xdr_zero; ch++); 259 va = (u32 *)&ch->rc_position; 260 261 /* Skip write-list */ 262 ary = (struct rpcrdma_write_array *)va; 263 if (ary->wc_discrim == xdr_zero) 264 va = (u32 *)&ary->wc_nchunks; 265 else 266 /* 267 * rs_length is the 2nd 4B field in wc_target and taking its 268 * address skips the list terminator 269 */ 270 va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; 271 272 /* Skip reply-array */ 273 ary = (struct rpcrdma_write_array *)va; 274 if (ary->wc_discrim == xdr_zero) 275 va = (u32 *)&ary->wc_nchunks; 276 else 277 va = (u32 *)&ary->wc_array[ary->wc_nchunks]; 278 279 rqstp->rq_arg.head[0].iov_base = va; 280 hdrlen = (unsigned long)va - (unsigned long)rmsgp; 281 rqstp->rq_arg.head[0].iov_len -= hdrlen; 282 283 return hdrlen; 284} 285 286int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, 287 struct rpcrdma_msg *rmsgp, 288 enum rpcrdma_errcode err, u32 *va) 289{ 290 u32 *startp = va; 291 292 *va++ = htonl(rmsgp->rm_xid); 293 *va++ = htonl(rmsgp->rm_vers); 294 *va++ = htonl(xprt->sc_max_requests); 295 *va++ = htonl(RDMA_ERROR); 296 *va++ = htonl(err); 297 if (err == ERR_VERS) { 298 *va++ = htonl(RPCRDMA_VERSION); 299 *va++ = htonl(RPCRDMA_VERSION); 300 } 301 302 return (int)((unsigned long)va - (unsigned long)startp); 303} 304 305int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) 306{ 307 struct rpcrdma_write_array *wr_ary; 308 309 /* There is no read-list in a reply */ 310 311 /* skip write list */ 312 wr_ary = (struct rpcrdma_write_array *) 313 &rmsgp->rm_body.rm_chunks[1]; 314 if (wr_ary->wc_discrim) 315 wr_ary = (struct rpcrdma_write_array *) 316 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. 317 wc_target.rs_length; 318 else 319 wr_ary = (struct rpcrdma_write_array *) 320 &wr_ary->wc_nchunks; 321 322 /* skip reply array */ 323 if (wr_ary->wc_discrim) 324 wr_ary = (struct rpcrdma_write_array *) 325 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; 326 else 327 wr_ary = (struct rpcrdma_write_array *) 328 &wr_ary->wc_nchunks; 329 330 return (unsigned long) wr_ary - (unsigned long) rmsgp; 331} 332 333void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) 334{ 335 struct rpcrdma_write_array *ary; 336 337 /* no read-list */ 338 rmsgp->rm_body.rm_chunks[0] = xdr_zero; 339 340 /* write-array discrim */ 341 ary = (struct rpcrdma_write_array *) 342 &rmsgp->rm_body.rm_chunks[1]; 343 ary->wc_discrim = xdr_one; 344 ary->wc_nchunks = htonl(chunks); 345 346 /* write-list terminator */ 347 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; 348 349 /* reply-array discriminator */ 350 ary->wc_array[chunks].wc_target.rs_length = xdr_zero; 351} 352 353void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, 354 int chunks) 355{ 356 ary->wc_discrim = xdr_one; 357 ary->wc_nchunks = htonl(chunks); 358} 359 360void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, 361 int chunk_no, 362 __be32 rs_handle, 363 __be64 rs_offset, 364 u32 write_len) 365{ 366 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; 367 seg->rs_handle = rs_handle; 368 seg->rs_offset = rs_offset; 369 seg->rs_length = htonl(write_len); 370} 371 372void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, 373 struct rpcrdma_msg *rdma_argp, 374 struct rpcrdma_msg *rdma_resp, 375 enum rpcrdma_proc rdma_type) 376{ 377 rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); 378 rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); 379 rdma_resp->rm_credit = htonl(xprt->sc_max_requests); 380 rdma_resp->rm_type = htonl(rdma_type); 381 382 /* Encode <nul> chunks lists */ 383 rdma_resp->rm_body.rm_chunks[0] = xdr_zero; 384 rdma_resp->rm_body.rm_chunks[1] = xdr_zero; 385 rdma_resp->rm_body.rm_chunks[2] = xdr_zero; 386} 387