svc_rdma.c revision f8572d8f2a2ba75408b97dc24ef47c83671795d7
1/*
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 *      Redistributions of source code must retain the above copyright
15 *      notice, this list of conditions and the following disclaimer.
16 *
17 *      Redistributions in binary form must reproduce the above
18 *      copyright notice, this list of conditions and the following
19 *      disclaimer in the documentation and/or other materials provided
20 *      with the distribution.
21 *
22 *      Neither the name of the Network Appliance, Inc. nor the names of
23 *      its contributors may be used to endorse or promote products
24 *      derived from this software without specific prior written
25 *      permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/fs.h>
44#include <linux/sysctl.h>
45#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/sched.h>
47#include <linux/sunrpc/svc_rdma.h>
48
49#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
50
51/* RPC/RDMA parameters */
52unsigned int svcrdma_ord = RPCRDMA_ORD;
53static unsigned int min_ord = 1;
54static unsigned int max_ord = 4096;
55unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
56static unsigned int min_max_requests = 4;
57static unsigned int max_max_requests = 16384;
58unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
59static unsigned int min_max_inline = 4096;
60static unsigned int max_max_inline = 65536;
61
62atomic_t rdma_stat_recv;
63atomic_t rdma_stat_read;
64atomic_t rdma_stat_write;
65atomic_t rdma_stat_sq_starve;
66atomic_t rdma_stat_rq_starve;
67atomic_t rdma_stat_rq_poll;
68atomic_t rdma_stat_rq_prod;
69atomic_t rdma_stat_sq_poll;
70atomic_t rdma_stat_sq_prod;
71
72/* Temporary NFS request map and context caches */
73struct kmem_cache *svc_rdma_map_cachep;
74struct kmem_cache *svc_rdma_ctxt_cachep;
75
76/*
77 * This function implements reading and resetting an atomic_t stat
78 * variable through read/write to a proc file. Any write to the file
79 * resets the associated statistic to zero. Any read returns it's
80 * current value.
81 */
82static int read_reset_stat(ctl_table *table, int write,
83			   void __user *buffer, size_t *lenp,
84			   loff_t *ppos)
85{
86	atomic_t *stat = (atomic_t *)table->data;
87
88	if (!stat)
89		return -EINVAL;
90
91	if (write)
92		atomic_set(stat, 0);
93	else {
94		char str_buf[32];
95		char *data;
96		int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
97		if (len >= 32)
98			return -EFAULT;
99		len = strlen(str_buf);
100		if (*ppos > len) {
101			*lenp = 0;
102			return 0;
103		}
104		data = &str_buf[*ppos];
105		len -= *ppos;
106		if (len > *lenp)
107			len = *lenp;
108		if (len && copy_to_user(buffer, str_buf, len))
109			return -EFAULT;
110		*lenp = len;
111		*ppos += len;
112	}
113	return 0;
114}
115
116static struct ctl_table_header *svcrdma_table_header;
117static ctl_table svcrdma_parm_table[] = {
118	{
119		.procname	= "max_requests",
120		.data		= &svcrdma_max_requests,
121		.maxlen		= sizeof(unsigned int),
122		.mode		= 0644,
123		.proc_handler	= &proc_dointvec_minmax,
124		.extra1		= &min_max_requests,
125		.extra2		= &max_max_requests
126	},
127	{
128		.procname	= "max_req_size",
129		.data		= &svcrdma_max_req_size,
130		.maxlen		= sizeof(unsigned int),
131		.mode		= 0644,
132		.proc_handler	= &proc_dointvec_minmax,
133		.extra1		= &min_max_inline,
134		.extra2		= &max_max_inline
135	},
136	{
137		.procname	= "max_outbound_read_requests",
138		.data		= &svcrdma_ord,
139		.maxlen		= sizeof(unsigned int),
140		.mode		= 0644,
141		.proc_handler	= &proc_dointvec_minmax,
142		.extra1		= &min_ord,
143		.extra2		= &max_ord,
144	},
145
146	{
147		.procname	= "rdma_stat_read",
148		.data		= &rdma_stat_read,
149		.maxlen		= sizeof(atomic_t),
150		.mode		= 0644,
151		.proc_handler	= &read_reset_stat,
152	},
153	{
154		.procname	= "rdma_stat_recv",
155		.data		= &rdma_stat_recv,
156		.maxlen		= sizeof(atomic_t),
157		.mode		= 0644,
158		.proc_handler	= &read_reset_stat,
159	},
160	{
161		.procname	= "rdma_stat_write",
162		.data		= &rdma_stat_write,
163		.maxlen		= sizeof(atomic_t),
164		.mode		= 0644,
165		.proc_handler	= &read_reset_stat,
166	},
167	{
168		.procname	= "rdma_stat_sq_starve",
169		.data		= &rdma_stat_sq_starve,
170		.maxlen		= sizeof(atomic_t),
171		.mode		= 0644,
172		.proc_handler	= &read_reset_stat,
173	},
174	{
175		.procname	= "rdma_stat_rq_starve",
176		.data		= &rdma_stat_rq_starve,
177		.maxlen		= sizeof(atomic_t),
178		.mode		= 0644,
179		.proc_handler	= &read_reset_stat,
180	},
181	{
182		.procname	= "rdma_stat_rq_poll",
183		.data		= &rdma_stat_rq_poll,
184		.maxlen		= sizeof(atomic_t),
185		.mode		= 0644,
186		.proc_handler	= &read_reset_stat,
187	},
188	{
189		.procname	= "rdma_stat_rq_prod",
190		.data		= &rdma_stat_rq_prod,
191		.maxlen		= sizeof(atomic_t),
192		.mode		= 0644,
193		.proc_handler	= &read_reset_stat,
194	},
195	{
196		.procname	= "rdma_stat_sq_poll",
197		.data		= &rdma_stat_sq_poll,
198		.maxlen		= sizeof(atomic_t),
199		.mode		= 0644,
200		.proc_handler	= &read_reset_stat,
201	},
202	{
203		.procname	= "rdma_stat_sq_prod",
204		.data		= &rdma_stat_sq_prod,
205		.maxlen		= sizeof(atomic_t),
206		.mode		= 0644,
207		.proc_handler	= &read_reset_stat,
208	},
209	{ },
210};
211
212static ctl_table svcrdma_table[] = {
213	{
214		.procname	= "svc_rdma",
215		.mode		= 0555,
216		.child		= svcrdma_parm_table
217	},
218	{ },
219};
220
221static ctl_table svcrdma_root_table[] = {
222	{
223		.procname	= "sunrpc",
224		.mode		= 0555,
225		.child		= svcrdma_table
226	},
227	{ },
228};
229
230void svc_rdma_cleanup(void)
231{
232	dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
233	flush_scheduled_work();
234	if (svcrdma_table_header) {
235		unregister_sysctl_table(svcrdma_table_header);
236		svcrdma_table_header = NULL;
237	}
238	svc_unreg_xprt_class(&svc_rdma_class);
239	kmem_cache_destroy(svc_rdma_map_cachep);
240	kmem_cache_destroy(svc_rdma_ctxt_cachep);
241}
242
243int svc_rdma_init(void)
244{
245	dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
246	dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
247	dprintk("\tmax_requests     : %d\n", svcrdma_max_requests);
248	dprintk("\tsq_depth         : %d\n",
249		svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
250	dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
251	if (!svcrdma_table_header)
252		svcrdma_table_header =
253			register_sysctl_table(svcrdma_root_table);
254
255	/* Create the temporary map cache */
256	svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
257						sizeof(struct svc_rdma_req_map),
258						0,
259						SLAB_HWCACHE_ALIGN,
260						NULL);
261	if (!svc_rdma_map_cachep) {
262		printk(KERN_INFO "Could not allocate map cache.\n");
263		goto err0;
264	}
265
266	/* Create the temporary context cache */
267	svc_rdma_ctxt_cachep =
268		kmem_cache_create("svc_rdma_ctxt_cache",
269				  sizeof(struct svc_rdma_op_ctxt),
270				  0,
271				  SLAB_HWCACHE_ALIGN,
272				  NULL);
273	if (!svc_rdma_ctxt_cachep) {
274		printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
275		goto err1;
276	}
277
278	/* Register RDMA with the SVC transport switch */
279	svc_reg_xprt_class(&svc_rdma_class);
280	return 0;
281 err1:
282	kmem_cache_destroy(svc_rdma_map_cachep);
283 err0:
284	unregister_sysctl_table(svcrdma_table_header);
285	return -ENOMEM;
286}
287MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
288MODULE_DESCRIPTION("SVC RDMA Transport");
289MODULE_LICENSE("Dual BSD/GPL");
290module_init(svc_rdma_init);
291module_exit(svc_rdma_cleanup);
292