1/*
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 *      Redistributions of source code must retain the above copyright
15 *      notice, this list of conditions and the following disclaimer.
16 *
17 *      Redistributions in binary form must reproduce the above
18 *      copyright notice, this list of conditions and the following
19 *      disclaimer in the documentation and/or other materials provided
20 *      with the distribution.
21 *
22 *      Neither the name of the Network Appliance, Inc. nor the names of
23 *      its contributors may be used to endorse or promote products
24 *      derived from this software without specific prior written
25 *      permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/slab.h>
44#include <linux/fs.h>
45#include <linux/sysctl.h>
46#include <linux/workqueue.h>
47#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/sched.h>
49#include <linux/sunrpc/svc_rdma.h>
50#include "xprt_rdma.h"
51
52#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
53
54/* RPC/RDMA parameters */
55unsigned int svcrdma_ord = RPCRDMA_ORD;
56static unsigned int min_ord = 1;
57static unsigned int max_ord = 4096;
58unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
59static unsigned int min_max_requests = 4;
60static unsigned int max_max_requests = 16384;
61unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
62static unsigned int min_max_inline = 4096;
63static unsigned int max_max_inline = 65536;
64
65atomic_t rdma_stat_recv;
66atomic_t rdma_stat_read;
67atomic_t rdma_stat_write;
68atomic_t rdma_stat_sq_starve;
69atomic_t rdma_stat_rq_starve;
70atomic_t rdma_stat_rq_poll;
71atomic_t rdma_stat_rq_prod;
72atomic_t rdma_stat_sq_poll;
73atomic_t rdma_stat_sq_prod;
74
75/* Temporary NFS request map and context caches */
76struct kmem_cache *svc_rdma_map_cachep;
77struct kmem_cache *svc_rdma_ctxt_cachep;
78
79struct workqueue_struct *svc_rdma_wq;
80
81/*
82 * This function implements reading and resetting an atomic_t stat
83 * variable through read/write to a proc file. Any write to the file
84 * resets the associated statistic to zero. Any read returns it's
85 * current value.
86 */
87static int read_reset_stat(struct ctl_table *table, int write,
88			   void __user *buffer, size_t *lenp,
89			   loff_t *ppos)
90{
91	atomic_t *stat = (atomic_t *)table->data;
92
93	if (!stat)
94		return -EINVAL;
95
96	if (write)
97		atomic_set(stat, 0);
98	else {
99		char str_buf[32];
100		char *data;
101		int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
102		if (len >= 32)
103			return -EFAULT;
104		len = strlen(str_buf);
105		if (*ppos > len) {
106			*lenp = 0;
107			return 0;
108		}
109		data = &str_buf[*ppos];
110		len -= *ppos;
111		if (len > *lenp)
112			len = *lenp;
113		if (len && copy_to_user(buffer, str_buf, len))
114			return -EFAULT;
115		*lenp = len;
116		*ppos += len;
117	}
118	return 0;
119}
120
121static struct ctl_table_header *svcrdma_table_header;
122static struct ctl_table svcrdma_parm_table[] = {
123	{
124		.procname	= "max_requests",
125		.data		= &svcrdma_max_requests,
126		.maxlen		= sizeof(unsigned int),
127		.mode		= 0644,
128		.proc_handler	= proc_dointvec_minmax,
129		.extra1		= &min_max_requests,
130		.extra2		= &max_max_requests
131	},
132	{
133		.procname	= "max_req_size",
134		.data		= &svcrdma_max_req_size,
135		.maxlen		= sizeof(unsigned int),
136		.mode		= 0644,
137		.proc_handler	= proc_dointvec_minmax,
138		.extra1		= &min_max_inline,
139		.extra2		= &max_max_inline
140	},
141	{
142		.procname	= "max_outbound_read_requests",
143		.data		= &svcrdma_ord,
144		.maxlen		= sizeof(unsigned int),
145		.mode		= 0644,
146		.proc_handler	= proc_dointvec_minmax,
147		.extra1		= &min_ord,
148		.extra2		= &max_ord,
149	},
150
151	{
152		.procname	= "rdma_stat_read",
153		.data		= &rdma_stat_read,
154		.maxlen		= sizeof(atomic_t),
155		.mode		= 0644,
156		.proc_handler	= read_reset_stat,
157	},
158	{
159		.procname	= "rdma_stat_recv",
160		.data		= &rdma_stat_recv,
161		.maxlen		= sizeof(atomic_t),
162		.mode		= 0644,
163		.proc_handler	= read_reset_stat,
164	},
165	{
166		.procname	= "rdma_stat_write",
167		.data		= &rdma_stat_write,
168		.maxlen		= sizeof(atomic_t),
169		.mode		= 0644,
170		.proc_handler	= read_reset_stat,
171	},
172	{
173		.procname	= "rdma_stat_sq_starve",
174		.data		= &rdma_stat_sq_starve,
175		.maxlen		= sizeof(atomic_t),
176		.mode		= 0644,
177		.proc_handler	= read_reset_stat,
178	},
179	{
180		.procname	= "rdma_stat_rq_starve",
181		.data		= &rdma_stat_rq_starve,
182		.maxlen		= sizeof(atomic_t),
183		.mode		= 0644,
184		.proc_handler	= read_reset_stat,
185	},
186	{
187		.procname	= "rdma_stat_rq_poll",
188		.data		= &rdma_stat_rq_poll,
189		.maxlen		= sizeof(atomic_t),
190		.mode		= 0644,
191		.proc_handler	= read_reset_stat,
192	},
193	{
194		.procname	= "rdma_stat_rq_prod",
195		.data		= &rdma_stat_rq_prod,
196		.maxlen		= sizeof(atomic_t),
197		.mode		= 0644,
198		.proc_handler	= read_reset_stat,
199	},
200	{
201		.procname	= "rdma_stat_sq_poll",
202		.data		= &rdma_stat_sq_poll,
203		.maxlen		= sizeof(atomic_t),
204		.mode		= 0644,
205		.proc_handler	= read_reset_stat,
206	},
207	{
208		.procname	= "rdma_stat_sq_prod",
209		.data		= &rdma_stat_sq_prod,
210		.maxlen		= sizeof(atomic_t),
211		.mode		= 0644,
212		.proc_handler	= read_reset_stat,
213	},
214	{ },
215};
216
217static struct ctl_table svcrdma_table[] = {
218	{
219		.procname	= "svc_rdma",
220		.mode		= 0555,
221		.child		= svcrdma_parm_table
222	},
223	{ },
224};
225
226static struct ctl_table svcrdma_root_table[] = {
227	{
228		.procname	= "sunrpc",
229		.mode		= 0555,
230		.child		= svcrdma_table
231	},
232	{ },
233};
234
235void svc_rdma_cleanup(void)
236{
237	dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
238	destroy_workqueue(svc_rdma_wq);
239	if (svcrdma_table_header) {
240		unregister_sysctl_table(svcrdma_table_header);
241		svcrdma_table_header = NULL;
242	}
243	svc_unreg_xprt_class(&svc_rdma_class);
244	kmem_cache_destroy(svc_rdma_map_cachep);
245	kmem_cache_destroy(svc_rdma_ctxt_cachep);
246}
247
248int svc_rdma_init(void)
249{
250	dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
251	dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
252	dprintk("\tmax_requests     : %d\n", svcrdma_max_requests);
253	dprintk("\tsq_depth         : %d\n",
254		svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
255	dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
256
257	svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
258	if (!svc_rdma_wq)
259		return -ENOMEM;
260
261	if (!svcrdma_table_header)
262		svcrdma_table_header =
263			register_sysctl_table(svcrdma_root_table);
264
265	/* Create the temporary map cache */
266	svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
267						sizeof(struct svc_rdma_req_map),
268						0,
269						SLAB_HWCACHE_ALIGN,
270						NULL);
271	if (!svc_rdma_map_cachep) {
272		printk(KERN_INFO "Could not allocate map cache.\n");
273		goto err0;
274	}
275
276	/* Create the temporary context cache */
277	svc_rdma_ctxt_cachep =
278		kmem_cache_create("svc_rdma_ctxt_cache",
279				  sizeof(struct svc_rdma_op_ctxt),
280				  0,
281				  SLAB_HWCACHE_ALIGN,
282				  NULL);
283	if (!svc_rdma_ctxt_cachep) {
284		printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
285		goto err1;
286	}
287
288	/* Register RDMA with the SVC transport switch */
289	svc_reg_xprt_class(&svc_rdma_class);
290	return 0;
291 err1:
292	kmem_cache_destroy(svc_rdma_map_cachep);
293 err0:
294	unregister_sysctl_table(svcrdma_table_header);
295	destroy_workqueue(svc_rdma_wq);
296	return -ENOMEM;
297}
298MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
299MODULE_DESCRIPTION("SVC RDMA Transport");
300MODULE_LICENSE("Dual BSD/GPL");
301module_init(svc_rdma_init);
302module_exit(svc_rdma_cleanup);
303