ip_vs_app.c revision c6d2d445d8dee04cde47eb4021636399a4239e9f
1/*
2 * ip_vs_app.c: Application module support for IPVS
3 *
4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5 *
6 *              This program is free software; you can redistribute it and/or
7 *              modify it under the terms of the GNU General Public License
8 *              as published by the Free Software Foundation; either version
9 *              2 of the License, or (at your option) any later version.
10 *
11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
12 * is that ip_vs_app module handles the reverse direction (incoming requests
13 * and outgoing responses).
14 *
15 *		IP_MASQ_APP application masquerading module
16 *
17 * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
18 *
19 */
20
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/module.h>
25#include <linux/kernel.h>
26#include <linux/skbuff.h>
27#include <linux/in.h>
28#include <linux/ip.h>
29#include <linux/netfilter.h>
30#include <linux/slab.h>
31#include <net/net_namespace.h>
32#include <net/protocol.h>
33#include <net/tcp.h>
34#include <asm/system.h>
35#include <linux/stat.h>
36#include <linux/proc_fs.h>
37#include <linux/seq_file.h>
38#include <linux/mutex.h>
39
40#include <net/ip_vs.h>
41
42EXPORT_SYMBOL(register_ip_vs_app);
43EXPORT_SYMBOL(unregister_ip_vs_app);
44EXPORT_SYMBOL(register_ip_vs_app_inc);
45
46/*
47 *	Get an ip_vs_app object
48 */
49static inline int ip_vs_app_get(struct ip_vs_app *app)
50{
51	return try_module_get(app->module);
52}
53
54
55static inline void ip_vs_app_put(struct ip_vs_app *app)
56{
57	module_put(app->module);
58}
59
60
61/*
62 *	Allocate/initialize app incarnation and register it in proto apps.
63 */
64static int
65ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
66		  __u16 port)
67{
68	struct ip_vs_protocol *pp;
69	struct ip_vs_app *inc;
70	int ret;
71
72	if (!(pp = ip_vs_proto_get(proto)))
73		return -EPROTONOSUPPORT;
74
75	if (!pp->unregister_app)
76		return -EOPNOTSUPP;
77
78	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
79	if (!inc)
80		return -ENOMEM;
81	INIT_LIST_HEAD(&inc->p_list);
82	INIT_LIST_HEAD(&inc->incs_list);
83	inc->app = app;
84	inc->port = htons(port);
85	atomic_set(&inc->usecnt, 0);
86
87	if (app->timeouts) {
88		inc->timeout_table =
89			ip_vs_create_timeout_table(app->timeouts,
90						   app->timeouts_size);
91		if (!inc->timeout_table) {
92			ret = -ENOMEM;
93			goto out;
94		}
95	}
96
97	ret = pp->register_app(net, inc);
98	if (ret)
99		goto out;
100
101	list_add(&inc->a_list, &app->incs_list);
102	IP_VS_DBG(9, "%s App %s:%u registered\n",
103		  pp->name, inc->name, ntohs(inc->port));
104
105	return 0;
106
107  out:
108	kfree(inc->timeout_table);
109	kfree(inc);
110	return ret;
111}
112
113
114/*
115 *	Release app incarnation
116 */
117static void
118ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
119{
120	struct ip_vs_protocol *pp;
121
122	if (!(pp = ip_vs_proto_get(inc->protocol)))
123		return;
124
125	if (pp->unregister_app)
126		pp->unregister_app(net, inc);
127
128	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
129		  pp->name, inc->name, ntohs(inc->port));
130
131	list_del(&inc->a_list);
132
133	kfree(inc->timeout_table);
134	kfree(inc);
135}
136
137
138/*
139 *	Get reference to app inc (only called from softirq)
140 *
141 */
142int ip_vs_app_inc_get(struct ip_vs_app *inc)
143{
144	int result;
145
146	atomic_inc(&inc->usecnt);
147	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
148		atomic_dec(&inc->usecnt);
149	return result;
150}
151
152
153/*
154 *	Put the app inc (only called from timer or net softirq)
155 */
156void ip_vs_app_inc_put(struct ip_vs_app *inc)
157{
158	ip_vs_app_put(inc->app);
159	atomic_dec(&inc->usecnt);
160}
161
162
163/*
164 *	Register an application incarnation in protocol applications
165 */
166int
167register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
168		       __u16 port)
169{
170	struct netns_ipvs *ipvs = net_ipvs(net);
171	int result;
172
173	mutex_lock(&ipvs->app_mutex);
174
175	result = ip_vs_app_inc_new(net, app, proto, port);
176
177	mutex_unlock(&ipvs->app_mutex);
178
179	return result;
180}
181
182
183/*
184 *	ip_vs_app registration routine
185 */
186int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
187{
188	struct netns_ipvs *ipvs = net_ipvs(net);
189	/* increase the module use count */
190	ip_vs_use_count_inc();
191
192	mutex_lock(&ipvs->app_mutex);
193
194	list_add(&app->a_list, &ipvs->app_list);
195
196	mutex_unlock(&ipvs->app_mutex);
197
198	return 0;
199}
200
201
202/*
203 *	ip_vs_app unregistration routine
204 *	We are sure there are no app incarnations attached to services
205 */
206void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
207{
208	struct netns_ipvs *ipvs = net_ipvs(net);
209	struct ip_vs_app *inc, *nxt;
210
211	mutex_lock(&ipvs->app_mutex);
212
213	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
214		ip_vs_app_inc_release(net, inc);
215	}
216
217	list_del(&app->a_list);
218
219	mutex_unlock(&ipvs->app_mutex);
220
221	/* decrease the module use count */
222	ip_vs_use_count_dec();
223}
224
225
226/*
227 *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
228 */
229int ip_vs_bind_app(struct ip_vs_conn *cp,
230		   struct ip_vs_protocol *pp)
231{
232	return pp->app_conn_bind(cp);
233}
234
235
236/*
237 *	Unbind cp from application incarnation (called by cp destructor)
238 */
239void ip_vs_unbind_app(struct ip_vs_conn *cp)
240{
241	struct ip_vs_app *inc = cp->app;
242
243	if (!inc)
244		return;
245
246	if (inc->unbind_conn)
247		inc->unbind_conn(inc, cp);
248	if (inc->done_conn)
249		inc->done_conn(inc, cp);
250	ip_vs_app_inc_put(inc);
251	cp->app = NULL;
252}
253
254
255/*
256 *	Fixes th->seq based on ip_vs_seq info.
257 */
258static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
259{
260	__u32 seq = ntohl(th->seq);
261
262	/*
263	 *	Adjust seq with delta-offset for all packets after
264	 *	the most recent resized pkt seq and with previous_delta offset
265	 *	for all packets	before most recent resized pkt seq.
266	 */
267	if (vseq->delta || vseq->previous_delta) {
268		if(after(seq, vseq->init_seq)) {
269			th->seq = htonl(seq + vseq->delta);
270			IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
271				  __func__, vseq->delta);
272		} else {
273			th->seq = htonl(seq + vseq->previous_delta);
274			IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
275				  __func__, vseq->previous_delta);
276		}
277	}
278}
279
280
281/*
282 *	Fixes th->ack_seq based on ip_vs_seq info.
283 */
284static inline void
285vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
286{
287	__u32 ack_seq = ntohl(th->ack_seq);
288
289	/*
290	 * Adjust ack_seq with delta-offset for
291	 * the packets AFTER most recent resized pkt has caused a shift
292	 * for packets before most recent resized pkt, use previous_delta
293	 */
294	if (vseq->delta || vseq->previous_delta) {
295		/* since ack_seq is the number of octet that is expected
296		   to receive next, so compare it with init_seq+delta */
297		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
298			th->ack_seq = htonl(ack_seq - vseq->delta);
299			IP_VS_DBG(9, "%s(): subtracted delta "
300				  "(%d) from ack_seq\n", __func__, vseq->delta);
301
302		} else {
303			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
304			IP_VS_DBG(9, "%s(): subtracted "
305				  "previous_delta (%d) from ack_seq\n",
306				  __func__, vseq->previous_delta);
307		}
308	}
309}
310
311
312/*
313 *	Updates ip_vs_seq if pkt has been resized
314 *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
315 */
316static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
317				 unsigned flag, __u32 seq, int diff)
318{
319	/* spinlock is to keep updating cp->flags atomic */
320	spin_lock(&cp->lock);
321	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
322		vseq->previous_delta = vseq->delta;
323		vseq->delta += diff;
324		vseq->init_seq = seq;
325		cp->flags |= flag;
326	}
327	spin_unlock(&cp->lock);
328}
329
330static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
331				  struct ip_vs_app *app)
332{
333	int diff;
334	const unsigned int tcp_offset = ip_hdrlen(skb);
335	struct tcphdr *th;
336	__u32 seq;
337
338	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
339		return 0;
340
341	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
342
343	/*
344	 *	Remember seq number in case this pkt gets resized
345	 */
346	seq = ntohl(th->seq);
347
348	/*
349	 *	Fix seq stuff if flagged as so.
350	 */
351	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
352		vs_fix_seq(&cp->out_seq, th);
353	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
354		vs_fix_ack_seq(&cp->in_seq, th);
355
356	/*
357	 *	Call private output hook function
358	 */
359	if (app->pkt_out == NULL)
360		return 1;
361
362	if (!app->pkt_out(app, cp, skb, &diff))
363		return 0;
364
365	/*
366	 *	Update ip_vs seq stuff if len has changed.
367	 */
368	if (diff != 0)
369		vs_seq_update(cp, &cp->out_seq,
370			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
371
372	return 1;
373}
374
375/*
376 *	Output pkt hook. Will call bound ip_vs_app specific function
377 *	called by ipvs packet handler, assumes previously checked cp!=NULL
378 *	returns false if it can't handle packet (oom)
379 */
380int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
381{
382	struct ip_vs_app *app;
383
384	/*
385	 *	check if application module is bound to
386	 *	this ip_vs_conn.
387	 */
388	if ((app = cp->app) == NULL)
389		return 1;
390
391	/* TCP is complicated */
392	if (cp->protocol == IPPROTO_TCP)
393		return app_tcp_pkt_out(cp, skb, app);
394
395	/*
396	 *	Call private output hook function
397	 */
398	if (app->pkt_out == NULL)
399		return 1;
400
401	return app->pkt_out(app, cp, skb, NULL);
402}
403
404
405static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
406				 struct ip_vs_app *app)
407{
408	int diff;
409	const unsigned int tcp_offset = ip_hdrlen(skb);
410	struct tcphdr *th;
411	__u32 seq;
412
413	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
414		return 0;
415
416	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
417
418	/*
419	 *	Remember seq number in case this pkt gets resized
420	 */
421	seq = ntohl(th->seq);
422
423	/*
424	 *	Fix seq stuff if flagged as so.
425	 */
426	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
427		vs_fix_seq(&cp->in_seq, th);
428	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
429		vs_fix_ack_seq(&cp->out_seq, th);
430
431	/*
432	 *	Call private input hook function
433	 */
434	if (app->pkt_in == NULL)
435		return 1;
436
437	if (!app->pkt_in(app, cp, skb, &diff))
438		return 0;
439
440	/*
441	 *	Update ip_vs seq stuff if len has changed.
442	 */
443	if (diff != 0)
444		vs_seq_update(cp, &cp->in_seq,
445			      IP_VS_CONN_F_IN_SEQ, seq, diff);
446
447	return 1;
448}
449
450/*
451 *	Input pkt hook. Will call bound ip_vs_app specific function
452 *	called by ipvs packet handler, assumes previously checked cp!=NULL.
453 *	returns false if can't handle packet (oom).
454 */
455int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
456{
457	struct ip_vs_app *app;
458
459	/*
460	 *	check if application module is bound to
461	 *	this ip_vs_conn.
462	 */
463	if ((app = cp->app) == NULL)
464		return 1;
465
466	/* TCP is complicated */
467	if (cp->protocol == IPPROTO_TCP)
468		return app_tcp_pkt_in(cp, skb, app);
469
470	/*
471	 *	Call private input hook function
472	 */
473	if (app->pkt_in == NULL)
474		return 1;
475
476	return app->pkt_in(app, cp, skb, NULL);
477}
478
479
480#ifdef CONFIG_PROC_FS
481/*
482 *	/proc/net/ip_vs_app entry function
483 */
484
485static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
486{
487	struct ip_vs_app *app, *inc;
488
489	list_for_each_entry(app, &ipvs->app_list, a_list) {
490		list_for_each_entry(inc, &app->incs_list, a_list) {
491			if (pos-- == 0)
492				return inc;
493		}
494	}
495	return NULL;
496
497}
498
499static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
500{
501	struct net *net = seq_file_net(seq);
502	struct netns_ipvs *ipvs = net_ipvs(net);
503
504	mutex_lock(&ipvs->app_mutex);
505
506	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
507}
508
509static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
510{
511	struct ip_vs_app *inc, *app;
512	struct list_head *e;
513	struct net *net = seq_file_net(seq);
514	struct netns_ipvs *ipvs = net_ipvs(net);
515
516	++*pos;
517	if (v == SEQ_START_TOKEN)
518		return ip_vs_app_idx(ipvs, 0);
519
520	inc = v;
521	app = inc->app;
522
523	if ((e = inc->a_list.next) != &app->incs_list)
524		return list_entry(e, struct ip_vs_app, a_list);
525
526	/* go on to next application */
527	for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
528		app = list_entry(e, struct ip_vs_app, a_list);
529		list_for_each_entry(inc, &app->incs_list, a_list) {
530			return inc;
531		}
532	}
533	return NULL;
534}
535
536static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
537{
538	struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
539
540	mutex_unlock(&ipvs->app_mutex);
541}
542
543static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
544{
545	if (v == SEQ_START_TOKEN)
546		seq_puts(seq, "prot port    usecnt name\n");
547	else {
548		const struct ip_vs_app *inc = v;
549
550		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
551			   ip_vs_proto_name(inc->protocol),
552			   ntohs(inc->port),
553			   atomic_read(&inc->usecnt),
554			   inc->name);
555	}
556	return 0;
557}
558
559static const struct seq_operations ip_vs_app_seq_ops = {
560	.start = ip_vs_app_seq_start,
561	.next  = ip_vs_app_seq_next,
562	.stop  = ip_vs_app_seq_stop,
563	.show  = ip_vs_app_seq_show,
564};
565
566static int ip_vs_app_open(struct inode *inode, struct file *file)
567{
568	return seq_open_net(inode, file, &ip_vs_app_seq_ops,
569			    sizeof(struct seq_net_private));
570}
571
572static const struct file_operations ip_vs_app_fops = {
573	.owner	 = THIS_MODULE,
574	.open	 = ip_vs_app_open,
575	.read	 = seq_read,
576	.llseek  = seq_lseek,
577	.release = seq_release,
578};
579#endif
580
581static int __net_init __ip_vs_app_init(struct net *net)
582{
583	struct netns_ipvs *ipvs = net_ipvs(net);
584
585	INIT_LIST_HEAD(&ipvs->app_list);
586	__mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
587	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
588	return 0;
589}
590
591static void __net_exit __ip_vs_app_cleanup(struct net *net)
592{
593	proc_net_remove(net, "ip_vs_app");
594}
595
596static struct pernet_operations ip_vs_app_ops = {
597	.init = __ip_vs_app_init,
598	.exit = __ip_vs_app_cleanup,
599};
600
601int __init ip_vs_app_init(void)
602{
603	int rv;
604
605	rv = register_pernet_subsys(&ip_vs_app_ops);
606	return rv;
607}
608
609
610void ip_vs_app_cleanup(void)
611{
612	unregister_pernet_subsys(&ip_vs_app_ops);
613}
614