ip_vs_app.c revision c6d2d445d8dee04cde47eb4021636399a4239e9f
1/* 2 * ip_vs_app.c: Application module support for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference 12 * is that ip_vs_app module handles the reverse direction (incoming requests 13 * and outgoing responses). 14 * 15 * IP_MASQ_APP application masquerading module 16 * 17 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> 18 * 19 */ 20 21#define KMSG_COMPONENT "IPVS" 22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24#include <linux/module.h> 25#include <linux/kernel.h> 26#include <linux/skbuff.h> 27#include <linux/in.h> 28#include <linux/ip.h> 29#include <linux/netfilter.h> 30#include <linux/slab.h> 31#include <net/net_namespace.h> 32#include <net/protocol.h> 33#include <net/tcp.h> 34#include <asm/system.h> 35#include <linux/stat.h> 36#include <linux/proc_fs.h> 37#include <linux/seq_file.h> 38#include <linux/mutex.h> 39 40#include <net/ip_vs.h> 41 42EXPORT_SYMBOL(register_ip_vs_app); 43EXPORT_SYMBOL(unregister_ip_vs_app); 44EXPORT_SYMBOL(register_ip_vs_app_inc); 45 46/* 47 * Get an ip_vs_app object 48 */ 49static inline int ip_vs_app_get(struct ip_vs_app *app) 50{ 51 return try_module_get(app->module); 52} 53 54 55static inline void ip_vs_app_put(struct ip_vs_app *app) 56{ 57 module_put(app->module); 58} 59 60 61/* 62 * Allocate/initialize app incarnation and register it in proto apps. 63 */ 64static int 65ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, 66 __u16 port) 67{ 68 struct ip_vs_protocol *pp; 69 struct ip_vs_app *inc; 70 int ret; 71 72 if (!(pp = ip_vs_proto_get(proto))) 73 return -EPROTONOSUPPORT; 74 75 if (!pp->unregister_app) 76 return -EOPNOTSUPP; 77 78 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); 79 if (!inc) 80 return -ENOMEM; 81 INIT_LIST_HEAD(&inc->p_list); 82 INIT_LIST_HEAD(&inc->incs_list); 83 inc->app = app; 84 inc->port = htons(port); 85 atomic_set(&inc->usecnt, 0); 86 87 if (app->timeouts) { 88 inc->timeout_table = 89 ip_vs_create_timeout_table(app->timeouts, 90 app->timeouts_size); 91 if (!inc->timeout_table) { 92 ret = -ENOMEM; 93 goto out; 94 } 95 } 96 97 ret = pp->register_app(net, inc); 98 if (ret) 99 goto out; 100 101 list_add(&inc->a_list, &app->incs_list); 102 IP_VS_DBG(9, "%s App %s:%u registered\n", 103 pp->name, inc->name, ntohs(inc->port)); 104 105 return 0; 106 107 out: 108 kfree(inc->timeout_table); 109 kfree(inc); 110 return ret; 111} 112 113 114/* 115 * Release app incarnation 116 */ 117static void 118ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) 119{ 120 struct ip_vs_protocol *pp; 121 122 if (!(pp = ip_vs_proto_get(inc->protocol))) 123 return; 124 125 if (pp->unregister_app) 126 pp->unregister_app(net, inc); 127 128 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 129 pp->name, inc->name, ntohs(inc->port)); 130 131 list_del(&inc->a_list); 132 133 kfree(inc->timeout_table); 134 kfree(inc); 135} 136 137 138/* 139 * Get reference to app inc (only called from softirq) 140 * 141 */ 142int ip_vs_app_inc_get(struct ip_vs_app *inc) 143{ 144 int result; 145 146 atomic_inc(&inc->usecnt); 147 if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) 148 atomic_dec(&inc->usecnt); 149 return result; 150} 151 152 153/* 154 * Put the app inc (only called from timer or net softirq) 155 */ 156void ip_vs_app_inc_put(struct ip_vs_app *inc) 157{ 158 ip_vs_app_put(inc->app); 159 atomic_dec(&inc->usecnt); 160} 161 162 163/* 164 * Register an application incarnation in protocol applications 165 */ 166int 167register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, 168 __u16 port) 169{ 170 struct netns_ipvs *ipvs = net_ipvs(net); 171 int result; 172 173 mutex_lock(&ipvs->app_mutex); 174 175 result = ip_vs_app_inc_new(net, app, proto, port); 176 177 mutex_unlock(&ipvs->app_mutex); 178 179 return result; 180} 181 182 183/* 184 * ip_vs_app registration routine 185 */ 186int register_ip_vs_app(struct net *net, struct ip_vs_app *app) 187{ 188 struct netns_ipvs *ipvs = net_ipvs(net); 189 /* increase the module use count */ 190 ip_vs_use_count_inc(); 191 192 mutex_lock(&ipvs->app_mutex); 193 194 list_add(&app->a_list, &ipvs->app_list); 195 196 mutex_unlock(&ipvs->app_mutex); 197 198 return 0; 199} 200 201 202/* 203 * ip_vs_app unregistration routine 204 * We are sure there are no app incarnations attached to services 205 */ 206void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) 207{ 208 struct netns_ipvs *ipvs = net_ipvs(net); 209 struct ip_vs_app *inc, *nxt; 210 211 mutex_lock(&ipvs->app_mutex); 212 213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { 214 ip_vs_app_inc_release(net, inc); 215 } 216 217 list_del(&app->a_list); 218 219 mutex_unlock(&ipvs->app_mutex); 220 221 /* decrease the module use count */ 222 ip_vs_use_count_dec(); 223} 224 225 226/* 227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 228 */ 229int ip_vs_bind_app(struct ip_vs_conn *cp, 230 struct ip_vs_protocol *pp) 231{ 232 return pp->app_conn_bind(cp); 233} 234 235 236/* 237 * Unbind cp from application incarnation (called by cp destructor) 238 */ 239void ip_vs_unbind_app(struct ip_vs_conn *cp) 240{ 241 struct ip_vs_app *inc = cp->app; 242 243 if (!inc) 244 return; 245 246 if (inc->unbind_conn) 247 inc->unbind_conn(inc, cp); 248 if (inc->done_conn) 249 inc->done_conn(inc, cp); 250 ip_vs_app_inc_put(inc); 251 cp->app = NULL; 252} 253 254 255/* 256 * Fixes th->seq based on ip_vs_seq info. 257 */ 258static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 259{ 260 __u32 seq = ntohl(th->seq); 261 262 /* 263 * Adjust seq with delta-offset for all packets after 264 * the most recent resized pkt seq and with previous_delta offset 265 * for all packets before most recent resized pkt seq. 266 */ 267 if (vseq->delta || vseq->previous_delta) { 268 if(after(seq, vseq->init_seq)) { 269 th->seq = htonl(seq + vseq->delta); 270 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", 271 __func__, vseq->delta); 272 } else { 273 th->seq = htonl(seq + vseq->previous_delta); 274 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", 275 __func__, vseq->previous_delta); 276 } 277 } 278} 279 280 281/* 282 * Fixes th->ack_seq based on ip_vs_seq info. 283 */ 284static inline void 285vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 286{ 287 __u32 ack_seq = ntohl(th->ack_seq); 288 289 /* 290 * Adjust ack_seq with delta-offset for 291 * the packets AFTER most recent resized pkt has caused a shift 292 * for packets before most recent resized pkt, use previous_delta 293 */ 294 if (vseq->delta || vseq->previous_delta) { 295 /* since ack_seq is the number of octet that is expected 296 to receive next, so compare it with init_seq+delta */ 297 if(after(ack_seq, vseq->init_seq+vseq->delta)) { 298 th->ack_seq = htonl(ack_seq - vseq->delta); 299 IP_VS_DBG(9, "%s(): subtracted delta " 300 "(%d) from ack_seq\n", __func__, vseq->delta); 301 302 } else { 303 th->ack_seq = htonl(ack_seq - vseq->previous_delta); 304 IP_VS_DBG(9, "%s(): subtracted " 305 "previous_delta (%d) from ack_seq\n", 306 __func__, vseq->previous_delta); 307 } 308 } 309} 310 311 312/* 313 * Updates ip_vs_seq if pkt has been resized 314 * Assumes already checked proto==IPPROTO_TCP and diff!=0. 315 */ 316static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, 317 unsigned flag, __u32 seq, int diff) 318{ 319 /* spinlock is to keep updating cp->flags atomic */ 320 spin_lock(&cp->lock); 321 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 322 vseq->previous_delta = vseq->delta; 323 vseq->delta += diff; 324 vseq->init_seq = seq; 325 cp->flags |= flag; 326 } 327 spin_unlock(&cp->lock); 328} 329 330static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 331 struct ip_vs_app *app) 332{ 333 int diff; 334 const unsigned int tcp_offset = ip_hdrlen(skb); 335 struct tcphdr *th; 336 __u32 seq; 337 338 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 339 return 0; 340 341 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 342 343 /* 344 * Remember seq number in case this pkt gets resized 345 */ 346 seq = ntohl(th->seq); 347 348 /* 349 * Fix seq stuff if flagged as so. 350 */ 351 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 352 vs_fix_seq(&cp->out_seq, th); 353 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 354 vs_fix_ack_seq(&cp->in_seq, th); 355 356 /* 357 * Call private output hook function 358 */ 359 if (app->pkt_out == NULL) 360 return 1; 361 362 if (!app->pkt_out(app, cp, skb, &diff)) 363 return 0; 364 365 /* 366 * Update ip_vs seq stuff if len has changed. 367 */ 368 if (diff != 0) 369 vs_seq_update(cp, &cp->out_seq, 370 IP_VS_CONN_F_OUT_SEQ, seq, diff); 371 372 return 1; 373} 374 375/* 376 * Output pkt hook. Will call bound ip_vs_app specific function 377 * called by ipvs packet handler, assumes previously checked cp!=NULL 378 * returns false if it can't handle packet (oom) 379 */ 380int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) 381{ 382 struct ip_vs_app *app; 383 384 /* 385 * check if application module is bound to 386 * this ip_vs_conn. 387 */ 388 if ((app = cp->app) == NULL) 389 return 1; 390 391 /* TCP is complicated */ 392 if (cp->protocol == IPPROTO_TCP) 393 return app_tcp_pkt_out(cp, skb, app); 394 395 /* 396 * Call private output hook function 397 */ 398 if (app->pkt_out == NULL) 399 return 1; 400 401 return app->pkt_out(app, cp, skb, NULL); 402} 403 404 405static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 406 struct ip_vs_app *app) 407{ 408 int diff; 409 const unsigned int tcp_offset = ip_hdrlen(skb); 410 struct tcphdr *th; 411 __u32 seq; 412 413 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 414 return 0; 415 416 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 417 418 /* 419 * Remember seq number in case this pkt gets resized 420 */ 421 seq = ntohl(th->seq); 422 423 /* 424 * Fix seq stuff if flagged as so. 425 */ 426 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 427 vs_fix_seq(&cp->in_seq, th); 428 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 429 vs_fix_ack_seq(&cp->out_seq, th); 430 431 /* 432 * Call private input hook function 433 */ 434 if (app->pkt_in == NULL) 435 return 1; 436 437 if (!app->pkt_in(app, cp, skb, &diff)) 438 return 0; 439 440 /* 441 * Update ip_vs seq stuff if len has changed. 442 */ 443 if (diff != 0) 444 vs_seq_update(cp, &cp->in_seq, 445 IP_VS_CONN_F_IN_SEQ, seq, diff); 446 447 return 1; 448} 449 450/* 451 * Input pkt hook. Will call bound ip_vs_app specific function 452 * called by ipvs packet handler, assumes previously checked cp!=NULL. 453 * returns false if can't handle packet (oom). 454 */ 455int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) 456{ 457 struct ip_vs_app *app; 458 459 /* 460 * check if application module is bound to 461 * this ip_vs_conn. 462 */ 463 if ((app = cp->app) == NULL) 464 return 1; 465 466 /* TCP is complicated */ 467 if (cp->protocol == IPPROTO_TCP) 468 return app_tcp_pkt_in(cp, skb, app); 469 470 /* 471 * Call private input hook function 472 */ 473 if (app->pkt_in == NULL) 474 return 1; 475 476 return app->pkt_in(app, cp, skb, NULL); 477} 478 479 480#ifdef CONFIG_PROC_FS 481/* 482 * /proc/net/ip_vs_app entry function 483 */ 484 485static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 486{ 487 struct ip_vs_app *app, *inc; 488 489 list_for_each_entry(app, &ipvs->app_list, a_list) { 490 list_for_each_entry(inc, &app->incs_list, a_list) { 491 if (pos-- == 0) 492 return inc; 493 } 494 } 495 return NULL; 496 497} 498 499static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 500{ 501 struct net *net = seq_file_net(seq); 502 struct netns_ipvs *ipvs = net_ipvs(net); 503 504 mutex_lock(&ipvs->app_mutex); 505 506 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 507} 508 509static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 510{ 511 struct ip_vs_app *inc, *app; 512 struct list_head *e; 513 struct net *net = seq_file_net(seq); 514 struct netns_ipvs *ipvs = net_ipvs(net); 515 516 ++*pos; 517 if (v == SEQ_START_TOKEN) 518 return ip_vs_app_idx(ipvs, 0); 519 520 inc = v; 521 app = inc->app; 522 523 if ((e = inc->a_list.next) != &app->incs_list) 524 return list_entry(e, struct ip_vs_app, a_list); 525 526 /* go on to next application */ 527 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 528 app = list_entry(e, struct ip_vs_app, a_list); 529 list_for_each_entry(inc, &app->incs_list, a_list) { 530 return inc; 531 } 532 } 533 return NULL; 534} 535 536static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 537{ 538 struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); 539 540 mutex_unlock(&ipvs->app_mutex); 541} 542 543static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 544{ 545 if (v == SEQ_START_TOKEN) 546 seq_puts(seq, "prot port usecnt name\n"); 547 else { 548 const struct ip_vs_app *inc = v; 549 550 seq_printf(seq, "%-3s %-7u %-6d %-17s\n", 551 ip_vs_proto_name(inc->protocol), 552 ntohs(inc->port), 553 atomic_read(&inc->usecnt), 554 inc->name); 555 } 556 return 0; 557} 558 559static const struct seq_operations ip_vs_app_seq_ops = { 560 .start = ip_vs_app_seq_start, 561 .next = ip_vs_app_seq_next, 562 .stop = ip_vs_app_seq_stop, 563 .show = ip_vs_app_seq_show, 564}; 565 566static int ip_vs_app_open(struct inode *inode, struct file *file) 567{ 568 return seq_open_net(inode, file, &ip_vs_app_seq_ops, 569 sizeof(struct seq_net_private)); 570} 571 572static const struct file_operations ip_vs_app_fops = { 573 .owner = THIS_MODULE, 574 .open = ip_vs_app_open, 575 .read = seq_read, 576 .llseek = seq_lseek, 577 .release = seq_release, 578}; 579#endif 580 581static int __net_init __ip_vs_app_init(struct net *net) 582{ 583 struct netns_ipvs *ipvs = net_ipvs(net); 584 585 INIT_LIST_HEAD(&ipvs->app_list); 586 __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); 587 proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); 588 return 0; 589} 590 591static void __net_exit __ip_vs_app_cleanup(struct net *net) 592{ 593 proc_net_remove(net, "ip_vs_app"); 594} 595 596static struct pernet_operations ip_vs_app_ops = { 597 .init = __ip_vs_app_init, 598 .exit = __ip_vs_app_cleanup, 599}; 600 601int __init ip_vs_app_init(void) 602{ 603 int rv; 604 605 rv = register_pernet_subsys(&ip_vs_app_ops); 606 return rv; 607} 608 609 610void ip_vs_app_cleanup(void) 611{ 612 unregister_pernet_subsys(&ip_vs_app_ops); 613} 614