1/* 2 * ip_vs_app.c: Application module support for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference 12 * is that ip_vs_app module handles the reverse direction (incoming requests 13 * and outgoing responses). 14 * 15 * IP_MASQ_APP application masquerading module 16 * 17 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> 18 * 19 */ 20 21#define KMSG_COMPONENT "IPVS" 22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24#include <linux/module.h> 25#include <linux/kernel.h> 26#include <linux/skbuff.h> 27#include <linux/in.h> 28#include <linux/ip.h> 29#include <linux/netfilter.h> 30#include <linux/slab.h> 31#include <net/net_namespace.h> 32#include <net/protocol.h> 33#include <net/tcp.h> 34#include <linux/stat.h> 35#include <linux/proc_fs.h> 36#include <linux/seq_file.h> 37#include <linux/mutex.h> 38 39#include <net/ip_vs.h> 40 41EXPORT_SYMBOL(register_ip_vs_app); 42EXPORT_SYMBOL(unregister_ip_vs_app); 43EXPORT_SYMBOL(register_ip_vs_app_inc); 44 45static DEFINE_MUTEX(__ip_vs_app_mutex); 46 47/* 48 * Get an ip_vs_app object 49 */ 50static inline int ip_vs_app_get(struct ip_vs_app *app) 51{ 52 return try_module_get(app->module); 53} 54 55 56static inline void ip_vs_app_put(struct ip_vs_app *app) 57{ 58 module_put(app->module); 59} 60 61static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) 62{ 63 kfree(inc->timeout_table); 64 kfree(inc); 65} 66 67static void ip_vs_app_inc_rcu_free(struct rcu_head *head) 68{ 69 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); 70 71 ip_vs_app_inc_destroy(inc); 72} 73 74/* 75 * Allocate/initialize app incarnation and register it in proto apps. 76 */ 77static int 78ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, 79 __u16 port) 80{ 81 struct ip_vs_protocol *pp; 82 struct ip_vs_app *inc; 83 int ret; 84 85 if (!(pp = ip_vs_proto_get(proto))) 86 return -EPROTONOSUPPORT; 87 88 if (!pp->unregister_app) 89 return -EOPNOTSUPP; 90 91 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); 92 if (!inc) 93 return -ENOMEM; 94 INIT_LIST_HEAD(&inc->p_list); 95 INIT_LIST_HEAD(&inc->incs_list); 96 inc->app = app; 97 inc->port = htons(port); 98 atomic_set(&inc->usecnt, 0); 99 100 if (app->timeouts) { 101 inc->timeout_table = 102 ip_vs_create_timeout_table(app->timeouts, 103 app->timeouts_size); 104 if (!inc->timeout_table) { 105 ret = -ENOMEM; 106 goto out; 107 } 108 } 109 110 ret = pp->register_app(net, inc); 111 if (ret) 112 goto out; 113 114 list_add(&inc->a_list, &app->incs_list); 115 IP_VS_DBG(9, "%s App %s:%u registered\n", 116 pp->name, inc->name, ntohs(inc->port)); 117 118 return 0; 119 120 out: 121 ip_vs_app_inc_destroy(inc); 122 return ret; 123} 124 125 126/* 127 * Release app incarnation 128 */ 129static void 130ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) 131{ 132 struct ip_vs_protocol *pp; 133 134 if (!(pp = ip_vs_proto_get(inc->protocol))) 135 return; 136 137 if (pp->unregister_app) 138 pp->unregister_app(net, inc); 139 140 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 141 pp->name, inc->name, ntohs(inc->port)); 142 143 list_del(&inc->a_list); 144 145 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); 146} 147 148 149/* 150 * Get reference to app inc (only called from softirq) 151 * 152 */ 153int ip_vs_app_inc_get(struct ip_vs_app *inc) 154{ 155 int result; 156 157 result = ip_vs_app_get(inc->app); 158 if (result) 159 atomic_inc(&inc->usecnt); 160 return result; 161} 162 163 164/* 165 * Put the app inc (only called from timer or net softirq) 166 */ 167void ip_vs_app_inc_put(struct ip_vs_app *inc) 168{ 169 atomic_dec(&inc->usecnt); 170 ip_vs_app_put(inc->app); 171} 172 173 174/* 175 * Register an application incarnation in protocol applications 176 */ 177int 178register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, 179 __u16 port) 180{ 181 int result; 182 183 mutex_lock(&__ip_vs_app_mutex); 184 185 result = ip_vs_app_inc_new(net, app, proto, port); 186 187 mutex_unlock(&__ip_vs_app_mutex); 188 189 return result; 190} 191 192 193/* Register application for netns */ 194struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) 195{ 196 struct netns_ipvs *ipvs = net_ipvs(net); 197 struct ip_vs_app *a; 198 int err = 0; 199 200 if (!ipvs) 201 return ERR_PTR(-ENOENT); 202 203 mutex_lock(&__ip_vs_app_mutex); 204 205 list_for_each_entry(a, &ipvs->app_list, a_list) { 206 if (!strcmp(app->name, a->name)) { 207 err = -EEXIST; 208 goto out_unlock; 209 } 210 } 211 a = kmemdup(app, sizeof(*app), GFP_KERNEL); 212 if (!a) { 213 err = -ENOMEM; 214 goto out_unlock; 215 } 216 INIT_LIST_HEAD(&a->incs_list); 217 list_add(&a->a_list, &ipvs->app_list); 218 /* increase the module use count */ 219 ip_vs_use_count_inc(); 220 221out_unlock: 222 mutex_unlock(&__ip_vs_app_mutex); 223 224 return err ? ERR_PTR(err) : a; 225} 226 227 228/* 229 * ip_vs_app unregistration routine 230 * We are sure there are no app incarnations attached to services 231 * Caller should use synchronize_rcu() or rcu_barrier() 232 */ 233void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) 234{ 235 struct netns_ipvs *ipvs = net_ipvs(net); 236 struct ip_vs_app *a, *anxt, *inc, *nxt; 237 238 if (!ipvs) 239 return; 240 241 mutex_lock(&__ip_vs_app_mutex); 242 243 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { 244 if (app && strcmp(app->name, a->name)) 245 continue; 246 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { 247 ip_vs_app_inc_release(net, inc); 248 } 249 250 list_del(&a->a_list); 251 kfree(a); 252 253 /* decrease the module use count */ 254 ip_vs_use_count_dec(); 255 } 256 257 mutex_unlock(&__ip_vs_app_mutex); 258} 259 260 261/* 262 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 263 */ 264int ip_vs_bind_app(struct ip_vs_conn *cp, 265 struct ip_vs_protocol *pp) 266{ 267 return pp->app_conn_bind(cp); 268} 269 270 271/* 272 * Unbind cp from application incarnation (called by cp destructor) 273 */ 274void ip_vs_unbind_app(struct ip_vs_conn *cp) 275{ 276 struct ip_vs_app *inc = cp->app; 277 278 if (!inc) 279 return; 280 281 if (inc->unbind_conn) 282 inc->unbind_conn(inc, cp); 283 if (inc->done_conn) 284 inc->done_conn(inc, cp); 285 ip_vs_app_inc_put(inc); 286 cp->app = NULL; 287} 288 289 290/* 291 * Fixes th->seq based on ip_vs_seq info. 292 */ 293static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 294{ 295 __u32 seq = ntohl(th->seq); 296 297 /* 298 * Adjust seq with delta-offset for all packets after 299 * the most recent resized pkt seq and with previous_delta offset 300 * for all packets before most recent resized pkt seq. 301 */ 302 if (vseq->delta || vseq->previous_delta) { 303 if(after(seq, vseq->init_seq)) { 304 th->seq = htonl(seq + vseq->delta); 305 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", 306 __func__, vseq->delta); 307 } else { 308 th->seq = htonl(seq + vseq->previous_delta); 309 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", 310 __func__, vseq->previous_delta); 311 } 312 } 313} 314 315 316/* 317 * Fixes th->ack_seq based on ip_vs_seq info. 318 */ 319static inline void 320vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 321{ 322 __u32 ack_seq = ntohl(th->ack_seq); 323 324 /* 325 * Adjust ack_seq with delta-offset for 326 * the packets AFTER most recent resized pkt has caused a shift 327 * for packets before most recent resized pkt, use previous_delta 328 */ 329 if (vseq->delta || vseq->previous_delta) { 330 /* since ack_seq is the number of octet that is expected 331 to receive next, so compare it with init_seq+delta */ 332 if(after(ack_seq, vseq->init_seq+vseq->delta)) { 333 th->ack_seq = htonl(ack_seq - vseq->delta); 334 IP_VS_DBG(9, "%s(): subtracted delta " 335 "(%d) from ack_seq\n", __func__, vseq->delta); 336 337 } else { 338 th->ack_seq = htonl(ack_seq - vseq->previous_delta); 339 IP_VS_DBG(9, "%s(): subtracted " 340 "previous_delta (%d) from ack_seq\n", 341 __func__, vseq->previous_delta); 342 } 343 } 344} 345 346 347/* 348 * Updates ip_vs_seq if pkt has been resized 349 * Assumes already checked proto==IPPROTO_TCP and diff!=0. 350 */ 351static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, 352 unsigned int flag, __u32 seq, int diff) 353{ 354 /* spinlock is to keep updating cp->flags atomic */ 355 spin_lock_bh(&cp->lock); 356 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 357 vseq->previous_delta = vseq->delta; 358 vseq->delta += diff; 359 vseq->init_seq = seq; 360 cp->flags |= flag; 361 } 362 spin_unlock_bh(&cp->lock); 363} 364 365static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 366 struct ip_vs_app *app) 367{ 368 int diff; 369 const unsigned int tcp_offset = ip_hdrlen(skb); 370 struct tcphdr *th; 371 __u32 seq; 372 373 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 374 return 0; 375 376 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 377 378 /* 379 * Remember seq number in case this pkt gets resized 380 */ 381 seq = ntohl(th->seq); 382 383 /* 384 * Fix seq stuff if flagged as so. 385 */ 386 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 387 vs_fix_seq(&cp->out_seq, th); 388 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 389 vs_fix_ack_seq(&cp->in_seq, th); 390 391 /* 392 * Call private output hook function 393 */ 394 if (app->pkt_out == NULL) 395 return 1; 396 397 if (!app->pkt_out(app, cp, skb, &diff)) 398 return 0; 399 400 /* 401 * Update ip_vs seq stuff if len has changed. 402 */ 403 if (diff != 0) 404 vs_seq_update(cp, &cp->out_seq, 405 IP_VS_CONN_F_OUT_SEQ, seq, diff); 406 407 return 1; 408} 409 410/* 411 * Output pkt hook. Will call bound ip_vs_app specific function 412 * called by ipvs packet handler, assumes previously checked cp!=NULL 413 * returns false if it can't handle packet (oom) 414 */ 415int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) 416{ 417 struct ip_vs_app *app; 418 419 /* 420 * check if application module is bound to 421 * this ip_vs_conn. 422 */ 423 if ((app = cp->app) == NULL) 424 return 1; 425 426 /* TCP is complicated */ 427 if (cp->protocol == IPPROTO_TCP) 428 return app_tcp_pkt_out(cp, skb, app); 429 430 /* 431 * Call private output hook function 432 */ 433 if (app->pkt_out == NULL) 434 return 1; 435 436 return app->pkt_out(app, cp, skb, NULL); 437} 438 439 440static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 441 struct ip_vs_app *app) 442{ 443 int diff; 444 const unsigned int tcp_offset = ip_hdrlen(skb); 445 struct tcphdr *th; 446 __u32 seq; 447 448 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 449 return 0; 450 451 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 452 453 /* 454 * Remember seq number in case this pkt gets resized 455 */ 456 seq = ntohl(th->seq); 457 458 /* 459 * Fix seq stuff if flagged as so. 460 */ 461 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 462 vs_fix_seq(&cp->in_seq, th); 463 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 464 vs_fix_ack_seq(&cp->out_seq, th); 465 466 /* 467 * Call private input hook function 468 */ 469 if (app->pkt_in == NULL) 470 return 1; 471 472 if (!app->pkt_in(app, cp, skb, &diff)) 473 return 0; 474 475 /* 476 * Update ip_vs seq stuff if len has changed. 477 */ 478 if (diff != 0) 479 vs_seq_update(cp, &cp->in_seq, 480 IP_VS_CONN_F_IN_SEQ, seq, diff); 481 482 return 1; 483} 484 485/* 486 * Input pkt hook. Will call bound ip_vs_app specific function 487 * called by ipvs packet handler, assumes previously checked cp!=NULL. 488 * returns false if can't handle packet (oom). 489 */ 490int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) 491{ 492 struct ip_vs_app *app; 493 494 /* 495 * check if application module is bound to 496 * this ip_vs_conn. 497 */ 498 if ((app = cp->app) == NULL) 499 return 1; 500 501 /* TCP is complicated */ 502 if (cp->protocol == IPPROTO_TCP) 503 return app_tcp_pkt_in(cp, skb, app); 504 505 /* 506 * Call private input hook function 507 */ 508 if (app->pkt_in == NULL) 509 return 1; 510 511 return app->pkt_in(app, cp, skb, NULL); 512} 513 514 515#ifdef CONFIG_PROC_FS 516/* 517 * /proc/net/ip_vs_app entry function 518 */ 519 520static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 521{ 522 struct ip_vs_app *app, *inc; 523 524 list_for_each_entry(app, &ipvs->app_list, a_list) { 525 list_for_each_entry(inc, &app->incs_list, a_list) { 526 if (pos-- == 0) 527 return inc; 528 } 529 } 530 return NULL; 531 532} 533 534static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 535{ 536 struct net *net = seq_file_net(seq); 537 struct netns_ipvs *ipvs = net_ipvs(net); 538 539 mutex_lock(&__ip_vs_app_mutex); 540 541 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 542} 543 544static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 545{ 546 struct ip_vs_app *inc, *app; 547 struct list_head *e; 548 struct net *net = seq_file_net(seq); 549 struct netns_ipvs *ipvs = net_ipvs(net); 550 551 ++*pos; 552 if (v == SEQ_START_TOKEN) 553 return ip_vs_app_idx(ipvs, 0); 554 555 inc = v; 556 app = inc->app; 557 558 if ((e = inc->a_list.next) != &app->incs_list) 559 return list_entry(e, struct ip_vs_app, a_list); 560 561 /* go on to next application */ 562 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 563 app = list_entry(e, struct ip_vs_app, a_list); 564 list_for_each_entry(inc, &app->incs_list, a_list) { 565 return inc; 566 } 567 } 568 return NULL; 569} 570 571static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 572{ 573 mutex_unlock(&__ip_vs_app_mutex); 574} 575 576static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 577{ 578 if (v == SEQ_START_TOKEN) 579 seq_puts(seq, "prot port usecnt name\n"); 580 else { 581 const struct ip_vs_app *inc = v; 582 583 seq_printf(seq, "%-3s %-7u %-6d %-17s\n", 584 ip_vs_proto_name(inc->protocol), 585 ntohs(inc->port), 586 atomic_read(&inc->usecnt), 587 inc->name); 588 } 589 return 0; 590} 591 592static const struct seq_operations ip_vs_app_seq_ops = { 593 .start = ip_vs_app_seq_start, 594 .next = ip_vs_app_seq_next, 595 .stop = ip_vs_app_seq_stop, 596 .show = ip_vs_app_seq_show, 597}; 598 599static int ip_vs_app_open(struct inode *inode, struct file *file) 600{ 601 return seq_open_net(inode, file, &ip_vs_app_seq_ops, 602 sizeof(struct seq_net_private)); 603} 604 605static const struct file_operations ip_vs_app_fops = { 606 .owner = THIS_MODULE, 607 .open = ip_vs_app_open, 608 .read = seq_read, 609 .llseek = seq_lseek, 610 .release = seq_release_net, 611}; 612#endif 613 614int __net_init ip_vs_app_net_init(struct net *net) 615{ 616 struct netns_ipvs *ipvs = net_ipvs(net); 617 618 INIT_LIST_HEAD(&ipvs->app_list); 619 proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); 620 return 0; 621} 622 623void __net_exit ip_vs_app_net_cleanup(struct net *net) 624{ 625 unregister_ip_vs_app(net, NULL /* all */); 626 remove_proc_entry("ip_vs_app", net->proc_net); 627} 628