1/* 2 * IPVS An implementation of the IP virtual server support for the 3 * LINUX operating system. IPVS is now implemented as a module 4 * over the NetFilter framework. IPVS can be used to build a 5 * high-performance and highly available server based on a 6 * cluster of servers. 7 * 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 9 * Peter Kese <peter.kese@ijs.si> 10 * Julian Anastasov <ja@ssi.bg> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 * Changes: 18 * 19 */ 20 21#define KMSG_COMPONENT "IPVS" 22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24#include <linux/module.h> 25#include <linux/init.h> 26#include <linux/types.h> 27#include <linux/capability.h> 28#include <linux/fs.h> 29#include <linux/sysctl.h> 30#include <linux/proc_fs.h> 31#include <linux/workqueue.h> 32#include <linux/swap.h> 33#include <linux/seq_file.h> 34#include <linux/slab.h> 35 36#include <linux/netfilter.h> 37#include <linux/netfilter_ipv4.h> 38#include <linux/mutex.h> 39 40#include <net/net_namespace.h> 41#include <linux/nsproxy.h> 42#include <net/ip.h> 43#ifdef CONFIG_IP_VS_IPV6 44#include <net/ipv6.h> 45#include <net/ip6_route.h> 46#endif 47#include <net/route.h> 48#include <net/sock.h> 49#include <net/genetlink.h> 50 51#include <asm/uaccess.h> 52 53#include <net/ip_vs.h> 54 55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 56static DEFINE_MUTEX(__ip_vs_mutex); 57 58/* sysctl variables */ 59 60#ifdef CONFIG_IP_VS_DEBUG 61static int sysctl_ip_vs_debug_level = 0; 62 63int ip_vs_get_debug_level(void) 64{ 65 return sysctl_ip_vs_debug_level; 66} 67#endif 68 69 70/* Protos */ 71static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 72 73 74#ifdef CONFIG_IP_VS_IPV6 75/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 76static bool __ip_vs_addr_is_local_v6(struct net *net, 77 const struct in6_addr *addr) 78{ 79 struct flowi6 fl6 = { 80 .daddr = *addr, 81 }; 82 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 83 bool is_local; 84 85 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 86 87 dst_release(dst); 88 return is_local; 89} 90#endif 91 92#ifdef CONFIG_SYSCTL 93/* 94 * update_defense_level is called from keventd and from sysctl, 95 * so it needs to protect itself from softirqs 96 */ 97static void update_defense_level(struct netns_ipvs *ipvs) 98{ 99 struct sysinfo i; 100 static int old_secure_tcp = 0; 101 int availmem; 102 int nomem; 103 int to_change = -1; 104 105 /* we only count free and buffered memory (in pages) */ 106 si_meminfo(&i); 107 availmem = i.freeram + i.bufferram; 108 /* however in linux 2.5 the i.bufferram is total page cache size, 109 we need adjust it */ 110 /* si_swapinfo(&i); */ 111 /* availmem = availmem - (i.totalswap - i.freeswap); */ 112 113 nomem = (availmem < ipvs->sysctl_amemthresh); 114 115 local_bh_disable(); 116 117 /* drop_entry */ 118 spin_lock(&ipvs->dropentry_lock); 119 switch (ipvs->sysctl_drop_entry) { 120 case 0: 121 atomic_set(&ipvs->dropentry, 0); 122 break; 123 case 1: 124 if (nomem) { 125 atomic_set(&ipvs->dropentry, 1); 126 ipvs->sysctl_drop_entry = 2; 127 } else { 128 atomic_set(&ipvs->dropentry, 0); 129 } 130 break; 131 case 2: 132 if (nomem) { 133 atomic_set(&ipvs->dropentry, 1); 134 } else { 135 atomic_set(&ipvs->dropentry, 0); 136 ipvs->sysctl_drop_entry = 1; 137 }; 138 break; 139 case 3: 140 atomic_set(&ipvs->dropentry, 1); 141 break; 142 } 143 spin_unlock(&ipvs->dropentry_lock); 144 145 /* drop_packet */ 146 spin_lock(&ipvs->droppacket_lock); 147 switch (ipvs->sysctl_drop_packet) { 148 case 0: 149 ipvs->drop_rate = 0; 150 break; 151 case 1: 152 if (nomem) { 153 ipvs->drop_rate = ipvs->drop_counter 154 = ipvs->sysctl_amemthresh / 155 (ipvs->sysctl_amemthresh-availmem); 156 ipvs->sysctl_drop_packet = 2; 157 } else { 158 ipvs->drop_rate = 0; 159 } 160 break; 161 case 2: 162 if (nomem) { 163 ipvs->drop_rate = ipvs->drop_counter 164 = ipvs->sysctl_amemthresh / 165 (ipvs->sysctl_amemthresh-availmem); 166 } else { 167 ipvs->drop_rate = 0; 168 ipvs->sysctl_drop_packet = 1; 169 } 170 break; 171 case 3: 172 ipvs->drop_rate = ipvs->sysctl_am_droprate; 173 break; 174 } 175 spin_unlock(&ipvs->droppacket_lock); 176 177 /* secure_tcp */ 178 spin_lock(&ipvs->securetcp_lock); 179 switch (ipvs->sysctl_secure_tcp) { 180 case 0: 181 if (old_secure_tcp >= 2) 182 to_change = 0; 183 break; 184 case 1: 185 if (nomem) { 186 if (old_secure_tcp < 2) 187 to_change = 1; 188 ipvs->sysctl_secure_tcp = 2; 189 } else { 190 if (old_secure_tcp >= 2) 191 to_change = 0; 192 } 193 break; 194 case 2: 195 if (nomem) { 196 if (old_secure_tcp < 2) 197 to_change = 1; 198 } else { 199 if (old_secure_tcp >= 2) 200 to_change = 0; 201 ipvs->sysctl_secure_tcp = 1; 202 } 203 break; 204 case 3: 205 if (old_secure_tcp < 2) 206 to_change = 1; 207 break; 208 } 209 old_secure_tcp = ipvs->sysctl_secure_tcp; 210 if (to_change >= 0) 211 ip_vs_protocol_timeout_change(ipvs, 212 ipvs->sysctl_secure_tcp > 1); 213 spin_unlock(&ipvs->securetcp_lock); 214 215 local_bh_enable(); 216} 217 218 219/* 220 * Timer for checking the defense 221 */ 222#define DEFENSE_TIMER_PERIOD 1*HZ 223 224static void defense_work_handler(struct work_struct *work) 225{ 226 struct netns_ipvs *ipvs = 227 container_of(work, struct netns_ipvs, defense_work.work); 228 229 update_defense_level(ipvs); 230 if (atomic_read(&ipvs->dropentry)) 231 ip_vs_random_dropentry(ipvs->net); 232 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 233} 234#endif 235 236int 237ip_vs_use_count_inc(void) 238{ 239 return try_module_get(THIS_MODULE); 240} 241 242void 243ip_vs_use_count_dec(void) 244{ 245 module_put(THIS_MODULE); 246} 247 248 249/* 250 * Hash table: for virtual service lookups 251 */ 252#define IP_VS_SVC_TAB_BITS 8 253#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 255 256/* the service table hashed by <protocol, addr, port> */ 257static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 258/* the service table hashed by fwmark */ 259static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 260 261 262/* 263 * Returns hash value for virtual service 264 */ 265static inline unsigned int 266ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, 267 const union nf_inet_addr *addr, __be16 port) 268{ 269 register unsigned int porth = ntohs(port); 270 __be32 addr_fold = addr->ip; 271 __u32 ahash; 272 273#ifdef CONFIG_IP_VS_IPV6 274 if (af == AF_INET6) 275 addr_fold = addr->ip6[0]^addr->ip6[1]^ 276 addr->ip6[2]^addr->ip6[3]; 277#endif 278 ahash = ntohl(addr_fold); 279 ahash ^= ((size_t) net >> 8); 280 281 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 282 IP_VS_SVC_TAB_MASK; 283} 284 285/* 286 * Returns hash value of fwmark for virtual service lookup 287 */ 288static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) 289{ 290 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 291} 292 293/* 294 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 295 * or in the ip_vs_svc_fwm_table by fwmark. 296 * Should be called with locked tables. 297 */ 298static int ip_vs_svc_hash(struct ip_vs_service *svc) 299{ 300 unsigned int hash; 301 302 if (svc->flags & IP_VS_SVC_F_HASHED) { 303 pr_err("%s(): request for already hashed, called from %pF\n", 304 __func__, __builtin_return_address(0)); 305 return 0; 306 } 307 308 if (svc->fwmark == 0) { 309 /* 310 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 311 */ 312 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, 313 &svc->addr, svc->port); 314 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 315 } else { 316 /* 317 * Hash it by fwmark in svc_fwm_table 318 */ 319 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); 320 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 321 } 322 323 svc->flags |= IP_VS_SVC_F_HASHED; 324 /* increase its refcnt because it is referenced by the svc table */ 325 atomic_inc(&svc->refcnt); 326 return 1; 327} 328 329 330/* 331 * Unhashes a service from svc_table / svc_fwm_table. 332 * Should be called with locked tables. 333 */ 334static int ip_vs_svc_unhash(struct ip_vs_service *svc) 335{ 336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 337 pr_err("%s(): request for unhash flagged, called from %pF\n", 338 __func__, __builtin_return_address(0)); 339 return 0; 340 } 341 342 if (svc->fwmark == 0) { 343 /* Remove it from the svc_table table */ 344 hlist_del_rcu(&svc->s_list); 345 } else { 346 /* Remove it from the svc_fwm_table table */ 347 hlist_del_rcu(&svc->f_list); 348 } 349 350 svc->flags &= ~IP_VS_SVC_F_HASHED; 351 atomic_dec(&svc->refcnt); 352 return 1; 353} 354 355 356/* 357 * Get service by {netns, proto,addr,port} in the service table. 358 */ 359static inline struct ip_vs_service * 360__ip_vs_service_find(struct net *net, int af, __u16 protocol, 361 const union nf_inet_addr *vaddr, __be16 vport) 362{ 363 unsigned int hash; 364 struct ip_vs_service *svc; 365 366 /* Check for "full" addressed entries */ 367 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); 368 369 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 370 if ((svc->af == af) 371 && ip_vs_addr_equal(af, &svc->addr, vaddr) 372 && (svc->port == vport) 373 && (svc->protocol == protocol) 374 && net_eq(svc->net, net)) { 375 /* HIT */ 376 return svc; 377 } 378 } 379 380 return NULL; 381} 382 383 384/* 385 * Get service by {fwmark} in the service table. 386 */ 387static inline struct ip_vs_service * 388__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) 389{ 390 unsigned int hash; 391 struct ip_vs_service *svc; 392 393 /* Check for fwmark addressed entries */ 394 hash = ip_vs_svc_fwm_hashkey(net, fwmark); 395 396 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 397 if (svc->fwmark == fwmark && svc->af == af 398 && net_eq(svc->net, net)) { 399 /* HIT */ 400 return svc; 401 } 402 } 403 404 return NULL; 405} 406 407/* Find service, called under RCU lock */ 408struct ip_vs_service * 409ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, 410 const union nf_inet_addr *vaddr, __be16 vport) 411{ 412 struct ip_vs_service *svc; 413 struct netns_ipvs *ipvs = net_ipvs(net); 414 415 /* 416 * Check the table hashed by fwmark first 417 */ 418 if (fwmark) { 419 svc = __ip_vs_svc_fwm_find(net, af, fwmark); 420 if (svc) 421 goto out; 422 } 423 424 /* 425 * Check the table hashed by <protocol,addr,port> 426 * for "full" addressed entries 427 */ 428 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); 429 430 if (svc == NULL 431 && protocol == IPPROTO_TCP 432 && atomic_read(&ipvs->ftpsvc_counter) 433 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 434 /* 435 * Check if ftp service entry exists, the packet 436 * might belong to FTP data connections. 437 */ 438 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); 439 } 440 441 if (svc == NULL 442 && atomic_read(&ipvs->nullsvc_counter)) { 443 /* 444 * Check if the catch-all port (port zero) exists 445 */ 446 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); 447 } 448 449 out: 450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 451 fwmark, ip_vs_proto_name(protocol), 452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 453 svc ? "hit" : "not hit"); 454 455 return svc; 456} 457 458 459static inline void 460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 461{ 462 atomic_inc(&svc->refcnt); 463 dest->svc = svc; 464} 465 466static void ip_vs_service_free(struct ip_vs_service *svc) 467{ 468 if (svc->stats.cpustats) 469 free_percpu(svc->stats.cpustats); 470 kfree(svc); 471} 472 473static void 474__ip_vs_unbind_svc(struct ip_vs_dest *dest) 475{ 476 struct ip_vs_service *svc = dest->svc; 477 478 dest->svc = NULL; 479 if (atomic_dec_and_test(&svc->refcnt)) { 480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 481 svc->fwmark, 482 IP_VS_DBG_ADDR(svc->af, &svc->addr), 483 ntohs(svc->port)); 484 ip_vs_service_free(svc); 485 } 486} 487 488 489/* 490 * Returns hash value for real service 491 */ 492static inline unsigned int ip_vs_rs_hashkey(int af, 493 const union nf_inet_addr *addr, 494 __be16 port) 495{ 496 register unsigned int porth = ntohs(port); 497 __be32 addr_fold = addr->ip; 498 499#ifdef CONFIG_IP_VS_IPV6 500 if (af == AF_INET6) 501 addr_fold = addr->ip6[0]^addr->ip6[1]^ 502 addr->ip6[2]^addr->ip6[3]; 503#endif 504 505 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 506 & IP_VS_RTAB_MASK; 507} 508 509/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 510static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 511{ 512 unsigned int hash; 513 514 if (dest->in_rs_table) 515 return; 516 517 /* 518 * Hash by proto,addr,port, 519 * which are the parameters of the real service. 520 */ 521 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 522 523 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 524 dest->in_rs_table = 1; 525} 526 527/* Unhash ip_vs_dest from rs_table. */ 528static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 529{ 530 /* 531 * Remove it from the rs_table table. 532 */ 533 if (dest->in_rs_table) { 534 hlist_del_rcu(&dest->d_list); 535 dest->in_rs_table = 0; 536 } 537} 538 539/* Check if real service by <proto,addr,port> is present */ 540bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, 541 const union nf_inet_addr *daddr, __be16 dport) 542{ 543 struct netns_ipvs *ipvs = net_ipvs(net); 544 unsigned int hash; 545 struct ip_vs_dest *dest; 546 547 /* Check for "full" addressed entries */ 548 hash = ip_vs_rs_hashkey(af, daddr, dport); 549 550 rcu_read_lock(); 551 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 552 if (dest->port == dport && 553 dest->af == af && 554 ip_vs_addr_equal(af, &dest->addr, daddr) && 555 (dest->protocol == protocol || dest->vfwmark)) { 556 /* HIT */ 557 rcu_read_unlock(); 558 return true; 559 } 560 } 561 rcu_read_unlock(); 562 563 return false; 564} 565 566/* Lookup destination by {addr,port} in the given service 567 * Called under RCU lock. 568 */ 569static struct ip_vs_dest * 570ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 571 __be16 dport) 572{ 573 struct ip_vs_dest *dest; 574 575 /* 576 * Find the destination for the given service 577 */ 578 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 579 if ((dest->af == svc->af) 580 && ip_vs_addr_equal(svc->af, &dest->addr, daddr) 581 && (dest->port == dport)) { 582 /* HIT */ 583 return dest; 584 } 585 } 586 587 return NULL; 588} 589 590/* 591 * Find destination by {daddr,dport,vaddr,protocol} 592 * Created to be used in ip_vs_process_message() in 593 * the backup synchronization daemon. It finds the 594 * destination to be bound to the received connection 595 * on the backup. 596 * Called under RCU lock, no refcnt is returned. 597 */ 598struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, 599 const union nf_inet_addr *daddr, 600 __be16 dport, 601 const union nf_inet_addr *vaddr, 602 __be16 vport, __u16 protocol, __u32 fwmark, 603 __u32 flags) 604{ 605 struct ip_vs_dest *dest; 606 struct ip_vs_service *svc; 607 __be16 port = dport; 608 609 svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport); 610 if (!svc) 611 return NULL; 612 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 613 port = 0; 614 dest = ip_vs_lookup_dest(svc, daddr, port); 615 if (!dest) 616 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); 617 return dest; 618} 619 620void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 621{ 622 struct ip_vs_dest_dst *dest_dst = container_of(head, 623 struct ip_vs_dest_dst, 624 rcu_head); 625 626 dst_release(dest_dst->dst_cache); 627 kfree(dest_dst); 628} 629 630/* Release dest_dst and dst_cache for dest in user context */ 631static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 632{ 633 struct ip_vs_dest_dst *old; 634 635 old = rcu_dereference_protected(dest->dest_dst, 1); 636 if (old) { 637 RCU_INIT_POINTER(dest->dest_dst, NULL); 638 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 639 } 640} 641 642/* 643 * Lookup dest by {svc,addr,port} in the destination trash. 644 * The destination trash is used to hold the destinations that are removed 645 * from the service table but are still referenced by some conn entries. 646 * The reason to add the destination trash is when the dest is temporary 647 * down (either by administrator or by monitor program), the dest can be 648 * picked back from the trash, the remaining connections to the dest can 649 * continue, and the counting information of the dest is also useful for 650 * scheduling. 651 */ 652static struct ip_vs_dest * 653ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 654 __be16 dport) 655{ 656 struct ip_vs_dest *dest; 657 struct netns_ipvs *ipvs = net_ipvs(svc->net); 658 659 /* 660 * Find the destination in trash 661 */ 662 spin_lock_bh(&ipvs->dest_trash_lock); 663 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 664 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 665 "dest->refcnt=%d\n", 666 dest->vfwmark, 667 IP_VS_DBG_ADDR(svc->af, &dest->addr), 668 ntohs(dest->port), 669 atomic_read(&dest->refcnt)); 670 /* We can not reuse dest while in grace period 671 * because conns still can use dest->svc 672 */ 673 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) 674 continue; 675 if (dest->af == svc->af && 676 ip_vs_addr_equal(svc->af, &dest->addr, daddr) && 677 dest->port == dport && 678 dest->vfwmark == svc->fwmark && 679 dest->protocol == svc->protocol && 680 (svc->fwmark || 681 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 682 dest->vport == svc->port))) { 683 /* HIT */ 684 list_del(&dest->t_list); 685 ip_vs_dest_hold(dest); 686 goto out; 687 } 688 } 689 690 dest = NULL; 691 692out: 693 spin_unlock_bh(&ipvs->dest_trash_lock); 694 695 return dest; 696} 697 698static void ip_vs_dest_free(struct ip_vs_dest *dest) 699{ 700 __ip_vs_dst_cache_reset(dest); 701 __ip_vs_unbind_svc(dest); 702 free_percpu(dest->stats.cpustats); 703 kfree(dest); 704} 705 706/* 707 * Clean up all the destinations in the trash 708 * Called by the ip_vs_control_cleanup() 709 * 710 * When the ip_vs_control_clearup is activated by ipvs module exit, 711 * the service tables must have been flushed and all the connections 712 * are expired, and the refcnt of each destination in the trash must 713 * be 0, so we simply release them here. 714 */ 715static void ip_vs_trash_cleanup(struct net *net) 716{ 717 struct ip_vs_dest *dest, *nxt; 718 struct netns_ipvs *ipvs = net_ipvs(net); 719 720 del_timer_sync(&ipvs->dest_trash_timer); 721 /* No need to use dest_trash_lock */ 722 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 723 list_del(&dest->t_list); 724 ip_vs_dest_free(dest); 725 } 726} 727 728static void 729ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) 730{ 731#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c 732 733 spin_lock_bh(&src->lock); 734 735 IP_VS_SHOW_STATS_COUNTER(conns); 736 IP_VS_SHOW_STATS_COUNTER(inpkts); 737 IP_VS_SHOW_STATS_COUNTER(outpkts); 738 IP_VS_SHOW_STATS_COUNTER(inbytes); 739 IP_VS_SHOW_STATS_COUNTER(outbytes); 740 741 ip_vs_read_estimator(dst, src); 742 743 spin_unlock_bh(&src->lock); 744} 745 746static void 747ip_vs_zero_stats(struct ip_vs_stats *stats) 748{ 749 spin_lock_bh(&stats->lock); 750 751 /* get current counters as zero point, rates are zeroed */ 752 753#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c 754 755 IP_VS_ZERO_STATS_COUNTER(conns); 756 IP_VS_ZERO_STATS_COUNTER(inpkts); 757 IP_VS_ZERO_STATS_COUNTER(outpkts); 758 IP_VS_ZERO_STATS_COUNTER(inbytes); 759 IP_VS_ZERO_STATS_COUNTER(outbytes); 760 761 ip_vs_zero_estimator(stats); 762 763 spin_unlock_bh(&stats->lock); 764} 765 766/* 767 * Update a destination in the given service 768 */ 769static void 770__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 771 struct ip_vs_dest_user_kern *udest, int add) 772{ 773 struct netns_ipvs *ipvs = net_ipvs(svc->net); 774 struct ip_vs_scheduler *sched; 775 int conn_flags; 776 777 /* set the weight and the flags */ 778 atomic_set(&dest->weight, udest->weight); 779 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 780 conn_flags |= IP_VS_CONN_F_INACTIVE; 781 782 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 783 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 784 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 785 } else { 786 /* 787 * Put the real service in rs_table if not present. 788 * For now only for NAT! 789 */ 790 ip_vs_rs_hash(ipvs, dest); 791 } 792 atomic_set(&dest->conn_flags, conn_flags); 793 794 /* bind the service */ 795 if (!dest->svc) { 796 __ip_vs_bind_svc(dest, svc); 797 } else { 798 if (dest->svc != svc) { 799 __ip_vs_unbind_svc(dest); 800 ip_vs_zero_stats(&dest->stats); 801 __ip_vs_bind_svc(dest, svc); 802 } 803 } 804 805 /* set the dest status flags */ 806 dest->flags |= IP_VS_DEST_F_AVAILABLE; 807 808 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 810 dest->u_threshold = udest->u_threshold; 811 dest->l_threshold = udest->l_threshold; 812 813 spin_lock_bh(&dest->dst_lock); 814 __ip_vs_dst_cache_reset(dest); 815 spin_unlock_bh(&dest->dst_lock); 816 817 sched = rcu_dereference_protected(svc->scheduler, 1); 818 if (add) { 819 ip_vs_start_estimator(svc->net, &dest->stats); 820 list_add_rcu(&dest->n_list, &svc->destinations); 821 svc->num_dests++; 822 if (sched->add_dest) 823 sched->add_dest(svc, dest); 824 } else { 825 if (sched->upd_dest) 826 sched->upd_dest(svc, dest); 827 } 828} 829 830 831/* 832 * Create a destination for the given service 833 */ 834static int 835ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, 836 struct ip_vs_dest **dest_p) 837{ 838 struct ip_vs_dest *dest; 839 unsigned int atype; 840 841 EnterFunction(2); 842 843#ifdef CONFIG_IP_VS_IPV6 844 if (svc->af == AF_INET6) { 845 atype = ipv6_addr_type(&udest->addr.in6); 846 if ((!(atype & IPV6_ADDR_UNICAST) || 847 atype & IPV6_ADDR_LINKLOCAL) && 848 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) 849 return -EINVAL; 850 } else 851#endif 852 { 853 atype = inet_addr_type(svc->net, udest->addr.ip); 854 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 855 return -EINVAL; 856 } 857 858 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 859 if (dest == NULL) 860 return -ENOMEM; 861 862 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 863 if (!dest->stats.cpustats) 864 goto err_alloc; 865 866 dest->af = svc->af; 867 dest->protocol = svc->protocol; 868 dest->vaddr = svc->addr; 869 dest->vport = svc->port; 870 dest->vfwmark = svc->fwmark; 871 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); 872 dest->port = udest->port; 873 874 atomic_set(&dest->activeconns, 0); 875 atomic_set(&dest->inactconns, 0); 876 atomic_set(&dest->persistconns, 0); 877 atomic_set(&dest->refcnt, 1); 878 879 INIT_HLIST_NODE(&dest->d_list); 880 spin_lock_init(&dest->dst_lock); 881 spin_lock_init(&dest->stats.lock); 882 __ip_vs_update_dest(svc, dest, udest, 1); 883 884 *dest_p = dest; 885 886 LeaveFunction(2); 887 return 0; 888 889err_alloc: 890 kfree(dest); 891 return -ENOMEM; 892} 893 894 895/* 896 * Add a destination into an existing service 897 */ 898static int 899ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 900{ 901 struct ip_vs_dest *dest; 902 union nf_inet_addr daddr; 903 __be16 dport = udest->port; 904 int ret; 905 906 EnterFunction(2); 907 908 if (udest->weight < 0) { 909 pr_err("%s(): server weight less than zero\n", __func__); 910 return -ERANGE; 911 } 912 913 if (udest->l_threshold > udest->u_threshold) { 914 pr_err("%s(): lower threshold is higher than upper threshold\n", 915 __func__); 916 return -ERANGE; 917 } 918 919 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 920 921 /* We use function that requires RCU lock */ 922 rcu_read_lock(); 923 dest = ip_vs_lookup_dest(svc, &daddr, dport); 924 rcu_read_unlock(); 925 926 if (dest != NULL) { 927 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 928 return -EEXIST; 929 } 930 931 /* 932 * Check if the dest already exists in the trash and 933 * is from the same service 934 */ 935 dest = ip_vs_trash_get_dest(svc, &daddr, dport); 936 937 if (dest != NULL) { 938 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 939 "dest->refcnt=%d, service %u/%s:%u\n", 940 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), 941 atomic_read(&dest->refcnt), 942 dest->vfwmark, 943 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 944 ntohs(dest->vport)); 945 946 __ip_vs_update_dest(svc, dest, udest, 1); 947 ret = 0; 948 } else { 949 /* 950 * Allocate and initialize the dest structure 951 */ 952 ret = ip_vs_new_dest(svc, udest, &dest); 953 } 954 LeaveFunction(2); 955 956 return ret; 957} 958 959 960/* 961 * Edit a destination in the given service 962 */ 963static int 964ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 965{ 966 struct ip_vs_dest *dest; 967 union nf_inet_addr daddr; 968 __be16 dport = udest->port; 969 970 EnterFunction(2); 971 972 if (udest->weight < 0) { 973 pr_err("%s(): server weight less than zero\n", __func__); 974 return -ERANGE; 975 } 976 977 if (udest->l_threshold > udest->u_threshold) { 978 pr_err("%s(): lower threshold is higher than upper threshold\n", 979 __func__); 980 return -ERANGE; 981 } 982 983 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 984 985 /* We use function that requires RCU lock */ 986 rcu_read_lock(); 987 dest = ip_vs_lookup_dest(svc, &daddr, dport); 988 rcu_read_unlock(); 989 990 if (dest == NULL) { 991 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 992 return -ENOENT; 993 } 994 995 __ip_vs_update_dest(svc, dest, udest, 0); 996 LeaveFunction(2); 997 998 return 0; 999} 1000 1001static void ip_vs_dest_wait_readers(struct rcu_head *head) 1002{ 1003 struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, 1004 rcu_head); 1005 1006 /* End of grace period after unlinking */ 1007 clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); 1008} 1009 1010 1011/* 1012 * Delete a destination (must be already unlinked from the service) 1013 */ 1014static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, 1015 bool cleanup) 1016{ 1017 struct netns_ipvs *ipvs = net_ipvs(net); 1018 1019 ip_vs_stop_estimator(net, &dest->stats); 1020 1021 /* 1022 * Remove it from the d-linked list with the real services. 1023 */ 1024 ip_vs_rs_unhash(dest); 1025 1026 if (!cleanup) { 1027 set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); 1028 call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); 1029 } 1030 1031 spin_lock_bh(&ipvs->dest_trash_lock); 1032 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1033 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1034 atomic_read(&dest->refcnt)); 1035 if (list_empty(&ipvs->dest_trash) && !cleanup) 1036 mod_timer(&ipvs->dest_trash_timer, 1037 jiffies + IP_VS_DEST_TRASH_PERIOD); 1038 /* dest lives in trash without reference */ 1039 list_add(&dest->t_list, &ipvs->dest_trash); 1040 spin_unlock_bh(&ipvs->dest_trash_lock); 1041 ip_vs_dest_put(dest); 1042} 1043 1044 1045/* 1046 * Unlink a destination from the given service 1047 */ 1048static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1049 struct ip_vs_dest *dest, 1050 int svcupd) 1051{ 1052 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1053 1054 /* 1055 * Remove it from the d-linked destination list. 1056 */ 1057 list_del_rcu(&dest->n_list); 1058 svc->num_dests--; 1059 1060 if (svcupd) { 1061 struct ip_vs_scheduler *sched; 1062 1063 sched = rcu_dereference_protected(svc->scheduler, 1); 1064 if (sched->del_dest) 1065 sched->del_dest(svc, dest); 1066 } 1067} 1068 1069 1070/* 1071 * Delete a destination server in the given service 1072 */ 1073static int 1074ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1075{ 1076 struct ip_vs_dest *dest; 1077 __be16 dport = udest->port; 1078 1079 EnterFunction(2); 1080 1081 /* We use function that requires RCU lock */ 1082 rcu_read_lock(); 1083 dest = ip_vs_lookup_dest(svc, &udest->addr, dport); 1084 rcu_read_unlock(); 1085 1086 if (dest == NULL) { 1087 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1088 return -ENOENT; 1089 } 1090 1091 /* 1092 * Unlink dest from the service 1093 */ 1094 __ip_vs_unlink_dest(svc, dest, 1); 1095 1096 /* 1097 * Delete the destination 1098 */ 1099 __ip_vs_del_dest(svc->net, dest, false); 1100 1101 LeaveFunction(2); 1102 1103 return 0; 1104} 1105 1106static void ip_vs_dest_trash_expire(unsigned long data) 1107{ 1108 struct net *net = (struct net *) data; 1109 struct netns_ipvs *ipvs = net_ipvs(net); 1110 struct ip_vs_dest *dest, *next; 1111 1112 spin_lock(&ipvs->dest_trash_lock); 1113 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1114 /* Skip if dest is in grace period */ 1115 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) 1116 continue; 1117 if (atomic_read(&dest->refcnt) > 0) 1118 continue; 1119 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1120 dest->vfwmark, 1121 IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), 1122 ntohs(dest->port)); 1123 list_del(&dest->t_list); 1124 ip_vs_dest_free(dest); 1125 } 1126 if (!list_empty(&ipvs->dest_trash)) 1127 mod_timer(&ipvs->dest_trash_timer, 1128 jiffies + IP_VS_DEST_TRASH_PERIOD); 1129 spin_unlock(&ipvs->dest_trash_lock); 1130} 1131 1132/* 1133 * Add a service into the service hash table 1134 */ 1135static int 1136ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, 1137 struct ip_vs_service **svc_p) 1138{ 1139 int ret = 0; 1140 struct ip_vs_scheduler *sched = NULL; 1141 struct ip_vs_pe *pe = NULL; 1142 struct ip_vs_service *svc = NULL; 1143 struct netns_ipvs *ipvs = net_ipvs(net); 1144 1145 /* increase the module use count */ 1146 ip_vs_use_count_inc(); 1147 1148 /* Lookup the scheduler by 'u->sched_name' */ 1149 sched = ip_vs_scheduler_get(u->sched_name); 1150 if (sched == NULL) { 1151 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1152 ret = -ENOENT; 1153 goto out_err; 1154 } 1155 1156 if (u->pe_name && *u->pe_name) { 1157 pe = ip_vs_pe_getbyname(u->pe_name); 1158 if (pe == NULL) { 1159 pr_info("persistence engine module ip_vs_pe_%s " 1160 "not found\n", u->pe_name); 1161 ret = -ENOENT; 1162 goto out_err; 1163 } 1164 } 1165 1166#ifdef CONFIG_IP_VS_IPV6 1167 if (u->af == AF_INET6) { 1168 __u32 plen = (__force __u32) u->netmask; 1169 1170 if (plen < 1 || plen > 128) { 1171 ret = -EINVAL; 1172 goto out_err; 1173 } 1174 } 1175#endif 1176 1177 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1178 if (svc == NULL) { 1179 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1180 ret = -ENOMEM; 1181 goto out_err; 1182 } 1183 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1184 if (!svc->stats.cpustats) { 1185 ret = -ENOMEM; 1186 goto out_err; 1187 } 1188 1189 /* I'm the first user of the service */ 1190 atomic_set(&svc->refcnt, 0); 1191 1192 svc->af = u->af; 1193 svc->protocol = u->protocol; 1194 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1195 svc->port = u->port; 1196 svc->fwmark = u->fwmark; 1197 svc->flags = u->flags; 1198 svc->timeout = u->timeout * HZ; 1199 svc->netmask = u->netmask; 1200 svc->net = net; 1201 1202 INIT_LIST_HEAD(&svc->destinations); 1203 spin_lock_init(&svc->sched_lock); 1204 spin_lock_init(&svc->stats.lock); 1205 1206 /* Bind the scheduler */ 1207 ret = ip_vs_bind_scheduler(svc, sched); 1208 if (ret) 1209 goto out_err; 1210 sched = NULL; 1211 1212 /* Bind the ct retriever */ 1213 RCU_INIT_POINTER(svc->pe, pe); 1214 pe = NULL; 1215 1216 /* Update the virtual service counters */ 1217 if (svc->port == FTPPORT) 1218 atomic_inc(&ipvs->ftpsvc_counter); 1219 else if (svc->port == 0) 1220 atomic_inc(&ipvs->nullsvc_counter); 1221 1222 ip_vs_start_estimator(net, &svc->stats); 1223 1224 /* Count only IPv4 services for old get/setsockopt interface */ 1225 if (svc->af == AF_INET) 1226 ipvs->num_services++; 1227 1228 /* Hash the service into the service table */ 1229 ip_vs_svc_hash(svc); 1230 1231 *svc_p = svc; 1232 /* Now there is a service - full throttle */ 1233 ipvs->enable = 1; 1234 return 0; 1235 1236 1237 out_err: 1238 if (svc != NULL) { 1239 ip_vs_unbind_scheduler(svc, sched); 1240 ip_vs_service_free(svc); 1241 } 1242 ip_vs_scheduler_put(sched); 1243 ip_vs_pe_put(pe); 1244 1245 /* decrease the module use count */ 1246 ip_vs_use_count_dec(); 1247 1248 return ret; 1249} 1250 1251 1252/* 1253 * Edit a service and bind it with a new scheduler 1254 */ 1255static int 1256ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1257{ 1258 struct ip_vs_scheduler *sched, *old_sched; 1259 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1260 int ret = 0; 1261 1262 /* 1263 * Lookup the scheduler, by 'u->sched_name' 1264 */ 1265 sched = ip_vs_scheduler_get(u->sched_name); 1266 if (sched == NULL) { 1267 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1268 return -ENOENT; 1269 } 1270 old_sched = sched; 1271 1272 if (u->pe_name && *u->pe_name) { 1273 pe = ip_vs_pe_getbyname(u->pe_name); 1274 if (pe == NULL) { 1275 pr_info("persistence engine module ip_vs_pe_%s " 1276 "not found\n", u->pe_name); 1277 ret = -ENOENT; 1278 goto out; 1279 } 1280 old_pe = pe; 1281 } 1282 1283#ifdef CONFIG_IP_VS_IPV6 1284 if (u->af == AF_INET6) { 1285 __u32 plen = (__force __u32) u->netmask; 1286 1287 if (plen < 1 || plen > 128) { 1288 ret = -EINVAL; 1289 goto out; 1290 } 1291 } 1292#endif 1293 1294 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1295 if (sched != old_sched) { 1296 /* Bind the new scheduler */ 1297 ret = ip_vs_bind_scheduler(svc, sched); 1298 if (ret) { 1299 old_sched = sched; 1300 goto out; 1301 } 1302 /* Unbind the old scheduler on success */ 1303 ip_vs_unbind_scheduler(svc, old_sched); 1304 } 1305 1306 /* 1307 * Set the flags and timeout value 1308 */ 1309 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1310 svc->timeout = u->timeout * HZ; 1311 svc->netmask = u->netmask; 1312 1313 old_pe = rcu_dereference_protected(svc->pe, 1); 1314 if (pe != old_pe) 1315 rcu_assign_pointer(svc->pe, pe); 1316 1317out: 1318 ip_vs_scheduler_put(old_sched); 1319 ip_vs_pe_put(old_pe); 1320 return ret; 1321} 1322 1323static void ip_vs_service_rcu_free(struct rcu_head *head) 1324{ 1325 struct ip_vs_service *svc; 1326 1327 svc = container_of(head, struct ip_vs_service, rcu_head); 1328 ip_vs_service_free(svc); 1329} 1330 1331/* 1332 * Delete a service from the service list 1333 * - The service must be unlinked, unlocked and not referenced! 1334 * - We are called under _bh lock 1335 */ 1336static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1337{ 1338 struct ip_vs_dest *dest, *nxt; 1339 struct ip_vs_scheduler *old_sched; 1340 struct ip_vs_pe *old_pe; 1341 struct netns_ipvs *ipvs = net_ipvs(svc->net); 1342 1343 pr_info("%s: enter\n", __func__); 1344 1345 /* Count only IPv4 services for old get/setsockopt interface */ 1346 if (svc->af == AF_INET) 1347 ipvs->num_services--; 1348 1349 ip_vs_stop_estimator(svc->net, &svc->stats); 1350 1351 /* Unbind scheduler */ 1352 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1353 ip_vs_unbind_scheduler(svc, old_sched); 1354 ip_vs_scheduler_put(old_sched); 1355 1356 /* Unbind persistence engine, keep svc->pe */ 1357 old_pe = rcu_dereference_protected(svc->pe, 1); 1358 ip_vs_pe_put(old_pe); 1359 1360 /* 1361 * Unlink the whole destination list 1362 */ 1363 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1364 __ip_vs_unlink_dest(svc, dest, 0); 1365 __ip_vs_del_dest(svc->net, dest, cleanup); 1366 } 1367 1368 /* 1369 * Update the virtual service counters 1370 */ 1371 if (svc->port == FTPPORT) 1372 atomic_dec(&ipvs->ftpsvc_counter); 1373 else if (svc->port == 0) 1374 atomic_dec(&ipvs->nullsvc_counter); 1375 1376 /* 1377 * Free the service if nobody refers to it 1378 */ 1379 if (atomic_dec_and_test(&svc->refcnt)) { 1380 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 1381 svc->fwmark, 1382 IP_VS_DBG_ADDR(svc->af, &svc->addr), 1383 ntohs(svc->port)); 1384 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 1385 } 1386 1387 /* decrease the module use count */ 1388 ip_vs_use_count_dec(); 1389} 1390 1391/* 1392 * Unlink a service from list and try to delete it if its refcnt reached 0 1393 */ 1394static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1395{ 1396 /* Hold svc to avoid double release from dest_trash */ 1397 atomic_inc(&svc->refcnt); 1398 /* 1399 * Unhash it from the service table 1400 */ 1401 ip_vs_svc_unhash(svc); 1402 1403 __ip_vs_del_service(svc, cleanup); 1404} 1405 1406/* 1407 * Delete a service from the service list 1408 */ 1409static int ip_vs_del_service(struct ip_vs_service *svc) 1410{ 1411 if (svc == NULL) 1412 return -EEXIST; 1413 ip_vs_unlink_service(svc, false); 1414 1415 return 0; 1416} 1417 1418 1419/* 1420 * Flush all the virtual services 1421 */ 1422static int ip_vs_flush(struct net *net, bool cleanup) 1423{ 1424 int idx; 1425 struct ip_vs_service *svc; 1426 struct hlist_node *n; 1427 1428 /* 1429 * Flush the service table hashed by <netns,protocol,addr,port> 1430 */ 1431 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1432 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1433 s_list) { 1434 if (net_eq(svc->net, net)) 1435 ip_vs_unlink_service(svc, cleanup); 1436 } 1437 } 1438 1439 /* 1440 * Flush the service table hashed by fwmark 1441 */ 1442 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1443 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1444 f_list) { 1445 if (net_eq(svc->net, net)) 1446 ip_vs_unlink_service(svc, cleanup); 1447 } 1448 } 1449 1450 return 0; 1451} 1452 1453/* 1454 * Delete service by {netns} in the service table. 1455 * Called by __ip_vs_cleanup() 1456 */ 1457void ip_vs_service_net_cleanup(struct net *net) 1458{ 1459 EnterFunction(2); 1460 /* Check for "full" addressed entries */ 1461 mutex_lock(&__ip_vs_mutex); 1462 ip_vs_flush(net, true); 1463 mutex_unlock(&__ip_vs_mutex); 1464 LeaveFunction(2); 1465} 1466 1467/* Put all references for device (dst_cache) */ 1468static inline void 1469ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1470{ 1471 struct ip_vs_dest_dst *dest_dst; 1472 1473 spin_lock_bh(&dest->dst_lock); 1474 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1475 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1476 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1477 dev->name, 1478 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1479 ntohs(dest->port), 1480 atomic_read(&dest->refcnt)); 1481 __ip_vs_dst_cache_reset(dest); 1482 } 1483 spin_unlock_bh(&dest->dst_lock); 1484 1485} 1486/* Netdev event receiver 1487 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1488 */ 1489static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1490 void *ptr) 1491{ 1492 struct net_device *dev = ptr; 1493 struct net *net = dev_net(dev); 1494 struct netns_ipvs *ipvs = net_ipvs(net); 1495 struct ip_vs_service *svc; 1496 struct ip_vs_dest *dest; 1497 unsigned int idx; 1498 1499 if (event != NETDEV_DOWN || !ipvs) 1500 return NOTIFY_DONE; 1501 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1502 EnterFunction(2); 1503 mutex_lock(&__ip_vs_mutex); 1504 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1505 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1506 if (net_eq(svc->net, net)) { 1507 list_for_each_entry(dest, &svc->destinations, 1508 n_list) { 1509 ip_vs_forget_dev(dest, dev); 1510 } 1511 } 1512 } 1513 1514 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1515 if (net_eq(svc->net, net)) { 1516 list_for_each_entry(dest, &svc->destinations, 1517 n_list) { 1518 ip_vs_forget_dev(dest, dev); 1519 } 1520 } 1521 1522 } 1523 } 1524 1525 spin_lock_bh(&ipvs->dest_trash_lock); 1526 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1527 ip_vs_forget_dev(dest, dev); 1528 } 1529 spin_unlock_bh(&ipvs->dest_trash_lock); 1530 mutex_unlock(&__ip_vs_mutex); 1531 LeaveFunction(2); 1532 return NOTIFY_DONE; 1533} 1534 1535/* 1536 * Zero counters in a service or all services 1537 */ 1538static int ip_vs_zero_service(struct ip_vs_service *svc) 1539{ 1540 struct ip_vs_dest *dest; 1541 1542 list_for_each_entry(dest, &svc->destinations, n_list) { 1543 ip_vs_zero_stats(&dest->stats); 1544 } 1545 ip_vs_zero_stats(&svc->stats); 1546 return 0; 1547} 1548 1549static int ip_vs_zero_all(struct net *net) 1550{ 1551 int idx; 1552 struct ip_vs_service *svc; 1553 1554 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1555 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1556 if (net_eq(svc->net, net)) 1557 ip_vs_zero_service(svc); 1558 } 1559 } 1560 1561 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1562 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1563 if (net_eq(svc->net, net)) 1564 ip_vs_zero_service(svc); 1565 } 1566 } 1567 1568 ip_vs_zero_stats(&net_ipvs(net)->tot_stats); 1569 return 0; 1570} 1571 1572#ifdef CONFIG_SYSCTL 1573 1574static int zero; 1575static int three = 3; 1576 1577static int 1578proc_do_defense_mode(ctl_table *table, int write, 1579 void __user *buffer, size_t *lenp, loff_t *ppos) 1580{ 1581 struct net *net = current->nsproxy->net_ns; 1582 int *valp = table->data; 1583 int val = *valp; 1584 int rc; 1585 1586 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1587 if (write && (*valp != val)) { 1588 if ((*valp < 0) || (*valp > 3)) { 1589 /* Restore the correct value */ 1590 *valp = val; 1591 } else { 1592 update_defense_level(net_ipvs(net)); 1593 } 1594 } 1595 return rc; 1596} 1597 1598static int 1599proc_do_sync_threshold(ctl_table *table, int write, 1600 void __user *buffer, size_t *lenp, loff_t *ppos) 1601{ 1602 int *valp = table->data; 1603 int val[2]; 1604 int rc; 1605 1606 /* backup the value first */ 1607 memcpy(val, valp, sizeof(val)); 1608 1609 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1610 if (write && (valp[0] < 0 || valp[1] < 0 || 1611 (valp[0] >= valp[1] && valp[1]))) { 1612 /* Restore the correct value */ 1613 memcpy(valp, val, sizeof(val)); 1614 } 1615 return rc; 1616} 1617 1618static int 1619proc_do_sync_mode(ctl_table *table, int write, 1620 void __user *buffer, size_t *lenp, loff_t *ppos) 1621{ 1622 int *valp = table->data; 1623 int val = *valp; 1624 int rc; 1625 1626 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1627 if (write && (*valp != val)) { 1628 if ((*valp < 0) || (*valp > 1)) { 1629 /* Restore the correct value */ 1630 *valp = val; 1631 } 1632 } 1633 return rc; 1634} 1635 1636static int 1637proc_do_sync_ports(ctl_table *table, int write, 1638 void __user *buffer, size_t *lenp, loff_t *ppos) 1639{ 1640 int *valp = table->data; 1641 int val = *valp; 1642 int rc; 1643 1644 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1645 if (write && (*valp != val)) { 1646 if (*valp < 1 || !is_power_of_2(*valp)) { 1647 /* Restore the correct value */ 1648 *valp = val; 1649 } 1650 } 1651 return rc; 1652} 1653 1654/* 1655 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1656 * Do not change order or insert new entries without 1657 * align with netns init in ip_vs_control_net_init() 1658 */ 1659 1660static struct ctl_table vs_vars[] = { 1661 { 1662 .procname = "amemthresh", 1663 .maxlen = sizeof(int), 1664 .mode = 0644, 1665 .proc_handler = proc_dointvec, 1666 }, 1667 { 1668 .procname = "am_droprate", 1669 .maxlen = sizeof(int), 1670 .mode = 0644, 1671 .proc_handler = proc_dointvec, 1672 }, 1673 { 1674 .procname = "drop_entry", 1675 .maxlen = sizeof(int), 1676 .mode = 0644, 1677 .proc_handler = proc_do_defense_mode, 1678 }, 1679 { 1680 .procname = "drop_packet", 1681 .maxlen = sizeof(int), 1682 .mode = 0644, 1683 .proc_handler = proc_do_defense_mode, 1684 }, 1685#ifdef CONFIG_IP_VS_NFCT 1686 { 1687 .procname = "conntrack", 1688 .maxlen = sizeof(int), 1689 .mode = 0644, 1690 .proc_handler = &proc_dointvec, 1691 }, 1692#endif 1693 { 1694 .procname = "secure_tcp", 1695 .maxlen = sizeof(int), 1696 .mode = 0644, 1697 .proc_handler = proc_do_defense_mode, 1698 }, 1699 { 1700 .procname = "snat_reroute", 1701 .maxlen = sizeof(int), 1702 .mode = 0644, 1703 .proc_handler = &proc_dointvec, 1704 }, 1705 { 1706 .procname = "sync_version", 1707 .maxlen = sizeof(int), 1708 .mode = 0644, 1709 .proc_handler = &proc_do_sync_mode, 1710 }, 1711 { 1712 .procname = "sync_ports", 1713 .maxlen = sizeof(int), 1714 .mode = 0644, 1715 .proc_handler = &proc_do_sync_ports, 1716 }, 1717 { 1718 .procname = "sync_qlen_max", 1719 .maxlen = sizeof(int), 1720 .mode = 0644, 1721 .proc_handler = proc_dointvec, 1722 }, 1723 { 1724 .procname = "sync_sock_size", 1725 .maxlen = sizeof(int), 1726 .mode = 0644, 1727 .proc_handler = proc_dointvec, 1728 }, 1729 { 1730 .procname = "cache_bypass", 1731 .maxlen = sizeof(int), 1732 .mode = 0644, 1733 .proc_handler = proc_dointvec, 1734 }, 1735 { 1736 .procname = "expire_nodest_conn", 1737 .maxlen = sizeof(int), 1738 .mode = 0644, 1739 .proc_handler = proc_dointvec, 1740 }, 1741 { 1742 .procname = "expire_quiescent_template", 1743 .maxlen = sizeof(int), 1744 .mode = 0644, 1745 .proc_handler = proc_dointvec, 1746 }, 1747 { 1748 .procname = "sync_threshold", 1749 .maxlen = 1750 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1751 .mode = 0644, 1752 .proc_handler = proc_do_sync_threshold, 1753 }, 1754 { 1755 .procname = "sync_refresh_period", 1756 .maxlen = sizeof(int), 1757 .mode = 0644, 1758 .proc_handler = proc_dointvec_jiffies, 1759 }, 1760 { 1761 .procname = "sync_retries", 1762 .maxlen = sizeof(int), 1763 .mode = 0644, 1764 .proc_handler = proc_dointvec_minmax, 1765 .extra1 = &zero, 1766 .extra2 = &three, 1767 }, 1768 { 1769 .procname = "nat_icmp_send", 1770 .maxlen = sizeof(int), 1771 .mode = 0644, 1772 .proc_handler = proc_dointvec, 1773 }, 1774 { 1775 .procname = "pmtu_disc", 1776 .maxlen = sizeof(int), 1777 .mode = 0644, 1778 .proc_handler = proc_dointvec, 1779 }, 1780 { 1781 .procname = "backup_only", 1782 .maxlen = sizeof(int), 1783 .mode = 0644, 1784 .proc_handler = proc_dointvec, 1785 }, 1786#ifdef CONFIG_IP_VS_DEBUG 1787 { 1788 .procname = "debug_level", 1789 .data = &sysctl_ip_vs_debug_level, 1790 .maxlen = sizeof(int), 1791 .mode = 0644, 1792 .proc_handler = proc_dointvec, 1793 }, 1794#endif 1795#if 0 1796 { 1797 .procname = "timeout_established", 1798 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], 1799 .maxlen = sizeof(int), 1800 .mode = 0644, 1801 .proc_handler = proc_dointvec_jiffies, 1802 }, 1803 { 1804 .procname = "timeout_synsent", 1805 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], 1806 .maxlen = sizeof(int), 1807 .mode = 0644, 1808 .proc_handler = proc_dointvec_jiffies, 1809 }, 1810 { 1811 .procname = "timeout_synrecv", 1812 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], 1813 .maxlen = sizeof(int), 1814 .mode = 0644, 1815 .proc_handler = proc_dointvec_jiffies, 1816 }, 1817 { 1818 .procname = "timeout_finwait", 1819 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], 1820 .maxlen = sizeof(int), 1821 .mode = 0644, 1822 .proc_handler = proc_dointvec_jiffies, 1823 }, 1824 { 1825 .procname = "timeout_timewait", 1826 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], 1827 .maxlen = sizeof(int), 1828 .mode = 0644, 1829 .proc_handler = proc_dointvec_jiffies, 1830 }, 1831 { 1832 .procname = "timeout_close", 1833 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], 1834 .maxlen = sizeof(int), 1835 .mode = 0644, 1836 .proc_handler = proc_dointvec_jiffies, 1837 }, 1838 { 1839 .procname = "timeout_closewait", 1840 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], 1841 .maxlen = sizeof(int), 1842 .mode = 0644, 1843 .proc_handler = proc_dointvec_jiffies, 1844 }, 1845 { 1846 .procname = "timeout_lastack", 1847 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], 1848 .maxlen = sizeof(int), 1849 .mode = 0644, 1850 .proc_handler = proc_dointvec_jiffies, 1851 }, 1852 { 1853 .procname = "timeout_listen", 1854 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], 1855 .maxlen = sizeof(int), 1856 .mode = 0644, 1857 .proc_handler = proc_dointvec_jiffies, 1858 }, 1859 { 1860 .procname = "timeout_synack", 1861 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], 1862 .maxlen = sizeof(int), 1863 .mode = 0644, 1864 .proc_handler = proc_dointvec_jiffies, 1865 }, 1866 { 1867 .procname = "timeout_udp", 1868 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], 1869 .maxlen = sizeof(int), 1870 .mode = 0644, 1871 .proc_handler = proc_dointvec_jiffies, 1872 }, 1873 { 1874 .procname = "timeout_icmp", 1875 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], 1876 .maxlen = sizeof(int), 1877 .mode = 0644, 1878 .proc_handler = proc_dointvec_jiffies, 1879 }, 1880#endif 1881 { } 1882}; 1883 1884#endif 1885 1886#ifdef CONFIG_PROC_FS 1887 1888struct ip_vs_iter { 1889 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 1890 struct hlist_head *table; 1891 int bucket; 1892}; 1893 1894/* 1895 * Write the contents of the VS rule table to a PROCfs file. 1896 * (It is kept just for backward compatibility) 1897 */ 1898static inline const char *ip_vs_fwd_name(unsigned int flags) 1899{ 1900 switch (flags & IP_VS_CONN_F_FWD_MASK) { 1901 case IP_VS_CONN_F_LOCALNODE: 1902 return "Local"; 1903 case IP_VS_CONN_F_TUNNEL: 1904 return "Tunnel"; 1905 case IP_VS_CONN_F_DROUTE: 1906 return "Route"; 1907 default: 1908 return "Masq"; 1909 } 1910} 1911 1912 1913/* Get the Nth entry in the two lists */ 1914static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1915{ 1916 struct net *net = seq_file_net(seq); 1917 struct ip_vs_iter *iter = seq->private; 1918 int idx; 1919 struct ip_vs_service *svc; 1920 1921 /* look in hash by protocol */ 1922 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1923 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 1924 if (net_eq(svc->net, net) && pos-- == 0) { 1925 iter->table = ip_vs_svc_table; 1926 iter->bucket = idx; 1927 return svc; 1928 } 1929 } 1930 } 1931 1932 /* keep looking in fwmark */ 1933 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1934 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 1935 f_list) { 1936 if (net_eq(svc->net, net) && pos-- == 0) { 1937 iter->table = ip_vs_svc_fwm_table; 1938 iter->bucket = idx; 1939 return svc; 1940 } 1941 } 1942 } 1943 1944 return NULL; 1945} 1946 1947static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 1948 __acquires(RCU) 1949{ 1950 rcu_read_lock(); 1951 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 1952} 1953 1954 1955static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1956{ 1957 struct hlist_node *e; 1958 struct ip_vs_iter *iter; 1959 struct ip_vs_service *svc; 1960 1961 ++*pos; 1962 if (v == SEQ_START_TOKEN) 1963 return ip_vs_info_array(seq,0); 1964 1965 svc = v; 1966 iter = seq->private; 1967 1968 if (iter->table == ip_vs_svc_table) { 1969 /* next service in table hashed by protocol */ 1970 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 1971 if (e) 1972 return hlist_entry(e, struct ip_vs_service, s_list); 1973 1974 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1975 hlist_for_each_entry_rcu(svc, 1976 &ip_vs_svc_table[iter->bucket], 1977 s_list) { 1978 return svc; 1979 } 1980 } 1981 1982 iter->table = ip_vs_svc_fwm_table; 1983 iter->bucket = -1; 1984 goto scan_fwmark; 1985 } 1986 1987 /* next service in hashed by fwmark */ 1988 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 1989 if (e) 1990 return hlist_entry(e, struct ip_vs_service, f_list); 1991 1992 scan_fwmark: 1993 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1994 hlist_for_each_entry_rcu(svc, 1995 &ip_vs_svc_fwm_table[iter->bucket], 1996 f_list) 1997 return svc; 1998 } 1999 2000 return NULL; 2001} 2002 2003static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2004 __releases(RCU) 2005{ 2006 rcu_read_unlock(); 2007} 2008 2009 2010static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 2011{ 2012 if (v == SEQ_START_TOKEN) { 2013 seq_printf(seq, 2014 "IP Virtual Server version %d.%d.%d (size=%d)\n", 2015 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2016 seq_puts(seq, 2017 "Prot LocalAddress:Port Scheduler Flags\n"); 2018 seq_puts(seq, 2019 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2020 } else { 2021 const struct ip_vs_service *svc = v; 2022 const struct ip_vs_iter *iter = seq->private; 2023 const struct ip_vs_dest *dest; 2024 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2025 2026 if (iter->table == ip_vs_svc_table) { 2027#ifdef CONFIG_IP_VS_IPV6 2028 if (svc->af == AF_INET6) 2029 seq_printf(seq, "%s [%pI6]:%04X %s ", 2030 ip_vs_proto_name(svc->protocol), 2031 &svc->addr.in6, 2032 ntohs(svc->port), 2033 sched->name); 2034 else 2035#endif 2036 seq_printf(seq, "%s %08X:%04X %s %s ", 2037 ip_vs_proto_name(svc->protocol), 2038 ntohl(svc->addr.ip), 2039 ntohs(svc->port), 2040 sched->name, 2041 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2042 } else { 2043 seq_printf(seq, "FWM %08X %s %s", 2044 svc->fwmark, sched->name, 2045 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2046 } 2047 2048 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 2049 seq_printf(seq, "persistent %d %08X\n", 2050 svc->timeout, 2051 ntohl(svc->netmask)); 2052 else 2053 seq_putc(seq, '\n'); 2054 2055 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 2056#ifdef CONFIG_IP_VS_IPV6 2057 if (dest->af == AF_INET6) 2058 seq_printf(seq, 2059 " -> [%pI6]:%04X" 2060 " %-7s %-6d %-10d %-10d\n", 2061 &dest->addr.in6, 2062 ntohs(dest->port), 2063 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2064 atomic_read(&dest->weight), 2065 atomic_read(&dest->activeconns), 2066 atomic_read(&dest->inactconns)); 2067 else 2068#endif 2069 seq_printf(seq, 2070 " -> %08X:%04X " 2071 "%-7s %-6d %-10d %-10d\n", 2072 ntohl(dest->addr.ip), 2073 ntohs(dest->port), 2074 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2075 atomic_read(&dest->weight), 2076 atomic_read(&dest->activeconns), 2077 atomic_read(&dest->inactconns)); 2078 2079 } 2080 } 2081 return 0; 2082} 2083 2084static const struct seq_operations ip_vs_info_seq_ops = { 2085 .start = ip_vs_info_seq_start, 2086 .next = ip_vs_info_seq_next, 2087 .stop = ip_vs_info_seq_stop, 2088 .show = ip_vs_info_seq_show, 2089}; 2090 2091static int ip_vs_info_open(struct inode *inode, struct file *file) 2092{ 2093 return seq_open_net(inode, file, &ip_vs_info_seq_ops, 2094 sizeof(struct ip_vs_iter)); 2095} 2096 2097static const struct file_operations ip_vs_info_fops = { 2098 .owner = THIS_MODULE, 2099 .open = ip_vs_info_open, 2100 .read = seq_read, 2101 .llseek = seq_lseek, 2102 .release = seq_release_net, 2103}; 2104 2105static int ip_vs_stats_show(struct seq_file *seq, void *v) 2106{ 2107 struct net *net = seq_file_single_net(seq); 2108 struct ip_vs_stats_user show; 2109 2110/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2111 seq_puts(seq, 2112 " Total Incoming Outgoing Incoming Outgoing\n"); 2113 seq_printf(seq, 2114 " Conns Packets Packets Bytes Bytes\n"); 2115 2116 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2117 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, 2118 show.inpkts, show.outpkts, 2119 (unsigned long long) show.inbytes, 2120 (unsigned long long) show.outbytes); 2121 2122/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2123 seq_puts(seq, 2124 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2125 seq_printf(seq, "%8X %8X %8X %16X %16X\n", 2126 show.cps, show.inpps, show.outpps, 2127 show.inbps, show.outbps); 2128 2129 return 0; 2130} 2131 2132static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 2133{ 2134 return single_open_net(inode, file, ip_vs_stats_show); 2135} 2136 2137static const struct file_operations ip_vs_stats_fops = { 2138 .owner = THIS_MODULE, 2139 .open = ip_vs_stats_seq_open, 2140 .read = seq_read, 2141 .llseek = seq_lseek, 2142 .release = single_release_net, 2143}; 2144 2145static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2146{ 2147 struct net *net = seq_file_single_net(seq); 2148 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2149 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2150 struct ip_vs_stats_user rates; 2151 int i; 2152 2153/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2154 seq_puts(seq, 2155 " Total Incoming Outgoing Incoming Outgoing\n"); 2156 seq_printf(seq, 2157 "CPU Conns Packets Packets Bytes Bytes\n"); 2158 2159 for_each_possible_cpu(i) { 2160 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2161 unsigned int start; 2162 __u64 inbytes, outbytes; 2163 2164 do { 2165 start = u64_stats_fetch_begin_bh(&u->syncp); 2166 inbytes = u->ustats.inbytes; 2167 outbytes = u->ustats.outbytes; 2168 } while (u64_stats_fetch_retry_bh(&u->syncp, start)); 2169 2170 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", 2171 i, u->ustats.conns, u->ustats.inpkts, 2172 u->ustats.outpkts, (__u64)inbytes, 2173 (__u64)outbytes); 2174 } 2175 2176 spin_lock_bh(&tot_stats->lock); 2177 2178 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", 2179 tot_stats->ustats.conns, tot_stats->ustats.inpkts, 2180 tot_stats->ustats.outpkts, 2181 (unsigned long long) tot_stats->ustats.inbytes, 2182 (unsigned long long) tot_stats->ustats.outbytes); 2183 2184 ip_vs_read_estimator(&rates, tot_stats); 2185 2186 spin_unlock_bh(&tot_stats->lock); 2187 2188/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2189 seq_puts(seq, 2190 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2191 seq_printf(seq, " %8X %8X %8X %16X %16X\n", 2192 rates.cps, 2193 rates.inpps, 2194 rates.outpps, 2195 rates.inbps, 2196 rates.outbps); 2197 2198 return 0; 2199} 2200 2201static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) 2202{ 2203 return single_open_net(inode, file, ip_vs_stats_percpu_show); 2204} 2205 2206static const struct file_operations ip_vs_stats_percpu_fops = { 2207 .owner = THIS_MODULE, 2208 .open = ip_vs_stats_percpu_seq_open, 2209 .read = seq_read, 2210 .llseek = seq_lseek, 2211 .release = single_release_net, 2212}; 2213#endif 2214 2215/* 2216 * Set timeout values for tcp tcpfin udp in the timeout_table. 2217 */ 2218static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) 2219{ 2220#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2221 struct ip_vs_proto_data *pd; 2222#endif 2223 2224 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2225 u->tcp_timeout, 2226 u->tcp_fin_timeout, 2227 u->udp_timeout); 2228 2229#ifdef CONFIG_IP_VS_PROTO_TCP 2230 if (u->tcp_timeout) { 2231 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2232 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2233 = u->tcp_timeout * HZ; 2234 } 2235 2236 if (u->tcp_fin_timeout) { 2237 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2238 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2239 = u->tcp_fin_timeout * HZ; 2240 } 2241#endif 2242 2243#ifdef CONFIG_IP_VS_PROTO_UDP 2244 if (u->udp_timeout) { 2245 pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2246 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2247 = u->udp_timeout * HZ; 2248 } 2249#endif 2250 return 0; 2251} 2252 2253 2254#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2255#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user)) 2256#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \ 2257 sizeof(struct ip_vs_dest_user)) 2258#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) 2259#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user)) 2260#define MAX_ARG_LEN SVCDEST_ARG_LEN 2261 2262static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { 2263 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN, 2264 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN, 2265 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN, 2266 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0, 2267 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN, 2268 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN, 2269 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN, 2270 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN, 2271 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN, 2272 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN, 2273 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, 2274}; 2275 2276static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2277 struct ip_vs_service_user *usvc_compat) 2278{ 2279 memset(usvc, 0, sizeof(*usvc)); 2280 2281 usvc->af = AF_INET; 2282 usvc->protocol = usvc_compat->protocol; 2283 usvc->addr.ip = usvc_compat->addr; 2284 usvc->port = usvc_compat->port; 2285 usvc->fwmark = usvc_compat->fwmark; 2286 2287 /* Deep copy of sched_name is not needed here */ 2288 usvc->sched_name = usvc_compat->sched_name; 2289 2290 usvc->flags = usvc_compat->flags; 2291 usvc->timeout = usvc_compat->timeout; 2292 usvc->netmask = usvc_compat->netmask; 2293} 2294 2295static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2296 struct ip_vs_dest_user *udest_compat) 2297{ 2298 memset(udest, 0, sizeof(*udest)); 2299 2300 udest->addr.ip = udest_compat->addr; 2301 udest->port = udest_compat->port; 2302 udest->conn_flags = udest_compat->conn_flags; 2303 udest->weight = udest_compat->weight; 2304 udest->u_threshold = udest_compat->u_threshold; 2305 udest->l_threshold = udest_compat->l_threshold; 2306} 2307 2308static int 2309do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2310{ 2311 struct net *net = sock_net(sk); 2312 int ret; 2313 unsigned char arg[MAX_ARG_LEN]; 2314 struct ip_vs_service_user *usvc_compat; 2315 struct ip_vs_service_user_kern usvc; 2316 struct ip_vs_service *svc; 2317 struct ip_vs_dest_user *udest_compat; 2318 struct ip_vs_dest_user_kern udest; 2319 struct netns_ipvs *ipvs = net_ipvs(net); 2320 2321 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2322 return -EPERM; 2323 2324 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2325 return -EINVAL; 2326 if (len < 0 || len > MAX_ARG_LEN) 2327 return -EINVAL; 2328 if (len != set_arglen[SET_CMDID(cmd)]) { 2329 pr_err("set_ctl: len %u != %u\n", 2330 len, set_arglen[SET_CMDID(cmd)]); 2331 return -EINVAL; 2332 } 2333 2334 if (copy_from_user(arg, user, len) != 0) 2335 return -EFAULT; 2336 2337 /* increase the module use count */ 2338 ip_vs_use_count_inc(); 2339 2340 /* Handle daemons since they have another lock */ 2341 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2342 cmd == IP_VS_SO_SET_STOPDAEMON) { 2343 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2344 2345 if (mutex_lock_interruptible(&ipvs->sync_mutex)) { 2346 ret = -ERESTARTSYS; 2347 goto out_dec; 2348 } 2349 if (cmd == IP_VS_SO_SET_STARTDAEMON) 2350 ret = start_sync_thread(net, dm->state, dm->mcast_ifn, 2351 dm->syncid); 2352 else 2353 ret = stop_sync_thread(net, dm->state); 2354 mutex_unlock(&ipvs->sync_mutex); 2355 goto out_dec; 2356 } 2357 2358 if (mutex_lock_interruptible(&__ip_vs_mutex)) { 2359 ret = -ERESTARTSYS; 2360 goto out_dec; 2361 } 2362 2363 if (cmd == IP_VS_SO_SET_FLUSH) { 2364 /* Flush the virtual service */ 2365 ret = ip_vs_flush(net, false); 2366 goto out_unlock; 2367 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2368 /* Set timeout values for (tcp tcpfin udp) */ 2369 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); 2370 goto out_unlock; 2371 } 2372 2373 usvc_compat = (struct ip_vs_service_user *)arg; 2374 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2375 2376 /* We only use the new structs internally, so copy userspace compat 2377 * structs to extended internal versions */ 2378 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2379 ip_vs_copy_udest_compat(&udest, udest_compat); 2380 2381 if (cmd == IP_VS_SO_SET_ZERO) { 2382 /* if no service address is set, zero counters in all */ 2383 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2384 ret = ip_vs_zero_all(net); 2385 goto out_unlock; 2386 } 2387 } 2388 2389 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2390 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2391 usvc.protocol != IPPROTO_SCTP) { 2392 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", 2393 usvc.protocol, &usvc.addr.ip, 2394 ntohs(usvc.port), usvc.sched_name); 2395 ret = -EFAULT; 2396 goto out_unlock; 2397 } 2398 2399 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2400 rcu_read_lock(); 2401 if (usvc.fwmark == 0) 2402 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, 2403 &usvc.addr, usvc.port); 2404 else 2405 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); 2406 rcu_read_unlock(); 2407 2408 if (cmd != IP_VS_SO_SET_ADD 2409 && (svc == NULL || svc->protocol != usvc.protocol)) { 2410 ret = -ESRCH; 2411 goto out_unlock; 2412 } 2413 2414 switch (cmd) { 2415 case IP_VS_SO_SET_ADD: 2416 if (svc != NULL) 2417 ret = -EEXIST; 2418 else 2419 ret = ip_vs_add_service(net, &usvc, &svc); 2420 break; 2421 case IP_VS_SO_SET_EDIT: 2422 ret = ip_vs_edit_service(svc, &usvc); 2423 break; 2424 case IP_VS_SO_SET_DEL: 2425 ret = ip_vs_del_service(svc); 2426 if (!ret) 2427 goto out_unlock; 2428 break; 2429 case IP_VS_SO_SET_ZERO: 2430 ret = ip_vs_zero_service(svc); 2431 break; 2432 case IP_VS_SO_SET_ADDDEST: 2433 ret = ip_vs_add_dest(svc, &udest); 2434 break; 2435 case IP_VS_SO_SET_EDITDEST: 2436 ret = ip_vs_edit_dest(svc, &udest); 2437 break; 2438 case IP_VS_SO_SET_DELDEST: 2439 ret = ip_vs_del_dest(svc, &udest); 2440 break; 2441 default: 2442 ret = -EINVAL; 2443 } 2444 2445 out_unlock: 2446 mutex_unlock(&__ip_vs_mutex); 2447 out_dec: 2448 /* decrease the module use count */ 2449 ip_vs_use_count_dec(); 2450 2451 return ret; 2452} 2453 2454 2455static void 2456ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2457{ 2458 struct ip_vs_scheduler *sched; 2459 2460 sched = rcu_dereference_protected(src->scheduler, 1); 2461 dst->protocol = src->protocol; 2462 dst->addr = src->addr.ip; 2463 dst->port = src->port; 2464 dst->fwmark = src->fwmark; 2465 strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); 2466 dst->flags = src->flags; 2467 dst->timeout = src->timeout / HZ; 2468 dst->netmask = src->netmask; 2469 dst->num_dests = src->num_dests; 2470 ip_vs_copy_stats(&dst->stats, &src->stats); 2471} 2472 2473static inline int 2474__ip_vs_get_service_entries(struct net *net, 2475 const struct ip_vs_get_services *get, 2476 struct ip_vs_get_services __user *uptr) 2477{ 2478 int idx, count=0; 2479 struct ip_vs_service *svc; 2480 struct ip_vs_service_entry entry; 2481 int ret = 0; 2482 2483 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2484 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2485 /* Only expose IPv4 entries to old interface */ 2486 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2487 continue; 2488 2489 if (count >= get->num_services) 2490 goto out; 2491 memset(&entry, 0, sizeof(entry)); 2492 ip_vs_copy_service(&entry, svc); 2493 if (copy_to_user(&uptr->entrytable[count], 2494 &entry, sizeof(entry))) { 2495 ret = -EFAULT; 2496 goto out; 2497 } 2498 count++; 2499 } 2500 } 2501 2502 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2503 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2504 /* Only expose IPv4 entries to old interface */ 2505 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2506 continue; 2507 2508 if (count >= get->num_services) 2509 goto out; 2510 memset(&entry, 0, sizeof(entry)); 2511 ip_vs_copy_service(&entry, svc); 2512 if (copy_to_user(&uptr->entrytable[count], 2513 &entry, sizeof(entry))) { 2514 ret = -EFAULT; 2515 goto out; 2516 } 2517 count++; 2518 } 2519 } 2520out: 2521 return ret; 2522} 2523 2524static inline int 2525__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, 2526 struct ip_vs_get_dests __user *uptr) 2527{ 2528 struct ip_vs_service *svc; 2529 union nf_inet_addr addr = { .ip = get->addr }; 2530 int ret = 0; 2531 2532 rcu_read_lock(); 2533 if (get->fwmark) 2534 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); 2535 else 2536 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, 2537 get->port); 2538 rcu_read_unlock(); 2539 2540 if (svc) { 2541 int count = 0; 2542 struct ip_vs_dest *dest; 2543 struct ip_vs_dest_entry entry; 2544 2545 memset(&entry, 0, sizeof(entry)); 2546 list_for_each_entry(dest, &svc->destinations, n_list) { 2547 if (count >= get->num_dests) 2548 break; 2549 2550 entry.addr = dest->addr.ip; 2551 entry.port = dest->port; 2552 entry.conn_flags = atomic_read(&dest->conn_flags); 2553 entry.weight = atomic_read(&dest->weight); 2554 entry.u_threshold = dest->u_threshold; 2555 entry.l_threshold = dest->l_threshold; 2556 entry.activeconns = atomic_read(&dest->activeconns); 2557 entry.inactconns = atomic_read(&dest->inactconns); 2558 entry.persistconns = atomic_read(&dest->persistconns); 2559 ip_vs_copy_stats(&entry.stats, &dest->stats); 2560 if (copy_to_user(&uptr->entrytable[count], 2561 &entry, sizeof(entry))) { 2562 ret = -EFAULT; 2563 break; 2564 } 2565 count++; 2566 } 2567 } else 2568 ret = -ESRCH; 2569 return ret; 2570} 2571 2572static inline void 2573__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) 2574{ 2575#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2576 struct ip_vs_proto_data *pd; 2577#endif 2578 2579 memset(u, 0, sizeof (*u)); 2580 2581#ifdef CONFIG_IP_VS_PROTO_TCP 2582 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2583 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2584 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2585#endif 2586#ifdef CONFIG_IP_VS_PROTO_UDP 2587 pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2588 u->udp_timeout = 2589 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2590#endif 2591} 2592 2593 2594#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2595#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo)) 2596#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services)) 2597#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry)) 2598#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests)) 2599#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) 2600#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2) 2601 2602static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = { 2603 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64, 2604 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN, 2605 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN, 2606 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN, 2607 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN, 2608 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN, 2609 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN, 2610}; 2611 2612static int 2613do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2614{ 2615 unsigned char arg[128]; 2616 int ret = 0; 2617 unsigned int copylen; 2618 struct net *net = sock_net(sk); 2619 struct netns_ipvs *ipvs = net_ipvs(net); 2620 2621 BUG_ON(!net); 2622 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2623 return -EPERM; 2624 2625 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2626 return -EINVAL; 2627 2628 if (*len < get_arglen[GET_CMDID(cmd)]) { 2629 pr_err("get_ctl: len %u < %u\n", 2630 *len, get_arglen[GET_CMDID(cmd)]); 2631 return -EINVAL; 2632 } 2633 2634 copylen = get_arglen[GET_CMDID(cmd)]; 2635 if (copylen > 128) 2636 return -EINVAL; 2637 2638 if (copy_from_user(arg, user, copylen) != 0) 2639 return -EFAULT; 2640 /* 2641 * Handle daemons first since it has its own locking 2642 */ 2643 if (cmd == IP_VS_SO_GET_DAEMON) { 2644 struct ip_vs_daemon_user d[2]; 2645 2646 memset(&d, 0, sizeof(d)); 2647 if (mutex_lock_interruptible(&ipvs->sync_mutex)) 2648 return -ERESTARTSYS; 2649 2650 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2651 d[0].state = IP_VS_STATE_MASTER; 2652 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, 2653 sizeof(d[0].mcast_ifn)); 2654 d[0].syncid = ipvs->master_syncid; 2655 } 2656 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2657 d[1].state = IP_VS_STATE_BACKUP; 2658 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, 2659 sizeof(d[1].mcast_ifn)); 2660 d[1].syncid = ipvs->backup_syncid; 2661 } 2662 if (copy_to_user(user, &d, sizeof(d)) != 0) 2663 ret = -EFAULT; 2664 mutex_unlock(&ipvs->sync_mutex); 2665 return ret; 2666 } 2667 2668 if (mutex_lock_interruptible(&__ip_vs_mutex)) 2669 return -ERESTARTSYS; 2670 2671 switch (cmd) { 2672 case IP_VS_SO_GET_VERSION: 2673 { 2674 char buf[64]; 2675 2676 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2677 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2678 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2679 ret = -EFAULT; 2680 goto out; 2681 } 2682 *len = strlen(buf)+1; 2683 } 2684 break; 2685 2686 case IP_VS_SO_GET_INFO: 2687 { 2688 struct ip_vs_getinfo info; 2689 info.version = IP_VS_VERSION_CODE; 2690 info.size = ip_vs_conn_tab_size; 2691 info.num_services = ipvs->num_services; 2692 if (copy_to_user(user, &info, sizeof(info)) != 0) 2693 ret = -EFAULT; 2694 } 2695 break; 2696 2697 case IP_VS_SO_GET_SERVICES: 2698 { 2699 struct ip_vs_get_services *get; 2700 int size; 2701 2702 get = (struct ip_vs_get_services *)arg; 2703 size = sizeof(*get) + 2704 sizeof(struct ip_vs_service_entry) * get->num_services; 2705 if (*len != size) { 2706 pr_err("length: %u != %u\n", *len, size); 2707 ret = -EINVAL; 2708 goto out; 2709 } 2710 ret = __ip_vs_get_service_entries(net, get, user); 2711 } 2712 break; 2713 2714 case IP_VS_SO_GET_SERVICE: 2715 { 2716 struct ip_vs_service_entry *entry; 2717 struct ip_vs_service *svc; 2718 union nf_inet_addr addr; 2719 2720 entry = (struct ip_vs_service_entry *)arg; 2721 addr.ip = entry->addr; 2722 rcu_read_lock(); 2723 if (entry->fwmark) 2724 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); 2725 else 2726 svc = __ip_vs_service_find(net, AF_INET, 2727 entry->protocol, &addr, 2728 entry->port); 2729 rcu_read_unlock(); 2730 if (svc) { 2731 ip_vs_copy_service(entry, svc); 2732 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2733 ret = -EFAULT; 2734 } else 2735 ret = -ESRCH; 2736 } 2737 break; 2738 2739 case IP_VS_SO_GET_DESTS: 2740 { 2741 struct ip_vs_get_dests *get; 2742 int size; 2743 2744 get = (struct ip_vs_get_dests *)arg; 2745 size = sizeof(*get) + 2746 sizeof(struct ip_vs_dest_entry) * get->num_dests; 2747 if (*len != size) { 2748 pr_err("length: %u != %u\n", *len, size); 2749 ret = -EINVAL; 2750 goto out; 2751 } 2752 ret = __ip_vs_get_dest_entries(net, get, user); 2753 } 2754 break; 2755 2756 case IP_VS_SO_GET_TIMEOUT: 2757 { 2758 struct ip_vs_timeout_user t; 2759 2760 __ip_vs_get_timeouts(net, &t); 2761 if (copy_to_user(user, &t, sizeof(t)) != 0) 2762 ret = -EFAULT; 2763 } 2764 break; 2765 2766 default: 2767 ret = -EINVAL; 2768 } 2769 2770out: 2771 mutex_unlock(&__ip_vs_mutex); 2772 return ret; 2773} 2774 2775 2776static struct nf_sockopt_ops ip_vs_sockopts = { 2777 .pf = PF_INET, 2778 .set_optmin = IP_VS_BASE_CTL, 2779 .set_optmax = IP_VS_SO_SET_MAX+1, 2780 .set = do_ip_vs_set_ctl, 2781 .get_optmin = IP_VS_BASE_CTL, 2782 .get_optmax = IP_VS_SO_GET_MAX+1, 2783 .get = do_ip_vs_get_ctl, 2784 .owner = THIS_MODULE, 2785}; 2786 2787/* 2788 * Generic Netlink interface 2789 */ 2790 2791/* IPVS genetlink family */ 2792static struct genl_family ip_vs_genl_family = { 2793 .id = GENL_ID_GENERATE, 2794 .hdrsize = 0, 2795 .name = IPVS_GENL_NAME, 2796 .version = IPVS_GENL_VERSION, 2797 .maxattr = IPVS_CMD_MAX, 2798 .netnsok = true, /* Make ipvsadm to work on netns */ 2799}; 2800 2801/* Policy used for first-level command attributes */ 2802static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2803 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2804 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2805 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2806 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2807 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2808 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2809}; 2810 2811/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2812static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2813 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2814 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2815 .len = IP_VS_IFNAME_MAXLEN }, 2816 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2817}; 2818 2819/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2820static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2821 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2822 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2823 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2824 .len = sizeof(union nf_inet_addr) }, 2825 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2826 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2827 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2828 .len = IP_VS_SCHEDNAME_MAXLEN }, 2829 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2830 .len = IP_VS_PENAME_MAXLEN }, 2831 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2832 .len = sizeof(struct ip_vs_flags) }, 2833 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2834 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2835 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2836}; 2837 2838/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2839static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2840 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2841 .len = sizeof(union nf_inet_addr) }, 2842 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2843 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2844 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2845 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2846 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2847 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2848 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2849 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 2850 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 2851}; 2852 2853static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2854 struct ip_vs_stats *stats) 2855{ 2856 struct ip_vs_stats_user ustats; 2857 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2858 if (!nl_stats) 2859 return -EMSGSIZE; 2860 2861 ip_vs_copy_stats(&ustats, stats); 2862 2863 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) || 2864 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) || 2865 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) || 2866 nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) || 2867 nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) || 2868 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) || 2869 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) || 2870 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) || 2871 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) || 2872 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps)) 2873 goto nla_put_failure; 2874 nla_nest_end(skb, nl_stats); 2875 2876 return 0; 2877 2878nla_put_failure: 2879 nla_nest_cancel(skb, nl_stats); 2880 return -EMSGSIZE; 2881} 2882 2883static int ip_vs_genl_fill_service(struct sk_buff *skb, 2884 struct ip_vs_service *svc) 2885{ 2886 struct ip_vs_scheduler *sched; 2887 struct ip_vs_pe *pe; 2888 struct nlattr *nl_service; 2889 struct ip_vs_flags flags = { .flags = svc->flags, 2890 .mask = ~0 }; 2891 2892 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); 2893 if (!nl_service) 2894 return -EMSGSIZE; 2895 2896 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 2897 goto nla_put_failure; 2898 if (svc->fwmark) { 2899 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 2900 goto nla_put_failure; 2901 } else { 2902 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 2903 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 2904 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 2905 goto nla_put_failure; 2906 } 2907 2908 sched = rcu_dereference_protected(svc->scheduler, 1); 2909 pe = rcu_dereference_protected(svc->pe, 1); 2910 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || 2911 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 2912 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 2913 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 2914 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 2915 goto nla_put_failure; 2916 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) 2917 goto nla_put_failure; 2918 2919 nla_nest_end(skb, nl_service); 2920 2921 return 0; 2922 2923nla_put_failure: 2924 nla_nest_cancel(skb, nl_service); 2925 return -EMSGSIZE; 2926} 2927 2928static int ip_vs_genl_dump_service(struct sk_buff *skb, 2929 struct ip_vs_service *svc, 2930 struct netlink_callback *cb) 2931{ 2932 void *hdr; 2933 2934 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2935 &ip_vs_genl_family, NLM_F_MULTI, 2936 IPVS_CMD_NEW_SERVICE); 2937 if (!hdr) 2938 return -EMSGSIZE; 2939 2940 if (ip_vs_genl_fill_service(skb, svc) < 0) 2941 goto nla_put_failure; 2942 2943 return genlmsg_end(skb, hdr); 2944 2945nla_put_failure: 2946 genlmsg_cancel(skb, hdr); 2947 return -EMSGSIZE; 2948} 2949 2950static int ip_vs_genl_dump_services(struct sk_buff *skb, 2951 struct netlink_callback *cb) 2952{ 2953 int idx = 0, i; 2954 int start = cb->args[0]; 2955 struct ip_vs_service *svc; 2956 struct net *net = skb_sknet(skb); 2957 2958 mutex_lock(&__ip_vs_mutex); 2959 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2960 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2961 if (++idx <= start || !net_eq(svc->net, net)) 2962 continue; 2963 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2964 idx--; 2965 goto nla_put_failure; 2966 } 2967 } 2968 } 2969 2970 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2971 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2972 if (++idx <= start || !net_eq(svc->net, net)) 2973 continue; 2974 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2975 idx--; 2976 goto nla_put_failure; 2977 } 2978 } 2979 } 2980 2981nla_put_failure: 2982 mutex_unlock(&__ip_vs_mutex); 2983 cb->args[0] = idx; 2984 2985 return skb->len; 2986} 2987 2988static int ip_vs_genl_parse_service(struct net *net, 2989 struct ip_vs_service_user_kern *usvc, 2990 struct nlattr *nla, int full_entry, 2991 struct ip_vs_service **ret_svc) 2992{ 2993 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 2994 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 2995 struct ip_vs_service *svc; 2996 2997 /* Parse mandatory identifying service fields first */ 2998 if (nla == NULL || 2999 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) 3000 return -EINVAL; 3001 3002 nla_af = attrs[IPVS_SVC_ATTR_AF]; 3003 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 3004 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 3005 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 3006 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 3007 3008 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 3009 return -EINVAL; 3010 3011 memset(usvc, 0, sizeof(*usvc)); 3012 3013 usvc->af = nla_get_u16(nla_af); 3014#ifdef CONFIG_IP_VS_IPV6 3015 if (usvc->af != AF_INET && usvc->af != AF_INET6) 3016#else 3017 if (usvc->af != AF_INET) 3018#endif 3019 return -EAFNOSUPPORT; 3020 3021 if (nla_fwmark) { 3022 usvc->protocol = IPPROTO_TCP; 3023 usvc->fwmark = nla_get_u32(nla_fwmark); 3024 } else { 3025 usvc->protocol = nla_get_u16(nla_protocol); 3026 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3027 usvc->port = nla_get_be16(nla_port); 3028 usvc->fwmark = 0; 3029 } 3030 3031 rcu_read_lock(); 3032 if (usvc->fwmark) 3033 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); 3034 else 3035 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, 3036 &usvc->addr, usvc->port); 3037 rcu_read_unlock(); 3038 *ret_svc = svc; 3039 3040 /* If a full entry was requested, check for the additional fields */ 3041 if (full_entry) { 3042 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 3043 *nla_netmask; 3044 struct ip_vs_flags flags; 3045 3046 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 3047 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 3048 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 3049 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 3050 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 3051 3052 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3053 return -EINVAL; 3054 3055 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3056 3057 /* prefill flags from service if it already exists */ 3058 if (svc) 3059 usvc->flags = svc->flags; 3060 3061 /* set new flags from userland */ 3062 usvc->flags = (usvc->flags & ~flags.mask) | 3063 (flags.flags & flags.mask); 3064 usvc->sched_name = nla_data(nla_sched); 3065 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3066 usvc->timeout = nla_get_u32(nla_timeout); 3067 usvc->netmask = nla_get_be32(nla_netmask); 3068 } 3069 3070 return 0; 3071} 3072 3073static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, 3074 struct nlattr *nla) 3075{ 3076 struct ip_vs_service_user_kern usvc; 3077 struct ip_vs_service *svc; 3078 int ret; 3079 3080 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); 3081 return ret ? ERR_PTR(ret) : svc; 3082} 3083 3084static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3085{ 3086 struct nlattr *nl_dest; 3087 3088 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); 3089 if (!nl_dest) 3090 return -EMSGSIZE; 3091 3092 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3093 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3094 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3095 (atomic_read(&dest->conn_flags) & 3096 IP_VS_CONN_F_FWD_MASK)) || 3097 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3098 atomic_read(&dest->weight)) || 3099 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3100 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3101 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3102 atomic_read(&dest->activeconns)) || 3103 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3104 atomic_read(&dest->inactconns)) || 3105 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3106 atomic_read(&dest->persistconns))) 3107 goto nla_put_failure; 3108 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) 3109 goto nla_put_failure; 3110 3111 nla_nest_end(skb, nl_dest); 3112 3113 return 0; 3114 3115nla_put_failure: 3116 nla_nest_cancel(skb, nl_dest); 3117 return -EMSGSIZE; 3118} 3119 3120static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3121 struct netlink_callback *cb) 3122{ 3123 void *hdr; 3124 3125 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3126 &ip_vs_genl_family, NLM_F_MULTI, 3127 IPVS_CMD_NEW_DEST); 3128 if (!hdr) 3129 return -EMSGSIZE; 3130 3131 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3132 goto nla_put_failure; 3133 3134 return genlmsg_end(skb, hdr); 3135 3136nla_put_failure: 3137 genlmsg_cancel(skb, hdr); 3138 return -EMSGSIZE; 3139} 3140 3141static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3142 struct netlink_callback *cb) 3143{ 3144 int idx = 0; 3145 int start = cb->args[0]; 3146 struct ip_vs_service *svc; 3147 struct ip_vs_dest *dest; 3148 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3149 struct net *net = skb_sknet(skb); 3150 3151 mutex_lock(&__ip_vs_mutex); 3152 3153 /* Try to find the service for which to dump destinations */ 3154 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, 3155 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 3156 goto out_err; 3157 3158 3159 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); 3160 if (IS_ERR(svc) || svc == NULL) 3161 goto out_err; 3162 3163 /* Dump the destinations */ 3164 list_for_each_entry(dest, &svc->destinations, n_list) { 3165 if (++idx <= start) 3166 continue; 3167 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3168 idx--; 3169 goto nla_put_failure; 3170 } 3171 } 3172 3173nla_put_failure: 3174 cb->args[0] = idx; 3175 3176out_err: 3177 mutex_unlock(&__ip_vs_mutex); 3178 3179 return skb->len; 3180} 3181 3182static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3183 struct nlattr *nla, int full_entry) 3184{ 3185 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3186 struct nlattr *nla_addr, *nla_port; 3187 3188 /* Parse mandatory identifying destination fields first */ 3189 if (nla == NULL || 3190 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) 3191 return -EINVAL; 3192 3193 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3194 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3195 3196 if (!(nla_addr && nla_port)) 3197 return -EINVAL; 3198 3199 memset(udest, 0, sizeof(*udest)); 3200 3201 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3202 udest->port = nla_get_be16(nla_port); 3203 3204 /* If a full entry was requested, check for the additional fields */ 3205 if (full_entry) { 3206 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3207 *nla_l_thresh; 3208 3209 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3210 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3211 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3212 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3213 3214 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3215 return -EINVAL; 3216 3217 udest->conn_flags = nla_get_u32(nla_fwd) 3218 & IP_VS_CONN_F_FWD_MASK; 3219 udest->weight = nla_get_u32(nla_weight); 3220 udest->u_threshold = nla_get_u32(nla_u_thresh); 3221 udest->l_threshold = nla_get_u32(nla_l_thresh); 3222 } 3223 3224 return 0; 3225} 3226 3227static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3228 const char *mcast_ifn, __u32 syncid) 3229{ 3230 struct nlattr *nl_daemon; 3231 3232 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); 3233 if (!nl_daemon) 3234 return -EMSGSIZE; 3235 3236 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3237 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) || 3238 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid)) 3239 goto nla_put_failure; 3240 nla_nest_end(skb, nl_daemon); 3241 3242 return 0; 3243 3244nla_put_failure: 3245 nla_nest_cancel(skb, nl_daemon); 3246 return -EMSGSIZE; 3247} 3248 3249static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3250 const char *mcast_ifn, __u32 syncid, 3251 struct netlink_callback *cb) 3252{ 3253 void *hdr; 3254 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3255 &ip_vs_genl_family, NLM_F_MULTI, 3256 IPVS_CMD_NEW_DAEMON); 3257 if (!hdr) 3258 return -EMSGSIZE; 3259 3260 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) 3261 goto nla_put_failure; 3262 3263 return genlmsg_end(skb, hdr); 3264 3265nla_put_failure: 3266 genlmsg_cancel(skb, hdr); 3267 return -EMSGSIZE; 3268} 3269 3270static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3271 struct netlink_callback *cb) 3272{ 3273 struct net *net = skb_sknet(skb); 3274 struct netns_ipvs *ipvs = net_ipvs(net); 3275 3276 mutex_lock(&ipvs->sync_mutex); 3277 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3278 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3279 ipvs->master_mcast_ifn, 3280 ipvs->master_syncid, cb) < 0) 3281 goto nla_put_failure; 3282 3283 cb->args[0] = 1; 3284 } 3285 3286 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3287 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3288 ipvs->backup_mcast_ifn, 3289 ipvs->backup_syncid, cb) < 0) 3290 goto nla_put_failure; 3291 3292 cb->args[1] = 1; 3293 } 3294 3295nla_put_failure: 3296 mutex_unlock(&ipvs->sync_mutex); 3297 3298 return skb->len; 3299} 3300 3301static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) 3302{ 3303 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3304 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3305 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3306 return -EINVAL; 3307 3308 return start_sync_thread(net, 3309 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 3310 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3311 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); 3312} 3313 3314static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) 3315{ 3316 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3317 return -EINVAL; 3318 3319 return stop_sync_thread(net, 3320 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3321} 3322 3323static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) 3324{ 3325 struct ip_vs_timeout_user t; 3326 3327 __ip_vs_get_timeouts(net, &t); 3328 3329 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3330 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3331 3332 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3333 t.tcp_fin_timeout = 3334 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3335 3336 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3337 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3338 3339 return ip_vs_set_timeout(net, &t); 3340} 3341 3342static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3343{ 3344 int ret = 0, cmd; 3345 struct net *net; 3346 struct netns_ipvs *ipvs; 3347 3348 net = skb_sknet(skb); 3349 ipvs = net_ipvs(net); 3350 cmd = info->genlhdr->cmd; 3351 3352 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3353 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3354 3355 mutex_lock(&ipvs->sync_mutex); 3356 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3357 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, 3358 info->attrs[IPVS_CMD_ATTR_DAEMON], 3359 ip_vs_daemon_policy)) { 3360 ret = -EINVAL; 3361 goto out; 3362 } 3363 3364 if (cmd == IPVS_CMD_NEW_DAEMON) 3365 ret = ip_vs_genl_new_daemon(net, daemon_attrs); 3366 else 3367 ret = ip_vs_genl_del_daemon(net, daemon_attrs); 3368out: 3369 mutex_unlock(&ipvs->sync_mutex); 3370 } 3371 return ret; 3372} 3373 3374static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3375{ 3376 struct ip_vs_service *svc = NULL; 3377 struct ip_vs_service_user_kern usvc; 3378 struct ip_vs_dest_user_kern udest; 3379 int ret = 0, cmd; 3380 int need_full_svc = 0, need_full_dest = 0; 3381 struct net *net; 3382 3383 net = skb_sknet(skb); 3384 cmd = info->genlhdr->cmd; 3385 3386 mutex_lock(&__ip_vs_mutex); 3387 3388 if (cmd == IPVS_CMD_FLUSH) { 3389 ret = ip_vs_flush(net, false); 3390 goto out; 3391 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3392 ret = ip_vs_genl_set_config(net, info->attrs); 3393 goto out; 3394 } else if (cmd == IPVS_CMD_ZERO && 3395 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3396 ret = ip_vs_zero_all(net); 3397 goto out; 3398 } 3399 3400 /* All following commands require a service argument, so check if we 3401 * received a valid one. We need a full service specification when 3402 * adding / editing a service. Only identifying members otherwise. */ 3403 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3404 need_full_svc = 1; 3405 3406 ret = ip_vs_genl_parse_service(net, &usvc, 3407 info->attrs[IPVS_CMD_ATTR_SERVICE], 3408 need_full_svc, &svc); 3409 if (ret) 3410 goto out; 3411 3412 /* Unless we're adding a new service, the service must already exist */ 3413 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3414 ret = -ESRCH; 3415 goto out; 3416 } 3417 3418 /* Destination commands require a valid destination argument. For 3419 * adding / editing a destination, we need a full destination 3420 * specification. */ 3421 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3422 cmd == IPVS_CMD_DEL_DEST) { 3423 if (cmd != IPVS_CMD_DEL_DEST) 3424 need_full_dest = 1; 3425 3426 ret = ip_vs_genl_parse_dest(&udest, 3427 info->attrs[IPVS_CMD_ATTR_DEST], 3428 need_full_dest); 3429 if (ret) 3430 goto out; 3431 } 3432 3433 switch (cmd) { 3434 case IPVS_CMD_NEW_SERVICE: 3435 if (svc == NULL) 3436 ret = ip_vs_add_service(net, &usvc, &svc); 3437 else 3438 ret = -EEXIST; 3439 break; 3440 case IPVS_CMD_SET_SERVICE: 3441 ret = ip_vs_edit_service(svc, &usvc); 3442 break; 3443 case IPVS_CMD_DEL_SERVICE: 3444 ret = ip_vs_del_service(svc); 3445 /* do not use svc, it can be freed */ 3446 break; 3447 case IPVS_CMD_NEW_DEST: 3448 ret = ip_vs_add_dest(svc, &udest); 3449 break; 3450 case IPVS_CMD_SET_DEST: 3451 ret = ip_vs_edit_dest(svc, &udest); 3452 break; 3453 case IPVS_CMD_DEL_DEST: 3454 ret = ip_vs_del_dest(svc, &udest); 3455 break; 3456 case IPVS_CMD_ZERO: 3457 ret = ip_vs_zero_service(svc); 3458 break; 3459 default: 3460 ret = -EINVAL; 3461 } 3462 3463out: 3464 mutex_unlock(&__ip_vs_mutex); 3465 3466 return ret; 3467} 3468 3469static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3470{ 3471 struct sk_buff *msg; 3472 void *reply; 3473 int ret, cmd, reply_cmd; 3474 struct net *net; 3475 3476 net = skb_sknet(skb); 3477 cmd = info->genlhdr->cmd; 3478 3479 if (cmd == IPVS_CMD_GET_SERVICE) 3480 reply_cmd = IPVS_CMD_NEW_SERVICE; 3481 else if (cmd == IPVS_CMD_GET_INFO) 3482 reply_cmd = IPVS_CMD_SET_INFO; 3483 else if (cmd == IPVS_CMD_GET_CONFIG) 3484 reply_cmd = IPVS_CMD_SET_CONFIG; 3485 else { 3486 pr_err("unknown Generic Netlink command\n"); 3487 return -EINVAL; 3488 } 3489 3490 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3491 if (!msg) 3492 return -ENOMEM; 3493 3494 mutex_lock(&__ip_vs_mutex); 3495 3496 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3497 if (reply == NULL) 3498 goto nla_put_failure; 3499 3500 switch (cmd) { 3501 case IPVS_CMD_GET_SERVICE: 3502 { 3503 struct ip_vs_service *svc; 3504 3505 svc = ip_vs_genl_find_service(net, 3506 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3507 if (IS_ERR(svc)) { 3508 ret = PTR_ERR(svc); 3509 goto out_err; 3510 } else if (svc) { 3511 ret = ip_vs_genl_fill_service(msg, svc); 3512 if (ret) 3513 goto nla_put_failure; 3514 } else { 3515 ret = -ESRCH; 3516 goto out_err; 3517 } 3518 3519 break; 3520 } 3521 3522 case IPVS_CMD_GET_CONFIG: 3523 { 3524 struct ip_vs_timeout_user t; 3525 3526 __ip_vs_get_timeouts(net, &t); 3527#ifdef CONFIG_IP_VS_PROTO_TCP 3528 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3529 t.tcp_timeout) || 3530 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3531 t.tcp_fin_timeout)) 3532 goto nla_put_failure; 3533#endif 3534#ifdef CONFIG_IP_VS_PROTO_UDP 3535 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3536 goto nla_put_failure; 3537#endif 3538 3539 break; 3540 } 3541 3542 case IPVS_CMD_GET_INFO: 3543 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3544 IP_VS_VERSION_CODE) || 3545 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3546 ip_vs_conn_tab_size)) 3547 goto nla_put_failure; 3548 break; 3549 } 3550 3551 genlmsg_end(msg, reply); 3552 ret = genlmsg_reply(msg, info); 3553 goto out; 3554 3555nla_put_failure: 3556 pr_err("not enough space in Netlink message\n"); 3557 ret = -EMSGSIZE; 3558 3559out_err: 3560 nlmsg_free(msg); 3561out: 3562 mutex_unlock(&__ip_vs_mutex); 3563 3564 return ret; 3565} 3566 3567 3568static struct genl_ops ip_vs_genl_ops[] __read_mostly = { 3569 { 3570 .cmd = IPVS_CMD_NEW_SERVICE, 3571 .flags = GENL_ADMIN_PERM, 3572 .policy = ip_vs_cmd_policy, 3573 .doit = ip_vs_genl_set_cmd, 3574 }, 3575 { 3576 .cmd = IPVS_CMD_SET_SERVICE, 3577 .flags = GENL_ADMIN_PERM, 3578 .policy = ip_vs_cmd_policy, 3579 .doit = ip_vs_genl_set_cmd, 3580 }, 3581 { 3582 .cmd = IPVS_CMD_DEL_SERVICE, 3583 .flags = GENL_ADMIN_PERM, 3584 .policy = ip_vs_cmd_policy, 3585 .doit = ip_vs_genl_set_cmd, 3586 }, 3587 { 3588 .cmd = IPVS_CMD_GET_SERVICE, 3589 .flags = GENL_ADMIN_PERM, 3590 .doit = ip_vs_genl_get_cmd, 3591 .dumpit = ip_vs_genl_dump_services, 3592 .policy = ip_vs_cmd_policy, 3593 }, 3594 { 3595 .cmd = IPVS_CMD_NEW_DEST, 3596 .flags = GENL_ADMIN_PERM, 3597 .policy = ip_vs_cmd_policy, 3598 .doit = ip_vs_genl_set_cmd, 3599 }, 3600 { 3601 .cmd = IPVS_CMD_SET_DEST, 3602 .flags = GENL_ADMIN_PERM, 3603 .policy = ip_vs_cmd_policy, 3604 .doit = ip_vs_genl_set_cmd, 3605 }, 3606 { 3607 .cmd = IPVS_CMD_DEL_DEST, 3608 .flags = GENL_ADMIN_PERM, 3609 .policy = ip_vs_cmd_policy, 3610 .doit = ip_vs_genl_set_cmd, 3611 }, 3612 { 3613 .cmd = IPVS_CMD_GET_DEST, 3614 .flags = GENL_ADMIN_PERM, 3615 .policy = ip_vs_cmd_policy, 3616 .dumpit = ip_vs_genl_dump_dests, 3617 }, 3618 { 3619 .cmd = IPVS_CMD_NEW_DAEMON, 3620 .flags = GENL_ADMIN_PERM, 3621 .policy = ip_vs_cmd_policy, 3622 .doit = ip_vs_genl_set_daemon, 3623 }, 3624 { 3625 .cmd = IPVS_CMD_DEL_DAEMON, 3626 .flags = GENL_ADMIN_PERM, 3627 .policy = ip_vs_cmd_policy, 3628 .doit = ip_vs_genl_set_daemon, 3629 }, 3630 { 3631 .cmd = IPVS_CMD_GET_DAEMON, 3632 .flags = GENL_ADMIN_PERM, 3633 .dumpit = ip_vs_genl_dump_daemons, 3634 }, 3635 { 3636 .cmd = IPVS_CMD_SET_CONFIG, 3637 .flags = GENL_ADMIN_PERM, 3638 .policy = ip_vs_cmd_policy, 3639 .doit = ip_vs_genl_set_cmd, 3640 }, 3641 { 3642 .cmd = IPVS_CMD_GET_CONFIG, 3643 .flags = GENL_ADMIN_PERM, 3644 .doit = ip_vs_genl_get_cmd, 3645 }, 3646 { 3647 .cmd = IPVS_CMD_GET_INFO, 3648 .flags = GENL_ADMIN_PERM, 3649 .doit = ip_vs_genl_get_cmd, 3650 }, 3651 { 3652 .cmd = IPVS_CMD_ZERO, 3653 .flags = GENL_ADMIN_PERM, 3654 .policy = ip_vs_cmd_policy, 3655 .doit = ip_vs_genl_set_cmd, 3656 }, 3657 { 3658 .cmd = IPVS_CMD_FLUSH, 3659 .flags = GENL_ADMIN_PERM, 3660 .doit = ip_vs_genl_set_cmd, 3661 }, 3662}; 3663 3664static int __init ip_vs_genl_register(void) 3665{ 3666 return genl_register_family_with_ops(&ip_vs_genl_family, 3667 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops)); 3668} 3669 3670static void ip_vs_genl_unregister(void) 3671{ 3672 genl_unregister_family(&ip_vs_genl_family); 3673} 3674 3675/* End of Generic Netlink interface definitions */ 3676 3677/* 3678 * per netns intit/exit func. 3679 */ 3680#ifdef CONFIG_SYSCTL 3681static int __net_init ip_vs_control_net_init_sysctl(struct net *net) 3682{ 3683 int idx; 3684 struct netns_ipvs *ipvs = net_ipvs(net); 3685 struct ctl_table *tbl; 3686 3687 atomic_set(&ipvs->dropentry, 0); 3688 spin_lock_init(&ipvs->dropentry_lock); 3689 spin_lock_init(&ipvs->droppacket_lock); 3690 spin_lock_init(&ipvs->securetcp_lock); 3691 3692 if (!net_eq(net, &init_net)) { 3693 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 3694 if (tbl == NULL) 3695 return -ENOMEM; 3696 3697 /* Don't export sysctls to unprivileged users */ 3698 if (net->user_ns != &init_user_ns) 3699 tbl[0].procname = NULL; 3700 } else 3701 tbl = vs_vars; 3702 /* Initialize sysctl defaults */ 3703 idx = 0; 3704 ipvs->sysctl_amemthresh = 1024; 3705 tbl[idx++].data = &ipvs->sysctl_amemthresh; 3706 ipvs->sysctl_am_droprate = 10; 3707 tbl[idx++].data = &ipvs->sysctl_am_droprate; 3708 tbl[idx++].data = &ipvs->sysctl_drop_entry; 3709 tbl[idx++].data = &ipvs->sysctl_drop_packet; 3710#ifdef CONFIG_IP_VS_NFCT 3711 tbl[idx++].data = &ipvs->sysctl_conntrack; 3712#endif 3713 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 3714 ipvs->sysctl_snat_reroute = 1; 3715 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 3716 ipvs->sysctl_sync_ver = 1; 3717 tbl[idx++].data = &ipvs->sysctl_sync_ver; 3718 ipvs->sysctl_sync_ports = 1; 3719 tbl[idx++].data = &ipvs->sysctl_sync_ports; 3720 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 3721 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 3722 ipvs->sysctl_sync_sock_size = 0; 3723 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 3724 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3725 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3726 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3727 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 3728 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 3729 tbl[idx].data = &ipvs->sysctl_sync_threshold; 3730 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3731 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 3732 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 3733 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 3734 tbl[idx++].data = &ipvs->sysctl_sync_retries; 3735 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3736 ipvs->sysctl_pmtu_disc = 1; 3737 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 3738 tbl[idx++].data = &ipvs->sysctl_backup_only; 3739 3740 3741 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 3742 if (ipvs->sysctl_hdr == NULL) { 3743 if (!net_eq(net, &init_net)) 3744 kfree(tbl); 3745 return -ENOMEM; 3746 } 3747 ip_vs_start_estimator(net, &ipvs->tot_stats); 3748 ipvs->sysctl_tbl = tbl; 3749 /* Schedule defense work */ 3750 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 3751 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 3752 3753 return 0; 3754} 3755 3756static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) 3757{ 3758 struct netns_ipvs *ipvs = net_ipvs(net); 3759 3760 cancel_delayed_work_sync(&ipvs->defense_work); 3761 cancel_work_sync(&ipvs->defense_work.work); 3762 unregister_net_sysctl_table(ipvs->sysctl_hdr); 3763} 3764 3765#else 3766 3767static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } 3768static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } 3769 3770#endif 3771 3772static struct notifier_block ip_vs_dst_notifier = { 3773 .notifier_call = ip_vs_dst_event, 3774}; 3775 3776int __net_init ip_vs_control_net_init(struct net *net) 3777{ 3778 int idx; 3779 struct netns_ipvs *ipvs = net_ipvs(net); 3780 3781 /* Initialize rs_table */ 3782 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3783 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 3784 3785 INIT_LIST_HEAD(&ipvs->dest_trash); 3786 spin_lock_init(&ipvs->dest_trash_lock); 3787 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 3788 (unsigned long) net); 3789 atomic_set(&ipvs->ftpsvc_counter, 0); 3790 atomic_set(&ipvs->nullsvc_counter, 0); 3791 3792 /* procfs stats */ 3793 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 3794 if (!ipvs->tot_stats.cpustats) 3795 return -ENOMEM; 3796 3797 spin_lock_init(&ipvs->tot_stats.lock); 3798 3799 proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); 3800 proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); 3801 proc_create("ip_vs_stats_percpu", 0, net->proc_net, 3802 &ip_vs_stats_percpu_fops); 3803 3804 if (ip_vs_control_net_init_sysctl(net)) 3805 goto err; 3806 3807 return 0; 3808 3809err: 3810 free_percpu(ipvs->tot_stats.cpustats); 3811 return -ENOMEM; 3812} 3813 3814void __net_exit ip_vs_control_net_cleanup(struct net *net) 3815{ 3816 struct netns_ipvs *ipvs = net_ipvs(net); 3817 3818 /* Some dest can be in grace period even before cleanup, we have to 3819 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. 3820 */ 3821 rcu_barrier(); 3822 ip_vs_trash_cleanup(net); 3823 ip_vs_stop_estimator(net, &ipvs->tot_stats); 3824 ip_vs_control_net_cleanup_sysctl(net); 3825 remove_proc_entry("ip_vs_stats_percpu", net->proc_net); 3826 remove_proc_entry("ip_vs_stats", net->proc_net); 3827 remove_proc_entry("ip_vs", net->proc_net); 3828 free_percpu(ipvs->tot_stats.cpustats); 3829} 3830 3831int __init ip_vs_register_nl_ioctl(void) 3832{ 3833 int ret; 3834 3835 ret = nf_register_sockopt(&ip_vs_sockopts); 3836 if (ret) { 3837 pr_err("cannot register sockopt.\n"); 3838 goto err_sock; 3839 } 3840 3841 ret = ip_vs_genl_register(); 3842 if (ret) { 3843 pr_err("cannot register Generic Netlink interface.\n"); 3844 goto err_genl; 3845 } 3846 return 0; 3847 3848err_genl: 3849 nf_unregister_sockopt(&ip_vs_sockopts); 3850err_sock: 3851 return ret; 3852} 3853 3854void ip_vs_unregister_nl_ioctl(void) 3855{ 3856 ip_vs_genl_unregister(); 3857 nf_unregister_sockopt(&ip_vs_sockopts); 3858} 3859 3860int __init ip_vs_control_init(void) 3861{ 3862 int idx; 3863 int ret; 3864 3865 EnterFunction(2); 3866 3867 /* Initialize svc_table, ip_vs_svc_fwm_table */ 3868 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3869 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 3870 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3871 } 3872 3873 smp_wmb(); /* Do we really need it now ? */ 3874 3875 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 3876 if (ret < 0) 3877 return ret; 3878 3879 LeaveFunction(2); 3880 return 0; 3881} 3882 3883 3884void ip_vs_control_cleanup(void) 3885{ 3886 EnterFunction(2); 3887 unregister_netdevice_notifier(&ip_vs_dst_notifier); 3888 LeaveFunction(2); 3889} 3890