1/* 2 * IPVS An implementation of the IP virtual server support for the 3 * LINUX operating system. IPVS is now implemented as a module 4 * over the NetFilter framework. IPVS can be used to build a 5 * high-performance and highly available server based on a 6 * cluster of servers. 7 * 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 9 * Peter Kese <peter.kese@ijs.si> 10 * Julian Anastasov <ja@ssi.bg> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 * Changes: 18 * 19 */ 20 21#define KMSG_COMPONENT "IPVS" 22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24#include <linux/module.h> 25#include <linux/init.h> 26#include <linux/types.h> 27#include <linux/capability.h> 28#include <linux/fs.h> 29#include <linux/sysctl.h> 30#include <linux/proc_fs.h> 31#include <linux/workqueue.h> 32#include <linux/swap.h> 33#include <linux/seq_file.h> 34#include <linux/slab.h> 35 36#include <linux/netfilter.h> 37#include <linux/netfilter_ipv4.h> 38#include <linux/mutex.h> 39 40#include <net/net_namespace.h> 41#include <linux/nsproxy.h> 42#include <net/ip.h> 43#ifdef CONFIG_IP_VS_IPV6 44#include <net/ipv6.h> 45#include <net/ip6_route.h> 46#endif 47#include <net/route.h> 48#include <net/sock.h> 49#include <net/genetlink.h> 50 51#include <asm/uaccess.h> 52 53#include <net/ip_vs.h> 54 55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 56static DEFINE_MUTEX(__ip_vs_mutex); 57 58/* sysctl variables */ 59 60#ifdef CONFIG_IP_VS_DEBUG 61static int sysctl_ip_vs_debug_level = 0; 62 63int ip_vs_get_debug_level(void) 64{ 65 return sysctl_ip_vs_debug_level; 66} 67#endif 68 69 70/* Protos */ 71static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 72 73 74#ifdef CONFIG_IP_VS_IPV6 75/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 76static bool __ip_vs_addr_is_local_v6(struct net *net, 77 const struct in6_addr *addr) 78{ 79 struct flowi6 fl6 = { 80 .daddr = *addr, 81 }; 82 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 83 bool is_local; 84 85 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 86 87 dst_release(dst); 88 return is_local; 89} 90#endif 91 92#ifdef CONFIG_SYSCTL 93/* 94 * update_defense_level is called from keventd and from sysctl, 95 * so it needs to protect itself from softirqs 96 */ 97static void update_defense_level(struct netns_ipvs *ipvs) 98{ 99 struct sysinfo i; 100 static int old_secure_tcp = 0; 101 int availmem; 102 int nomem; 103 int to_change = -1; 104 105 /* we only count free and buffered memory (in pages) */ 106 si_meminfo(&i); 107 availmem = i.freeram + i.bufferram; 108 /* however in linux 2.5 the i.bufferram is total page cache size, 109 we need adjust it */ 110 /* si_swapinfo(&i); */ 111 /* availmem = availmem - (i.totalswap - i.freeswap); */ 112 113 nomem = (availmem < ipvs->sysctl_amemthresh); 114 115 local_bh_disable(); 116 117 /* drop_entry */ 118 spin_lock(&ipvs->dropentry_lock); 119 switch (ipvs->sysctl_drop_entry) { 120 case 0: 121 atomic_set(&ipvs->dropentry, 0); 122 break; 123 case 1: 124 if (nomem) { 125 atomic_set(&ipvs->dropentry, 1); 126 ipvs->sysctl_drop_entry = 2; 127 } else { 128 atomic_set(&ipvs->dropentry, 0); 129 } 130 break; 131 case 2: 132 if (nomem) { 133 atomic_set(&ipvs->dropentry, 1); 134 } else { 135 atomic_set(&ipvs->dropentry, 0); 136 ipvs->sysctl_drop_entry = 1; 137 }; 138 break; 139 case 3: 140 atomic_set(&ipvs->dropentry, 1); 141 break; 142 } 143 spin_unlock(&ipvs->dropentry_lock); 144 145 /* drop_packet */ 146 spin_lock(&ipvs->droppacket_lock); 147 switch (ipvs->sysctl_drop_packet) { 148 case 0: 149 ipvs->drop_rate = 0; 150 break; 151 case 1: 152 if (nomem) { 153 ipvs->drop_rate = ipvs->drop_counter 154 = ipvs->sysctl_amemthresh / 155 (ipvs->sysctl_amemthresh-availmem); 156 ipvs->sysctl_drop_packet = 2; 157 } else { 158 ipvs->drop_rate = 0; 159 } 160 break; 161 case 2: 162 if (nomem) { 163 ipvs->drop_rate = ipvs->drop_counter 164 = ipvs->sysctl_amemthresh / 165 (ipvs->sysctl_amemthresh-availmem); 166 } else { 167 ipvs->drop_rate = 0; 168 ipvs->sysctl_drop_packet = 1; 169 } 170 break; 171 case 3: 172 ipvs->drop_rate = ipvs->sysctl_am_droprate; 173 break; 174 } 175 spin_unlock(&ipvs->droppacket_lock); 176 177 /* secure_tcp */ 178 spin_lock(&ipvs->securetcp_lock); 179 switch (ipvs->sysctl_secure_tcp) { 180 case 0: 181 if (old_secure_tcp >= 2) 182 to_change = 0; 183 break; 184 case 1: 185 if (nomem) { 186 if (old_secure_tcp < 2) 187 to_change = 1; 188 ipvs->sysctl_secure_tcp = 2; 189 } else { 190 if (old_secure_tcp >= 2) 191 to_change = 0; 192 } 193 break; 194 case 2: 195 if (nomem) { 196 if (old_secure_tcp < 2) 197 to_change = 1; 198 } else { 199 if (old_secure_tcp >= 2) 200 to_change = 0; 201 ipvs->sysctl_secure_tcp = 1; 202 } 203 break; 204 case 3: 205 if (old_secure_tcp < 2) 206 to_change = 1; 207 break; 208 } 209 old_secure_tcp = ipvs->sysctl_secure_tcp; 210 if (to_change >= 0) 211 ip_vs_protocol_timeout_change(ipvs, 212 ipvs->sysctl_secure_tcp > 1); 213 spin_unlock(&ipvs->securetcp_lock); 214 215 local_bh_enable(); 216} 217 218 219/* 220 * Timer for checking the defense 221 */ 222#define DEFENSE_TIMER_PERIOD 1*HZ 223 224static void defense_work_handler(struct work_struct *work) 225{ 226 struct netns_ipvs *ipvs = 227 container_of(work, struct netns_ipvs, defense_work.work); 228 229 update_defense_level(ipvs); 230 if (atomic_read(&ipvs->dropentry)) 231 ip_vs_random_dropentry(ipvs->net); 232 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 233} 234#endif 235 236int 237ip_vs_use_count_inc(void) 238{ 239 return try_module_get(THIS_MODULE); 240} 241 242void 243ip_vs_use_count_dec(void) 244{ 245 module_put(THIS_MODULE); 246} 247 248 249/* 250 * Hash table: for virtual service lookups 251 */ 252#define IP_VS_SVC_TAB_BITS 8 253#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 255 256/* the service table hashed by <protocol, addr, port> */ 257static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 258/* the service table hashed by fwmark */ 259static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 260 261 262/* 263 * Returns hash value for virtual service 264 */ 265static inline unsigned int 266ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, 267 const union nf_inet_addr *addr, __be16 port) 268{ 269 register unsigned int porth = ntohs(port); 270 __be32 addr_fold = addr->ip; 271 __u32 ahash; 272 273#ifdef CONFIG_IP_VS_IPV6 274 if (af == AF_INET6) 275 addr_fold = addr->ip6[0]^addr->ip6[1]^ 276 addr->ip6[2]^addr->ip6[3]; 277#endif 278 ahash = ntohl(addr_fold); 279 ahash ^= ((size_t) net >> 8); 280 281 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 282 IP_VS_SVC_TAB_MASK; 283} 284 285/* 286 * Returns hash value of fwmark for virtual service lookup 287 */ 288static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) 289{ 290 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 291} 292 293/* 294 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 295 * or in the ip_vs_svc_fwm_table by fwmark. 296 * Should be called with locked tables. 297 */ 298static int ip_vs_svc_hash(struct ip_vs_service *svc) 299{ 300 unsigned int hash; 301 302 if (svc->flags & IP_VS_SVC_F_HASHED) { 303 pr_err("%s(): request for already hashed, called from %pF\n", 304 __func__, __builtin_return_address(0)); 305 return 0; 306 } 307 308 if (svc->fwmark == 0) { 309 /* 310 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 311 */ 312 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, 313 &svc->addr, svc->port); 314 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 315 } else { 316 /* 317 * Hash it by fwmark in svc_fwm_table 318 */ 319 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); 320 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 321 } 322 323 svc->flags |= IP_VS_SVC_F_HASHED; 324 /* increase its refcnt because it is referenced by the svc table */ 325 atomic_inc(&svc->refcnt); 326 return 1; 327} 328 329 330/* 331 * Unhashes a service from svc_table / svc_fwm_table. 332 * Should be called with locked tables. 333 */ 334static int ip_vs_svc_unhash(struct ip_vs_service *svc) 335{ 336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 337 pr_err("%s(): request for unhash flagged, called from %pF\n", 338 __func__, __builtin_return_address(0)); 339 return 0; 340 } 341 342 if (svc->fwmark == 0) { 343 /* Remove it from the svc_table table */ 344 hlist_del_rcu(&svc->s_list); 345 } else { 346 /* Remove it from the svc_fwm_table table */ 347 hlist_del_rcu(&svc->f_list); 348 } 349 350 svc->flags &= ~IP_VS_SVC_F_HASHED; 351 atomic_dec(&svc->refcnt); 352 return 1; 353} 354 355 356/* 357 * Get service by {netns, proto,addr,port} in the service table. 358 */ 359static inline struct ip_vs_service * 360__ip_vs_service_find(struct net *net, int af, __u16 protocol, 361 const union nf_inet_addr *vaddr, __be16 vport) 362{ 363 unsigned int hash; 364 struct ip_vs_service *svc; 365 366 /* Check for "full" addressed entries */ 367 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); 368 369 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 370 if ((svc->af == af) 371 && ip_vs_addr_equal(af, &svc->addr, vaddr) 372 && (svc->port == vport) 373 && (svc->protocol == protocol) 374 && net_eq(svc->net, net)) { 375 /* HIT */ 376 return svc; 377 } 378 } 379 380 return NULL; 381} 382 383 384/* 385 * Get service by {fwmark} in the service table. 386 */ 387static inline struct ip_vs_service * 388__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) 389{ 390 unsigned int hash; 391 struct ip_vs_service *svc; 392 393 /* Check for fwmark addressed entries */ 394 hash = ip_vs_svc_fwm_hashkey(net, fwmark); 395 396 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 397 if (svc->fwmark == fwmark && svc->af == af 398 && net_eq(svc->net, net)) { 399 /* HIT */ 400 return svc; 401 } 402 } 403 404 return NULL; 405} 406 407/* Find service, called under RCU lock */ 408struct ip_vs_service * 409ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, 410 const union nf_inet_addr *vaddr, __be16 vport) 411{ 412 struct ip_vs_service *svc; 413 struct netns_ipvs *ipvs = net_ipvs(net); 414 415 /* 416 * Check the table hashed by fwmark first 417 */ 418 if (fwmark) { 419 svc = __ip_vs_svc_fwm_find(net, af, fwmark); 420 if (svc) 421 goto out; 422 } 423 424 /* 425 * Check the table hashed by <protocol,addr,port> 426 * for "full" addressed entries 427 */ 428 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); 429 430 if (svc == NULL 431 && protocol == IPPROTO_TCP 432 && atomic_read(&ipvs->ftpsvc_counter) 433 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 434 /* 435 * Check if ftp service entry exists, the packet 436 * might belong to FTP data connections. 437 */ 438 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); 439 } 440 441 if (svc == NULL 442 && atomic_read(&ipvs->nullsvc_counter)) { 443 /* 444 * Check if the catch-all port (port zero) exists 445 */ 446 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); 447 } 448 449 out: 450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 451 fwmark, ip_vs_proto_name(protocol), 452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 453 svc ? "hit" : "not hit"); 454 455 return svc; 456} 457 458 459static inline void 460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 461{ 462 atomic_inc(&svc->refcnt); 463 rcu_assign_pointer(dest->svc, svc); 464} 465 466static void ip_vs_service_free(struct ip_vs_service *svc) 467{ 468 if (svc->stats.cpustats) 469 free_percpu(svc->stats.cpustats); 470 kfree(svc); 471} 472 473static void ip_vs_service_rcu_free(struct rcu_head *head) 474{ 475 struct ip_vs_service *svc; 476 477 svc = container_of(head, struct ip_vs_service, rcu_head); 478 ip_vs_service_free(svc); 479} 480 481static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) 482{ 483 if (atomic_dec_and_test(&svc->refcnt)) { 484 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 485 svc->fwmark, 486 IP_VS_DBG_ADDR(svc->af, &svc->addr), 487 ntohs(svc->port)); 488 if (do_delay) 489 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 490 else 491 ip_vs_service_free(svc); 492 } 493} 494 495 496/* 497 * Returns hash value for real service 498 */ 499static inline unsigned int ip_vs_rs_hashkey(int af, 500 const union nf_inet_addr *addr, 501 __be16 port) 502{ 503 register unsigned int porth = ntohs(port); 504 __be32 addr_fold = addr->ip; 505 506#ifdef CONFIG_IP_VS_IPV6 507 if (af == AF_INET6) 508 addr_fold = addr->ip6[0]^addr->ip6[1]^ 509 addr->ip6[2]^addr->ip6[3]; 510#endif 511 512 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 513 & IP_VS_RTAB_MASK; 514} 515 516/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 517static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 518{ 519 unsigned int hash; 520 521 if (dest->in_rs_table) 522 return; 523 524 /* 525 * Hash by proto,addr,port, 526 * which are the parameters of the real service. 527 */ 528 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 529 530 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 531 dest->in_rs_table = 1; 532} 533 534/* Unhash ip_vs_dest from rs_table. */ 535static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 536{ 537 /* 538 * Remove it from the rs_table table. 539 */ 540 if (dest->in_rs_table) { 541 hlist_del_rcu(&dest->d_list); 542 dest->in_rs_table = 0; 543 } 544} 545 546/* Check if real service by <proto,addr,port> is present */ 547bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, 548 const union nf_inet_addr *daddr, __be16 dport) 549{ 550 struct netns_ipvs *ipvs = net_ipvs(net); 551 unsigned int hash; 552 struct ip_vs_dest *dest; 553 554 /* Check for "full" addressed entries */ 555 hash = ip_vs_rs_hashkey(af, daddr, dport); 556 557 rcu_read_lock(); 558 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 559 if (dest->port == dport && 560 dest->af == af && 561 ip_vs_addr_equal(af, &dest->addr, daddr) && 562 (dest->protocol == protocol || dest->vfwmark)) { 563 /* HIT */ 564 rcu_read_unlock(); 565 return true; 566 } 567 } 568 rcu_read_unlock(); 569 570 return false; 571} 572 573/* Lookup destination by {addr,port} in the given service 574 * Called under RCU lock. 575 */ 576static struct ip_vs_dest * 577ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, 578 const union nf_inet_addr *daddr, __be16 dport) 579{ 580 struct ip_vs_dest *dest; 581 582 /* 583 * Find the destination for the given service 584 */ 585 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 586 if ((dest->af == dest_af) && 587 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 588 (dest->port == dport)) { 589 /* HIT */ 590 return dest; 591 } 592 } 593 594 return NULL; 595} 596 597/* 598 * Find destination by {daddr,dport,vaddr,protocol} 599 * Created to be used in ip_vs_process_message() in 600 * the backup synchronization daemon. It finds the 601 * destination to be bound to the received connection 602 * on the backup. 603 * Called under RCU lock, no refcnt is returned. 604 */ 605struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af, 606 const union nf_inet_addr *daddr, 607 __be16 dport, 608 const union nf_inet_addr *vaddr, 609 __be16 vport, __u16 protocol, __u32 fwmark, 610 __u32 flags) 611{ 612 struct ip_vs_dest *dest; 613 struct ip_vs_service *svc; 614 __be16 port = dport; 615 616 svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport); 617 if (!svc) 618 return NULL; 619 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 620 port = 0; 621 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); 622 if (!dest) 623 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); 624 return dest; 625} 626 627void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 628{ 629 struct ip_vs_dest_dst *dest_dst = container_of(head, 630 struct ip_vs_dest_dst, 631 rcu_head); 632 633 dst_release(dest_dst->dst_cache); 634 kfree(dest_dst); 635} 636 637/* Release dest_dst and dst_cache for dest in user context */ 638static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 639{ 640 struct ip_vs_dest_dst *old; 641 642 old = rcu_dereference_protected(dest->dest_dst, 1); 643 if (old) { 644 RCU_INIT_POINTER(dest->dest_dst, NULL); 645 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 646 } 647} 648 649/* 650 * Lookup dest by {svc,addr,port} in the destination trash. 651 * The destination trash is used to hold the destinations that are removed 652 * from the service table but are still referenced by some conn entries. 653 * The reason to add the destination trash is when the dest is temporary 654 * down (either by administrator or by monitor program), the dest can be 655 * picked back from the trash, the remaining connections to the dest can 656 * continue, and the counting information of the dest is also useful for 657 * scheduling. 658 */ 659static struct ip_vs_dest * 660ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, 661 const union nf_inet_addr *daddr, __be16 dport) 662{ 663 struct ip_vs_dest *dest; 664 struct netns_ipvs *ipvs = net_ipvs(svc->net); 665 666 /* 667 * Find the destination in trash 668 */ 669 spin_lock_bh(&ipvs->dest_trash_lock); 670 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 671 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 672 "dest->refcnt=%d\n", 673 dest->vfwmark, 674 IP_VS_DBG_ADDR(dest->af, &dest->addr), 675 ntohs(dest->port), 676 atomic_read(&dest->refcnt)); 677 if (dest->af == dest_af && 678 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 679 dest->port == dport && 680 dest->vfwmark == svc->fwmark && 681 dest->protocol == svc->protocol && 682 (svc->fwmark || 683 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 684 dest->vport == svc->port))) { 685 /* HIT */ 686 list_del(&dest->t_list); 687 ip_vs_dest_hold(dest); 688 goto out; 689 } 690 } 691 692 dest = NULL; 693 694out: 695 spin_unlock_bh(&ipvs->dest_trash_lock); 696 697 return dest; 698} 699 700static void ip_vs_dest_free(struct ip_vs_dest *dest) 701{ 702 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); 703 704 __ip_vs_dst_cache_reset(dest); 705 __ip_vs_svc_put(svc, false); 706 free_percpu(dest->stats.cpustats); 707 ip_vs_dest_put_and_free(dest); 708} 709 710/* 711 * Clean up all the destinations in the trash 712 * Called by the ip_vs_control_cleanup() 713 * 714 * When the ip_vs_control_clearup is activated by ipvs module exit, 715 * the service tables must have been flushed and all the connections 716 * are expired, and the refcnt of each destination in the trash must 717 * be 0, so we simply release them here. 718 */ 719static void ip_vs_trash_cleanup(struct net *net) 720{ 721 struct ip_vs_dest *dest, *nxt; 722 struct netns_ipvs *ipvs = net_ipvs(net); 723 724 del_timer_sync(&ipvs->dest_trash_timer); 725 /* No need to use dest_trash_lock */ 726 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 727 list_del(&dest->t_list); 728 ip_vs_dest_free(dest); 729 } 730} 731 732static void 733ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) 734{ 735#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c 736 737 spin_lock_bh(&src->lock); 738 739 IP_VS_SHOW_STATS_COUNTER(conns); 740 IP_VS_SHOW_STATS_COUNTER(inpkts); 741 IP_VS_SHOW_STATS_COUNTER(outpkts); 742 IP_VS_SHOW_STATS_COUNTER(inbytes); 743 IP_VS_SHOW_STATS_COUNTER(outbytes); 744 745 ip_vs_read_estimator(dst, src); 746 747 spin_unlock_bh(&src->lock); 748} 749 750static void 751ip_vs_zero_stats(struct ip_vs_stats *stats) 752{ 753 spin_lock_bh(&stats->lock); 754 755 /* get current counters as zero point, rates are zeroed */ 756 757#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c 758 759 IP_VS_ZERO_STATS_COUNTER(conns); 760 IP_VS_ZERO_STATS_COUNTER(inpkts); 761 IP_VS_ZERO_STATS_COUNTER(outpkts); 762 IP_VS_ZERO_STATS_COUNTER(inbytes); 763 IP_VS_ZERO_STATS_COUNTER(outbytes); 764 765 ip_vs_zero_estimator(stats); 766 767 spin_unlock_bh(&stats->lock); 768} 769 770/* 771 * Update a destination in the given service 772 */ 773static void 774__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 775 struct ip_vs_dest_user_kern *udest, int add) 776{ 777 struct netns_ipvs *ipvs = net_ipvs(svc->net); 778 struct ip_vs_service *old_svc; 779 struct ip_vs_scheduler *sched; 780 int conn_flags; 781 782 /* We cannot modify an address and change the address family */ 783 BUG_ON(!add && udest->af != dest->af); 784 785 if (add && udest->af != svc->af) 786 ipvs->mixed_address_family_dests++; 787 788 /* set the weight and the flags */ 789 atomic_set(&dest->weight, udest->weight); 790 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 791 conn_flags |= IP_VS_CONN_F_INACTIVE; 792 793 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 794 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 795 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 796 } else { 797 /* 798 * Put the real service in rs_table if not present. 799 * For now only for NAT! 800 */ 801 ip_vs_rs_hash(ipvs, dest); 802 } 803 atomic_set(&dest->conn_flags, conn_flags); 804 805 /* bind the service */ 806 old_svc = rcu_dereference_protected(dest->svc, 1); 807 if (!old_svc) { 808 __ip_vs_bind_svc(dest, svc); 809 } else { 810 if (old_svc != svc) { 811 ip_vs_zero_stats(&dest->stats); 812 __ip_vs_bind_svc(dest, svc); 813 __ip_vs_svc_put(old_svc, true); 814 } 815 } 816 817 /* set the dest status flags */ 818 dest->flags |= IP_VS_DEST_F_AVAILABLE; 819 820 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 821 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 822 dest->u_threshold = udest->u_threshold; 823 dest->l_threshold = udest->l_threshold; 824 825 dest->af = udest->af; 826 827 spin_lock_bh(&dest->dst_lock); 828 __ip_vs_dst_cache_reset(dest); 829 spin_unlock_bh(&dest->dst_lock); 830 831 sched = rcu_dereference_protected(svc->scheduler, 1); 832 if (add) { 833 ip_vs_start_estimator(svc->net, &dest->stats); 834 list_add_rcu(&dest->n_list, &svc->destinations); 835 svc->num_dests++; 836 if (sched->add_dest) 837 sched->add_dest(svc, dest); 838 } else { 839 if (sched->upd_dest) 840 sched->upd_dest(svc, dest); 841 } 842} 843 844 845/* 846 * Create a destination for the given service 847 */ 848static int 849ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, 850 struct ip_vs_dest **dest_p) 851{ 852 struct ip_vs_dest *dest; 853 unsigned int atype, i; 854 855 EnterFunction(2); 856 857#ifdef CONFIG_IP_VS_IPV6 858 if (udest->af == AF_INET6) { 859 atype = ipv6_addr_type(&udest->addr.in6); 860 if ((!(atype & IPV6_ADDR_UNICAST) || 861 atype & IPV6_ADDR_LINKLOCAL) && 862 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) 863 return -EINVAL; 864 } else 865#endif 866 { 867 atype = inet_addr_type(svc->net, udest->addr.ip); 868 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 869 return -EINVAL; 870 } 871 872 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 873 if (dest == NULL) 874 return -ENOMEM; 875 876 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 877 if (!dest->stats.cpustats) 878 goto err_alloc; 879 880 for_each_possible_cpu(i) { 881 struct ip_vs_cpu_stats *ip_vs_dest_stats; 882 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); 883 u64_stats_init(&ip_vs_dest_stats->syncp); 884 } 885 886 dest->af = udest->af; 887 dest->protocol = svc->protocol; 888 dest->vaddr = svc->addr; 889 dest->vport = svc->port; 890 dest->vfwmark = svc->fwmark; 891 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); 892 dest->port = udest->port; 893 894 atomic_set(&dest->activeconns, 0); 895 atomic_set(&dest->inactconns, 0); 896 atomic_set(&dest->persistconns, 0); 897 atomic_set(&dest->refcnt, 1); 898 899 INIT_HLIST_NODE(&dest->d_list); 900 spin_lock_init(&dest->dst_lock); 901 spin_lock_init(&dest->stats.lock); 902 __ip_vs_update_dest(svc, dest, udest, 1); 903 904 *dest_p = dest; 905 906 LeaveFunction(2); 907 return 0; 908 909err_alloc: 910 kfree(dest); 911 return -ENOMEM; 912} 913 914 915/* 916 * Add a destination into an existing service 917 */ 918static int 919ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 920{ 921 struct ip_vs_dest *dest; 922 union nf_inet_addr daddr; 923 __be16 dport = udest->port; 924 int ret; 925 926 EnterFunction(2); 927 928 if (udest->weight < 0) { 929 pr_err("%s(): server weight less than zero\n", __func__); 930 return -ERANGE; 931 } 932 933 if (udest->l_threshold > udest->u_threshold) { 934 pr_err("%s(): lower threshold is higher than upper threshold\n", 935 __func__); 936 return -ERANGE; 937 } 938 939 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 940 941 /* We use function that requires RCU lock */ 942 rcu_read_lock(); 943 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 944 rcu_read_unlock(); 945 946 if (dest != NULL) { 947 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 948 return -EEXIST; 949 } 950 951 /* 952 * Check if the dest already exists in the trash and 953 * is from the same service 954 */ 955 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); 956 957 if (dest != NULL) { 958 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 959 "dest->refcnt=%d, service %u/%s:%u\n", 960 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), 961 atomic_read(&dest->refcnt), 962 dest->vfwmark, 963 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 964 ntohs(dest->vport)); 965 966 __ip_vs_update_dest(svc, dest, udest, 1); 967 ret = 0; 968 } else { 969 /* 970 * Allocate and initialize the dest structure 971 */ 972 ret = ip_vs_new_dest(svc, udest, &dest); 973 } 974 LeaveFunction(2); 975 976 return ret; 977} 978 979 980/* 981 * Edit a destination in the given service 982 */ 983static int 984ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 985{ 986 struct ip_vs_dest *dest; 987 union nf_inet_addr daddr; 988 __be16 dport = udest->port; 989 990 EnterFunction(2); 991 992 if (udest->weight < 0) { 993 pr_err("%s(): server weight less than zero\n", __func__); 994 return -ERANGE; 995 } 996 997 if (udest->l_threshold > udest->u_threshold) { 998 pr_err("%s(): lower threshold is higher than upper threshold\n", 999 __func__); 1000 return -ERANGE; 1001 } 1002 1003 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1004 1005 /* We use function that requires RCU lock */ 1006 rcu_read_lock(); 1007 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1008 rcu_read_unlock(); 1009 1010 if (dest == NULL) { 1011 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1012 return -ENOENT; 1013 } 1014 1015 __ip_vs_update_dest(svc, dest, udest, 0); 1016 LeaveFunction(2); 1017 1018 return 0; 1019} 1020 1021/* 1022 * Delete a destination (must be already unlinked from the service) 1023 */ 1024static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, 1025 bool cleanup) 1026{ 1027 struct netns_ipvs *ipvs = net_ipvs(net); 1028 1029 ip_vs_stop_estimator(net, &dest->stats); 1030 1031 /* 1032 * Remove it from the d-linked list with the real services. 1033 */ 1034 ip_vs_rs_unhash(dest); 1035 1036 spin_lock_bh(&ipvs->dest_trash_lock); 1037 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1038 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1039 atomic_read(&dest->refcnt)); 1040 if (list_empty(&ipvs->dest_trash) && !cleanup) 1041 mod_timer(&ipvs->dest_trash_timer, 1042 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1043 /* dest lives in trash without reference */ 1044 list_add(&dest->t_list, &ipvs->dest_trash); 1045 dest->idle_start = 0; 1046 spin_unlock_bh(&ipvs->dest_trash_lock); 1047 ip_vs_dest_put(dest); 1048} 1049 1050 1051/* 1052 * Unlink a destination from the given service 1053 */ 1054static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1055 struct ip_vs_dest *dest, 1056 int svcupd) 1057{ 1058 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1059 1060 /* 1061 * Remove it from the d-linked destination list. 1062 */ 1063 list_del_rcu(&dest->n_list); 1064 svc->num_dests--; 1065 1066 if (dest->af != svc->af) 1067 net_ipvs(svc->net)->mixed_address_family_dests--; 1068 1069 if (svcupd) { 1070 struct ip_vs_scheduler *sched; 1071 1072 sched = rcu_dereference_protected(svc->scheduler, 1); 1073 if (sched->del_dest) 1074 sched->del_dest(svc, dest); 1075 } 1076} 1077 1078 1079/* 1080 * Delete a destination server in the given service 1081 */ 1082static int 1083ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1084{ 1085 struct ip_vs_dest *dest; 1086 __be16 dport = udest->port; 1087 1088 EnterFunction(2); 1089 1090 /* We use function that requires RCU lock */ 1091 rcu_read_lock(); 1092 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); 1093 rcu_read_unlock(); 1094 1095 if (dest == NULL) { 1096 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1097 return -ENOENT; 1098 } 1099 1100 /* 1101 * Unlink dest from the service 1102 */ 1103 __ip_vs_unlink_dest(svc, dest, 1); 1104 1105 /* 1106 * Delete the destination 1107 */ 1108 __ip_vs_del_dest(svc->net, dest, false); 1109 1110 LeaveFunction(2); 1111 1112 return 0; 1113} 1114 1115static void ip_vs_dest_trash_expire(unsigned long data) 1116{ 1117 struct net *net = (struct net *) data; 1118 struct netns_ipvs *ipvs = net_ipvs(net); 1119 struct ip_vs_dest *dest, *next; 1120 unsigned long now = jiffies; 1121 1122 spin_lock(&ipvs->dest_trash_lock); 1123 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1124 if (atomic_read(&dest->refcnt) > 0) 1125 continue; 1126 if (dest->idle_start) { 1127 if (time_before(now, dest->idle_start + 1128 IP_VS_DEST_TRASH_PERIOD)) 1129 continue; 1130 } else { 1131 dest->idle_start = max(1UL, now); 1132 continue; 1133 } 1134 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1135 dest->vfwmark, 1136 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1137 ntohs(dest->port)); 1138 list_del(&dest->t_list); 1139 ip_vs_dest_free(dest); 1140 } 1141 if (!list_empty(&ipvs->dest_trash)) 1142 mod_timer(&ipvs->dest_trash_timer, 1143 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1144 spin_unlock(&ipvs->dest_trash_lock); 1145} 1146 1147/* 1148 * Add a service into the service hash table 1149 */ 1150static int 1151ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, 1152 struct ip_vs_service **svc_p) 1153{ 1154 int ret = 0, i; 1155 struct ip_vs_scheduler *sched = NULL; 1156 struct ip_vs_pe *pe = NULL; 1157 struct ip_vs_service *svc = NULL; 1158 struct netns_ipvs *ipvs = net_ipvs(net); 1159 1160 /* increase the module use count */ 1161 ip_vs_use_count_inc(); 1162 1163 /* Lookup the scheduler by 'u->sched_name' */ 1164 sched = ip_vs_scheduler_get(u->sched_name); 1165 if (sched == NULL) { 1166 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1167 ret = -ENOENT; 1168 goto out_err; 1169 } 1170 1171 if (u->pe_name && *u->pe_name) { 1172 pe = ip_vs_pe_getbyname(u->pe_name); 1173 if (pe == NULL) { 1174 pr_info("persistence engine module ip_vs_pe_%s " 1175 "not found\n", u->pe_name); 1176 ret = -ENOENT; 1177 goto out_err; 1178 } 1179 } 1180 1181#ifdef CONFIG_IP_VS_IPV6 1182 if (u->af == AF_INET6) { 1183 __u32 plen = (__force __u32) u->netmask; 1184 1185 if (plen < 1 || plen > 128) { 1186 ret = -EINVAL; 1187 goto out_err; 1188 } 1189 } 1190#endif 1191 1192 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1193 if (svc == NULL) { 1194 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1195 ret = -ENOMEM; 1196 goto out_err; 1197 } 1198 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1199 if (!svc->stats.cpustats) { 1200 ret = -ENOMEM; 1201 goto out_err; 1202 } 1203 1204 for_each_possible_cpu(i) { 1205 struct ip_vs_cpu_stats *ip_vs_stats; 1206 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); 1207 u64_stats_init(&ip_vs_stats->syncp); 1208 } 1209 1210 1211 /* I'm the first user of the service */ 1212 atomic_set(&svc->refcnt, 0); 1213 1214 svc->af = u->af; 1215 svc->protocol = u->protocol; 1216 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1217 svc->port = u->port; 1218 svc->fwmark = u->fwmark; 1219 svc->flags = u->flags; 1220 svc->timeout = u->timeout * HZ; 1221 svc->netmask = u->netmask; 1222 svc->net = net; 1223 1224 INIT_LIST_HEAD(&svc->destinations); 1225 spin_lock_init(&svc->sched_lock); 1226 spin_lock_init(&svc->stats.lock); 1227 1228 /* Bind the scheduler */ 1229 ret = ip_vs_bind_scheduler(svc, sched); 1230 if (ret) 1231 goto out_err; 1232 sched = NULL; 1233 1234 /* Bind the ct retriever */ 1235 RCU_INIT_POINTER(svc->pe, pe); 1236 pe = NULL; 1237 1238 /* Update the virtual service counters */ 1239 if (svc->port == FTPPORT) 1240 atomic_inc(&ipvs->ftpsvc_counter); 1241 else if (svc->port == 0) 1242 atomic_inc(&ipvs->nullsvc_counter); 1243 1244 ip_vs_start_estimator(net, &svc->stats); 1245 1246 /* Count only IPv4 services for old get/setsockopt interface */ 1247 if (svc->af == AF_INET) 1248 ipvs->num_services++; 1249 1250 /* Hash the service into the service table */ 1251 ip_vs_svc_hash(svc); 1252 1253 *svc_p = svc; 1254 /* Now there is a service - full throttle */ 1255 ipvs->enable = 1; 1256 return 0; 1257 1258 1259 out_err: 1260 if (svc != NULL) { 1261 ip_vs_unbind_scheduler(svc, sched); 1262 ip_vs_service_free(svc); 1263 } 1264 ip_vs_scheduler_put(sched); 1265 ip_vs_pe_put(pe); 1266 1267 /* decrease the module use count */ 1268 ip_vs_use_count_dec(); 1269 1270 return ret; 1271} 1272 1273 1274/* 1275 * Edit a service and bind it with a new scheduler 1276 */ 1277static int 1278ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1279{ 1280 struct ip_vs_scheduler *sched, *old_sched; 1281 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1282 int ret = 0; 1283 1284 /* 1285 * Lookup the scheduler, by 'u->sched_name' 1286 */ 1287 sched = ip_vs_scheduler_get(u->sched_name); 1288 if (sched == NULL) { 1289 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1290 return -ENOENT; 1291 } 1292 old_sched = sched; 1293 1294 if (u->pe_name && *u->pe_name) { 1295 pe = ip_vs_pe_getbyname(u->pe_name); 1296 if (pe == NULL) { 1297 pr_info("persistence engine module ip_vs_pe_%s " 1298 "not found\n", u->pe_name); 1299 ret = -ENOENT; 1300 goto out; 1301 } 1302 old_pe = pe; 1303 } 1304 1305#ifdef CONFIG_IP_VS_IPV6 1306 if (u->af == AF_INET6) { 1307 __u32 plen = (__force __u32) u->netmask; 1308 1309 if (plen < 1 || plen > 128) { 1310 ret = -EINVAL; 1311 goto out; 1312 } 1313 } 1314#endif 1315 1316 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1317 if (sched != old_sched) { 1318 /* Bind the new scheduler */ 1319 ret = ip_vs_bind_scheduler(svc, sched); 1320 if (ret) { 1321 old_sched = sched; 1322 goto out; 1323 } 1324 /* Unbind the old scheduler on success */ 1325 ip_vs_unbind_scheduler(svc, old_sched); 1326 } 1327 1328 /* 1329 * Set the flags and timeout value 1330 */ 1331 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1332 svc->timeout = u->timeout * HZ; 1333 svc->netmask = u->netmask; 1334 1335 old_pe = rcu_dereference_protected(svc->pe, 1); 1336 if (pe != old_pe) 1337 rcu_assign_pointer(svc->pe, pe); 1338 1339out: 1340 ip_vs_scheduler_put(old_sched); 1341 ip_vs_pe_put(old_pe); 1342 return ret; 1343} 1344 1345/* 1346 * Delete a service from the service list 1347 * - The service must be unlinked, unlocked and not referenced! 1348 * - We are called under _bh lock 1349 */ 1350static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1351{ 1352 struct ip_vs_dest *dest, *nxt; 1353 struct ip_vs_scheduler *old_sched; 1354 struct ip_vs_pe *old_pe; 1355 struct netns_ipvs *ipvs = net_ipvs(svc->net); 1356 1357 pr_info("%s: enter\n", __func__); 1358 1359 /* Count only IPv4 services for old get/setsockopt interface */ 1360 if (svc->af == AF_INET) 1361 ipvs->num_services--; 1362 1363 ip_vs_stop_estimator(svc->net, &svc->stats); 1364 1365 /* Unbind scheduler */ 1366 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1367 ip_vs_unbind_scheduler(svc, old_sched); 1368 ip_vs_scheduler_put(old_sched); 1369 1370 /* Unbind persistence engine, keep svc->pe */ 1371 old_pe = rcu_dereference_protected(svc->pe, 1); 1372 ip_vs_pe_put(old_pe); 1373 1374 /* 1375 * Unlink the whole destination list 1376 */ 1377 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1378 __ip_vs_unlink_dest(svc, dest, 0); 1379 __ip_vs_del_dest(svc->net, dest, cleanup); 1380 } 1381 1382 /* 1383 * Update the virtual service counters 1384 */ 1385 if (svc->port == FTPPORT) 1386 atomic_dec(&ipvs->ftpsvc_counter); 1387 else if (svc->port == 0) 1388 atomic_dec(&ipvs->nullsvc_counter); 1389 1390 /* 1391 * Free the service if nobody refers to it 1392 */ 1393 __ip_vs_svc_put(svc, true); 1394 1395 /* decrease the module use count */ 1396 ip_vs_use_count_dec(); 1397} 1398 1399/* 1400 * Unlink a service from list and try to delete it if its refcnt reached 0 1401 */ 1402static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1403{ 1404 /* Hold svc to avoid double release from dest_trash */ 1405 atomic_inc(&svc->refcnt); 1406 /* 1407 * Unhash it from the service table 1408 */ 1409 ip_vs_svc_unhash(svc); 1410 1411 __ip_vs_del_service(svc, cleanup); 1412} 1413 1414/* 1415 * Delete a service from the service list 1416 */ 1417static int ip_vs_del_service(struct ip_vs_service *svc) 1418{ 1419 if (svc == NULL) 1420 return -EEXIST; 1421 ip_vs_unlink_service(svc, false); 1422 1423 return 0; 1424} 1425 1426 1427/* 1428 * Flush all the virtual services 1429 */ 1430static int ip_vs_flush(struct net *net, bool cleanup) 1431{ 1432 int idx; 1433 struct ip_vs_service *svc; 1434 struct hlist_node *n; 1435 1436 /* 1437 * Flush the service table hashed by <netns,protocol,addr,port> 1438 */ 1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1440 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1441 s_list) { 1442 if (net_eq(svc->net, net)) 1443 ip_vs_unlink_service(svc, cleanup); 1444 } 1445 } 1446 1447 /* 1448 * Flush the service table hashed by fwmark 1449 */ 1450 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1451 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1452 f_list) { 1453 if (net_eq(svc->net, net)) 1454 ip_vs_unlink_service(svc, cleanup); 1455 } 1456 } 1457 1458 return 0; 1459} 1460 1461/* 1462 * Delete service by {netns} in the service table. 1463 * Called by __ip_vs_cleanup() 1464 */ 1465void ip_vs_service_net_cleanup(struct net *net) 1466{ 1467 EnterFunction(2); 1468 /* Check for "full" addressed entries */ 1469 mutex_lock(&__ip_vs_mutex); 1470 ip_vs_flush(net, true); 1471 mutex_unlock(&__ip_vs_mutex); 1472 LeaveFunction(2); 1473} 1474 1475/* Put all references for device (dst_cache) */ 1476static inline void 1477ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1478{ 1479 struct ip_vs_dest_dst *dest_dst; 1480 1481 spin_lock_bh(&dest->dst_lock); 1482 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1483 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1484 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1485 dev->name, 1486 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1487 ntohs(dest->port), 1488 atomic_read(&dest->refcnt)); 1489 __ip_vs_dst_cache_reset(dest); 1490 } 1491 spin_unlock_bh(&dest->dst_lock); 1492 1493} 1494/* Netdev event receiver 1495 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1496 */ 1497static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1498 void *ptr) 1499{ 1500 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1501 struct net *net = dev_net(dev); 1502 struct netns_ipvs *ipvs = net_ipvs(net); 1503 struct ip_vs_service *svc; 1504 struct ip_vs_dest *dest; 1505 unsigned int idx; 1506 1507 if (event != NETDEV_DOWN || !ipvs) 1508 return NOTIFY_DONE; 1509 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1510 EnterFunction(2); 1511 mutex_lock(&__ip_vs_mutex); 1512 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1513 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1514 if (net_eq(svc->net, net)) { 1515 list_for_each_entry(dest, &svc->destinations, 1516 n_list) { 1517 ip_vs_forget_dev(dest, dev); 1518 } 1519 } 1520 } 1521 1522 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1523 if (net_eq(svc->net, net)) { 1524 list_for_each_entry(dest, &svc->destinations, 1525 n_list) { 1526 ip_vs_forget_dev(dest, dev); 1527 } 1528 } 1529 1530 } 1531 } 1532 1533 spin_lock_bh(&ipvs->dest_trash_lock); 1534 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1535 ip_vs_forget_dev(dest, dev); 1536 } 1537 spin_unlock_bh(&ipvs->dest_trash_lock); 1538 mutex_unlock(&__ip_vs_mutex); 1539 LeaveFunction(2); 1540 return NOTIFY_DONE; 1541} 1542 1543/* 1544 * Zero counters in a service or all services 1545 */ 1546static int ip_vs_zero_service(struct ip_vs_service *svc) 1547{ 1548 struct ip_vs_dest *dest; 1549 1550 list_for_each_entry(dest, &svc->destinations, n_list) { 1551 ip_vs_zero_stats(&dest->stats); 1552 } 1553 ip_vs_zero_stats(&svc->stats); 1554 return 0; 1555} 1556 1557static int ip_vs_zero_all(struct net *net) 1558{ 1559 int idx; 1560 struct ip_vs_service *svc; 1561 1562 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1563 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1564 if (net_eq(svc->net, net)) 1565 ip_vs_zero_service(svc); 1566 } 1567 } 1568 1569 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1570 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1571 if (net_eq(svc->net, net)) 1572 ip_vs_zero_service(svc); 1573 } 1574 } 1575 1576 ip_vs_zero_stats(&net_ipvs(net)->tot_stats); 1577 return 0; 1578} 1579 1580#ifdef CONFIG_SYSCTL 1581 1582static int zero; 1583static int three = 3; 1584 1585static int 1586proc_do_defense_mode(struct ctl_table *table, int write, 1587 void __user *buffer, size_t *lenp, loff_t *ppos) 1588{ 1589 struct net *net = current->nsproxy->net_ns; 1590 int *valp = table->data; 1591 int val = *valp; 1592 int rc; 1593 1594 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1595 if (write && (*valp != val)) { 1596 if ((*valp < 0) || (*valp > 3)) { 1597 /* Restore the correct value */ 1598 *valp = val; 1599 } else { 1600 update_defense_level(net_ipvs(net)); 1601 } 1602 } 1603 return rc; 1604} 1605 1606static int 1607proc_do_sync_threshold(struct ctl_table *table, int write, 1608 void __user *buffer, size_t *lenp, loff_t *ppos) 1609{ 1610 int *valp = table->data; 1611 int val[2]; 1612 int rc; 1613 1614 /* backup the value first */ 1615 memcpy(val, valp, sizeof(val)); 1616 1617 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1618 if (write && (valp[0] < 0 || valp[1] < 0 || 1619 (valp[0] >= valp[1] && valp[1]))) { 1620 /* Restore the correct value */ 1621 memcpy(valp, val, sizeof(val)); 1622 } 1623 return rc; 1624} 1625 1626static int 1627proc_do_sync_mode(struct ctl_table *table, int write, 1628 void __user *buffer, size_t *lenp, loff_t *ppos) 1629{ 1630 int *valp = table->data; 1631 int val = *valp; 1632 int rc; 1633 1634 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1635 if (write && (*valp != val)) { 1636 if ((*valp < 0) || (*valp > 1)) { 1637 /* Restore the correct value */ 1638 *valp = val; 1639 } 1640 } 1641 return rc; 1642} 1643 1644static int 1645proc_do_sync_ports(struct ctl_table *table, int write, 1646 void __user *buffer, size_t *lenp, loff_t *ppos) 1647{ 1648 int *valp = table->data; 1649 int val = *valp; 1650 int rc; 1651 1652 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1653 if (write && (*valp != val)) { 1654 if (*valp < 1 || !is_power_of_2(*valp)) { 1655 /* Restore the correct value */ 1656 *valp = val; 1657 } 1658 } 1659 return rc; 1660} 1661 1662/* 1663 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1664 * Do not change order or insert new entries without 1665 * align with netns init in ip_vs_control_net_init() 1666 */ 1667 1668static struct ctl_table vs_vars[] = { 1669 { 1670 .procname = "amemthresh", 1671 .maxlen = sizeof(int), 1672 .mode = 0644, 1673 .proc_handler = proc_dointvec, 1674 }, 1675 { 1676 .procname = "am_droprate", 1677 .maxlen = sizeof(int), 1678 .mode = 0644, 1679 .proc_handler = proc_dointvec, 1680 }, 1681 { 1682 .procname = "drop_entry", 1683 .maxlen = sizeof(int), 1684 .mode = 0644, 1685 .proc_handler = proc_do_defense_mode, 1686 }, 1687 { 1688 .procname = "drop_packet", 1689 .maxlen = sizeof(int), 1690 .mode = 0644, 1691 .proc_handler = proc_do_defense_mode, 1692 }, 1693#ifdef CONFIG_IP_VS_NFCT 1694 { 1695 .procname = "conntrack", 1696 .maxlen = sizeof(int), 1697 .mode = 0644, 1698 .proc_handler = &proc_dointvec, 1699 }, 1700#endif 1701 { 1702 .procname = "secure_tcp", 1703 .maxlen = sizeof(int), 1704 .mode = 0644, 1705 .proc_handler = proc_do_defense_mode, 1706 }, 1707 { 1708 .procname = "snat_reroute", 1709 .maxlen = sizeof(int), 1710 .mode = 0644, 1711 .proc_handler = &proc_dointvec, 1712 }, 1713 { 1714 .procname = "sync_version", 1715 .maxlen = sizeof(int), 1716 .mode = 0644, 1717 .proc_handler = &proc_do_sync_mode, 1718 }, 1719 { 1720 .procname = "sync_ports", 1721 .maxlen = sizeof(int), 1722 .mode = 0644, 1723 .proc_handler = &proc_do_sync_ports, 1724 }, 1725 { 1726 .procname = "sync_persist_mode", 1727 .maxlen = sizeof(int), 1728 .mode = 0644, 1729 .proc_handler = proc_dointvec, 1730 }, 1731 { 1732 .procname = "sync_qlen_max", 1733 .maxlen = sizeof(unsigned long), 1734 .mode = 0644, 1735 .proc_handler = proc_doulongvec_minmax, 1736 }, 1737 { 1738 .procname = "sync_sock_size", 1739 .maxlen = sizeof(int), 1740 .mode = 0644, 1741 .proc_handler = proc_dointvec, 1742 }, 1743 { 1744 .procname = "cache_bypass", 1745 .maxlen = sizeof(int), 1746 .mode = 0644, 1747 .proc_handler = proc_dointvec, 1748 }, 1749 { 1750 .procname = "expire_nodest_conn", 1751 .maxlen = sizeof(int), 1752 .mode = 0644, 1753 .proc_handler = proc_dointvec, 1754 }, 1755 { 1756 .procname = "sloppy_tcp", 1757 .maxlen = sizeof(int), 1758 .mode = 0644, 1759 .proc_handler = proc_dointvec, 1760 }, 1761 { 1762 .procname = "sloppy_sctp", 1763 .maxlen = sizeof(int), 1764 .mode = 0644, 1765 .proc_handler = proc_dointvec, 1766 }, 1767 { 1768 .procname = "expire_quiescent_template", 1769 .maxlen = sizeof(int), 1770 .mode = 0644, 1771 .proc_handler = proc_dointvec, 1772 }, 1773 { 1774 .procname = "sync_threshold", 1775 .maxlen = 1776 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1777 .mode = 0644, 1778 .proc_handler = proc_do_sync_threshold, 1779 }, 1780 { 1781 .procname = "sync_refresh_period", 1782 .maxlen = sizeof(int), 1783 .mode = 0644, 1784 .proc_handler = proc_dointvec_jiffies, 1785 }, 1786 { 1787 .procname = "sync_retries", 1788 .maxlen = sizeof(int), 1789 .mode = 0644, 1790 .proc_handler = proc_dointvec_minmax, 1791 .extra1 = &zero, 1792 .extra2 = &three, 1793 }, 1794 { 1795 .procname = "nat_icmp_send", 1796 .maxlen = sizeof(int), 1797 .mode = 0644, 1798 .proc_handler = proc_dointvec, 1799 }, 1800 { 1801 .procname = "pmtu_disc", 1802 .maxlen = sizeof(int), 1803 .mode = 0644, 1804 .proc_handler = proc_dointvec, 1805 }, 1806 { 1807 .procname = "backup_only", 1808 .maxlen = sizeof(int), 1809 .mode = 0644, 1810 .proc_handler = proc_dointvec, 1811 }, 1812#ifdef CONFIG_IP_VS_DEBUG 1813 { 1814 .procname = "debug_level", 1815 .data = &sysctl_ip_vs_debug_level, 1816 .maxlen = sizeof(int), 1817 .mode = 0644, 1818 .proc_handler = proc_dointvec, 1819 }, 1820#endif 1821 { } 1822}; 1823 1824#endif 1825 1826#ifdef CONFIG_PROC_FS 1827 1828struct ip_vs_iter { 1829 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 1830 struct hlist_head *table; 1831 int bucket; 1832}; 1833 1834/* 1835 * Write the contents of the VS rule table to a PROCfs file. 1836 * (It is kept just for backward compatibility) 1837 */ 1838static inline const char *ip_vs_fwd_name(unsigned int flags) 1839{ 1840 switch (flags & IP_VS_CONN_F_FWD_MASK) { 1841 case IP_VS_CONN_F_LOCALNODE: 1842 return "Local"; 1843 case IP_VS_CONN_F_TUNNEL: 1844 return "Tunnel"; 1845 case IP_VS_CONN_F_DROUTE: 1846 return "Route"; 1847 default: 1848 return "Masq"; 1849 } 1850} 1851 1852 1853/* Get the Nth entry in the two lists */ 1854static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1855{ 1856 struct net *net = seq_file_net(seq); 1857 struct ip_vs_iter *iter = seq->private; 1858 int idx; 1859 struct ip_vs_service *svc; 1860 1861 /* look in hash by protocol */ 1862 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1863 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 1864 if (net_eq(svc->net, net) && pos-- == 0) { 1865 iter->table = ip_vs_svc_table; 1866 iter->bucket = idx; 1867 return svc; 1868 } 1869 } 1870 } 1871 1872 /* keep looking in fwmark */ 1873 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1874 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 1875 f_list) { 1876 if (net_eq(svc->net, net) && pos-- == 0) { 1877 iter->table = ip_vs_svc_fwm_table; 1878 iter->bucket = idx; 1879 return svc; 1880 } 1881 } 1882 } 1883 1884 return NULL; 1885} 1886 1887static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 1888 __acquires(RCU) 1889{ 1890 rcu_read_lock(); 1891 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 1892} 1893 1894 1895static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1896{ 1897 struct hlist_node *e; 1898 struct ip_vs_iter *iter; 1899 struct ip_vs_service *svc; 1900 1901 ++*pos; 1902 if (v == SEQ_START_TOKEN) 1903 return ip_vs_info_array(seq,0); 1904 1905 svc = v; 1906 iter = seq->private; 1907 1908 if (iter->table == ip_vs_svc_table) { 1909 /* next service in table hashed by protocol */ 1910 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 1911 if (e) 1912 return hlist_entry(e, struct ip_vs_service, s_list); 1913 1914 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1915 hlist_for_each_entry_rcu(svc, 1916 &ip_vs_svc_table[iter->bucket], 1917 s_list) { 1918 return svc; 1919 } 1920 } 1921 1922 iter->table = ip_vs_svc_fwm_table; 1923 iter->bucket = -1; 1924 goto scan_fwmark; 1925 } 1926 1927 /* next service in hashed by fwmark */ 1928 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 1929 if (e) 1930 return hlist_entry(e, struct ip_vs_service, f_list); 1931 1932 scan_fwmark: 1933 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1934 hlist_for_each_entry_rcu(svc, 1935 &ip_vs_svc_fwm_table[iter->bucket], 1936 f_list) 1937 return svc; 1938 } 1939 1940 return NULL; 1941} 1942 1943static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 1944 __releases(RCU) 1945{ 1946 rcu_read_unlock(); 1947} 1948 1949 1950static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 1951{ 1952 if (v == SEQ_START_TOKEN) { 1953 seq_printf(seq, 1954 "IP Virtual Server version %d.%d.%d (size=%d)\n", 1955 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 1956 seq_puts(seq, 1957 "Prot LocalAddress:Port Scheduler Flags\n"); 1958 seq_puts(seq, 1959 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 1960 } else { 1961 const struct ip_vs_service *svc = v; 1962 const struct ip_vs_iter *iter = seq->private; 1963 const struct ip_vs_dest *dest; 1964 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 1965 1966 if (iter->table == ip_vs_svc_table) { 1967#ifdef CONFIG_IP_VS_IPV6 1968 if (svc->af == AF_INET6) 1969 seq_printf(seq, "%s [%pI6]:%04X %s ", 1970 ip_vs_proto_name(svc->protocol), 1971 &svc->addr.in6, 1972 ntohs(svc->port), 1973 sched->name); 1974 else 1975#endif 1976 seq_printf(seq, "%s %08X:%04X %s %s ", 1977 ip_vs_proto_name(svc->protocol), 1978 ntohl(svc->addr.ip), 1979 ntohs(svc->port), 1980 sched->name, 1981 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 1982 } else { 1983 seq_printf(seq, "FWM %08X %s %s", 1984 svc->fwmark, sched->name, 1985 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 1986 } 1987 1988 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 1989 seq_printf(seq, "persistent %d %08X\n", 1990 svc->timeout, 1991 ntohl(svc->netmask)); 1992 else 1993 seq_putc(seq, '\n'); 1994 1995 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 1996#ifdef CONFIG_IP_VS_IPV6 1997 if (dest->af == AF_INET6) 1998 seq_printf(seq, 1999 " -> [%pI6]:%04X" 2000 " %-7s %-6d %-10d %-10d\n", 2001 &dest->addr.in6, 2002 ntohs(dest->port), 2003 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2004 atomic_read(&dest->weight), 2005 atomic_read(&dest->activeconns), 2006 atomic_read(&dest->inactconns)); 2007 else 2008#endif 2009 seq_printf(seq, 2010 " -> %08X:%04X " 2011 "%-7s %-6d %-10d %-10d\n", 2012 ntohl(dest->addr.ip), 2013 ntohs(dest->port), 2014 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2015 atomic_read(&dest->weight), 2016 atomic_read(&dest->activeconns), 2017 atomic_read(&dest->inactconns)); 2018 2019 } 2020 } 2021 return 0; 2022} 2023 2024static const struct seq_operations ip_vs_info_seq_ops = { 2025 .start = ip_vs_info_seq_start, 2026 .next = ip_vs_info_seq_next, 2027 .stop = ip_vs_info_seq_stop, 2028 .show = ip_vs_info_seq_show, 2029}; 2030 2031static int ip_vs_info_open(struct inode *inode, struct file *file) 2032{ 2033 return seq_open_net(inode, file, &ip_vs_info_seq_ops, 2034 sizeof(struct ip_vs_iter)); 2035} 2036 2037static const struct file_operations ip_vs_info_fops = { 2038 .owner = THIS_MODULE, 2039 .open = ip_vs_info_open, 2040 .read = seq_read, 2041 .llseek = seq_lseek, 2042 .release = seq_release_net, 2043}; 2044 2045static int ip_vs_stats_show(struct seq_file *seq, void *v) 2046{ 2047 struct net *net = seq_file_single_net(seq); 2048 struct ip_vs_stats_user show; 2049 2050/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2051 seq_puts(seq, 2052 " Total Incoming Outgoing Incoming Outgoing\n"); 2053 seq_printf(seq, 2054 " Conns Packets Packets Bytes Bytes\n"); 2055 2056 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2057 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, 2058 show.inpkts, show.outpkts, 2059 (unsigned long long) show.inbytes, 2060 (unsigned long long) show.outbytes); 2061 2062/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2063 seq_puts(seq, 2064 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2065 seq_printf(seq, "%8X %8X %8X %16X %16X\n", 2066 show.cps, show.inpps, show.outpps, 2067 show.inbps, show.outbps); 2068 2069 return 0; 2070} 2071 2072static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 2073{ 2074 return single_open_net(inode, file, ip_vs_stats_show); 2075} 2076 2077static const struct file_operations ip_vs_stats_fops = { 2078 .owner = THIS_MODULE, 2079 .open = ip_vs_stats_seq_open, 2080 .read = seq_read, 2081 .llseek = seq_lseek, 2082 .release = single_release_net, 2083}; 2084 2085static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2086{ 2087 struct net *net = seq_file_single_net(seq); 2088 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2089 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2090 struct ip_vs_stats_user rates; 2091 int i; 2092 2093/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2094 seq_puts(seq, 2095 " Total Incoming Outgoing Incoming Outgoing\n"); 2096 seq_printf(seq, 2097 "CPU Conns Packets Packets Bytes Bytes\n"); 2098 2099 for_each_possible_cpu(i) { 2100 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2101 unsigned int start; 2102 __u64 inbytes, outbytes; 2103 2104 do { 2105 start = u64_stats_fetch_begin_irq(&u->syncp); 2106 inbytes = u->ustats.inbytes; 2107 outbytes = u->ustats.outbytes; 2108 } while (u64_stats_fetch_retry_irq(&u->syncp, start)); 2109 2110 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", 2111 i, u->ustats.conns, u->ustats.inpkts, 2112 u->ustats.outpkts, (__u64)inbytes, 2113 (__u64)outbytes); 2114 } 2115 2116 spin_lock_bh(&tot_stats->lock); 2117 2118 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", 2119 tot_stats->ustats.conns, tot_stats->ustats.inpkts, 2120 tot_stats->ustats.outpkts, 2121 (unsigned long long) tot_stats->ustats.inbytes, 2122 (unsigned long long) tot_stats->ustats.outbytes); 2123 2124 ip_vs_read_estimator(&rates, tot_stats); 2125 2126 spin_unlock_bh(&tot_stats->lock); 2127 2128/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2129 seq_puts(seq, 2130 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2131 seq_printf(seq, " %8X %8X %8X %16X %16X\n", 2132 rates.cps, 2133 rates.inpps, 2134 rates.outpps, 2135 rates.inbps, 2136 rates.outbps); 2137 2138 return 0; 2139} 2140 2141static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) 2142{ 2143 return single_open_net(inode, file, ip_vs_stats_percpu_show); 2144} 2145 2146static const struct file_operations ip_vs_stats_percpu_fops = { 2147 .owner = THIS_MODULE, 2148 .open = ip_vs_stats_percpu_seq_open, 2149 .read = seq_read, 2150 .llseek = seq_lseek, 2151 .release = single_release_net, 2152}; 2153#endif 2154 2155/* 2156 * Set timeout values for tcp tcpfin udp in the timeout_table. 2157 */ 2158static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) 2159{ 2160#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2161 struct ip_vs_proto_data *pd; 2162#endif 2163 2164 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2165 u->tcp_timeout, 2166 u->tcp_fin_timeout, 2167 u->udp_timeout); 2168 2169#ifdef CONFIG_IP_VS_PROTO_TCP 2170 if (u->tcp_timeout) { 2171 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2172 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2173 = u->tcp_timeout * HZ; 2174 } 2175 2176 if (u->tcp_fin_timeout) { 2177 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2178 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2179 = u->tcp_fin_timeout * HZ; 2180 } 2181#endif 2182 2183#ifdef CONFIG_IP_VS_PROTO_UDP 2184 if (u->udp_timeout) { 2185 pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2186 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2187 = u->udp_timeout * HZ; 2188 } 2189#endif 2190 return 0; 2191} 2192 2193#define CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2194 2195struct ip_vs_svcdest_user { 2196 struct ip_vs_service_user s; 2197 struct ip_vs_dest_user d; 2198}; 2199 2200static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { 2201 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), 2202 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), 2203 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), 2204 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), 2205 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), 2206 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), 2207 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2208 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), 2209 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), 2210 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), 2211}; 2212 2213union ip_vs_set_arglen { 2214 struct ip_vs_service_user field_IP_VS_SO_SET_ADD; 2215 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; 2216 struct ip_vs_service_user field_IP_VS_SO_SET_DEL; 2217 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; 2218 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; 2219 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; 2220 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; 2221 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; 2222 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; 2223 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; 2224}; 2225 2226#define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) 2227 2228static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2229 struct ip_vs_service_user *usvc_compat) 2230{ 2231 memset(usvc, 0, sizeof(*usvc)); 2232 2233 usvc->af = AF_INET; 2234 usvc->protocol = usvc_compat->protocol; 2235 usvc->addr.ip = usvc_compat->addr; 2236 usvc->port = usvc_compat->port; 2237 usvc->fwmark = usvc_compat->fwmark; 2238 2239 /* Deep copy of sched_name is not needed here */ 2240 usvc->sched_name = usvc_compat->sched_name; 2241 2242 usvc->flags = usvc_compat->flags; 2243 usvc->timeout = usvc_compat->timeout; 2244 usvc->netmask = usvc_compat->netmask; 2245} 2246 2247static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2248 struct ip_vs_dest_user *udest_compat) 2249{ 2250 memset(udest, 0, sizeof(*udest)); 2251 2252 udest->addr.ip = udest_compat->addr; 2253 udest->port = udest_compat->port; 2254 udest->conn_flags = udest_compat->conn_flags; 2255 udest->weight = udest_compat->weight; 2256 udest->u_threshold = udest_compat->u_threshold; 2257 udest->l_threshold = udest_compat->l_threshold; 2258 udest->af = AF_INET; 2259} 2260 2261static int 2262do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2263{ 2264 struct net *net = sock_net(sk); 2265 int ret; 2266 unsigned char arg[MAX_SET_ARGLEN]; 2267 struct ip_vs_service_user *usvc_compat; 2268 struct ip_vs_service_user_kern usvc; 2269 struct ip_vs_service *svc; 2270 struct ip_vs_dest_user *udest_compat; 2271 struct ip_vs_dest_user_kern udest; 2272 struct netns_ipvs *ipvs = net_ipvs(net); 2273 2274 BUILD_BUG_ON(sizeof(arg) > 255); 2275 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2276 return -EPERM; 2277 2278 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2279 return -EINVAL; 2280 if (len != set_arglen[CMDID(cmd)]) { 2281 IP_VS_DBG(1, "set_ctl: len %u != %u\n", 2282 len, set_arglen[CMDID(cmd)]); 2283 return -EINVAL; 2284 } 2285 2286 if (copy_from_user(arg, user, len) != 0) 2287 return -EFAULT; 2288 2289 /* increase the module use count */ 2290 ip_vs_use_count_inc(); 2291 2292 /* Handle daemons since they have another lock */ 2293 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2294 cmd == IP_VS_SO_SET_STOPDAEMON) { 2295 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2296 2297 mutex_lock(&ipvs->sync_mutex); 2298 if (cmd == IP_VS_SO_SET_STARTDAEMON) 2299 ret = start_sync_thread(net, dm->state, dm->mcast_ifn, 2300 dm->syncid); 2301 else 2302 ret = stop_sync_thread(net, dm->state); 2303 mutex_unlock(&ipvs->sync_mutex); 2304 goto out_dec; 2305 } 2306 2307 mutex_lock(&__ip_vs_mutex); 2308 if (cmd == IP_VS_SO_SET_FLUSH) { 2309 /* Flush the virtual service */ 2310 ret = ip_vs_flush(net, false); 2311 goto out_unlock; 2312 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2313 /* Set timeout values for (tcp tcpfin udp) */ 2314 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); 2315 goto out_unlock; 2316 } 2317 2318 usvc_compat = (struct ip_vs_service_user *)arg; 2319 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2320 2321 /* We only use the new structs internally, so copy userspace compat 2322 * structs to extended internal versions */ 2323 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2324 ip_vs_copy_udest_compat(&udest, udest_compat); 2325 2326 if (cmd == IP_VS_SO_SET_ZERO) { 2327 /* if no service address is set, zero counters in all */ 2328 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2329 ret = ip_vs_zero_all(net); 2330 goto out_unlock; 2331 } 2332 } 2333 2334 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2335 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2336 usvc.protocol != IPPROTO_SCTP) { 2337 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", 2338 usvc.protocol, &usvc.addr.ip, 2339 ntohs(usvc.port), usvc.sched_name); 2340 ret = -EFAULT; 2341 goto out_unlock; 2342 } 2343 2344 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2345 rcu_read_lock(); 2346 if (usvc.fwmark == 0) 2347 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, 2348 &usvc.addr, usvc.port); 2349 else 2350 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); 2351 rcu_read_unlock(); 2352 2353 if (cmd != IP_VS_SO_SET_ADD 2354 && (svc == NULL || svc->protocol != usvc.protocol)) { 2355 ret = -ESRCH; 2356 goto out_unlock; 2357 } 2358 2359 switch (cmd) { 2360 case IP_VS_SO_SET_ADD: 2361 if (svc != NULL) 2362 ret = -EEXIST; 2363 else 2364 ret = ip_vs_add_service(net, &usvc, &svc); 2365 break; 2366 case IP_VS_SO_SET_EDIT: 2367 ret = ip_vs_edit_service(svc, &usvc); 2368 break; 2369 case IP_VS_SO_SET_DEL: 2370 ret = ip_vs_del_service(svc); 2371 if (!ret) 2372 goto out_unlock; 2373 break; 2374 case IP_VS_SO_SET_ZERO: 2375 ret = ip_vs_zero_service(svc); 2376 break; 2377 case IP_VS_SO_SET_ADDDEST: 2378 ret = ip_vs_add_dest(svc, &udest); 2379 break; 2380 case IP_VS_SO_SET_EDITDEST: 2381 ret = ip_vs_edit_dest(svc, &udest); 2382 break; 2383 case IP_VS_SO_SET_DELDEST: 2384 ret = ip_vs_del_dest(svc, &udest); 2385 break; 2386 default: 2387 ret = -EINVAL; 2388 } 2389 2390 out_unlock: 2391 mutex_unlock(&__ip_vs_mutex); 2392 out_dec: 2393 /* decrease the module use count */ 2394 ip_vs_use_count_dec(); 2395 2396 return ret; 2397} 2398 2399 2400static void 2401ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2402{ 2403 struct ip_vs_scheduler *sched; 2404 2405 sched = rcu_dereference_protected(src->scheduler, 1); 2406 dst->protocol = src->protocol; 2407 dst->addr = src->addr.ip; 2408 dst->port = src->port; 2409 dst->fwmark = src->fwmark; 2410 strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); 2411 dst->flags = src->flags; 2412 dst->timeout = src->timeout / HZ; 2413 dst->netmask = src->netmask; 2414 dst->num_dests = src->num_dests; 2415 ip_vs_copy_stats(&dst->stats, &src->stats); 2416} 2417 2418static inline int 2419__ip_vs_get_service_entries(struct net *net, 2420 const struct ip_vs_get_services *get, 2421 struct ip_vs_get_services __user *uptr) 2422{ 2423 int idx, count=0; 2424 struct ip_vs_service *svc; 2425 struct ip_vs_service_entry entry; 2426 int ret = 0; 2427 2428 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2429 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2430 /* Only expose IPv4 entries to old interface */ 2431 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2432 continue; 2433 2434 if (count >= get->num_services) 2435 goto out; 2436 memset(&entry, 0, sizeof(entry)); 2437 ip_vs_copy_service(&entry, svc); 2438 if (copy_to_user(&uptr->entrytable[count], 2439 &entry, sizeof(entry))) { 2440 ret = -EFAULT; 2441 goto out; 2442 } 2443 count++; 2444 } 2445 } 2446 2447 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2448 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2449 /* Only expose IPv4 entries to old interface */ 2450 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2451 continue; 2452 2453 if (count >= get->num_services) 2454 goto out; 2455 memset(&entry, 0, sizeof(entry)); 2456 ip_vs_copy_service(&entry, svc); 2457 if (copy_to_user(&uptr->entrytable[count], 2458 &entry, sizeof(entry))) { 2459 ret = -EFAULT; 2460 goto out; 2461 } 2462 count++; 2463 } 2464 } 2465out: 2466 return ret; 2467} 2468 2469static inline int 2470__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, 2471 struct ip_vs_get_dests __user *uptr) 2472{ 2473 struct ip_vs_service *svc; 2474 union nf_inet_addr addr = { .ip = get->addr }; 2475 int ret = 0; 2476 2477 rcu_read_lock(); 2478 if (get->fwmark) 2479 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); 2480 else 2481 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, 2482 get->port); 2483 rcu_read_unlock(); 2484 2485 if (svc) { 2486 int count = 0; 2487 struct ip_vs_dest *dest; 2488 struct ip_vs_dest_entry entry; 2489 2490 memset(&entry, 0, sizeof(entry)); 2491 list_for_each_entry(dest, &svc->destinations, n_list) { 2492 if (count >= get->num_dests) 2493 break; 2494 2495 /* Cannot expose heterogeneous members via sockopt 2496 * interface 2497 */ 2498 if (dest->af != svc->af) 2499 continue; 2500 2501 entry.addr = dest->addr.ip; 2502 entry.port = dest->port; 2503 entry.conn_flags = atomic_read(&dest->conn_flags); 2504 entry.weight = atomic_read(&dest->weight); 2505 entry.u_threshold = dest->u_threshold; 2506 entry.l_threshold = dest->l_threshold; 2507 entry.activeconns = atomic_read(&dest->activeconns); 2508 entry.inactconns = atomic_read(&dest->inactconns); 2509 entry.persistconns = atomic_read(&dest->persistconns); 2510 ip_vs_copy_stats(&entry.stats, &dest->stats); 2511 if (copy_to_user(&uptr->entrytable[count], 2512 &entry, sizeof(entry))) { 2513 ret = -EFAULT; 2514 break; 2515 } 2516 count++; 2517 } 2518 } else 2519 ret = -ESRCH; 2520 return ret; 2521} 2522 2523static inline void 2524__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) 2525{ 2526#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2527 struct ip_vs_proto_data *pd; 2528#endif 2529 2530 memset(u, 0, sizeof (*u)); 2531 2532#ifdef CONFIG_IP_VS_PROTO_TCP 2533 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2534 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2535 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2536#endif 2537#ifdef CONFIG_IP_VS_PROTO_UDP 2538 pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2539 u->udp_timeout = 2540 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2541#endif 2542} 2543 2544static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { 2545 [CMDID(IP_VS_SO_GET_VERSION)] = 64, 2546 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), 2547 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), 2548 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), 2549 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), 2550 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2551 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), 2552}; 2553 2554union ip_vs_get_arglen { 2555 char field_IP_VS_SO_GET_VERSION[64]; 2556 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; 2557 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; 2558 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; 2559 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; 2560 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; 2561 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; 2562}; 2563 2564#define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) 2565 2566static int 2567do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2568{ 2569 unsigned char arg[MAX_GET_ARGLEN]; 2570 int ret = 0; 2571 unsigned int copylen; 2572 struct net *net = sock_net(sk); 2573 struct netns_ipvs *ipvs = net_ipvs(net); 2574 2575 BUG_ON(!net); 2576 BUILD_BUG_ON(sizeof(arg) > 255); 2577 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2578 return -EPERM; 2579 2580 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2581 return -EINVAL; 2582 2583 copylen = get_arglen[CMDID(cmd)]; 2584 if (*len < (int) copylen) { 2585 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); 2586 return -EINVAL; 2587 } 2588 2589 if (copy_from_user(arg, user, copylen) != 0) 2590 return -EFAULT; 2591 /* 2592 * Handle daemons first since it has its own locking 2593 */ 2594 if (cmd == IP_VS_SO_GET_DAEMON) { 2595 struct ip_vs_daemon_user d[2]; 2596 2597 memset(&d, 0, sizeof(d)); 2598 mutex_lock(&ipvs->sync_mutex); 2599 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2600 d[0].state = IP_VS_STATE_MASTER; 2601 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, 2602 sizeof(d[0].mcast_ifn)); 2603 d[0].syncid = ipvs->master_syncid; 2604 } 2605 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2606 d[1].state = IP_VS_STATE_BACKUP; 2607 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, 2608 sizeof(d[1].mcast_ifn)); 2609 d[1].syncid = ipvs->backup_syncid; 2610 } 2611 if (copy_to_user(user, &d, sizeof(d)) != 0) 2612 ret = -EFAULT; 2613 mutex_unlock(&ipvs->sync_mutex); 2614 return ret; 2615 } 2616 2617 mutex_lock(&__ip_vs_mutex); 2618 switch (cmd) { 2619 case IP_VS_SO_GET_VERSION: 2620 { 2621 char buf[64]; 2622 2623 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2624 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2625 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2626 ret = -EFAULT; 2627 goto out; 2628 } 2629 *len = strlen(buf)+1; 2630 } 2631 break; 2632 2633 case IP_VS_SO_GET_INFO: 2634 { 2635 struct ip_vs_getinfo info; 2636 info.version = IP_VS_VERSION_CODE; 2637 info.size = ip_vs_conn_tab_size; 2638 info.num_services = ipvs->num_services; 2639 if (copy_to_user(user, &info, sizeof(info)) != 0) 2640 ret = -EFAULT; 2641 } 2642 break; 2643 2644 case IP_VS_SO_GET_SERVICES: 2645 { 2646 struct ip_vs_get_services *get; 2647 int size; 2648 2649 get = (struct ip_vs_get_services *)arg; 2650 size = sizeof(*get) + 2651 sizeof(struct ip_vs_service_entry) * get->num_services; 2652 if (*len != size) { 2653 pr_err("length: %u != %u\n", *len, size); 2654 ret = -EINVAL; 2655 goto out; 2656 } 2657 ret = __ip_vs_get_service_entries(net, get, user); 2658 } 2659 break; 2660 2661 case IP_VS_SO_GET_SERVICE: 2662 { 2663 struct ip_vs_service_entry *entry; 2664 struct ip_vs_service *svc; 2665 union nf_inet_addr addr; 2666 2667 entry = (struct ip_vs_service_entry *)arg; 2668 addr.ip = entry->addr; 2669 rcu_read_lock(); 2670 if (entry->fwmark) 2671 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); 2672 else 2673 svc = __ip_vs_service_find(net, AF_INET, 2674 entry->protocol, &addr, 2675 entry->port); 2676 rcu_read_unlock(); 2677 if (svc) { 2678 ip_vs_copy_service(entry, svc); 2679 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2680 ret = -EFAULT; 2681 } else 2682 ret = -ESRCH; 2683 } 2684 break; 2685 2686 case IP_VS_SO_GET_DESTS: 2687 { 2688 struct ip_vs_get_dests *get; 2689 int size; 2690 2691 get = (struct ip_vs_get_dests *)arg; 2692 size = sizeof(*get) + 2693 sizeof(struct ip_vs_dest_entry) * get->num_dests; 2694 if (*len != size) { 2695 pr_err("length: %u != %u\n", *len, size); 2696 ret = -EINVAL; 2697 goto out; 2698 } 2699 ret = __ip_vs_get_dest_entries(net, get, user); 2700 } 2701 break; 2702 2703 case IP_VS_SO_GET_TIMEOUT: 2704 { 2705 struct ip_vs_timeout_user t; 2706 2707 __ip_vs_get_timeouts(net, &t); 2708 if (copy_to_user(user, &t, sizeof(t)) != 0) 2709 ret = -EFAULT; 2710 } 2711 break; 2712 2713 default: 2714 ret = -EINVAL; 2715 } 2716 2717out: 2718 mutex_unlock(&__ip_vs_mutex); 2719 return ret; 2720} 2721 2722 2723static struct nf_sockopt_ops ip_vs_sockopts = { 2724 .pf = PF_INET, 2725 .set_optmin = IP_VS_BASE_CTL, 2726 .set_optmax = IP_VS_SO_SET_MAX+1, 2727 .set = do_ip_vs_set_ctl, 2728 .get_optmin = IP_VS_BASE_CTL, 2729 .get_optmax = IP_VS_SO_GET_MAX+1, 2730 .get = do_ip_vs_get_ctl, 2731 .owner = THIS_MODULE, 2732}; 2733 2734/* 2735 * Generic Netlink interface 2736 */ 2737 2738/* IPVS genetlink family */ 2739static struct genl_family ip_vs_genl_family = { 2740 .id = GENL_ID_GENERATE, 2741 .hdrsize = 0, 2742 .name = IPVS_GENL_NAME, 2743 .version = IPVS_GENL_VERSION, 2744 .maxattr = IPVS_CMD_MAX, 2745 .netnsok = true, /* Make ipvsadm to work on netns */ 2746}; 2747 2748/* Policy used for first-level command attributes */ 2749static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2750 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2751 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2752 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2753 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2754 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2755 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2756}; 2757 2758/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2759static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2760 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2761 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2762 .len = IP_VS_IFNAME_MAXLEN }, 2763 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2764}; 2765 2766/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2767static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2768 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2769 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2770 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2771 .len = sizeof(union nf_inet_addr) }, 2772 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2773 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2774 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2775 .len = IP_VS_SCHEDNAME_MAXLEN }, 2776 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2777 .len = IP_VS_PENAME_MAXLEN }, 2778 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2779 .len = sizeof(struct ip_vs_flags) }, 2780 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2781 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2782 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2783}; 2784 2785/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2786static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2787 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2788 .len = sizeof(union nf_inet_addr) }, 2789 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2790 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2791 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2792 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2793 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2794 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2795 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2796 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 2797 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 2798 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, 2799}; 2800 2801static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2802 struct ip_vs_stats *stats) 2803{ 2804 struct ip_vs_stats_user ustats; 2805 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2806 if (!nl_stats) 2807 return -EMSGSIZE; 2808 2809 ip_vs_copy_stats(&ustats, stats); 2810 2811 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) || 2812 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) || 2813 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) || 2814 nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) || 2815 nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) || 2816 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) || 2817 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) || 2818 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) || 2819 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) || 2820 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps)) 2821 goto nla_put_failure; 2822 nla_nest_end(skb, nl_stats); 2823 2824 return 0; 2825 2826nla_put_failure: 2827 nla_nest_cancel(skb, nl_stats); 2828 return -EMSGSIZE; 2829} 2830 2831static int ip_vs_genl_fill_service(struct sk_buff *skb, 2832 struct ip_vs_service *svc) 2833{ 2834 struct ip_vs_scheduler *sched; 2835 struct ip_vs_pe *pe; 2836 struct nlattr *nl_service; 2837 struct ip_vs_flags flags = { .flags = svc->flags, 2838 .mask = ~0 }; 2839 2840 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); 2841 if (!nl_service) 2842 return -EMSGSIZE; 2843 2844 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 2845 goto nla_put_failure; 2846 if (svc->fwmark) { 2847 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 2848 goto nla_put_failure; 2849 } else { 2850 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 2851 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 2852 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 2853 goto nla_put_failure; 2854 } 2855 2856 sched = rcu_dereference_protected(svc->scheduler, 1); 2857 pe = rcu_dereference_protected(svc->pe, 1); 2858 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || 2859 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 2860 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 2861 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 2862 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 2863 goto nla_put_failure; 2864 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) 2865 goto nla_put_failure; 2866 2867 nla_nest_end(skb, nl_service); 2868 2869 return 0; 2870 2871nla_put_failure: 2872 nla_nest_cancel(skb, nl_service); 2873 return -EMSGSIZE; 2874} 2875 2876static int ip_vs_genl_dump_service(struct sk_buff *skb, 2877 struct ip_vs_service *svc, 2878 struct netlink_callback *cb) 2879{ 2880 void *hdr; 2881 2882 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2883 &ip_vs_genl_family, NLM_F_MULTI, 2884 IPVS_CMD_NEW_SERVICE); 2885 if (!hdr) 2886 return -EMSGSIZE; 2887 2888 if (ip_vs_genl_fill_service(skb, svc) < 0) 2889 goto nla_put_failure; 2890 2891 return genlmsg_end(skb, hdr); 2892 2893nla_put_failure: 2894 genlmsg_cancel(skb, hdr); 2895 return -EMSGSIZE; 2896} 2897 2898static int ip_vs_genl_dump_services(struct sk_buff *skb, 2899 struct netlink_callback *cb) 2900{ 2901 int idx = 0, i; 2902 int start = cb->args[0]; 2903 struct ip_vs_service *svc; 2904 struct net *net = skb_sknet(skb); 2905 2906 mutex_lock(&__ip_vs_mutex); 2907 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2908 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2909 if (++idx <= start || !net_eq(svc->net, net)) 2910 continue; 2911 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2912 idx--; 2913 goto nla_put_failure; 2914 } 2915 } 2916 } 2917 2918 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2919 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2920 if (++idx <= start || !net_eq(svc->net, net)) 2921 continue; 2922 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2923 idx--; 2924 goto nla_put_failure; 2925 } 2926 } 2927 } 2928 2929nla_put_failure: 2930 mutex_unlock(&__ip_vs_mutex); 2931 cb->args[0] = idx; 2932 2933 return skb->len; 2934} 2935 2936static int ip_vs_genl_parse_service(struct net *net, 2937 struct ip_vs_service_user_kern *usvc, 2938 struct nlattr *nla, int full_entry, 2939 struct ip_vs_service **ret_svc) 2940{ 2941 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 2942 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 2943 struct ip_vs_service *svc; 2944 2945 /* Parse mandatory identifying service fields first */ 2946 if (nla == NULL || 2947 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) 2948 return -EINVAL; 2949 2950 nla_af = attrs[IPVS_SVC_ATTR_AF]; 2951 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 2952 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 2953 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 2954 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 2955 2956 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 2957 return -EINVAL; 2958 2959 memset(usvc, 0, sizeof(*usvc)); 2960 2961 usvc->af = nla_get_u16(nla_af); 2962#ifdef CONFIG_IP_VS_IPV6 2963 if (usvc->af != AF_INET && usvc->af != AF_INET6) 2964#else 2965 if (usvc->af != AF_INET) 2966#endif 2967 return -EAFNOSUPPORT; 2968 2969 if (nla_fwmark) { 2970 usvc->protocol = IPPROTO_TCP; 2971 usvc->fwmark = nla_get_u32(nla_fwmark); 2972 } else { 2973 usvc->protocol = nla_get_u16(nla_protocol); 2974 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 2975 usvc->port = nla_get_be16(nla_port); 2976 usvc->fwmark = 0; 2977 } 2978 2979 rcu_read_lock(); 2980 if (usvc->fwmark) 2981 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); 2982 else 2983 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, 2984 &usvc->addr, usvc->port); 2985 rcu_read_unlock(); 2986 *ret_svc = svc; 2987 2988 /* If a full entry was requested, check for the additional fields */ 2989 if (full_entry) { 2990 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 2991 *nla_netmask; 2992 struct ip_vs_flags flags; 2993 2994 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 2995 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 2996 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 2997 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 2998 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 2999 3000 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3001 return -EINVAL; 3002 3003 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3004 3005 /* prefill flags from service if it already exists */ 3006 if (svc) 3007 usvc->flags = svc->flags; 3008 3009 /* set new flags from userland */ 3010 usvc->flags = (usvc->flags & ~flags.mask) | 3011 (flags.flags & flags.mask); 3012 usvc->sched_name = nla_data(nla_sched); 3013 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3014 usvc->timeout = nla_get_u32(nla_timeout); 3015 usvc->netmask = nla_get_be32(nla_netmask); 3016 } 3017 3018 return 0; 3019} 3020 3021static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, 3022 struct nlattr *nla) 3023{ 3024 struct ip_vs_service_user_kern usvc; 3025 struct ip_vs_service *svc; 3026 int ret; 3027 3028 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); 3029 return ret ? ERR_PTR(ret) : svc; 3030} 3031 3032static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3033{ 3034 struct nlattr *nl_dest; 3035 3036 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); 3037 if (!nl_dest) 3038 return -EMSGSIZE; 3039 3040 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3041 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3042 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3043 (atomic_read(&dest->conn_flags) & 3044 IP_VS_CONN_F_FWD_MASK)) || 3045 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3046 atomic_read(&dest->weight)) || 3047 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3048 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3049 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3050 atomic_read(&dest->activeconns)) || 3051 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3052 atomic_read(&dest->inactconns)) || 3053 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3054 atomic_read(&dest->persistconns)) || 3055 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) 3056 goto nla_put_failure; 3057 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) 3058 goto nla_put_failure; 3059 3060 nla_nest_end(skb, nl_dest); 3061 3062 return 0; 3063 3064nla_put_failure: 3065 nla_nest_cancel(skb, nl_dest); 3066 return -EMSGSIZE; 3067} 3068 3069static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3070 struct netlink_callback *cb) 3071{ 3072 void *hdr; 3073 3074 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3075 &ip_vs_genl_family, NLM_F_MULTI, 3076 IPVS_CMD_NEW_DEST); 3077 if (!hdr) 3078 return -EMSGSIZE; 3079 3080 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3081 goto nla_put_failure; 3082 3083 return genlmsg_end(skb, hdr); 3084 3085nla_put_failure: 3086 genlmsg_cancel(skb, hdr); 3087 return -EMSGSIZE; 3088} 3089 3090static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3091 struct netlink_callback *cb) 3092{ 3093 int idx = 0; 3094 int start = cb->args[0]; 3095 struct ip_vs_service *svc; 3096 struct ip_vs_dest *dest; 3097 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3098 struct net *net = skb_sknet(skb); 3099 3100 mutex_lock(&__ip_vs_mutex); 3101 3102 /* Try to find the service for which to dump destinations */ 3103 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, 3104 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 3105 goto out_err; 3106 3107 3108 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); 3109 if (IS_ERR(svc) || svc == NULL) 3110 goto out_err; 3111 3112 /* Dump the destinations */ 3113 list_for_each_entry(dest, &svc->destinations, n_list) { 3114 if (++idx <= start) 3115 continue; 3116 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3117 idx--; 3118 goto nla_put_failure; 3119 } 3120 } 3121 3122nla_put_failure: 3123 cb->args[0] = idx; 3124 3125out_err: 3126 mutex_unlock(&__ip_vs_mutex); 3127 3128 return skb->len; 3129} 3130 3131static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3132 struct nlattr *nla, int full_entry) 3133{ 3134 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3135 struct nlattr *nla_addr, *nla_port; 3136 struct nlattr *nla_addr_family; 3137 3138 /* Parse mandatory identifying destination fields first */ 3139 if (nla == NULL || 3140 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) 3141 return -EINVAL; 3142 3143 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3144 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3145 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; 3146 3147 if (!(nla_addr && nla_port)) 3148 return -EINVAL; 3149 3150 memset(udest, 0, sizeof(*udest)); 3151 3152 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3153 udest->port = nla_get_be16(nla_port); 3154 3155 if (nla_addr_family) 3156 udest->af = nla_get_u16(nla_addr_family); 3157 else 3158 udest->af = 0; 3159 3160 /* If a full entry was requested, check for the additional fields */ 3161 if (full_entry) { 3162 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3163 *nla_l_thresh; 3164 3165 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3166 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3167 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3168 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3169 3170 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3171 return -EINVAL; 3172 3173 udest->conn_flags = nla_get_u32(nla_fwd) 3174 & IP_VS_CONN_F_FWD_MASK; 3175 udest->weight = nla_get_u32(nla_weight); 3176 udest->u_threshold = nla_get_u32(nla_u_thresh); 3177 udest->l_threshold = nla_get_u32(nla_l_thresh); 3178 } 3179 3180 return 0; 3181} 3182 3183static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3184 const char *mcast_ifn, __u32 syncid) 3185{ 3186 struct nlattr *nl_daemon; 3187 3188 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); 3189 if (!nl_daemon) 3190 return -EMSGSIZE; 3191 3192 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3193 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) || 3194 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid)) 3195 goto nla_put_failure; 3196 nla_nest_end(skb, nl_daemon); 3197 3198 return 0; 3199 3200nla_put_failure: 3201 nla_nest_cancel(skb, nl_daemon); 3202 return -EMSGSIZE; 3203} 3204 3205static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3206 const char *mcast_ifn, __u32 syncid, 3207 struct netlink_callback *cb) 3208{ 3209 void *hdr; 3210 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3211 &ip_vs_genl_family, NLM_F_MULTI, 3212 IPVS_CMD_NEW_DAEMON); 3213 if (!hdr) 3214 return -EMSGSIZE; 3215 3216 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) 3217 goto nla_put_failure; 3218 3219 return genlmsg_end(skb, hdr); 3220 3221nla_put_failure: 3222 genlmsg_cancel(skb, hdr); 3223 return -EMSGSIZE; 3224} 3225 3226static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3227 struct netlink_callback *cb) 3228{ 3229 struct net *net = skb_sknet(skb); 3230 struct netns_ipvs *ipvs = net_ipvs(net); 3231 3232 mutex_lock(&ipvs->sync_mutex); 3233 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3234 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3235 ipvs->master_mcast_ifn, 3236 ipvs->master_syncid, cb) < 0) 3237 goto nla_put_failure; 3238 3239 cb->args[0] = 1; 3240 } 3241 3242 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3243 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3244 ipvs->backup_mcast_ifn, 3245 ipvs->backup_syncid, cb) < 0) 3246 goto nla_put_failure; 3247 3248 cb->args[1] = 1; 3249 } 3250 3251nla_put_failure: 3252 mutex_unlock(&ipvs->sync_mutex); 3253 3254 return skb->len; 3255} 3256 3257static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) 3258{ 3259 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3260 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3261 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3262 return -EINVAL; 3263 3264 /* The synchronization protocol is incompatible with mixed family 3265 * services 3266 */ 3267 if (net_ipvs(net)->mixed_address_family_dests > 0) 3268 return -EINVAL; 3269 3270 return start_sync_thread(net, 3271 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 3272 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3273 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); 3274} 3275 3276static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) 3277{ 3278 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3279 return -EINVAL; 3280 3281 return stop_sync_thread(net, 3282 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3283} 3284 3285static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) 3286{ 3287 struct ip_vs_timeout_user t; 3288 3289 __ip_vs_get_timeouts(net, &t); 3290 3291 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3292 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3293 3294 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3295 t.tcp_fin_timeout = 3296 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3297 3298 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3299 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3300 3301 return ip_vs_set_timeout(net, &t); 3302} 3303 3304static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3305{ 3306 int ret = 0, cmd; 3307 struct net *net; 3308 struct netns_ipvs *ipvs; 3309 3310 net = skb_sknet(skb); 3311 ipvs = net_ipvs(net); 3312 cmd = info->genlhdr->cmd; 3313 3314 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3315 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3316 3317 mutex_lock(&ipvs->sync_mutex); 3318 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3319 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, 3320 info->attrs[IPVS_CMD_ATTR_DAEMON], 3321 ip_vs_daemon_policy)) { 3322 ret = -EINVAL; 3323 goto out; 3324 } 3325 3326 if (cmd == IPVS_CMD_NEW_DAEMON) 3327 ret = ip_vs_genl_new_daemon(net, daemon_attrs); 3328 else 3329 ret = ip_vs_genl_del_daemon(net, daemon_attrs); 3330out: 3331 mutex_unlock(&ipvs->sync_mutex); 3332 } 3333 return ret; 3334} 3335 3336static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3337{ 3338 struct ip_vs_service *svc = NULL; 3339 struct ip_vs_service_user_kern usvc; 3340 struct ip_vs_dest_user_kern udest; 3341 int ret = 0, cmd; 3342 int need_full_svc = 0, need_full_dest = 0; 3343 struct net *net; 3344 3345 net = skb_sknet(skb); 3346 cmd = info->genlhdr->cmd; 3347 3348 mutex_lock(&__ip_vs_mutex); 3349 3350 if (cmd == IPVS_CMD_FLUSH) { 3351 ret = ip_vs_flush(net, false); 3352 goto out; 3353 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3354 ret = ip_vs_genl_set_config(net, info->attrs); 3355 goto out; 3356 } else if (cmd == IPVS_CMD_ZERO && 3357 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3358 ret = ip_vs_zero_all(net); 3359 goto out; 3360 } 3361 3362 /* All following commands require a service argument, so check if we 3363 * received a valid one. We need a full service specification when 3364 * adding / editing a service. Only identifying members otherwise. */ 3365 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3366 need_full_svc = 1; 3367 3368 ret = ip_vs_genl_parse_service(net, &usvc, 3369 info->attrs[IPVS_CMD_ATTR_SERVICE], 3370 need_full_svc, &svc); 3371 if (ret) 3372 goto out; 3373 3374 /* Unless we're adding a new service, the service must already exist */ 3375 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3376 ret = -ESRCH; 3377 goto out; 3378 } 3379 3380 /* Destination commands require a valid destination argument. For 3381 * adding / editing a destination, we need a full destination 3382 * specification. */ 3383 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3384 cmd == IPVS_CMD_DEL_DEST) { 3385 if (cmd != IPVS_CMD_DEL_DEST) 3386 need_full_dest = 1; 3387 3388 ret = ip_vs_genl_parse_dest(&udest, 3389 info->attrs[IPVS_CMD_ATTR_DEST], 3390 need_full_dest); 3391 if (ret) 3392 goto out; 3393 3394 /* Old protocols did not allow the user to specify address 3395 * family, so we set it to zero instead. We also didn't 3396 * allow heterogeneous pools in the old code, so it's safe 3397 * to assume that this will have the same address family as 3398 * the service. 3399 */ 3400 if (udest.af == 0) 3401 udest.af = svc->af; 3402 3403 if (udest.af != svc->af) { 3404 /* The synchronization protocol is incompatible 3405 * with mixed family services 3406 */ 3407 if (net_ipvs(net)->sync_state) { 3408 ret = -EINVAL; 3409 goto out; 3410 } 3411 3412 /* Which connection types do we support? */ 3413 switch (udest.conn_flags) { 3414 case IP_VS_CONN_F_TUNNEL: 3415 /* We are able to forward this */ 3416 break; 3417 default: 3418 ret = -EINVAL; 3419 goto out; 3420 } 3421 } 3422 } 3423 3424 switch (cmd) { 3425 case IPVS_CMD_NEW_SERVICE: 3426 if (svc == NULL) 3427 ret = ip_vs_add_service(net, &usvc, &svc); 3428 else 3429 ret = -EEXIST; 3430 break; 3431 case IPVS_CMD_SET_SERVICE: 3432 ret = ip_vs_edit_service(svc, &usvc); 3433 break; 3434 case IPVS_CMD_DEL_SERVICE: 3435 ret = ip_vs_del_service(svc); 3436 /* do not use svc, it can be freed */ 3437 break; 3438 case IPVS_CMD_NEW_DEST: 3439 ret = ip_vs_add_dest(svc, &udest); 3440 break; 3441 case IPVS_CMD_SET_DEST: 3442 ret = ip_vs_edit_dest(svc, &udest); 3443 break; 3444 case IPVS_CMD_DEL_DEST: 3445 ret = ip_vs_del_dest(svc, &udest); 3446 break; 3447 case IPVS_CMD_ZERO: 3448 ret = ip_vs_zero_service(svc); 3449 break; 3450 default: 3451 ret = -EINVAL; 3452 } 3453 3454out: 3455 mutex_unlock(&__ip_vs_mutex); 3456 3457 return ret; 3458} 3459 3460static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3461{ 3462 struct sk_buff *msg; 3463 void *reply; 3464 int ret, cmd, reply_cmd; 3465 struct net *net; 3466 3467 net = skb_sknet(skb); 3468 cmd = info->genlhdr->cmd; 3469 3470 if (cmd == IPVS_CMD_GET_SERVICE) 3471 reply_cmd = IPVS_CMD_NEW_SERVICE; 3472 else if (cmd == IPVS_CMD_GET_INFO) 3473 reply_cmd = IPVS_CMD_SET_INFO; 3474 else if (cmd == IPVS_CMD_GET_CONFIG) 3475 reply_cmd = IPVS_CMD_SET_CONFIG; 3476 else { 3477 pr_err("unknown Generic Netlink command\n"); 3478 return -EINVAL; 3479 } 3480 3481 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3482 if (!msg) 3483 return -ENOMEM; 3484 3485 mutex_lock(&__ip_vs_mutex); 3486 3487 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3488 if (reply == NULL) 3489 goto nla_put_failure; 3490 3491 switch (cmd) { 3492 case IPVS_CMD_GET_SERVICE: 3493 { 3494 struct ip_vs_service *svc; 3495 3496 svc = ip_vs_genl_find_service(net, 3497 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3498 if (IS_ERR(svc)) { 3499 ret = PTR_ERR(svc); 3500 goto out_err; 3501 } else if (svc) { 3502 ret = ip_vs_genl_fill_service(msg, svc); 3503 if (ret) 3504 goto nla_put_failure; 3505 } else { 3506 ret = -ESRCH; 3507 goto out_err; 3508 } 3509 3510 break; 3511 } 3512 3513 case IPVS_CMD_GET_CONFIG: 3514 { 3515 struct ip_vs_timeout_user t; 3516 3517 __ip_vs_get_timeouts(net, &t); 3518#ifdef CONFIG_IP_VS_PROTO_TCP 3519 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3520 t.tcp_timeout) || 3521 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3522 t.tcp_fin_timeout)) 3523 goto nla_put_failure; 3524#endif 3525#ifdef CONFIG_IP_VS_PROTO_UDP 3526 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3527 goto nla_put_failure; 3528#endif 3529 3530 break; 3531 } 3532 3533 case IPVS_CMD_GET_INFO: 3534 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3535 IP_VS_VERSION_CODE) || 3536 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3537 ip_vs_conn_tab_size)) 3538 goto nla_put_failure; 3539 break; 3540 } 3541 3542 genlmsg_end(msg, reply); 3543 ret = genlmsg_reply(msg, info); 3544 goto out; 3545 3546nla_put_failure: 3547 pr_err("not enough space in Netlink message\n"); 3548 ret = -EMSGSIZE; 3549 3550out_err: 3551 nlmsg_free(msg); 3552out: 3553 mutex_unlock(&__ip_vs_mutex); 3554 3555 return ret; 3556} 3557 3558 3559static const struct genl_ops ip_vs_genl_ops[] = { 3560 { 3561 .cmd = IPVS_CMD_NEW_SERVICE, 3562 .flags = GENL_ADMIN_PERM, 3563 .policy = ip_vs_cmd_policy, 3564 .doit = ip_vs_genl_set_cmd, 3565 }, 3566 { 3567 .cmd = IPVS_CMD_SET_SERVICE, 3568 .flags = GENL_ADMIN_PERM, 3569 .policy = ip_vs_cmd_policy, 3570 .doit = ip_vs_genl_set_cmd, 3571 }, 3572 { 3573 .cmd = IPVS_CMD_DEL_SERVICE, 3574 .flags = GENL_ADMIN_PERM, 3575 .policy = ip_vs_cmd_policy, 3576 .doit = ip_vs_genl_set_cmd, 3577 }, 3578 { 3579 .cmd = IPVS_CMD_GET_SERVICE, 3580 .flags = GENL_ADMIN_PERM, 3581 .doit = ip_vs_genl_get_cmd, 3582 .dumpit = ip_vs_genl_dump_services, 3583 .policy = ip_vs_cmd_policy, 3584 }, 3585 { 3586 .cmd = IPVS_CMD_NEW_DEST, 3587 .flags = GENL_ADMIN_PERM, 3588 .policy = ip_vs_cmd_policy, 3589 .doit = ip_vs_genl_set_cmd, 3590 }, 3591 { 3592 .cmd = IPVS_CMD_SET_DEST, 3593 .flags = GENL_ADMIN_PERM, 3594 .policy = ip_vs_cmd_policy, 3595 .doit = ip_vs_genl_set_cmd, 3596 }, 3597 { 3598 .cmd = IPVS_CMD_DEL_DEST, 3599 .flags = GENL_ADMIN_PERM, 3600 .policy = ip_vs_cmd_policy, 3601 .doit = ip_vs_genl_set_cmd, 3602 }, 3603 { 3604 .cmd = IPVS_CMD_GET_DEST, 3605 .flags = GENL_ADMIN_PERM, 3606 .policy = ip_vs_cmd_policy, 3607 .dumpit = ip_vs_genl_dump_dests, 3608 }, 3609 { 3610 .cmd = IPVS_CMD_NEW_DAEMON, 3611 .flags = GENL_ADMIN_PERM, 3612 .policy = ip_vs_cmd_policy, 3613 .doit = ip_vs_genl_set_daemon, 3614 }, 3615 { 3616 .cmd = IPVS_CMD_DEL_DAEMON, 3617 .flags = GENL_ADMIN_PERM, 3618 .policy = ip_vs_cmd_policy, 3619 .doit = ip_vs_genl_set_daemon, 3620 }, 3621 { 3622 .cmd = IPVS_CMD_GET_DAEMON, 3623 .flags = GENL_ADMIN_PERM, 3624 .dumpit = ip_vs_genl_dump_daemons, 3625 }, 3626 { 3627 .cmd = IPVS_CMD_SET_CONFIG, 3628 .flags = GENL_ADMIN_PERM, 3629 .policy = ip_vs_cmd_policy, 3630 .doit = ip_vs_genl_set_cmd, 3631 }, 3632 { 3633 .cmd = IPVS_CMD_GET_CONFIG, 3634 .flags = GENL_ADMIN_PERM, 3635 .doit = ip_vs_genl_get_cmd, 3636 }, 3637 { 3638 .cmd = IPVS_CMD_GET_INFO, 3639 .flags = GENL_ADMIN_PERM, 3640 .doit = ip_vs_genl_get_cmd, 3641 }, 3642 { 3643 .cmd = IPVS_CMD_ZERO, 3644 .flags = GENL_ADMIN_PERM, 3645 .policy = ip_vs_cmd_policy, 3646 .doit = ip_vs_genl_set_cmd, 3647 }, 3648 { 3649 .cmd = IPVS_CMD_FLUSH, 3650 .flags = GENL_ADMIN_PERM, 3651 .doit = ip_vs_genl_set_cmd, 3652 }, 3653}; 3654 3655static int __init ip_vs_genl_register(void) 3656{ 3657 return genl_register_family_with_ops(&ip_vs_genl_family, 3658 ip_vs_genl_ops); 3659} 3660 3661static void ip_vs_genl_unregister(void) 3662{ 3663 genl_unregister_family(&ip_vs_genl_family); 3664} 3665 3666/* End of Generic Netlink interface definitions */ 3667 3668/* 3669 * per netns intit/exit func. 3670 */ 3671#ifdef CONFIG_SYSCTL 3672static int __net_init ip_vs_control_net_init_sysctl(struct net *net) 3673{ 3674 int idx; 3675 struct netns_ipvs *ipvs = net_ipvs(net); 3676 struct ctl_table *tbl; 3677 3678 atomic_set(&ipvs->dropentry, 0); 3679 spin_lock_init(&ipvs->dropentry_lock); 3680 spin_lock_init(&ipvs->droppacket_lock); 3681 spin_lock_init(&ipvs->securetcp_lock); 3682 3683 if (!net_eq(net, &init_net)) { 3684 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 3685 if (tbl == NULL) 3686 return -ENOMEM; 3687 3688 /* Don't export sysctls to unprivileged users */ 3689 if (net->user_ns != &init_user_ns) 3690 tbl[0].procname = NULL; 3691 } else 3692 tbl = vs_vars; 3693 /* Initialize sysctl defaults */ 3694 idx = 0; 3695 ipvs->sysctl_amemthresh = 1024; 3696 tbl[idx++].data = &ipvs->sysctl_amemthresh; 3697 ipvs->sysctl_am_droprate = 10; 3698 tbl[idx++].data = &ipvs->sysctl_am_droprate; 3699 tbl[idx++].data = &ipvs->sysctl_drop_entry; 3700 tbl[idx++].data = &ipvs->sysctl_drop_packet; 3701#ifdef CONFIG_IP_VS_NFCT 3702 tbl[idx++].data = &ipvs->sysctl_conntrack; 3703#endif 3704 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 3705 ipvs->sysctl_snat_reroute = 1; 3706 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 3707 ipvs->sysctl_sync_ver = 1; 3708 tbl[idx++].data = &ipvs->sysctl_sync_ver; 3709 ipvs->sysctl_sync_ports = 1; 3710 tbl[idx++].data = &ipvs->sysctl_sync_ports; 3711 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; 3712 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 3713 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 3714 ipvs->sysctl_sync_sock_size = 0; 3715 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 3716 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3717 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3718 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 3719 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 3720 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3721 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 3722 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 3723 tbl[idx].data = &ipvs->sysctl_sync_threshold; 3724 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3725 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 3726 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 3727 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 3728 tbl[idx++].data = &ipvs->sysctl_sync_retries; 3729 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3730 ipvs->sysctl_pmtu_disc = 1; 3731 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 3732 tbl[idx++].data = &ipvs->sysctl_backup_only; 3733 3734 3735 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 3736 if (ipvs->sysctl_hdr == NULL) { 3737 if (!net_eq(net, &init_net)) 3738 kfree(tbl); 3739 return -ENOMEM; 3740 } 3741 ip_vs_start_estimator(net, &ipvs->tot_stats); 3742 ipvs->sysctl_tbl = tbl; 3743 /* Schedule defense work */ 3744 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 3745 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 3746 3747 return 0; 3748} 3749 3750static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) 3751{ 3752 struct netns_ipvs *ipvs = net_ipvs(net); 3753 3754 cancel_delayed_work_sync(&ipvs->defense_work); 3755 cancel_work_sync(&ipvs->defense_work.work); 3756 unregister_net_sysctl_table(ipvs->sysctl_hdr); 3757 ip_vs_stop_estimator(net, &ipvs->tot_stats); 3758} 3759 3760#else 3761 3762static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } 3763static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } 3764 3765#endif 3766 3767static struct notifier_block ip_vs_dst_notifier = { 3768 .notifier_call = ip_vs_dst_event, 3769}; 3770 3771int __net_init ip_vs_control_net_init(struct net *net) 3772{ 3773 int i, idx; 3774 struct netns_ipvs *ipvs = net_ipvs(net); 3775 3776 /* Initialize rs_table */ 3777 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3778 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 3779 3780 INIT_LIST_HEAD(&ipvs->dest_trash); 3781 spin_lock_init(&ipvs->dest_trash_lock); 3782 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 3783 (unsigned long) net); 3784 atomic_set(&ipvs->ftpsvc_counter, 0); 3785 atomic_set(&ipvs->nullsvc_counter, 0); 3786 3787 /* procfs stats */ 3788 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 3789 if (!ipvs->tot_stats.cpustats) 3790 return -ENOMEM; 3791 3792 for_each_possible_cpu(i) { 3793 struct ip_vs_cpu_stats *ipvs_tot_stats; 3794 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); 3795 u64_stats_init(&ipvs_tot_stats->syncp); 3796 } 3797 3798 spin_lock_init(&ipvs->tot_stats.lock); 3799 3800 proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); 3801 proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); 3802 proc_create("ip_vs_stats_percpu", 0, net->proc_net, 3803 &ip_vs_stats_percpu_fops); 3804 3805 if (ip_vs_control_net_init_sysctl(net)) 3806 goto err; 3807 3808 return 0; 3809 3810err: 3811 free_percpu(ipvs->tot_stats.cpustats); 3812 return -ENOMEM; 3813} 3814 3815void __net_exit ip_vs_control_net_cleanup(struct net *net) 3816{ 3817 struct netns_ipvs *ipvs = net_ipvs(net); 3818 3819 ip_vs_trash_cleanup(net); 3820 ip_vs_control_net_cleanup_sysctl(net); 3821 remove_proc_entry("ip_vs_stats_percpu", net->proc_net); 3822 remove_proc_entry("ip_vs_stats", net->proc_net); 3823 remove_proc_entry("ip_vs", net->proc_net); 3824 free_percpu(ipvs->tot_stats.cpustats); 3825} 3826 3827int __init ip_vs_register_nl_ioctl(void) 3828{ 3829 int ret; 3830 3831 ret = nf_register_sockopt(&ip_vs_sockopts); 3832 if (ret) { 3833 pr_err("cannot register sockopt.\n"); 3834 goto err_sock; 3835 } 3836 3837 ret = ip_vs_genl_register(); 3838 if (ret) { 3839 pr_err("cannot register Generic Netlink interface.\n"); 3840 goto err_genl; 3841 } 3842 return 0; 3843 3844err_genl: 3845 nf_unregister_sockopt(&ip_vs_sockopts); 3846err_sock: 3847 return ret; 3848} 3849 3850void ip_vs_unregister_nl_ioctl(void) 3851{ 3852 ip_vs_genl_unregister(); 3853 nf_unregister_sockopt(&ip_vs_sockopts); 3854} 3855 3856int __init ip_vs_control_init(void) 3857{ 3858 int idx; 3859 int ret; 3860 3861 EnterFunction(2); 3862 3863 /* Initialize svc_table, ip_vs_svc_fwm_table */ 3864 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3865 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 3866 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3867 } 3868 3869 smp_wmb(); /* Do we really need it now ? */ 3870 3871 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 3872 if (ret < 0) 3873 return ret; 3874 3875 LeaveFunction(2); 3876 return 0; 3877} 3878 3879 3880void ip_vs_control_cleanup(void) 3881{ 3882 EnterFunction(2); 3883 unregister_netdevice_notifier(&ip_vs_dst_notifier); 3884 LeaveFunction(2); 3885} 3886