1/*
2 * IPVS         An implementation of the IP virtual server support for the
3 *              LINUX operating system.  IPVS is now implemented as a module
4 *              over the NetFilter framework. IPVS can be used to build a
5 *              high-performance and highly available server based on a
6 *              cluster of servers.
7 *
8 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9 *              Peter Kese <peter.kese@ijs.si>
10 *              Julian Anastasov <ja@ssi.bg>
11 *
12 *              This program is free software; you can redistribute it and/or
13 *              modify it under the terms of the GNU General Public License
14 *              as published by the Free Software Foundation; either version
15 *              2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
27#include <linux/capability.h>
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
33#include <linux/seq_file.h>
34#include <linux/slab.h>
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/mutex.h>
39
40#include <net/net_namespace.h>
41#include <linux/nsproxy.h>
42#include <net/ip.h>
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
47#include <net/route.h>
48#include <net/sock.h>
49#include <net/genetlink.h>
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56static DEFINE_MUTEX(__ip_vs_mutex);
57
58/* sysctl variables */
59
60#ifdef CONFIG_IP_VS_DEBUG
61static int sysctl_ip_vs_debug_level = 0;
62
63int ip_vs_get_debug_level(void)
64{
65	return sysctl_ip_vs_debug_level;
66}
67#endif
68
69
70/*  Protos */
71static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
72
73
74#ifdef CONFIG_IP_VS_IPV6
75/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
76static bool __ip_vs_addr_is_local_v6(struct net *net,
77				     const struct in6_addr *addr)
78{
79	struct flowi6 fl6 = {
80		.daddr = *addr,
81	};
82	struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
83	bool is_local;
84
85	is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
86
87	dst_release(dst);
88	return is_local;
89}
90#endif
91
92#ifdef CONFIG_SYSCTL
93/*
94 *	update_defense_level is called from keventd and from sysctl,
95 *	so it needs to protect itself from softirqs
96 */
97static void update_defense_level(struct netns_ipvs *ipvs)
98{
99	struct sysinfo i;
100	static int old_secure_tcp = 0;
101	int availmem;
102	int nomem;
103	int to_change = -1;
104
105	/* we only count free and buffered memory (in pages) */
106	si_meminfo(&i);
107	availmem = i.freeram + i.bufferram;
108	/* however in linux 2.5 the i.bufferram is total page cache size,
109	   we need adjust it */
110	/* si_swapinfo(&i); */
111	/* availmem = availmem - (i.totalswap - i.freeswap); */
112
113	nomem = (availmem < ipvs->sysctl_amemthresh);
114
115	local_bh_disable();
116
117	/* drop_entry */
118	spin_lock(&ipvs->dropentry_lock);
119	switch (ipvs->sysctl_drop_entry) {
120	case 0:
121		atomic_set(&ipvs->dropentry, 0);
122		break;
123	case 1:
124		if (nomem) {
125			atomic_set(&ipvs->dropentry, 1);
126			ipvs->sysctl_drop_entry = 2;
127		} else {
128			atomic_set(&ipvs->dropentry, 0);
129		}
130		break;
131	case 2:
132		if (nomem) {
133			atomic_set(&ipvs->dropentry, 1);
134		} else {
135			atomic_set(&ipvs->dropentry, 0);
136			ipvs->sysctl_drop_entry = 1;
137		};
138		break;
139	case 3:
140		atomic_set(&ipvs->dropentry, 1);
141		break;
142	}
143	spin_unlock(&ipvs->dropentry_lock);
144
145	/* drop_packet */
146	spin_lock(&ipvs->droppacket_lock);
147	switch (ipvs->sysctl_drop_packet) {
148	case 0:
149		ipvs->drop_rate = 0;
150		break;
151	case 1:
152		if (nomem) {
153			ipvs->drop_rate = ipvs->drop_counter
154				= ipvs->sysctl_amemthresh /
155				(ipvs->sysctl_amemthresh-availmem);
156			ipvs->sysctl_drop_packet = 2;
157		} else {
158			ipvs->drop_rate = 0;
159		}
160		break;
161	case 2:
162		if (nomem) {
163			ipvs->drop_rate = ipvs->drop_counter
164				= ipvs->sysctl_amemthresh /
165				(ipvs->sysctl_amemthresh-availmem);
166		} else {
167			ipvs->drop_rate = 0;
168			ipvs->sysctl_drop_packet = 1;
169		}
170		break;
171	case 3:
172		ipvs->drop_rate = ipvs->sysctl_am_droprate;
173		break;
174	}
175	spin_unlock(&ipvs->droppacket_lock);
176
177	/* secure_tcp */
178	spin_lock(&ipvs->securetcp_lock);
179	switch (ipvs->sysctl_secure_tcp) {
180	case 0:
181		if (old_secure_tcp >= 2)
182			to_change = 0;
183		break;
184	case 1:
185		if (nomem) {
186			if (old_secure_tcp < 2)
187				to_change = 1;
188			ipvs->sysctl_secure_tcp = 2;
189		} else {
190			if (old_secure_tcp >= 2)
191				to_change = 0;
192		}
193		break;
194	case 2:
195		if (nomem) {
196			if (old_secure_tcp < 2)
197				to_change = 1;
198		} else {
199			if (old_secure_tcp >= 2)
200				to_change = 0;
201			ipvs->sysctl_secure_tcp = 1;
202		}
203		break;
204	case 3:
205		if (old_secure_tcp < 2)
206			to_change = 1;
207		break;
208	}
209	old_secure_tcp = ipvs->sysctl_secure_tcp;
210	if (to_change >= 0)
211		ip_vs_protocol_timeout_change(ipvs,
212					      ipvs->sysctl_secure_tcp > 1);
213	spin_unlock(&ipvs->securetcp_lock);
214
215	local_bh_enable();
216}
217
218
219/*
220 *	Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD	1*HZ
223
224static void defense_work_handler(struct work_struct *work)
225{
226	struct netns_ipvs *ipvs =
227		container_of(work, struct netns_ipvs, defense_work.work);
228
229	update_defense_level(ipvs);
230	if (atomic_read(&ipvs->dropentry))
231		ip_vs_random_dropentry(ipvs->net);
232	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
233}
234#endif
235
236int
237ip_vs_use_count_inc(void)
238{
239	return try_module_get(THIS_MODULE);
240}
241
242void
243ip_vs_use_count_dec(void)
244{
245	module_put(THIS_MODULE);
246}
247
248
249/*
250 *	Hash table: for virtual service lookups
251 */
252#define IP_VS_SVC_TAB_BITS 8
253#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
255
256/* the service table hashed by <protocol, addr, port> */
257static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
258/* the service table hashed by fwmark */
259static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
260
261
262/*
263 *	Returns hash value for virtual service
264 */
265static inline unsigned int
266ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
267		  const union nf_inet_addr *addr, __be16 port)
268{
269	register unsigned int porth = ntohs(port);
270	__be32 addr_fold = addr->ip;
271	__u32 ahash;
272
273#ifdef CONFIG_IP_VS_IPV6
274	if (af == AF_INET6)
275		addr_fold = addr->ip6[0]^addr->ip6[1]^
276			    addr->ip6[2]^addr->ip6[3];
277#endif
278	ahash = ntohl(addr_fold);
279	ahash ^= ((size_t) net >> 8);
280
281	return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
282	       IP_VS_SVC_TAB_MASK;
283}
284
285/*
286 *	Returns hash value of fwmark for virtual service lookup
287 */
288static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
289{
290	return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
291}
292
293/*
294 *	Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
295 *	or in the ip_vs_svc_fwm_table by fwmark.
296 *	Should be called with locked tables.
297 */
298static int ip_vs_svc_hash(struct ip_vs_service *svc)
299{
300	unsigned int hash;
301
302	if (svc->flags & IP_VS_SVC_F_HASHED) {
303		pr_err("%s(): request for already hashed, called from %pF\n",
304		       __func__, __builtin_return_address(0));
305		return 0;
306	}
307
308	if (svc->fwmark == 0) {
309		/*
310		 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
311		 */
312		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
313					 &svc->addr, svc->port);
314		hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
315	} else {
316		/*
317		 *  Hash it by fwmark in svc_fwm_table
318		 */
319		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
320		hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
321	}
322
323	svc->flags |= IP_VS_SVC_F_HASHED;
324	/* increase its refcnt because it is referenced by the svc table */
325	atomic_inc(&svc->refcnt);
326	return 1;
327}
328
329
330/*
331 *	Unhashes a service from svc_table / svc_fwm_table.
332 *	Should be called with locked tables.
333 */
334static int ip_vs_svc_unhash(struct ip_vs_service *svc)
335{
336	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
337		pr_err("%s(): request for unhash flagged, called from %pF\n",
338		       __func__, __builtin_return_address(0));
339		return 0;
340	}
341
342	if (svc->fwmark == 0) {
343		/* Remove it from the svc_table table */
344		hlist_del_rcu(&svc->s_list);
345	} else {
346		/* Remove it from the svc_fwm_table table */
347		hlist_del_rcu(&svc->f_list);
348	}
349
350	svc->flags &= ~IP_VS_SVC_F_HASHED;
351	atomic_dec(&svc->refcnt);
352	return 1;
353}
354
355
356/*
357 *	Get service by {netns, proto,addr,port} in the service table.
358 */
359static inline struct ip_vs_service *
360__ip_vs_service_find(struct net *net, int af, __u16 protocol,
361		     const union nf_inet_addr *vaddr, __be16 vport)
362{
363	unsigned int hash;
364	struct ip_vs_service *svc;
365
366	/* Check for "full" addressed entries */
367	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
368
369	hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
370		if ((svc->af == af)
371		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
372		    && (svc->port == vport)
373		    && (svc->protocol == protocol)
374		    && net_eq(svc->net, net)) {
375			/* HIT */
376			return svc;
377		}
378	}
379
380	return NULL;
381}
382
383
384/*
385 *	Get service by {fwmark} in the service table.
386 */
387static inline struct ip_vs_service *
388__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
389{
390	unsigned int hash;
391	struct ip_vs_service *svc;
392
393	/* Check for fwmark addressed entries */
394	hash = ip_vs_svc_fwm_hashkey(net, fwmark);
395
396	hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
397		if (svc->fwmark == fwmark && svc->af == af
398		    && net_eq(svc->net, net)) {
399			/* HIT */
400			return svc;
401		}
402	}
403
404	return NULL;
405}
406
407/* Find service, called under RCU lock */
408struct ip_vs_service *
409ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
410		   const union nf_inet_addr *vaddr, __be16 vport)
411{
412	struct ip_vs_service *svc;
413	struct netns_ipvs *ipvs = net_ipvs(net);
414
415	/*
416	 *	Check the table hashed by fwmark first
417	 */
418	if (fwmark) {
419		svc = __ip_vs_svc_fwm_find(net, af, fwmark);
420		if (svc)
421			goto out;
422	}
423
424	/*
425	 *	Check the table hashed by <protocol,addr,port>
426	 *	for "full" addressed entries
427	 */
428	svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
429
430	if (svc == NULL
431	    && protocol == IPPROTO_TCP
432	    && atomic_read(&ipvs->ftpsvc_counter)
433	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
434		/*
435		 * Check if ftp service entry exists, the packet
436		 * might belong to FTP data connections.
437		 */
438		svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
439	}
440
441	if (svc == NULL
442	    && atomic_read(&ipvs->nullsvc_counter)) {
443		/*
444		 * Check if the catch-all port (port zero) exists
445		 */
446		svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
447	}
448
449  out:
450	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
451		      fwmark, ip_vs_proto_name(protocol),
452		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
453		      svc ? "hit" : "not hit");
454
455	return svc;
456}
457
458
459static inline void
460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461{
462	atomic_inc(&svc->refcnt);
463	rcu_assign_pointer(dest->svc, svc);
464}
465
466static void ip_vs_service_free(struct ip_vs_service *svc)
467{
468	if (svc->stats.cpustats)
469		free_percpu(svc->stats.cpustats);
470	kfree(svc);
471}
472
473static void ip_vs_service_rcu_free(struct rcu_head *head)
474{
475	struct ip_vs_service *svc;
476
477	svc = container_of(head, struct ip_vs_service, rcu_head);
478	ip_vs_service_free(svc);
479}
480
481static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
482{
483	if (atomic_dec_and_test(&svc->refcnt)) {
484		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
485			      svc->fwmark,
486			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
487			      ntohs(svc->port));
488		if (do_delay)
489			call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
490		else
491			ip_vs_service_free(svc);
492	}
493}
494
495
496/*
497 *	Returns hash value for real service
498 */
499static inline unsigned int ip_vs_rs_hashkey(int af,
500					    const union nf_inet_addr *addr,
501					    __be16 port)
502{
503	register unsigned int porth = ntohs(port);
504	__be32 addr_fold = addr->ip;
505
506#ifdef CONFIG_IP_VS_IPV6
507	if (af == AF_INET6)
508		addr_fold = addr->ip6[0]^addr->ip6[1]^
509			    addr->ip6[2]^addr->ip6[3];
510#endif
511
512	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
513		& IP_VS_RTAB_MASK;
514}
515
516/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
517static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
518{
519	unsigned int hash;
520
521	if (dest->in_rs_table)
522		return;
523
524	/*
525	 *	Hash by proto,addr,port,
526	 *	which are the parameters of the real service.
527	 */
528	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
529
530	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
531	dest->in_rs_table = 1;
532}
533
534/* Unhash ip_vs_dest from rs_table. */
535static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
536{
537	/*
538	 * Remove it from the rs_table table.
539	 */
540	if (dest->in_rs_table) {
541		hlist_del_rcu(&dest->d_list);
542		dest->in_rs_table = 0;
543	}
544}
545
546/* Check if real service by <proto,addr,port> is present */
547bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
548			    const union nf_inet_addr *daddr, __be16 dport)
549{
550	struct netns_ipvs *ipvs = net_ipvs(net);
551	unsigned int hash;
552	struct ip_vs_dest *dest;
553
554	/* Check for "full" addressed entries */
555	hash = ip_vs_rs_hashkey(af, daddr, dport);
556
557	rcu_read_lock();
558	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
559		if (dest->port == dport &&
560		    dest->af == af &&
561		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
562		    (dest->protocol == protocol || dest->vfwmark)) {
563			/* HIT */
564			rcu_read_unlock();
565			return true;
566		}
567	}
568	rcu_read_unlock();
569
570	return false;
571}
572
573/* Lookup destination by {addr,port} in the given service
574 * Called under RCU lock.
575 */
576static struct ip_vs_dest *
577ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
578		  const union nf_inet_addr *daddr, __be16 dport)
579{
580	struct ip_vs_dest *dest;
581
582	/*
583	 * Find the destination for the given service
584	 */
585	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
586		if ((dest->af == dest_af) &&
587		    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
588		    (dest->port == dport)) {
589			/* HIT */
590			return dest;
591		}
592	}
593
594	return NULL;
595}
596
597/*
598 * Find destination by {daddr,dport,vaddr,protocol}
599 * Created to be used in ip_vs_process_message() in
600 * the backup synchronization daemon. It finds the
601 * destination to be bound to the received connection
602 * on the backup.
603 * Called under RCU lock, no refcnt is returned.
604 */
605struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int svc_af, int dest_af,
606				   const union nf_inet_addr *daddr,
607				   __be16 dport,
608				   const union nf_inet_addr *vaddr,
609				   __be16 vport, __u16 protocol, __u32 fwmark,
610				   __u32 flags)
611{
612	struct ip_vs_dest *dest;
613	struct ip_vs_service *svc;
614	__be16 port = dport;
615
616	svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
617	if (!svc)
618		return NULL;
619	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
620		port = 0;
621	dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
622	if (!dest)
623		dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
624	return dest;
625}
626
627void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
628{
629	struct ip_vs_dest_dst *dest_dst = container_of(head,
630						       struct ip_vs_dest_dst,
631						       rcu_head);
632
633	dst_release(dest_dst->dst_cache);
634	kfree(dest_dst);
635}
636
637/* Release dest_dst and dst_cache for dest in user context */
638static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
639{
640	struct ip_vs_dest_dst *old;
641
642	old = rcu_dereference_protected(dest->dest_dst, 1);
643	if (old) {
644		RCU_INIT_POINTER(dest->dest_dst, NULL);
645		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
646	}
647}
648
649/*
650 *  Lookup dest by {svc,addr,port} in the destination trash.
651 *  The destination trash is used to hold the destinations that are removed
652 *  from the service table but are still referenced by some conn entries.
653 *  The reason to add the destination trash is when the dest is temporary
654 *  down (either by administrator or by monitor program), the dest can be
655 *  picked back from the trash, the remaining connections to the dest can
656 *  continue, and the counting information of the dest is also useful for
657 *  scheduling.
658 */
659static struct ip_vs_dest *
660ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
661		     const union nf_inet_addr *daddr, __be16 dport)
662{
663	struct ip_vs_dest *dest;
664	struct netns_ipvs *ipvs = net_ipvs(svc->net);
665
666	/*
667	 * Find the destination in trash
668	 */
669	spin_lock_bh(&ipvs->dest_trash_lock);
670	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
671		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
672			      "dest->refcnt=%d\n",
673			      dest->vfwmark,
674			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
675			      ntohs(dest->port),
676			      atomic_read(&dest->refcnt));
677		if (dest->af == dest_af &&
678		    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
679		    dest->port == dport &&
680		    dest->vfwmark == svc->fwmark &&
681		    dest->protocol == svc->protocol &&
682		    (svc->fwmark ||
683		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
684		      dest->vport == svc->port))) {
685			/* HIT */
686			list_del(&dest->t_list);
687			ip_vs_dest_hold(dest);
688			goto out;
689		}
690	}
691
692	dest = NULL;
693
694out:
695	spin_unlock_bh(&ipvs->dest_trash_lock);
696
697	return dest;
698}
699
700static void ip_vs_dest_free(struct ip_vs_dest *dest)
701{
702	struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
703
704	__ip_vs_dst_cache_reset(dest);
705	__ip_vs_svc_put(svc, false);
706	free_percpu(dest->stats.cpustats);
707	ip_vs_dest_put_and_free(dest);
708}
709
710/*
711 *  Clean up all the destinations in the trash
712 *  Called by the ip_vs_control_cleanup()
713 *
714 *  When the ip_vs_control_clearup is activated by ipvs module exit,
715 *  the service tables must have been flushed and all the connections
716 *  are expired, and the refcnt of each destination in the trash must
717 *  be 0, so we simply release them here.
718 */
719static void ip_vs_trash_cleanup(struct net *net)
720{
721	struct ip_vs_dest *dest, *nxt;
722	struct netns_ipvs *ipvs = net_ipvs(net);
723
724	del_timer_sync(&ipvs->dest_trash_timer);
725	/* No need to use dest_trash_lock */
726	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
727		list_del(&dest->t_list);
728		ip_vs_dest_free(dest);
729	}
730}
731
732static void
733ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
734{
735#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
736
737	spin_lock_bh(&src->lock);
738
739	IP_VS_SHOW_STATS_COUNTER(conns);
740	IP_VS_SHOW_STATS_COUNTER(inpkts);
741	IP_VS_SHOW_STATS_COUNTER(outpkts);
742	IP_VS_SHOW_STATS_COUNTER(inbytes);
743	IP_VS_SHOW_STATS_COUNTER(outbytes);
744
745	ip_vs_read_estimator(dst, src);
746
747	spin_unlock_bh(&src->lock);
748}
749
750static void
751ip_vs_zero_stats(struct ip_vs_stats *stats)
752{
753	spin_lock_bh(&stats->lock);
754
755	/* get current counters as zero point, rates are zeroed */
756
757#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
758
759	IP_VS_ZERO_STATS_COUNTER(conns);
760	IP_VS_ZERO_STATS_COUNTER(inpkts);
761	IP_VS_ZERO_STATS_COUNTER(outpkts);
762	IP_VS_ZERO_STATS_COUNTER(inbytes);
763	IP_VS_ZERO_STATS_COUNTER(outbytes);
764
765	ip_vs_zero_estimator(stats);
766
767	spin_unlock_bh(&stats->lock);
768}
769
770/*
771 *	Update a destination in the given service
772 */
773static void
774__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
775		    struct ip_vs_dest_user_kern *udest, int add)
776{
777	struct netns_ipvs *ipvs = net_ipvs(svc->net);
778	struct ip_vs_service *old_svc;
779	struct ip_vs_scheduler *sched;
780	int conn_flags;
781
782	/* We cannot modify an address and change the address family */
783	BUG_ON(!add && udest->af != dest->af);
784
785	if (add && udest->af != svc->af)
786		ipvs->mixed_address_family_dests++;
787
788	/* set the weight and the flags */
789	atomic_set(&dest->weight, udest->weight);
790	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
791	conn_flags |= IP_VS_CONN_F_INACTIVE;
792
793	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
794	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
795		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
796	} else {
797		/*
798		 *    Put the real service in rs_table if not present.
799		 *    For now only for NAT!
800		 */
801		ip_vs_rs_hash(ipvs, dest);
802	}
803	atomic_set(&dest->conn_flags, conn_flags);
804
805	/* bind the service */
806	old_svc = rcu_dereference_protected(dest->svc, 1);
807	if (!old_svc) {
808		__ip_vs_bind_svc(dest, svc);
809	} else {
810		if (old_svc != svc) {
811			ip_vs_zero_stats(&dest->stats);
812			__ip_vs_bind_svc(dest, svc);
813			__ip_vs_svc_put(old_svc, true);
814		}
815	}
816
817	/* set the dest status flags */
818	dest->flags |= IP_VS_DEST_F_AVAILABLE;
819
820	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
821		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
822	dest->u_threshold = udest->u_threshold;
823	dest->l_threshold = udest->l_threshold;
824
825	dest->af = udest->af;
826
827	spin_lock_bh(&dest->dst_lock);
828	__ip_vs_dst_cache_reset(dest);
829	spin_unlock_bh(&dest->dst_lock);
830
831	sched = rcu_dereference_protected(svc->scheduler, 1);
832	if (add) {
833		ip_vs_start_estimator(svc->net, &dest->stats);
834		list_add_rcu(&dest->n_list, &svc->destinations);
835		svc->num_dests++;
836		if (sched->add_dest)
837			sched->add_dest(svc, dest);
838	} else {
839		if (sched->upd_dest)
840			sched->upd_dest(svc, dest);
841	}
842}
843
844
845/*
846 *	Create a destination for the given service
847 */
848static int
849ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
850	       struct ip_vs_dest **dest_p)
851{
852	struct ip_vs_dest *dest;
853	unsigned int atype, i;
854
855	EnterFunction(2);
856
857#ifdef CONFIG_IP_VS_IPV6
858	if (udest->af == AF_INET6) {
859		atype = ipv6_addr_type(&udest->addr.in6);
860		if ((!(atype & IPV6_ADDR_UNICAST) ||
861			atype & IPV6_ADDR_LINKLOCAL) &&
862			!__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
863			return -EINVAL;
864	} else
865#endif
866	{
867		atype = inet_addr_type(svc->net, udest->addr.ip);
868		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
869			return -EINVAL;
870	}
871
872	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
873	if (dest == NULL)
874		return -ENOMEM;
875
876	dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
877	if (!dest->stats.cpustats)
878		goto err_alloc;
879
880	for_each_possible_cpu(i) {
881		struct ip_vs_cpu_stats *ip_vs_dest_stats;
882		ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
883		u64_stats_init(&ip_vs_dest_stats->syncp);
884	}
885
886	dest->af = udest->af;
887	dest->protocol = svc->protocol;
888	dest->vaddr = svc->addr;
889	dest->vport = svc->port;
890	dest->vfwmark = svc->fwmark;
891	ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
892	dest->port = udest->port;
893
894	atomic_set(&dest->activeconns, 0);
895	atomic_set(&dest->inactconns, 0);
896	atomic_set(&dest->persistconns, 0);
897	atomic_set(&dest->refcnt, 1);
898
899	INIT_HLIST_NODE(&dest->d_list);
900	spin_lock_init(&dest->dst_lock);
901	spin_lock_init(&dest->stats.lock);
902	__ip_vs_update_dest(svc, dest, udest, 1);
903
904	*dest_p = dest;
905
906	LeaveFunction(2);
907	return 0;
908
909err_alloc:
910	kfree(dest);
911	return -ENOMEM;
912}
913
914
915/*
916 *	Add a destination into an existing service
917 */
918static int
919ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
920{
921	struct ip_vs_dest *dest;
922	union nf_inet_addr daddr;
923	__be16 dport = udest->port;
924	int ret;
925
926	EnterFunction(2);
927
928	if (udest->weight < 0) {
929		pr_err("%s(): server weight less than zero\n", __func__);
930		return -ERANGE;
931	}
932
933	if (udest->l_threshold > udest->u_threshold) {
934		pr_err("%s(): lower threshold is higher than upper threshold\n",
935			__func__);
936		return -ERANGE;
937	}
938
939	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
940
941	/* We use function that requires RCU lock */
942	rcu_read_lock();
943	dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
944	rcu_read_unlock();
945
946	if (dest != NULL) {
947		IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
948		return -EEXIST;
949	}
950
951	/*
952	 * Check if the dest already exists in the trash and
953	 * is from the same service
954	 */
955	dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
956
957	if (dest != NULL) {
958		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
959			      "dest->refcnt=%d, service %u/%s:%u\n",
960			      IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
961			      atomic_read(&dest->refcnt),
962			      dest->vfwmark,
963			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
964			      ntohs(dest->vport));
965
966		__ip_vs_update_dest(svc, dest, udest, 1);
967		ret = 0;
968	} else {
969		/*
970		 * Allocate and initialize the dest structure
971		 */
972		ret = ip_vs_new_dest(svc, udest, &dest);
973	}
974	LeaveFunction(2);
975
976	return ret;
977}
978
979
980/*
981 *	Edit a destination in the given service
982 */
983static int
984ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
985{
986	struct ip_vs_dest *dest;
987	union nf_inet_addr daddr;
988	__be16 dport = udest->port;
989
990	EnterFunction(2);
991
992	if (udest->weight < 0) {
993		pr_err("%s(): server weight less than zero\n", __func__);
994		return -ERANGE;
995	}
996
997	if (udest->l_threshold > udest->u_threshold) {
998		pr_err("%s(): lower threshold is higher than upper threshold\n",
999			__func__);
1000		return -ERANGE;
1001	}
1002
1003	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1004
1005	/* We use function that requires RCU lock */
1006	rcu_read_lock();
1007	dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1008	rcu_read_unlock();
1009
1010	if (dest == NULL) {
1011		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1012		return -ENOENT;
1013	}
1014
1015	__ip_vs_update_dest(svc, dest, udest, 0);
1016	LeaveFunction(2);
1017
1018	return 0;
1019}
1020
1021/*
1022 *	Delete a destination (must be already unlinked from the service)
1023 */
1024static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
1025			     bool cleanup)
1026{
1027	struct netns_ipvs *ipvs = net_ipvs(net);
1028
1029	ip_vs_stop_estimator(net, &dest->stats);
1030
1031	/*
1032	 *  Remove it from the d-linked list with the real services.
1033	 */
1034	ip_vs_rs_unhash(dest);
1035
1036	spin_lock_bh(&ipvs->dest_trash_lock);
1037	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1038		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1039		      atomic_read(&dest->refcnt));
1040	if (list_empty(&ipvs->dest_trash) && !cleanup)
1041		mod_timer(&ipvs->dest_trash_timer,
1042			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1043	/* dest lives in trash without reference */
1044	list_add(&dest->t_list, &ipvs->dest_trash);
1045	dest->idle_start = 0;
1046	spin_unlock_bh(&ipvs->dest_trash_lock);
1047	ip_vs_dest_put(dest);
1048}
1049
1050
1051/*
1052 *	Unlink a destination from the given service
1053 */
1054static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1055				struct ip_vs_dest *dest,
1056				int svcupd)
1057{
1058	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1059
1060	/*
1061	 *  Remove it from the d-linked destination list.
1062	 */
1063	list_del_rcu(&dest->n_list);
1064	svc->num_dests--;
1065
1066	if (dest->af != svc->af)
1067		net_ipvs(svc->net)->mixed_address_family_dests--;
1068
1069	if (svcupd) {
1070		struct ip_vs_scheduler *sched;
1071
1072		sched = rcu_dereference_protected(svc->scheduler, 1);
1073		if (sched->del_dest)
1074			sched->del_dest(svc, dest);
1075	}
1076}
1077
1078
1079/*
1080 *	Delete a destination server in the given service
1081 */
1082static int
1083ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1084{
1085	struct ip_vs_dest *dest;
1086	__be16 dport = udest->port;
1087
1088	EnterFunction(2);
1089
1090	/* We use function that requires RCU lock */
1091	rcu_read_lock();
1092	dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
1093	rcu_read_unlock();
1094
1095	if (dest == NULL) {
1096		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1097		return -ENOENT;
1098	}
1099
1100	/*
1101	 *	Unlink dest from the service
1102	 */
1103	__ip_vs_unlink_dest(svc, dest, 1);
1104
1105	/*
1106	 *	Delete the destination
1107	 */
1108	__ip_vs_del_dest(svc->net, dest, false);
1109
1110	LeaveFunction(2);
1111
1112	return 0;
1113}
1114
1115static void ip_vs_dest_trash_expire(unsigned long data)
1116{
1117	struct net *net = (struct net *) data;
1118	struct netns_ipvs *ipvs = net_ipvs(net);
1119	struct ip_vs_dest *dest, *next;
1120	unsigned long now = jiffies;
1121
1122	spin_lock(&ipvs->dest_trash_lock);
1123	list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1124		if (atomic_read(&dest->refcnt) > 0)
1125			continue;
1126		if (dest->idle_start) {
1127			if (time_before(now, dest->idle_start +
1128					     IP_VS_DEST_TRASH_PERIOD))
1129				continue;
1130		} else {
1131			dest->idle_start = max(1UL, now);
1132			continue;
1133		}
1134		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1135			      dest->vfwmark,
1136			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1137			      ntohs(dest->port));
1138		list_del(&dest->t_list);
1139		ip_vs_dest_free(dest);
1140	}
1141	if (!list_empty(&ipvs->dest_trash))
1142		mod_timer(&ipvs->dest_trash_timer,
1143			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1144	spin_unlock(&ipvs->dest_trash_lock);
1145}
1146
1147/*
1148 *	Add a service into the service hash table
1149 */
1150static int
1151ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1152		  struct ip_vs_service **svc_p)
1153{
1154	int ret = 0, i;
1155	struct ip_vs_scheduler *sched = NULL;
1156	struct ip_vs_pe *pe = NULL;
1157	struct ip_vs_service *svc = NULL;
1158	struct netns_ipvs *ipvs = net_ipvs(net);
1159
1160	/* increase the module use count */
1161	ip_vs_use_count_inc();
1162
1163	/* Lookup the scheduler by 'u->sched_name' */
1164	sched = ip_vs_scheduler_get(u->sched_name);
1165	if (sched == NULL) {
1166		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1167		ret = -ENOENT;
1168		goto out_err;
1169	}
1170
1171	if (u->pe_name && *u->pe_name) {
1172		pe = ip_vs_pe_getbyname(u->pe_name);
1173		if (pe == NULL) {
1174			pr_info("persistence engine module ip_vs_pe_%s "
1175				"not found\n", u->pe_name);
1176			ret = -ENOENT;
1177			goto out_err;
1178		}
1179	}
1180
1181#ifdef CONFIG_IP_VS_IPV6
1182	if (u->af == AF_INET6) {
1183		__u32 plen = (__force __u32) u->netmask;
1184
1185		if (plen < 1 || plen > 128) {
1186			ret = -EINVAL;
1187			goto out_err;
1188		}
1189	}
1190#endif
1191
1192	svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1193	if (svc == NULL) {
1194		IP_VS_DBG(1, "%s(): no memory\n", __func__);
1195		ret = -ENOMEM;
1196		goto out_err;
1197	}
1198	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1199	if (!svc->stats.cpustats) {
1200		ret = -ENOMEM;
1201		goto out_err;
1202	}
1203
1204	for_each_possible_cpu(i) {
1205		struct ip_vs_cpu_stats *ip_vs_stats;
1206		ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
1207		u64_stats_init(&ip_vs_stats->syncp);
1208	}
1209
1210
1211	/* I'm the first user of the service */
1212	atomic_set(&svc->refcnt, 0);
1213
1214	svc->af = u->af;
1215	svc->protocol = u->protocol;
1216	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1217	svc->port = u->port;
1218	svc->fwmark = u->fwmark;
1219	svc->flags = u->flags;
1220	svc->timeout = u->timeout * HZ;
1221	svc->netmask = u->netmask;
1222	svc->net = net;
1223
1224	INIT_LIST_HEAD(&svc->destinations);
1225	spin_lock_init(&svc->sched_lock);
1226	spin_lock_init(&svc->stats.lock);
1227
1228	/* Bind the scheduler */
1229	ret = ip_vs_bind_scheduler(svc, sched);
1230	if (ret)
1231		goto out_err;
1232	sched = NULL;
1233
1234	/* Bind the ct retriever */
1235	RCU_INIT_POINTER(svc->pe, pe);
1236	pe = NULL;
1237
1238	/* Update the virtual service counters */
1239	if (svc->port == FTPPORT)
1240		atomic_inc(&ipvs->ftpsvc_counter);
1241	else if (svc->port == 0)
1242		atomic_inc(&ipvs->nullsvc_counter);
1243
1244	ip_vs_start_estimator(net, &svc->stats);
1245
1246	/* Count only IPv4 services for old get/setsockopt interface */
1247	if (svc->af == AF_INET)
1248		ipvs->num_services++;
1249
1250	/* Hash the service into the service table */
1251	ip_vs_svc_hash(svc);
1252
1253	*svc_p = svc;
1254	/* Now there is a service - full throttle */
1255	ipvs->enable = 1;
1256	return 0;
1257
1258
1259 out_err:
1260	if (svc != NULL) {
1261		ip_vs_unbind_scheduler(svc, sched);
1262		ip_vs_service_free(svc);
1263	}
1264	ip_vs_scheduler_put(sched);
1265	ip_vs_pe_put(pe);
1266
1267	/* decrease the module use count */
1268	ip_vs_use_count_dec();
1269
1270	return ret;
1271}
1272
1273
1274/*
1275 *	Edit a service and bind it with a new scheduler
1276 */
1277static int
1278ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1279{
1280	struct ip_vs_scheduler *sched, *old_sched;
1281	struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1282	int ret = 0;
1283
1284	/*
1285	 * Lookup the scheduler, by 'u->sched_name'
1286	 */
1287	sched = ip_vs_scheduler_get(u->sched_name);
1288	if (sched == NULL) {
1289		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1290		return -ENOENT;
1291	}
1292	old_sched = sched;
1293
1294	if (u->pe_name && *u->pe_name) {
1295		pe = ip_vs_pe_getbyname(u->pe_name);
1296		if (pe == NULL) {
1297			pr_info("persistence engine module ip_vs_pe_%s "
1298				"not found\n", u->pe_name);
1299			ret = -ENOENT;
1300			goto out;
1301		}
1302		old_pe = pe;
1303	}
1304
1305#ifdef CONFIG_IP_VS_IPV6
1306	if (u->af == AF_INET6) {
1307		__u32 plen = (__force __u32) u->netmask;
1308
1309		if (plen < 1 || plen > 128) {
1310			ret = -EINVAL;
1311			goto out;
1312		}
1313	}
1314#endif
1315
1316	old_sched = rcu_dereference_protected(svc->scheduler, 1);
1317	if (sched != old_sched) {
1318		/* Bind the new scheduler */
1319		ret = ip_vs_bind_scheduler(svc, sched);
1320		if (ret) {
1321			old_sched = sched;
1322			goto out;
1323		}
1324		/* Unbind the old scheduler on success */
1325		ip_vs_unbind_scheduler(svc, old_sched);
1326	}
1327
1328	/*
1329	 * Set the flags and timeout value
1330	 */
1331	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1332	svc->timeout = u->timeout * HZ;
1333	svc->netmask = u->netmask;
1334
1335	old_pe = rcu_dereference_protected(svc->pe, 1);
1336	if (pe != old_pe)
1337		rcu_assign_pointer(svc->pe, pe);
1338
1339out:
1340	ip_vs_scheduler_put(old_sched);
1341	ip_vs_pe_put(old_pe);
1342	return ret;
1343}
1344
1345/*
1346 *	Delete a service from the service list
1347 *	- The service must be unlinked, unlocked and not referenced!
1348 *	- We are called under _bh lock
1349 */
1350static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1351{
1352	struct ip_vs_dest *dest, *nxt;
1353	struct ip_vs_scheduler *old_sched;
1354	struct ip_vs_pe *old_pe;
1355	struct netns_ipvs *ipvs = net_ipvs(svc->net);
1356
1357	pr_info("%s: enter\n", __func__);
1358
1359	/* Count only IPv4 services for old get/setsockopt interface */
1360	if (svc->af == AF_INET)
1361		ipvs->num_services--;
1362
1363	ip_vs_stop_estimator(svc->net, &svc->stats);
1364
1365	/* Unbind scheduler */
1366	old_sched = rcu_dereference_protected(svc->scheduler, 1);
1367	ip_vs_unbind_scheduler(svc, old_sched);
1368	ip_vs_scheduler_put(old_sched);
1369
1370	/* Unbind persistence engine, keep svc->pe */
1371	old_pe = rcu_dereference_protected(svc->pe, 1);
1372	ip_vs_pe_put(old_pe);
1373
1374	/*
1375	 *    Unlink the whole destination list
1376	 */
1377	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1378		__ip_vs_unlink_dest(svc, dest, 0);
1379		__ip_vs_del_dest(svc->net, dest, cleanup);
1380	}
1381
1382	/*
1383	 *    Update the virtual service counters
1384	 */
1385	if (svc->port == FTPPORT)
1386		atomic_dec(&ipvs->ftpsvc_counter);
1387	else if (svc->port == 0)
1388		atomic_dec(&ipvs->nullsvc_counter);
1389
1390	/*
1391	 *    Free the service if nobody refers to it
1392	 */
1393	__ip_vs_svc_put(svc, true);
1394
1395	/* decrease the module use count */
1396	ip_vs_use_count_dec();
1397}
1398
1399/*
1400 * Unlink a service from list and try to delete it if its refcnt reached 0
1401 */
1402static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1403{
1404	/* Hold svc to avoid double release from dest_trash */
1405	atomic_inc(&svc->refcnt);
1406	/*
1407	 * Unhash it from the service table
1408	 */
1409	ip_vs_svc_unhash(svc);
1410
1411	__ip_vs_del_service(svc, cleanup);
1412}
1413
1414/*
1415 *	Delete a service from the service list
1416 */
1417static int ip_vs_del_service(struct ip_vs_service *svc)
1418{
1419	if (svc == NULL)
1420		return -EEXIST;
1421	ip_vs_unlink_service(svc, false);
1422
1423	return 0;
1424}
1425
1426
1427/*
1428 *	Flush all the virtual services
1429 */
1430static int ip_vs_flush(struct net *net, bool cleanup)
1431{
1432	int idx;
1433	struct ip_vs_service *svc;
1434	struct hlist_node *n;
1435
1436	/*
1437	 * Flush the service table hashed by <netns,protocol,addr,port>
1438	 */
1439	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1441					  s_list) {
1442			if (net_eq(svc->net, net))
1443				ip_vs_unlink_service(svc, cleanup);
1444		}
1445	}
1446
1447	/*
1448	 * Flush the service table hashed by fwmark
1449	 */
1450	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1451		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1452					  f_list) {
1453			if (net_eq(svc->net, net))
1454				ip_vs_unlink_service(svc, cleanup);
1455		}
1456	}
1457
1458	return 0;
1459}
1460
1461/*
1462 *	Delete service by {netns} in the service table.
1463 *	Called by __ip_vs_cleanup()
1464 */
1465void ip_vs_service_net_cleanup(struct net *net)
1466{
1467	EnterFunction(2);
1468	/* Check for "full" addressed entries */
1469	mutex_lock(&__ip_vs_mutex);
1470	ip_vs_flush(net, true);
1471	mutex_unlock(&__ip_vs_mutex);
1472	LeaveFunction(2);
1473}
1474
1475/* Put all references for device (dst_cache) */
1476static inline void
1477ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1478{
1479	struct ip_vs_dest_dst *dest_dst;
1480
1481	spin_lock_bh(&dest->dst_lock);
1482	dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
1483	if (dest_dst && dest_dst->dst_cache->dev == dev) {
1484		IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1485			      dev->name,
1486			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1487			      ntohs(dest->port),
1488			      atomic_read(&dest->refcnt));
1489		__ip_vs_dst_cache_reset(dest);
1490	}
1491	spin_unlock_bh(&dest->dst_lock);
1492
1493}
1494/* Netdev event receiver
1495 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1496 */
1497static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1498			   void *ptr)
1499{
1500	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1501	struct net *net = dev_net(dev);
1502	struct netns_ipvs *ipvs = net_ipvs(net);
1503	struct ip_vs_service *svc;
1504	struct ip_vs_dest *dest;
1505	unsigned int idx;
1506
1507	if (event != NETDEV_DOWN || !ipvs)
1508		return NOTIFY_DONE;
1509	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1510	EnterFunction(2);
1511	mutex_lock(&__ip_vs_mutex);
1512	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1513		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1514			if (net_eq(svc->net, net)) {
1515				list_for_each_entry(dest, &svc->destinations,
1516						    n_list) {
1517					ip_vs_forget_dev(dest, dev);
1518				}
1519			}
1520		}
1521
1522		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1523			if (net_eq(svc->net, net)) {
1524				list_for_each_entry(dest, &svc->destinations,
1525						    n_list) {
1526					ip_vs_forget_dev(dest, dev);
1527				}
1528			}
1529
1530		}
1531	}
1532
1533	spin_lock_bh(&ipvs->dest_trash_lock);
1534	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1535		ip_vs_forget_dev(dest, dev);
1536	}
1537	spin_unlock_bh(&ipvs->dest_trash_lock);
1538	mutex_unlock(&__ip_vs_mutex);
1539	LeaveFunction(2);
1540	return NOTIFY_DONE;
1541}
1542
1543/*
1544 *	Zero counters in a service or all services
1545 */
1546static int ip_vs_zero_service(struct ip_vs_service *svc)
1547{
1548	struct ip_vs_dest *dest;
1549
1550	list_for_each_entry(dest, &svc->destinations, n_list) {
1551		ip_vs_zero_stats(&dest->stats);
1552	}
1553	ip_vs_zero_stats(&svc->stats);
1554	return 0;
1555}
1556
1557static int ip_vs_zero_all(struct net *net)
1558{
1559	int idx;
1560	struct ip_vs_service *svc;
1561
1562	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1563		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1564			if (net_eq(svc->net, net))
1565				ip_vs_zero_service(svc);
1566		}
1567	}
1568
1569	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1570		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1571			if (net_eq(svc->net, net))
1572				ip_vs_zero_service(svc);
1573		}
1574	}
1575
1576	ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1577	return 0;
1578}
1579
1580#ifdef CONFIG_SYSCTL
1581
1582static int zero;
1583static int three = 3;
1584
1585static int
1586proc_do_defense_mode(struct ctl_table *table, int write,
1587		     void __user *buffer, size_t *lenp, loff_t *ppos)
1588{
1589	struct net *net = current->nsproxy->net_ns;
1590	int *valp = table->data;
1591	int val = *valp;
1592	int rc;
1593
1594	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1595	if (write && (*valp != val)) {
1596		if ((*valp < 0) || (*valp > 3)) {
1597			/* Restore the correct value */
1598			*valp = val;
1599		} else {
1600			update_defense_level(net_ipvs(net));
1601		}
1602	}
1603	return rc;
1604}
1605
1606static int
1607proc_do_sync_threshold(struct ctl_table *table, int write,
1608		       void __user *buffer, size_t *lenp, loff_t *ppos)
1609{
1610	int *valp = table->data;
1611	int val[2];
1612	int rc;
1613
1614	/* backup the value first */
1615	memcpy(val, valp, sizeof(val));
1616
1617	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1618	if (write && (valp[0] < 0 || valp[1] < 0 ||
1619	    (valp[0] >= valp[1] && valp[1]))) {
1620		/* Restore the correct value */
1621		memcpy(valp, val, sizeof(val));
1622	}
1623	return rc;
1624}
1625
1626static int
1627proc_do_sync_mode(struct ctl_table *table, int write,
1628		     void __user *buffer, size_t *lenp, loff_t *ppos)
1629{
1630	int *valp = table->data;
1631	int val = *valp;
1632	int rc;
1633
1634	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1635	if (write && (*valp != val)) {
1636		if ((*valp < 0) || (*valp > 1)) {
1637			/* Restore the correct value */
1638			*valp = val;
1639		}
1640	}
1641	return rc;
1642}
1643
1644static int
1645proc_do_sync_ports(struct ctl_table *table, int write,
1646		   void __user *buffer, size_t *lenp, loff_t *ppos)
1647{
1648	int *valp = table->data;
1649	int val = *valp;
1650	int rc;
1651
1652	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1653	if (write && (*valp != val)) {
1654		if (*valp < 1 || !is_power_of_2(*valp)) {
1655			/* Restore the correct value */
1656			*valp = val;
1657		}
1658	}
1659	return rc;
1660}
1661
1662/*
1663 *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1664 *	Do not change order or insert new entries without
1665 *	align with netns init in ip_vs_control_net_init()
1666 */
1667
1668static struct ctl_table vs_vars[] = {
1669	{
1670		.procname	= "amemthresh",
1671		.maxlen		= sizeof(int),
1672		.mode		= 0644,
1673		.proc_handler	= proc_dointvec,
1674	},
1675	{
1676		.procname	= "am_droprate",
1677		.maxlen		= sizeof(int),
1678		.mode		= 0644,
1679		.proc_handler	= proc_dointvec,
1680	},
1681	{
1682		.procname	= "drop_entry",
1683		.maxlen		= sizeof(int),
1684		.mode		= 0644,
1685		.proc_handler	= proc_do_defense_mode,
1686	},
1687	{
1688		.procname	= "drop_packet",
1689		.maxlen		= sizeof(int),
1690		.mode		= 0644,
1691		.proc_handler	= proc_do_defense_mode,
1692	},
1693#ifdef CONFIG_IP_VS_NFCT
1694	{
1695		.procname	= "conntrack",
1696		.maxlen		= sizeof(int),
1697		.mode		= 0644,
1698		.proc_handler	= &proc_dointvec,
1699	},
1700#endif
1701	{
1702		.procname	= "secure_tcp",
1703		.maxlen		= sizeof(int),
1704		.mode		= 0644,
1705		.proc_handler	= proc_do_defense_mode,
1706	},
1707	{
1708		.procname	= "snat_reroute",
1709		.maxlen		= sizeof(int),
1710		.mode		= 0644,
1711		.proc_handler	= &proc_dointvec,
1712	},
1713	{
1714		.procname	= "sync_version",
1715		.maxlen		= sizeof(int),
1716		.mode		= 0644,
1717		.proc_handler	= &proc_do_sync_mode,
1718	},
1719	{
1720		.procname	= "sync_ports",
1721		.maxlen		= sizeof(int),
1722		.mode		= 0644,
1723		.proc_handler	= &proc_do_sync_ports,
1724	},
1725	{
1726		.procname	= "sync_persist_mode",
1727		.maxlen		= sizeof(int),
1728		.mode		= 0644,
1729		.proc_handler	= proc_dointvec,
1730	},
1731	{
1732		.procname	= "sync_qlen_max",
1733		.maxlen		= sizeof(unsigned long),
1734		.mode		= 0644,
1735		.proc_handler	= proc_doulongvec_minmax,
1736	},
1737	{
1738		.procname	= "sync_sock_size",
1739		.maxlen		= sizeof(int),
1740		.mode		= 0644,
1741		.proc_handler	= proc_dointvec,
1742	},
1743	{
1744		.procname	= "cache_bypass",
1745		.maxlen		= sizeof(int),
1746		.mode		= 0644,
1747		.proc_handler	= proc_dointvec,
1748	},
1749	{
1750		.procname	= "expire_nodest_conn",
1751		.maxlen		= sizeof(int),
1752		.mode		= 0644,
1753		.proc_handler	= proc_dointvec,
1754	},
1755	{
1756		.procname	= "sloppy_tcp",
1757		.maxlen		= sizeof(int),
1758		.mode		= 0644,
1759		.proc_handler	= proc_dointvec,
1760	},
1761	{
1762		.procname	= "sloppy_sctp",
1763		.maxlen		= sizeof(int),
1764		.mode		= 0644,
1765		.proc_handler	= proc_dointvec,
1766	},
1767	{
1768		.procname	= "expire_quiescent_template",
1769		.maxlen		= sizeof(int),
1770		.mode		= 0644,
1771		.proc_handler	= proc_dointvec,
1772	},
1773	{
1774		.procname	= "sync_threshold",
1775		.maxlen		=
1776			sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1777		.mode		= 0644,
1778		.proc_handler	= proc_do_sync_threshold,
1779	},
1780	{
1781		.procname	= "sync_refresh_period",
1782		.maxlen		= sizeof(int),
1783		.mode		= 0644,
1784		.proc_handler	= proc_dointvec_jiffies,
1785	},
1786	{
1787		.procname	= "sync_retries",
1788		.maxlen		= sizeof(int),
1789		.mode		= 0644,
1790		.proc_handler	= proc_dointvec_minmax,
1791		.extra1		= &zero,
1792		.extra2		= &three,
1793	},
1794	{
1795		.procname	= "nat_icmp_send",
1796		.maxlen		= sizeof(int),
1797		.mode		= 0644,
1798		.proc_handler	= proc_dointvec,
1799	},
1800	{
1801		.procname	= "pmtu_disc",
1802		.maxlen		= sizeof(int),
1803		.mode		= 0644,
1804		.proc_handler	= proc_dointvec,
1805	},
1806	{
1807		.procname	= "backup_only",
1808		.maxlen		= sizeof(int),
1809		.mode		= 0644,
1810		.proc_handler	= proc_dointvec,
1811	},
1812#ifdef CONFIG_IP_VS_DEBUG
1813	{
1814		.procname	= "debug_level",
1815		.data		= &sysctl_ip_vs_debug_level,
1816		.maxlen		= sizeof(int),
1817		.mode		= 0644,
1818		.proc_handler	= proc_dointvec,
1819	},
1820#endif
1821	{ }
1822};
1823
1824#endif
1825
1826#ifdef CONFIG_PROC_FS
1827
1828struct ip_vs_iter {
1829	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1830	struct hlist_head *table;
1831	int bucket;
1832};
1833
1834/*
1835 *	Write the contents of the VS rule table to a PROCfs file.
1836 *	(It is kept just for backward compatibility)
1837 */
1838static inline const char *ip_vs_fwd_name(unsigned int flags)
1839{
1840	switch (flags & IP_VS_CONN_F_FWD_MASK) {
1841	case IP_VS_CONN_F_LOCALNODE:
1842		return "Local";
1843	case IP_VS_CONN_F_TUNNEL:
1844		return "Tunnel";
1845	case IP_VS_CONN_F_DROUTE:
1846		return "Route";
1847	default:
1848		return "Masq";
1849	}
1850}
1851
1852
1853/* Get the Nth entry in the two lists */
1854static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1855{
1856	struct net *net = seq_file_net(seq);
1857	struct ip_vs_iter *iter = seq->private;
1858	int idx;
1859	struct ip_vs_service *svc;
1860
1861	/* look in hash by protocol */
1862	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1863		hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
1864			if (net_eq(svc->net, net) && pos-- == 0) {
1865				iter->table = ip_vs_svc_table;
1866				iter->bucket = idx;
1867				return svc;
1868			}
1869		}
1870	}
1871
1872	/* keep looking in fwmark */
1873	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1874		hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
1875					 f_list) {
1876			if (net_eq(svc->net, net) && pos-- == 0) {
1877				iter->table = ip_vs_svc_fwm_table;
1878				iter->bucket = idx;
1879				return svc;
1880			}
1881		}
1882	}
1883
1884	return NULL;
1885}
1886
1887static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1888	__acquires(RCU)
1889{
1890	rcu_read_lock();
1891	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1892}
1893
1894
1895static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1896{
1897	struct hlist_node *e;
1898	struct ip_vs_iter *iter;
1899	struct ip_vs_service *svc;
1900
1901	++*pos;
1902	if (v == SEQ_START_TOKEN)
1903		return ip_vs_info_array(seq,0);
1904
1905	svc = v;
1906	iter = seq->private;
1907
1908	if (iter->table == ip_vs_svc_table) {
1909		/* next service in table hashed by protocol */
1910		e = rcu_dereference(hlist_next_rcu(&svc->s_list));
1911		if (e)
1912			return hlist_entry(e, struct ip_vs_service, s_list);
1913
1914		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1915			hlist_for_each_entry_rcu(svc,
1916						 &ip_vs_svc_table[iter->bucket],
1917						 s_list) {
1918				return svc;
1919			}
1920		}
1921
1922		iter->table = ip_vs_svc_fwm_table;
1923		iter->bucket = -1;
1924		goto scan_fwmark;
1925	}
1926
1927	/* next service in hashed by fwmark */
1928	e = rcu_dereference(hlist_next_rcu(&svc->f_list));
1929	if (e)
1930		return hlist_entry(e, struct ip_vs_service, f_list);
1931
1932 scan_fwmark:
1933	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1934		hlist_for_each_entry_rcu(svc,
1935					 &ip_vs_svc_fwm_table[iter->bucket],
1936					 f_list)
1937			return svc;
1938	}
1939
1940	return NULL;
1941}
1942
1943static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1944	__releases(RCU)
1945{
1946	rcu_read_unlock();
1947}
1948
1949
1950static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1951{
1952	if (v == SEQ_START_TOKEN) {
1953		seq_printf(seq,
1954			"IP Virtual Server version %d.%d.%d (size=%d)\n",
1955			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1956		seq_puts(seq,
1957			 "Prot LocalAddress:Port Scheduler Flags\n");
1958		seq_puts(seq,
1959			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1960	} else {
1961		const struct ip_vs_service *svc = v;
1962		const struct ip_vs_iter *iter = seq->private;
1963		const struct ip_vs_dest *dest;
1964		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
1965
1966		if (iter->table == ip_vs_svc_table) {
1967#ifdef CONFIG_IP_VS_IPV6
1968			if (svc->af == AF_INET6)
1969				seq_printf(seq, "%s  [%pI6]:%04X %s ",
1970					   ip_vs_proto_name(svc->protocol),
1971					   &svc->addr.in6,
1972					   ntohs(svc->port),
1973					   sched->name);
1974			else
1975#endif
1976				seq_printf(seq, "%s  %08X:%04X %s %s ",
1977					   ip_vs_proto_name(svc->protocol),
1978					   ntohl(svc->addr.ip),
1979					   ntohs(svc->port),
1980					   sched->name,
1981					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1982		} else {
1983			seq_printf(seq, "FWM  %08X %s %s",
1984				   svc->fwmark, sched->name,
1985				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1986		}
1987
1988		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1989			seq_printf(seq, "persistent %d %08X\n",
1990				svc->timeout,
1991				ntohl(svc->netmask));
1992		else
1993			seq_putc(seq, '\n');
1994
1995		list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
1996#ifdef CONFIG_IP_VS_IPV6
1997			if (dest->af == AF_INET6)
1998				seq_printf(seq,
1999					   "  -> [%pI6]:%04X"
2000					   "      %-7s %-6d %-10d %-10d\n",
2001					   &dest->addr.in6,
2002					   ntohs(dest->port),
2003					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2004					   atomic_read(&dest->weight),
2005					   atomic_read(&dest->activeconns),
2006					   atomic_read(&dest->inactconns));
2007			else
2008#endif
2009				seq_printf(seq,
2010					   "  -> %08X:%04X      "
2011					   "%-7s %-6d %-10d %-10d\n",
2012					   ntohl(dest->addr.ip),
2013					   ntohs(dest->port),
2014					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2015					   atomic_read(&dest->weight),
2016					   atomic_read(&dest->activeconns),
2017					   atomic_read(&dest->inactconns));
2018
2019		}
2020	}
2021	return 0;
2022}
2023
2024static const struct seq_operations ip_vs_info_seq_ops = {
2025	.start = ip_vs_info_seq_start,
2026	.next  = ip_vs_info_seq_next,
2027	.stop  = ip_vs_info_seq_stop,
2028	.show  = ip_vs_info_seq_show,
2029};
2030
2031static int ip_vs_info_open(struct inode *inode, struct file *file)
2032{
2033	return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2034			sizeof(struct ip_vs_iter));
2035}
2036
2037static const struct file_operations ip_vs_info_fops = {
2038	.owner	 = THIS_MODULE,
2039	.open    = ip_vs_info_open,
2040	.read    = seq_read,
2041	.llseek  = seq_lseek,
2042	.release = seq_release_net,
2043};
2044
2045static int ip_vs_stats_show(struct seq_file *seq, void *v)
2046{
2047	struct net *net = seq_file_single_net(seq);
2048	struct ip_vs_stats_user show;
2049
2050/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2051	seq_puts(seq,
2052		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
2053	seq_printf(seq,
2054		   "   Conns  Packets  Packets            Bytes            Bytes\n");
2055
2056	ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2057	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2058		   show.inpkts, show.outpkts,
2059		   (unsigned long long) show.inbytes,
2060		   (unsigned long long) show.outbytes);
2061
2062/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2063	seq_puts(seq,
2064		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2065	seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2066			show.cps, show.inpps, show.outpps,
2067			show.inbps, show.outbps);
2068
2069	return 0;
2070}
2071
2072static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2073{
2074	return single_open_net(inode, file, ip_vs_stats_show);
2075}
2076
2077static const struct file_operations ip_vs_stats_fops = {
2078	.owner = THIS_MODULE,
2079	.open = ip_vs_stats_seq_open,
2080	.read = seq_read,
2081	.llseek = seq_lseek,
2082	.release = single_release_net,
2083};
2084
2085static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2086{
2087	struct net *net = seq_file_single_net(seq);
2088	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2089	struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2090	struct ip_vs_stats_user rates;
2091	int i;
2092
2093/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2094	seq_puts(seq,
2095		 "       Total Incoming Outgoing         Incoming         Outgoing\n");
2096	seq_printf(seq,
2097		   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2098
2099	for_each_possible_cpu(i) {
2100		struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2101		unsigned int start;
2102		__u64 inbytes, outbytes;
2103
2104		do {
2105			start = u64_stats_fetch_begin_irq(&u->syncp);
2106			inbytes = u->ustats.inbytes;
2107			outbytes = u->ustats.outbytes;
2108		} while (u64_stats_fetch_retry_irq(&u->syncp, start));
2109
2110		seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2111			   i, u->ustats.conns, u->ustats.inpkts,
2112			   u->ustats.outpkts, (__u64)inbytes,
2113			   (__u64)outbytes);
2114	}
2115
2116	spin_lock_bh(&tot_stats->lock);
2117
2118	seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2119		   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2120		   tot_stats->ustats.outpkts,
2121		   (unsigned long long) tot_stats->ustats.inbytes,
2122		   (unsigned long long) tot_stats->ustats.outbytes);
2123
2124	ip_vs_read_estimator(&rates, tot_stats);
2125
2126	spin_unlock_bh(&tot_stats->lock);
2127
2128/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2129	seq_puts(seq,
2130		   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2131	seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2132			rates.cps,
2133			rates.inpps,
2134			rates.outpps,
2135			rates.inbps,
2136			rates.outbps);
2137
2138	return 0;
2139}
2140
2141static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2142{
2143	return single_open_net(inode, file, ip_vs_stats_percpu_show);
2144}
2145
2146static const struct file_operations ip_vs_stats_percpu_fops = {
2147	.owner = THIS_MODULE,
2148	.open = ip_vs_stats_percpu_seq_open,
2149	.read = seq_read,
2150	.llseek = seq_lseek,
2151	.release = single_release_net,
2152};
2153#endif
2154
2155/*
2156 *	Set timeout values for tcp tcpfin udp in the timeout_table.
2157 */
2158static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2159{
2160#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2161	struct ip_vs_proto_data *pd;
2162#endif
2163
2164	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2165		  u->tcp_timeout,
2166		  u->tcp_fin_timeout,
2167		  u->udp_timeout);
2168
2169#ifdef CONFIG_IP_VS_PROTO_TCP
2170	if (u->tcp_timeout) {
2171		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2172		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2173			= u->tcp_timeout * HZ;
2174	}
2175
2176	if (u->tcp_fin_timeout) {
2177		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2178		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2179			= u->tcp_fin_timeout * HZ;
2180	}
2181#endif
2182
2183#ifdef CONFIG_IP_VS_PROTO_UDP
2184	if (u->udp_timeout) {
2185		pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2186		pd->timeout_table[IP_VS_UDP_S_NORMAL]
2187			= u->udp_timeout * HZ;
2188	}
2189#endif
2190	return 0;
2191}
2192
2193#define CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
2194
2195struct ip_vs_svcdest_user {
2196	struct ip_vs_service_user	s;
2197	struct ip_vs_dest_user		d;
2198};
2199
2200static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
2201	[CMDID(IP_VS_SO_SET_ADD)]         = sizeof(struct ip_vs_service_user),
2202	[CMDID(IP_VS_SO_SET_EDIT)]        = sizeof(struct ip_vs_service_user),
2203	[CMDID(IP_VS_SO_SET_DEL)]         = sizeof(struct ip_vs_service_user),
2204	[CMDID(IP_VS_SO_SET_ADDDEST)]     = sizeof(struct ip_vs_svcdest_user),
2205	[CMDID(IP_VS_SO_SET_DELDEST)]     = sizeof(struct ip_vs_svcdest_user),
2206	[CMDID(IP_VS_SO_SET_EDITDEST)]    = sizeof(struct ip_vs_svcdest_user),
2207	[CMDID(IP_VS_SO_SET_TIMEOUT)]     = sizeof(struct ip_vs_timeout_user),
2208	[CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
2209	[CMDID(IP_VS_SO_SET_STOPDAEMON)]  = sizeof(struct ip_vs_daemon_user),
2210	[CMDID(IP_VS_SO_SET_ZERO)]        = sizeof(struct ip_vs_service_user),
2211};
2212
2213union ip_vs_set_arglen {
2214	struct ip_vs_service_user	field_IP_VS_SO_SET_ADD;
2215	struct ip_vs_service_user	field_IP_VS_SO_SET_EDIT;
2216	struct ip_vs_service_user	field_IP_VS_SO_SET_DEL;
2217	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_ADDDEST;
2218	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_DELDEST;
2219	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_EDITDEST;
2220	struct ip_vs_timeout_user	field_IP_VS_SO_SET_TIMEOUT;
2221	struct ip_vs_daemon_user	field_IP_VS_SO_SET_STARTDAEMON;
2222	struct ip_vs_daemon_user	field_IP_VS_SO_SET_STOPDAEMON;
2223	struct ip_vs_service_user	field_IP_VS_SO_SET_ZERO;
2224};
2225
2226#define MAX_SET_ARGLEN	sizeof(union ip_vs_set_arglen)
2227
2228static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2229				  struct ip_vs_service_user *usvc_compat)
2230{
2231	memset(usvc, 0, sizeof(*usvc));
2232
2233	usvc->af		= AF_INET;
2234	usvc->protocol		= usvc_compat->protocol;
2235	usvc->addr.ip		= usvc_compat->addr;
2236	usvc->port		= usvc_compat->port;
2237	usvc->fwmark		= usvc_compat->fwmark;
2238
2239	/* Deep copy of sched_name is not needed here */
2240	usvc->sched_name	= usvc_compat->sched_name;
2241
2242	usvc->flags		= usvc_compat->flags;
2243	usvc->timeout		= usvc_compat->timeout;
2244	usvc->netmask		= usvc_compat->netmask;
2245}
2246
2247static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2248				   struct ip_vs_dest_user *udest_compat)
2249{
2250	memset(udest, 0, sizeof(*udest));
2251
2252	udest->addr.ip		= udest_compat->addr;
2253	udest->port		= udest_compat->port;
2254	udest->conn_flags	= udest_compat->conn_flags;
2255	udest->weight		= udest_compat->weight;
2256	udest->u_threshold	= udest_compat->u_threshold;
2257	udest->l_threshold	= udest_compat->l_threshold;
2258	udest->af		= AF_INET;
2259}
2260
2261static int
2262do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2263{
2264	struct net *net = sock_net(sk);
2265	int ret;
2266	unsigned char arg[MAX_SET_ARGLEN];
2267	struct ip_vs_service_user *usvc_compat;
2268	struct ip_vs_service_user_kern usvc;
2269	struct ip_vs_service *svc;
2270	struct ip_vs_dest_user *udest_compat;
2271	struct ip_vs_dest_user_kern udest;
2272	struct netns_ipvs *ipvs = net_ipvs(net);
2273
2274	BUILD_BUG_ON(sizeof(arg) > 255);
2275	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2276		return -EPERM;
2277
2278	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2279		return -EINVAL;
2280	if (len != set_arglen[CMDID(cmd)]) {
2281		IP_VS_DBG(1, "set_ctl: len %u != %u\n",
2282			  len, set_arglen[CMDID(cmd)]);
2283		return -EINVAL;
2284	}
2285
2286	if (copy_from_user(arg, user, len) != 0)
2287		return -EFAULT;
2288
2289	/* increase the module use count */
2290	ip_vs_use_count_inc();
2291
2292	/* Handle daemons since they have another lock */
2293	if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2294	    cmd == IP_VS_SO_SET_STOPDAEMON) {
2295		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2296
2297		mutex_lock(&ipvs->sync_mutex);
2298		if (cmd == IP_VS_SO_SET_STARTDAEMON)
2299			ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2300						dm->syncid);
2301		else
2302			ret = stop_sync_thread(net, dm->state);
2303		mutex_unlock(&ipvs->sync_mutex);
2304		goto out_dec;
2305	}
2306
2307	mutex_lock(&__ip_vs_mutex);
2308	if (cmd == IP_VS_SO_SET_FLUSH) {
2309		/* Flush the virtual service */
2310		ret = ip_vs_flush(net, false);
2311		goto out_unlock;
2312	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2313		/* Set timeout values for (tcp tcpfin udp) */
2314		ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2315		goto out_unlock;
2316	}
2317
2318	usvc_compat = (struct ip_vs_service_user *)arg;
2319	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2320
2321	/* We only use the new structs internally, so copy userspace compat
2322	 * structs to extended internal versions */
2323	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2324	ip_vs_copy_udest_compat(&udest, udest_compat);
2325
2326	if (cmd == IP_VS_SO_SET_ZERO) {
2327		/* if no service address is set, zero counters in all */
2328		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2329			ret = ip_vs_zero_all(net);
2330			goto out_unlock;
2331		}
2332	}
2333
2334	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2335	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2336	    usvc.protocol != IPPROTO_SCTP) {
2337		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2338		       usvc.protocol, &usvc.addr.ip,
2339		       ntohs(usvc.port), usvc.sched_name);
2340		ret = -EFAULT;
2341		goto out_unlock;
2342	}
2343
2344	/* Lookup the exact service by <protocol, addr, port> or fwmark */
2345	rcu_read_lock();
2346	if (usvc.fwmark == 0)
2347		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2348					   &usvc.addr, usvc.port);
2349	else
2350		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2351	rcu_read_unlock();
2352
2353	if (cmd != IP_VS_SO_SET_ADD
2354	    && (svc == NULL || svc->protocol != usvc.protocol)) {
2355		ret = -ESRCH;
2356		goto out_unlock;
2357	}
2358
2359	switch (cmd) {
2360	case IP_VS_SO_SET_ADD:
2361		if (svc != NULL)
2362			ret = -EEXIST;
2363		else
2364			ret = ip_vs_add_service(net, &usvc, &svc);
2365		break;
2366	case IP_VS_SO_SET_EDIT:
2367		ret = ip_vs_edit_service(svc, &usvc);
2368		break;
2369	case IP_VS_SO_SET_DEL:
2370		ret = ip_vs_del_service(svc);
2371		if (!ret)
2372			goto out_unlock;
2373		break;
2374	case IP_VS_SO_SET_ZERO:
2375		ret = ip_vs_zero_service(svc);
2376		break;
2377	case IP_VS_SO_SET_ADDDEST:
2378		ret = ip_vs_add_dest(svc, &udest);
2379		break;
2380	case IP_VS_SO_SET_EDITDEST:
2381		ret = ip_vs_edit_dest(svc, &udest);
2382		break;
2383	case IP_VS_SO_SET_DELDEST:
2384		ret = ip_vs_del_dest(svc, &udest);
2385		break;
2386	default:
2387		ret = -EINVAL;
2388	}
2389
2390  out_unlock:
2391	mutex_unlock(&__ip_vs_mutex);
2392  out_dec:
2393	/* decrease the module use count */
2394	ip_vs_use_count_dec();
2395
2396	return ret;
2397}
2398
2399
2400static void
2401ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2402{
2403	struct ip_vs_scheduler *sched;
2404
2405	sched = rcu_dereference_protected(src->scheduler, 1);
2406	dst->protocol = src->protocol;
2407	dst->addr = src->addr.ip;
2408	dst->port = src->port;
2409	dst->fwmark = src->fwmark;
2410	strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
2411	dst->flags = src->flags;
2412	dst->timeout = src->timeout / HZ;
2413	dst->netmask = src->netmask;
2414	dst->num_dests = src->num_dests;
2415	ip_vs_copy_stats(&dst->stats, &src->stats);
2416}
2417
2418static inline int
2419__ip_vs_get_service_entries(struct net *net,
2420			    const struct ip_vs_get_services *get,
2421			    struct ip_vs_get_services __user *uptr)
2422{
2423	int idx, count=0;
2424	struct ip_vs_service *svc;
2425	struct ip_vs_service_entry entry;
2426	int ret = 0;
2427
2428	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2429		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2430			/* Only expose IPv4 entries to old interface */
2431			if (svc->af != AF_INET || !net_eq(svc->net, net))
2432				continue;
2433
2434			if (count >= get->num_services)
2435				goto out;
2436			memset(&entry, 0, sizeof(entry));
2437			ip_vs_copy_service(&entry, svc);
2438			if (copy_to_user(&uptr->entrytable[count],
2439					 &entry, sizeof(entry))) {
2440				ret = -EFAULT;
2441				goto out;
2442			}
2443			count++;
2444		}
2445	}
2446
2447	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2448		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2449			/* Only expose IPv4 entries to old interface */
2450			if (svc->af != AF_INET || !net_eq(svc->net, net))
2451				continue;
2452
2453			if (count >= get->num_services)
2454				goto out;
2455			memset(&entry, 0, sizeof(entry));
2456			ip_vs_copy_service(&entry, svc);
2457			if (copy_to_user(&uptr->entrytable[count],
2458					 &entry, sizeof(entry))) {
2459				ret = -EFAULT;
2460				goto out;
2461			}
2462			count++;
2463		}
2464	}
2465out:
2466	return ret;
2467}
2468
2469static inline int
2470__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2471			 struct ip_vs_get_dests __user *uptr)
2472{
2473	struct ip_vs_service *svc;
2474	union nf_inet_addr addr = { .ip = get->addr };
2475	int ret = 0;
2476
2477	rcu_read_lock();
2478	if (get->fwmark)
2479		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2480	else
2481		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2482					   get->port);
2483	rcu_read_unlock();
2484
2485	if (svc) {
2486		int count = 0;
2487		struct ip_vs_dest *dest;
2488		struct ip_vs_dest_entry entry;
2489
2490		memset(&entry, 0, sizeof(entry));
2491		list_for_each_entry(dest, &svc->destinations, n_list) {
2492			if (count >= get->num_dests)
2493				break;
2494
2495			/* Cannot expose heterogeneous members via sockopt
2496			 * interface
2497			 */
2498			if (dest->af != svc->af)
2499				continue;
2500
2501			entry.addr = dest->addr.ip;
2502			entry.port = dest->port;
2503			entry.conn_flags = atomic_read(&dest->conn_flags);
2504			entry.weight = atomic_read(&dest->weight);
2505			entry.u_threshold = dest->u_threshold;
2506			entry.l_threshold = dest->l_threshold;
2507			entry.activeconns = atomic_read(&dest->activeconns);
2508			entry.inactconns = atomic_read(&dest->inactconns);
2509			entry.persistconns = atomic_read(&dest->persistconns);
2510			ip_vs_copy_stats(&entry.stats, &dest->stats);
2511			if (copy_to_user(&uptr->entrytable[count],
2512					 &entry, sizeof(entry))) {
2513				ret = -EFAULT;
2514				break;
2515			}
2516			count++;
2517		}
2518	} else
2519		ret = -ESRCH;
2520	return ret;
2521}
2522
2523static inline void
2524__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2525{
2526#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2527	struct ip_vs_proto_data *pd;
2528#endif
2529
2530	memset(u, 0, sizeof (*u));
2531
2532#ifdef CONFIG_IP_VS_PROTO_TCP
2533	pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2534	u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2535	u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2536#endif
2537#ifdef CONFIG_IP_VS_PROTO_UDP
2538	pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2539	u->udp_timeout =
2540			pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2541#endif
2542}
2543
2544static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
2545	[CMDID(IP_VS_SO_GET_VERSION)]  = 64,
2546	[CMDID(IP_VS_SO_GET_INFO)]     = sizeof(struct ip_vs_getinfo),
2547	[CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
2548	[CMDID(IP_VS_SO_GET_SERVICE)]  = sizeof(struct ip_vs_service_entry),
2549	[CMDID(IP_VS_SO_GET_DESTS)]    = sizeof(struct ip_vs_get_dests),
2550	[CMDID(IP_VS_SO_GET_TIMEOUT)]  = sizeof(struct ip_vs_timeout_user),
2551	[CMDID(IP_VS_SO_GET_DAEMON)]   = 2 * sizeof(struct ip_vs_daemon_user),
2552};
2553
2554union ip_vs_get_arglen {
2555	char				field_IP_VS_SO_GET_VERSION[64];
2556	struct ip_vs_getinfo		field_IP_VS_SO_GET_INFO;
2557	struct ip_vs_get_services	field_IP_VS_SO_GET_SERVICES;
2558	struct ip_vs_service_entry	field_IP_VS_SO_GET_SERVICE;
2559	struct ip_vs_get_dests		field_IP_VS_SO_GET_DESTS;
2560	struct ip_vs_timeout_user	field_IP_VS_SO_GET_TIMEOUT;
2561	struct ip_vs_daemon_user	field_IP_VS_SO_GET_DAEMON[2];
2562};
2563
2564#define MAX_GET_ARGLEN	sizeof(union ip_vs_get_arglen)
2565
2566static int
2567do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2568{
2569	unsigned char arg[MAX_GET_ARGLEN];
2570	int ret = 0;
2571	unsigned int copylen;
2572	struct net *net = sock_net(sk);
2573	struct netns_ipvs *ipvs = net_ipvs(net);
2574
2575	BUG_ON(!net);
2576	BUILD_BUG_ON(sizeof(arg) > 255);
2577	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2578		return -EPERM;
2579
2580	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2581		return -EINVAL;
2582
2583	copylen = get_arglen[CMDID(cmd)];
2584	if (*len < (int) copylen) {
2585		IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
2586		return -EINVAL;
2587	}
2588
2589	if (copy_from_user(arg, user, copylen) != 0)
2590		return -EFAULT;
2591	/*
2592	 * Handle daemons first since it has its own locking
2593	 */
2594	if (cmd == IP_VS_SO_GET_DAEMON) {
2595		struct ip_vs_daemon_user d[2];
2596
2597		memset(&d, 0, sizeof(d));
2598		mutex_lock(&ipvs->sync_mutex);
2599		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2600			d[0].state = IP_VS_STATE_MASTER;
2601			strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2602				sizeof(d[0].mcast_ifn));
2603			d[0].syncid = ipvs->master_syncid;
2604		}
2605		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2606			d[1].state = IP_VS_STATE_BACKUP;
2607			strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2608				sizeof(d[1].mcast_ifn));
2609			d[1].syncid = ipvs->backup_syncid;
2610		}
2611		if (copy_to_user(user, &d, sizeof(d)) != 0)
2612			ret = -EFAULT;
2613		mutex_unlock(&ipvs->sync_mutex);
2614		return ret;
2615	}
2616
2617	mutex_lock(&__ip_vs_mutex);
2618	switch (cmd) {
2619	case IP_VS_SO_GET_VERSION:
2620	{
2621		char buf[64];
2622
2623		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2624			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2625		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2626			ret = -EFAULT;
2627			goto out;
2628		}
2629		*len = strlen(buf)+1;
2630	}
2631	break;
2632
2633	case IP_VS_SO_GET_INFO:
2634	{
2635		struct ip_vs_getinfo info;
2636		info.version = IP_VS_VERSION_CODE;
2637		info.size = ip_vs_conn_tab_size;
2638		info.num_services = ipvs->num_services;
2639		if (copy_to_user(user, &info, sizeof(info)) != 0)
2640			ret = -EFAULT;
2641	}
2642	break;
2643
2644	case IP_VS_SO_GET_SERVICES:
2645	{
2646		struct ip_vs_get_services *get;
2647		int size;
2648
2649		get = (struct ip_vs_get_services *)arg;
2650		size = sizeof(*get) +
2651			sizeof(struct ip_vs_service_entry) * get->num_services;
2652		if (*len != size) {
2653			pr_err("length: %u != %u\n", *len, size);
2654			ret = -EINVAL;
2655			goto out;
2656		}
2657		ret = __ip_vs_get_service_entries(net, get, user);
2658	}
2659	break;
2660
2661	case IP_VS_SO_GET_SERVICE:
2662	{
2663		struct ip_vs_service_entry *entry;
2664		struct ip_vs_service *svc;
2665		union nf_inet_addr addr;
2666
2667		entry = (struct ip_vs_service_entry *)arg;
2668		addr.ip = entry->addr;
2669		rcu_read_lock();
2670		if (entry->fwmark)
2671			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2672		else
2673			svc = __ip_vs_service_find(net, AF_INET,
2674						   entry->protocol, &addr,
2675						   entry->port);
2676		rcu_read_unlock();
2677		if (svc) {
2678			ip_vs_copy_service(entry, svc);
2679			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2680				ret = -EFAULT;
2681		} else
2682			ret = -ESRCH;
2683	}
2684	break;
2685
2686	case IP_VS_SO_GET_DESTS:
2687	{
2688		struct ip_vs_get_dests *get;
2689		int size;
2690
2691		get = (struct ip_vs_get_dests *)arg;
2692		size = sizeof(*get) +
2693			sizeof(struct ip_vs_dest_entry) * get->num_dests;
2694		if (*len != size) {
2695			pr_err("length: %u != %u\n", *len, size);
2696			ret = -EINVAL;
2697			goto out;
2698		}
2699		ret = __ip_vs_get_dest_entries(net, get, user);
2700	}
2701	break;
2702
2703	case IP_VS_SO_GET_TIMEOUT:
2704	{
2705		struct ip_vs_timeout_user t;
2706
2707		__ip_vs_get_timeouts(net, &t);
2708		if (copy_to_user(user, &t, sizeof(t)) != 0)
2709			ret = -EFAULT;
2710	}
2711	break;
2712
2713	default:
2714		ret = -EINVAL;
2715	}
2716
2717out:
2718	mutex_unlock(&__ip_vs_mutex);
2719	return ret;
2720}
2721
2722
2723static struct nf_sockopt_ops ip_vs_sockopts = {
2724	.pf		= PF_INET,
2725	.set_optmin	= IP_VS_BASE_CTL,
2726	.set_optmax	= IP_VS_SO_SET_MAX+1,
2727	.set		= do_ip_vs_set_ctl,
2728	.get_optmin	= IP_VS_BASE_CTL,
2729	.get_optmax	= IP_VS_SO_GET_MAX+1,
2730	.get		= do_ip_vs_get_ctl,
2731	.owner		= THIS_MODULE,
2732};
2733
2734/*
2735 * Generic Netlink interface
2736 */
2737
2738/* IPVS genetlink family */
2739static struct genl_family ip_vs_genl_family = {
2740	.id		= GENL_ID_GENERATE,
2741	.hdrsize	= 0,
2742	.name		= IPVS_GENL_NAME,
2743	.version	= IPVS_GENL_VERSION,
2744	.maxattr	= IPVS_CMD_MAX,
2745	.netnsok        = true,         /* Make ipvsadm to work on netns */
2746};
2747
2748/* Policy used for first-level command attributes */
2749static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2750	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
2751	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
2752	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
2753	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
2754	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
2755	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
2756};
2757
2758/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2759static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2760	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
2761	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
2762					    .len = IP_VS_IFNAME_MAXLEN },
2763	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
2764};
2765
2766/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2767static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2768	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
2769	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
2770	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
2771					    .len = sizeof(union nf_inet_addr) },
2772	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
2773	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
2774	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
2775					    .len = IP_VS_SCHEDNAME_MAXLEN },
2776	[IPVS_SVC_ATTR_PE_NAME]		= { .type = NLA_NUL_STRING,
2777					    .len = IP_VS_PENAME_MAXLEN },
2778	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
2779					    .len = sizeof(struct ip_vs_flags) },
2780	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
2781	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
2782	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
2783};
2784
2785/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2786static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2787	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
2788					    .len = sizeof(union nf_inet_addr) },
2789	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
2790	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
2791	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
2792	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
2793	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
2794	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
2795	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
2796	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
2797	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
2798	[IPVS_DEST_ATTR_ADDR_FAMILY]	= { .type = NLA_U16 },
2799};
2800
2801static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2802				 struct ip_vs_stats *stats)
2803{
2804	struct ip_vs_stats_user ustats;
2805	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2806	if (!nl_stats)
2807		return -EMSGSIZE;
2808
2809	ip_vs_copy_stats(&ustats, stats);
2810
2811	if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2812	    nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2813	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2814	    nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2815	    nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2816	    nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2817	    nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2818	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2819	    nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2820	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2821		goto nla_put_failure;
2822	nla_nest_end(skb, nl_stats);
2823
2824	return 0;
2825
2826nla_put_failure:
2827	nla_nest_cancel(skb, nl_stats);
2828	return -EMSGSIZE;
2829}
2830
2831static int ip_vs_genl_fill_service(struct sk_buff *skb,
2832				   struct ip_vs_service *svc)
2833{
2834	struct ip_vs_scheduler *sched;
2835	struct ip_vs_pe *pe;
2836	struct nlattr *nl_service;
2837	struct ip_vs_flags flags = { .flags = svc->flags,
2838				     .mask = ~0 };
2839
2840	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2841	if (!nl_service)
2842		return -EMSGSIZE;
2843
2844	if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2845		goto nla_put_failure;
2846	if (svc->fwmark) {
2847		if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2848			goto nla_put_failure;
2849	} else {
2850		if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2851		    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2852		    nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2853			goto nla_put_failure;
2854	}
2855
2856	sched = rcu_dereference_protected(svc->scheduler, 1);
2857	pe = rcu_dereference_protected(svc->pe, 1);
2858	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
2859	    (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
2860	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2861	    nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2862	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2863		goto nla_put_failure;
2864	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2865		goto nla_put_failure;
2866
2867	nla_nest_end(skb, nl_service);
2868
2869	return 0;
2870
2871nla_put_failure:
2872	nla_nest_cancel(skb, nl_service);
2873	return -EMSGSIZE;
2874}
2875
2876static int ip_vs_genl_dump_service(struct sk_buff *skb,
2877				   struct ip_vs_service *svc,
2878				   struct netlink_callback *cb)
2879{
2880	void *hdr;
2881
2882	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2883			  &ip_vs_genl_family, NLM_F_MULTI,
2884			  IPVS_CMD_NEW_SERVICE);
2885	if (!hdr)
2886		return -EMSGSIZE;
2887
2888	if (ip_vs_genl_fill_service(skb, svc) < 0)
2889		goto nla_put_failure;
2890
2891	return genlmsg_end(skb, hdr);
2892
2893nla_put_failure:
2894	genlmsg_cancel(skb, hdr);
2895	return -EMSGSIZE;
2896}
2897
2898static int ip_vs_genl_dump_services(struct sk_buff *skb,
2899				    struct netlink_callback *cb)
2900{
2901	int idx = 0, i;
2902	int start = cb->args[0];
2903	struct ip_vs_service *svc;
2904	struct net *net = skb_sknet(skb);
2905
2906	mutex_lock(&__ip_vs_mutex);
2907	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2908		hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2909			if (++idx <= start || !net_eq(svc->net, net))
2910				continue;
2911			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2912				idx--;
2913				goto nla_put_failure;
2914			}
2915		}
2916	}
2917
2918	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2919		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2920			if (++idx <= start || !net_eq(svc->net, net))
2921				continue;
2922			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2923				idx--;
2924				goto nla_put_failure;
2925			}
2926		}
2927	}
2928
2929nla_put_failure:
2930	mutex_unlock(&__ip_vs_mutex);
2931	cb->args[0] = idx;
2932
2933	return skb->len;
2934}
2935
2936static int ip_vs_genl_parse_service(struct net *net,
2937				    struct ip_vs_service_user_kern *usvc,
2938				    struct nlattr *nla, int full_entry,
2939				    struct ip_vs_service **ret_svc)
2940{
2941	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2942	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2943	struct ip_vs_service *svc;
2944
2945	/* Parse mandatory identifying service fields first */
2946	if (nla == NULL ||
2947	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2948		return -EINVAL;
2949
2950	nla_af		= attrs[IPVS_SVC_ATTR_AF];
2951	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
2952	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
2953	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
2954	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
2955
2956	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2957		return -EINVAL;
2958
2959	memset(usvc, 0, sizeof(*usvc));
2960
2961	usvc->af = nla_get_u16(nla_af);
2962#ifdef CONFIG_IP_VS_IPV6
2963	if (usvc->af != AF_INET && usvc->af != AF_INET6)
2964#else
2965	if (usvc->af != AF_INET)
2966#endif
2967		return -EAFNOSUPPORT;
2968
2969	if (nla_fwmark) {
2970		usvc->protocol = IPPROTO_TCP;
2971		usvc->fwmark = nla_get_u32(nla_fwmark);
2972	} else {
2973		usvc->protocol = nla_get_u16(nla_protocol);
2974		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2975		usvc->port = nla_get_be16(nla_port);
2976		usvc->fwmark = 0;
2977	}
2978
2979	rcu_read_lock();
2980	if (usvc->fwmark)
2981		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2982	else
2983		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2984					   &usvc->addr, usvc->port);
2985	rcu_read_unlock();
2986	*ret_svc = svc;
2987
2988	/* If a full entry was requested, check for the additional fields */
2989	if (full_entry) {
2990		struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2991			      *nla_netmask;
2992		struct ip_vs_flags flags;
2993
2994		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2995		nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2996		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2997		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2998		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2999
3000		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3001			return -EINVAL;
3002
3003		nla_memcpy(&flags, nla_flags, sizeof(flags));
3004
3005		/* prefill flags from service if it already exists */
3006		if (svc)
3007			usvc->flags = svc->flags;
3008
3009		/* set new flags from userland */
3010		usvc->flags = (usvc->flags & ~flags.mask) |
3011			      (flags.flags & flags.mask);
3012		usvc->sched_name = nla_data(nla_sched);
3013		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3014		usvc->timeout = nla_get_u32(nla_timeout);
3015		usvc->netmask = nla_get_be32(nla_netmask);
3016	}
3017
3018	return 0;
3019}
3020
3021static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3022						     struct nlattr *nla)
3023{
3024	struct ip_vs_service_user_kern usvc;
3025	struct ip_vs_service *svc;
3026	int ret;
3027
3028	ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3029	return ret ? ERR_PTR(ret) : svc;
3030}
3031
3032static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3033{
3034	struct nlattr *nl_dest;
3035
3036	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3037	if (!nl_dest)
3038		return -EMSGSIZE;
3039
3040	if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3041	    nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3042	    nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3043			(atomic_read(&dest->conn_flags) &
3044			 IP_VS_CONN_F_FWD_MASK)) ||
3045	    nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3046			atomic_read(&dest->weight)) ||
3047	    nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3048	    nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3049	    nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3050			atomic_read(&dest->activeconns)) ||
3051	    nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3052			atomic_read(&dest->inactconns)) ||
3053	    nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3054			atomic_read(&dest->persistconns)) ||
3055	    nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
3056		goto nla_put_failure;
3057	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3058		goto nla_put_failure;
3059
3060	nla_nest_end(skb, nl_dest);
3061
3062	return 0;
3063
3064nla_put_failure:
3065	nla_nest_cancel(skb, nl_dest);
3066	return -EMSGSIZE;
3067}
3068
3069static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3070				struct netlink_callback *cb)
3071{
3072	void *hdr;
3073
3074	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3075			  &ip_vs_genl_family, NLM_F_MULTI,
3076			  IPVS_CMD_NEW_DEST);
3077	if (!hdr)
3078		return -EMSGSIZE;
3079
3080	if (ip_vs_genl_fill_dest(skb, dest) < 0)
3081		goto nla_put_failure;
3082
3083	return genlmsg_end(skb, hdr);
3084
3085nla_put_failure:
3086	genlmsg_cancel(skb, hdr);
3087	return -EMSGSIZE;
3088}
3089
3090static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3091				 struct netlink_callback *cb)
3092{
3093	int idx = 0;
3094	int start = cb->args[0];
3095	struct ip_vs_service *svc;
3096	struct ip_vs_dest *dest;
3097	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3098	struct net *net = skb_sknet(skb);
3099
3100	mutex_lock(&__ip_vs_mutex);
3101
3102	/* Try to find the service for which to dump destinations */
3103	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3104			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3105		goto out_err;
3106
3107
3108	svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3109	if (IS_ERR(svc) || svc == NULL)
3110		goto out_err;
3111
3112	/* Dump the destinations */
3113	list_for_each_entry(dest, &svc->destinations, n_list) {
3114		if (++idx <= start)
3115			continue;
3116		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3117			idx--;
3118			goto nla_put_failure;
3119		}
3120	}
3121
3122nla_put_failure:
3123	cb->args[0] = idx;
3124
3125out_err:
3126	mutex_unlock(&__ip_vs_mutex);
3127
3128	return skb->len;
3129}
3130
3131static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3132				 struct nlattr *nla, int full_entry)
3133{
3134	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3135	struct nlattr *nla_addr, *nla_port;
3136	struct nlattr *nla_addr_family;
3137
3138	/* Parse mandatory identifying destination fields first */
3139	if (nla == NULL ||
3140	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3141		return -EINVAL;
3142
3143	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
3144	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
3145	nla_addr_family	= attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
3146
3147	if (!(nla_addr && nla_port))
3148		return -EINVAL;
3149
3150	memset(udest, 0, sizeof(*udest));
3151
3152	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3153	udest->port = nla_get_be16(nla_port);
3154
3155	if (nla_addr_family)
3156		udest->af = nla_get_u16(nla_addr_family);
3157	else
3158		udest->af = 0;
3159
3160	/* If a full entry was requested, check for the additional fields */
3161	if (full_entry) {
3162		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3163			      *nla_l_thresh;
3164
3165		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
3166		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
3167		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
3168		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
3169
3170		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3171			return -EINVAL;
3172
3173		udest->conn_flags = nla_get_u32(nla_fwd)
3174				    & IP_VS_CONN_F_FWD_MASK;
3175		udest->weight = nla_get_u32(nla_weight);
3176		udest->u_threshold = nla_get_u32(nla_u_thresh);
3177		udest->l_threshold = nla_get_u32(nla_l_thresh);
3178	}
3179
3180	return 0;
3181}
3182
3183static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
3184				  const char *mcast_ifn, __u32 syncid)
3185{
3186	struct nlattr *nl_daemon;
3187
3188	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3189	if (!nl_daemon)
3190		return -EMSGSIZE;
3191
3192	if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3193	    nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3194	    nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3195		goto nla_put_failure;
3196	nla_nest_end(skb, nl_daemon);
3197
3198	return 0;
3199
3200nla_put_failure:
3201	nla_nest_cancel(skb, nl_daemon);
3202	return -EMSGSIZE;
3203}
3204
3205static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
3206				  const char *mcast_ifn, __u32 syncid,
3207				  struct netlink_callback *cb)
3208{
3209	void *hdr;
3210	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3211			  &ip_vs_genl_family, NLM_F_MULTI,
3212			  IPVS_CMD_NEW_DAEMON);
3213	if (!hdr)
3214		return -EMSGSIZE;
3215
3216	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3217		goto nla_put_failure;
3218
3219	return genlmsg_end(skb, hdr);
3220
3221nla_put_failure:
3222	genlmsg_cancel(skb, hdr);
3223	return -EMSGSIZE;
3224}
3225
3226static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3227				   struct netlink_callback *cb)
3228{
3229	struct net *net = skb_sknet(skb);
3230	struct netns_ipvs *ipvs = net_ipvs(net);
3231
3232	mutex_lock(&ipvs->sync_mutex);
3233	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3234		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3235					   ipvs->master_mcast_ifn,
3236					   ipvs->master_syncid, cb) < 0)
3237			goto nla_put_failure;
3238
3239		cb->args[0] = 1;
3240	}
3241
3242	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3243		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3244					   ipvs->backup_mcast_ifn,
3245					   ipvs->backup_syncid, cb) < 0)
3246			goto nla_put_failure;
3247
3248		cb->args[1] = 1;
3249	}
3250
3251nla_put_failure:
3252	mutex_unlock(&ipvs->sync_mutex);
3253
3254	return skb->len;
3255}
3256
3257static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3258{
3259	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3260	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3261	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3262		return -EINVAL;
3263
3264	/* The synchronization protocol is incompatible with mixed family
3265	 * services
3266	 */
3267	if (net_ipvs(net)->mixed_address_family_dests > 0)
3268		return -EINVAL;
3269
3270	return start_sync_thread(net,
3271				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3272				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3273				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3274}
3275
3276static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3277{
3278	if (!attrs[IPVS_DAEMON_ATTR_STATE])
3279		return -EINVAL;
3280
3281	return stop_sync_thread(net,
3282				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3283}
3284
3285static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3286{
3287	struct ip_vs_timeout_user t;
3288
3289	__ip_vs_get_timeouts(net, &t);
3290
3291	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3292		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3293
3294	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3295		t.tcp_fin_timeout =
3296			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3297
3298	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3299		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3300
3301	return ip_vs_set_timeout(net, &t);
3302}
3303
3304static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3305{
3306	int ret = 0, cmd;
3307	struct net *net;
3308	struct netns_ipvs *ipvs;
3309
3310	net = skb_sknet(skb);
3311	ipvs = net_ipvs(net);
3312	cmd = info->genlhdr->cmd;
3313
3314	if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3315		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3316
3317		mutex_lock(&ipvs->sync_mutex);
3318		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3319		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3320				     info->attrs[IPVS_CMD_ATTR_DAEMON],
3321				     ip_vs_daemon_policy)) {
3322			ret = -EINVAL;
3323			goto out;
3324		}
3325
3326		if (cmd == IPVS_CMD_NEW_DAEMON)
3327			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3328		else
3329			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3330out:
3331		mutex_unlock(&ipvs->sync_mutex);
3332	}
3333	return ret;
3334}
3335
3336static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3337{
3338	struct ip_vs_service *svc = NULL;
3339	struct ip_vs_service_user_kern usvc;
3340	struct ip_vs_dest_user_kern udest;
3341	int ret = 0, cmd;
3342	int need_full_svc = 0, need_full_dest = 0;
3343	struct net *net;
3344
3345	net = skb_sknet(skb);
3346	cmd = info->genlhdr->cmd;
3347
3348	mutex_lock(&__ip_vs_mutex);
3349
3350	if (cmd == IPVS_CMD_FLUSH) {
3351		ret = ip_vs_flush(net, false);
3352		goto out;
3353	} else if (cmd == IPVS_CMD_SET_CONFIG) {
3354		ret = ip_vs_genl_set_config(net, info->attrs);
3355		goto out;
3356	} else if (cmd == IPVS_CMD_ZERO &&
3357		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3358		ret = ip_vs_zero_all(net);
3359		goto out;
3360	}
3361
3362	/* All following commands require a service argument, so check if we
3363	 * received a valid one. We need a full service specification when
3364	 * adding / editing a service. Only identifying members otherwise. */
3365	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3366		need_full_svc = 1;
3367
3368	ret = ip_vs_genl_parse_service(net, &usvc,
3369				       info->attrs[IPVS_CMD_ATTR_SERVICE],
3370				       need_full_svc, &svc);
3371	if (ret)
3372		goto out;
3373
3374	/* Unless we're adding a new service, the service must already exist */
3375	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3376		ret = -ESRCH;
3377		goto out;
3378	}
3379
3380	/* Destination commands require a valid destination argument. For
3381	 * adding / editing a destination, we need a full destination
3382	 * specification. */
3383	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3384	    cmd == IPVS_CMD_DEL_DEST) {
3385		if (cmd != IPVS_CMD_DEL_DEST)
3386			need_full_dest = 1;
3387
3388		ret = ip_vs_genl_parse_dest(&udest,
3389					    info->attrs[IPVS_CMD_ATTR_DEST],
3390					    need_full_dest);
3391		if (ret)
3392			goto out;
3393
3394		/* Old protocols did not allow the user to specify address
3395		 * family, so we set it to zero instead.  We also didn't
3396		 * allow heterogeneous pools in the old code, so it's safe
3397		 * to assume that this will have the same address family as
3398		 * the service.
3399		 */
3400		if (udest.af == 0)
3401			udest.af = svc->af;
3402
3403		if (udest.af != svc->af) {
3404			/* The synchronization protocol is incompatible
3405			 * with mixed family services
3406			 */
3407			if (net_ipvs(net)->sync_state) {
3408				ret = -EINVAL;
3409				goto out;
3410			}
3411
3412			/* Which connection types do we support? */
3413			switch (udest.conn_flags) {
3414			case IP_VS_CONN_F_TUNNEL:
3415				/* We are able to forward this */
3416				break;
3417			default:
3418				ret = -EINVAL;
3419				goto out;
3420			}
3421		}
3422	}
3423
3424	switch (cmd) {
3425	case IPVS_CMD_NEW_SERVICE:
3426		if (svc == NULL)
3427			ret = ip_vs_add_service(net, &usvc, &svc);
3428		else
3429			ret = -EEXIST;
3430		break;
3431	case IPVS_CMD_SET_SERVICE:
3432		ret = ip_vs_edit_service(svc, &usvc);
3433		break;
3434	case IPVS_CMD_DEL_SERVICE:
3435		ret = ip_vs_del_service(svc);
3436		/* do not use svc, it can be freed */
3437		break;
3438	case IPVS_CMD_NEW_DEST:
3439		ret = ip_vs_add_dest(svc, &udest);
3440		break;
3441	case IPVS_CMD_SET_DEST:
3442		ret = ip_vs_edit_dest(svc, &udest);
3443		break;
3444	case IPVS_CMD_DEL_DEST:
3445		ret = ip_vs_del_dest(svc, &udest);
3446		break;
3447	case IPVS_CMD_ZERO:
3448		ret = ip_vs_zero_service(svc);
3449		break;
3450	default:
3451		ret = -EINVAL;
3452	}
3453
3454out:
3455	mutex_unlock(&__ip_vs_mutex);
3456
3457	return ret;
3458}
3459
3460static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3461{
3462	struct sk_buff *msg;
3463	void *reply;
3464	int ret, cmd, reply_cmd;
3465	struct net *net;
3466
3467	net = skb_sknet(skb);
3468	cmd = info->genlhdr->cmd;
3469
3470	if (cmd == IPVS_CMD_GET_SERVICE)
3471		reply_cmd = IPVS_CMD_NEW_SERVICE;
3472	else if (cmd == IPVS_CMD_GET_INFO)
3473		reply_cmd = IPVS_CMD_SET_INFO;
3474	else if (cmd == IPVS_CMD_GET_CONFIG)
3475		reply_cmd = IPVS_CMD_SET_CONFIG;
3476	else {
3477		pr_err("unknown Generic Netlink command\n");
3478		return -EINVAL;
3479	}
3480
3481	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3482	if (!msg)
3483		return -ENOMEM;
3484
3485	mutex_lock(&__ip_vs_mutex);
3486
3487	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3488	if (reply == NULL)
3489		goto nla_put_failure;
3490
3491	switch (cmd) {
3492	case IPVS_CMD_GET_SERVICE:
3493	{
3494		struct ip_vs_service *svc;
3495
3496		svc = ip_vs_genl_find_service(net,
3497					      info->attrs[IPVS_CMD_ATTR_SERVICE]);
3498		if (IS_ERR(svc)) {
3499			ret = PTR_ERR(svc);
3500			goto out_err;
3501		} else if (svc) {
3502			ret = ip_vs_genl_fill_service(msg, svc);
3503			if (ret)
3504				goto nla_put_failure;
3505		} else {
3506			ret = -ESRCH;
3507			goto out_err;
3508		}
3509
3510		break;
3511	}
3512
3513	case IPVS_CMD_GET_CONFIG:
3514	{
3515		struct ip_vs_timeout_user t;
3516
3517		__ip_vs_get_timeouts(net, &t);
3518#ifdef CONFIG_IP_VS_PROTO_TCP
3519		if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3520				t.tcp_timeout) ||
3521		    nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3522				t.tcp_fin_timeout))
3523			goto nla_put_failure;
3524#endif
3525#ifdef CONFIG_IP_VS_PROTO_UDP
3526		if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3527			goto nla_put_failure;
3528#endif
3529
3530		break;
3531	}
3532
3533	case IPVS_CMD_GET_INFO:
3534		if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3535				IP_VS_VERSION_CODE) ||
3536		    nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3537				ip_vs_conn_tab_size))
3538			goto nla_put_failure;
3539		break;
3540	}
3541
3542	genlmsg_end(msg, reply);
3543	ret = genlmsg_reply(msg, info);
3544	goto out;
3545
3546nla_put_failure:
3547	pr_err("not enough space in Netlink message\n");
3548	ret = -EMSGSIZE;
3549
3550out_err:
3551	nlmsg_free(msg);
3552out:
3553	mutex_unlock(&__ip_vs_mutex);
3554
3555	return ret;
3556}
3557
3558
3559static const struct genl_ops ip_vs_genl_ops[] = {
3560	{
3561		.cmd	= IPVS_CMD_NEW_SERVICE,
3562		.flags	= GENL_ADMIN_PERM,
3563		.policy	= ip_vs_cmd_policy,
3564		.doit	= ip_vs_genl_set_cmd,
3565	},
3566	{
3567		.cmd	= IPVS_CMD_SET_SERVICE,
3568		.flags	= GENL_ADMIN_PERM,
3569		.policy	= ip_vs_cmd_policy,
3570		.doit	= ip_vs_genl_set_cmd,
3571	},
3572	{
3573		.cmd	= IPVS_CMD_DEL_SERVICE,
3574		.flags	= GENL_ADMIN_PERM,
3575		.policy	= ip_vs_cmd_policy,
3576		.doit	= ip_vs_genl_set_cmd,
3577	},
3578	{
3579		.cmd	= IPVS_CMD_GET_SERVICE,
3580		.flags	= GENL_ADMIN_PERM,
3581		.doit	= ip_vs_genl_get_cmd,
3582		.dumpit	= ip_vs_genl_dump_services,
3583		.policy	= ip_vs_cmd_policy,
3584	},
3585	{
3586		.cmd	= IPVS_CMD_NEW_DEST,
3587		.flags	= GENL_ADMIN_PERM,
3588		.policy	= ip_vs_cmd_policy,
3589		.doit	= ip_vs_genl_set_cmd,
3590	},
3591	{
3592		.cmd	= IPVS_CMD_SET_DEST,
3593		.flags	= GENL_ADMIN_PERM,
3594		.policy	= ip_vs_cmd_policy,
3595		.doit	= ip_vs_genl_set_cmd,
3596	},
3597	{
3598		.cmd	= IPVS_CMD_DEL_DEST,
3599		.flags	= GENL_ADMIN_PERM,
3600		.policy	= ip_vs_cmd_policy,
3601		.doit	= ip_vs_genl_set_cmd,
3602	},
3603	{
3604		.cmd	= IPVS_CMD_GET_DEST,
3605		.flags	= GENL_ADMIN_PERM,
3606		.policy	= ip_vs_cmd_policy,
3607		.dumpit	= ip_vs_genl_dump_dests,
3608	},
3609	{
3610		.cmd	= IPVS_CMD_NEW_DAEMON,
3611		.flags	= GENL_ADMIN_PERM,
3612		.policy	= ip_vs_cmd_policy,
3613		.doit	= ip_vs_genl_set_daemon,
3614	},
3615	{
3616		.cmd	= IPVS_CMD_DEL_DAEMON,
3617		.flags	= GENL_ADMIN_PERM,
3618		.policy	= ip_vs_cmd_policy,
3619		.doit	= ip_vs_genl_set_daemon,
3620	},
3621	{
3622		.cmd	= IPVS_CMD_GET_DAEMON,
3623		.flags	= GENL_ADMIN_PERM,
3624		.dumpit	= ip_vs_genl_dump_daemons,
3625	},
3626	{
3627		.cmd	= IPVS_CMD_SET_CONFIG,
3628		.flags	= GENL_ADMIN_PERM,
3629		.policy	= ip_vs_cmd_policy,
3630		.doit	= ip_vs_genl_set_cmd,
3631	},
3632	{
3633		.cmd	= IPVS_CMD_GET_CONFIG,
3634		.flags	= GENL_ADMIN_PERM,
3635		.doit	= ip_vs_genl_get_cmd,
3636	},
3637	{
3638		.cmd	= IPVS_CMD_GET_INFO,
3639		.flags	= GENL_ADMIN_PERM,
3640		.doit	= ip_vs_genl_get_cmd,
3641	},
3642	{
3643		.cmd	= IPVS_CMD_ZERO,
3644		.flags	= GENL_ADMIN_PERM,
3645		.policy	= ip_vs_cmd_policy,
3646		.doit	= ip_vs_genl_set_cmd,
3647	},
3648	{
3649		.cmd	= IPVS_CMD_FLUSH,
3650		.flags	= GENL_ADMIN_PERM,
3651		.doit	= ip_vs_genl_set_cmd,
3652	},
3653};
3654
3655static int __init ip_vs_genl_register(void)
3656{
3657	return genl_register_family_with_ops(&ip_vs_genl_family,
3658					     ip_vs_genl_ops);
3659}
3660
3661static void ip_vs_genl_unregister(void)
3662{
3663	genl_unregister_family(&ip_vs_genl_family);
3664}
3665
3666/* End of Generic Netlink interface definitions */
3667
3668/*
3669 * per netns intit/exit func.
3670 */
3671#ifdef CONFIG_SYSCTL
3672static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3673{
3674	int idx;
3675	struct netns_ipvs *ipvs = net_ipvs(net);
3676	struct ctl_table *tbl;
3677
3678	atomic_set(&ipvs->dropentry, 0);
3679	spin_lock_init(&ipvs->dropentry_lock);
3680	spin_lock_init(&ipvs->droppacket_lock);
3681	spin_lock_init(&ipvs->securetcp_lock);
3682
3683	if (!net_eq(net, &init_net)) {
3684		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3685		if (tbl == NULL)
3686			return -ENOMEM;
3687
3688		/* Don't export sysctls to unprivileged users */
3689		if (net->user_ns != &init_user_ns)
3690			tbl[0].procname = NULL;
3691	} else
3692		tbl = vs_vars;
3693	/* Initialize sysctl defaults */
3694	idx = 0;
3695	ipvs->sysctl_amemthresh = 1024;
3696	tbl[idx++].data = &ipvs->sysctl_amemthresh;
3697	ipvs->sysctl_am_droprate = 10;
3698	tbl[idx++].data = &ipvs->sysctl_am_droprate;
3699	tbl[idx++].data = &ipvs->sysctl_drop_entry;
3700	tbl[idx++].data = &ipvs->sysctl_drop_packet;
3701#ifdef CONFIG_IP_VS_NFCT
3702	tbl[idx++].data = &ipvs->sysctl_conntrack;
3703#endif
3704	tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3705	ipvs->sysctl_snat_reroute = 1;
3706	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3707	ipvs->sysctl_sync_ver = 1;
3708	tbl[idx++].data = &ipvs->sysctl_sync_ver;
3709	ipvs->sysctl_sync_ports = 1;
3710	tbl[idx++].data = &ipvs->sysctl_sync_ports;
3711	tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
3712	ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3713	tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3714	ipvs->sysctl_sync_sock_size = 0;
3715	tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3716	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3717	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3718	tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
3719	tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
3720	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3721	ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3722	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3723	tbl[idx].data = &ipvs->sysctl_sync_threshold;
3724	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3725	ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3726	tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3727	ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3728	tbl[idx++].data = &ipvs->sysctl_sync_retries;
3729	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3730	ipvs->sysctl_pmtu_disc = 1;
3731	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3732	tbl[idx++].data = &ipvs->sysctl_backup_only;
3733
3734
3735	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3736	if (ipvs->sysctl_hdr == NULL) {
3737		if (!net_eq(net, &init_net))
3738			kfree(tbl);
3739		return -ENOMEM;
3740	}
3741	ip_vs_start_estimator(net, &ipvs->tot_stats);
3742	ipvs->sysctl_tbl = tbl;
3743	/* Schedule defense work */
3744	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3745	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3746
3747	return 0;
3748}
3749
3750static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3751{
3752	struct netns_ipvs *ipvs = net_ipvs(net);
3753
3754	cancel_delayed_work_sync(&ipvs->defense_work);
3755	cancel_work_sync(&ipvs->defense_work.work);
3756	unregister_net_sysctl_table(ipvs->sysctl_hdr);
3757	ip_vs_stop_estimator(net, &ipvs->tot_stats);
3758}
3759
3760#else
3761
3762static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3763static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3764
3765#endif
3766
3767static struct notifier_block ip_vs_dst_notifier = {
3768	.notifier_call = ip_vs_dst_event,
3769};
3770
3771int __net_init ip_vs_control_net_init(struct net *net)
3772{
3773	int i, idx;
3774	struct netns_ipvs *ipvs = net_ipvs(net);
3775
3776	/* Initialize rs_table */
3777	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3778		INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
3779
3780	INIT_LIST_HEAD(&ipvs->dest_trash);
3781	spin_lock_init(&ipvs->dest_trash_lock);
3782	setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
3783		    (unsigned long) net);
3784	atomic_set(&ipvs->ftpsvc_counter, 0);
3785	atomic_set(&ipvs->nullsvc_counter, 0);
3786
3787	/* procfs stats */
3788	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3789	if (!ipvs->tot_stats.cpustats)
3790		return -ENOMEM;
3791
3792	for_each_possible_cpu(i) {
3793		struct ip_vs_cpu_stats *ipvs_tot_stats;
3794		ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
3795		u64_stats_init(&ipvs_tot_stats->syncp);
3796	}
3797
3798	spin_lock_init(&ipvs->tot_stats.lock);
3799
3800	proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
3801	proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
3802	proc_create("ip_vs_stats_percpu", 0, net->proc_net,
3803		    &ip_vs_stats_percpu_fops);
3804
3805	if (ip_vs_control_net_init_sysctl(net))
3806		goto err;
3807
3808	return 0;
3809
3810err:
3811	free_percpu(ipvs->tot_stats.cpustats);
3812	return -ENOMEM;
3813}
3814
3815void __net_exit ip_vs_control_net_cleanup(struct net *net)
3816{
3817	struct netns_ipvs *ipvs = net_ipvs(net);
3818
3819	ip_vs_trash_cleanup(net);
3820	ip_vs_control_net_cleanup_sysctl(net);
3821	remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
3822	remove_proc_entry("ip_vs_stats", net->proc_net);
3823	remove_proc_entry("ip_vs", net->proc_net);
3824	free_percpu(ipvs->tot_stats.cpustats);
3825}
3826
3827int __init ip_vs_register_nl_ioctl(void)
3828{
3829	int ret;
3830
3831	ret = nf_register_sockopt(&ip_vs_sockopts);
3832	if (ret) {
3833		pr_err("cannot register sockopt.\n");
3834		goto err_sock;
3835	}
3836
3837	ret = ip_vs_genl_register();
3838	if (ret) {
3839		pr_err("cannot register Generic Netlink interface.\n");
3840		goto err_genl;
3841	}
3842	return 0;
3843
3844err_genl:
3845	nf_unregister_sockopt(&ip_vs_sockopts);
3846err_sock:
3847	return ret;
3848}
3849
3850void ip_vs_unregister_nl_ioctl(void)
3851{
3852	ip_vs_genl_unregister();
3853	nf_unregister_sockopt(&ip_vs_sockopts);
3854}
3855
3856int __init ip_vs_control_init(void)
3857{
3858	int idx;
3859	int ret;
3860
3861	EnterFunction(2);
3862
3863	/* Initialize svc_table, ip_vs_svc_fwm_table */
3864	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3865		INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
3866		INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3867	}
3868
3869	smp_wmb();	/* Do we really need it now ? */
3870
3871	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3872	if (ret < 0)
3873		return ret;
3874
3875	LeaveFunction(2);
3876	return 0;
3877}
3878
3879
3880void ip_vs_control_cleanup(void)
3881{
3882	EnterFunction(2);
3883	unregister_netdevice_notifier(&ip_vs_dst_notifier);
3884	LeaveFunction(2);
3885}
3886