sysctl_net_ipv4.c revision 6ba8a3b19e764b6a65e4030ab0999be50c291e6c
1/*
2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
3 *
4 * Begun April 1, 1996, Mike Shaver.
5 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
6 */
7
8#include <linux/mm.h>
9#include <linux/module.h>
10#include <linux/sysctl.h>
11#include <linux/igmp.h>
12#include <linux/inetdevice.h>
13#include <linux/seqlock.h>
14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/nsproxy.h>
17#include <linux/swap.h>
18#include <net/snmp.h>
19#include <net/icmp.h>
20#include <net/ip.h>
21#include <net/route.h>
22#include <net/tcp.h>
23#include <net/udp.h>
24#include <net/cipso_ipv4.h>
25#include <net/inet_frag.h>
26#include <net/ping.h>
27#include <net/tcp_memcontrol.h>
28
29static int zero;
30static int one = 1;
31static int four = 4;
32static int tcp_retr1_max = 255;
33static int ip_local_port_range_min[] = { 1, 1 };
34static int ip_local_port_range_max[] = { 65535, 65535 };
35static int tcp_adv_win_scale_min = -31;
36static int tcp_adv_win_scale_max = 31;
37static int ip_ttl_min = 1;
38static int ip_ttl_max = 255;
39static int ip_ping_group_range_min[] = { 0, 0 };
40static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
41
42/* Update system visible IP port range */
43static void set_local_port_range(int range[2])
44{
45	write_seqlock(&sysctl_local_ports.lock);
46	sysctl_local_ports.range[0] = range[0];
47	sysctl_local_ports.range[1] = range[1];
48	write_sequnlock(&sysctl_local_ports.lock);
49}
50
51/* Validate changes from /proc interface. */
52static int ipv4_local_port_range(ctl_table *table, int write,
53				 void __user *buffer,
54				 size_t *lenp, loff_t *ppos)
55{
56	int ret;
57	int range[2];
58	ctl_table tmp = {
59		.data = &range,
60		.maxlen = sizeof(range),
61		.mode = table->mode,
62		.extra1 = &ip_local_port_range_min,
63		.extra2 = &ip_local_port_range_max,
64	};
65
66	inet_get_local_port_range(range, range + 1);
67	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
68
69	if (write && ret == 0) {
70		if (range[1] < range[0])
71			ret = -EINVAL;
72		else
73			set_local_port_range(range);
74	}
75
76	return ret;
77}
78
79
80static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
81{
82	kgid_t *data = table->data;
83	unsigned int seq;
84	do {
85		seq = read_seqbegin(&sysctl_local_ports.lock);
86
87		*low = data[0];
88		*high = data[1];
89	} while (read_seqretry(&sysctl_local_ports.lock, seq));
90}
91
92/* Update system visible IP port range */
93static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
94{
95	kgid_t *data = table->data;
96	write_seqlock(&sysctl_local_ports.lock);
97	data[0] = low;
98	data[1] = high;
99	write_sequnlock(&sysctl_local_ports.lock);
100}
101
102/* Validate changes from /proc interface. */
103static int ipv4_ping_group_range(ctl_table *table, int write,
104				 void __user *buffer,
105				 size_t *lenp, loff_t *ppos)
106{
107	struct user_namespace *user_ns = current_user_ns();
108	int ret;
109	gid_t urange[2];
110	kgid_t low, high;
111	ctl_table tmp = {
112		.data = &urange,
113		.maxlen = sizeof(urange),
114		.mode = table->mode,
115		.extra1 = &ip_ping_group_range_min,
116		.extra2 = &ip_ping_group_range_max,
117	};
118
119	inet_get_ping_group_range_table(table, &low, &high);
120	urange[0] = from_kgid_munged(user_ns, low);
121	urange[1] = from_kgid_munged(user_ns, high);
122	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
123
124	if (write && ret == 0) {
125		low = make_kgid(user_ns, urange[0]);
126		high = make_kgid(user_ns, urange[1]);
127		if (!gid_valid(low) || !gid_valid(high) ||
128		    (urange[1] < urange[0]) || gid_lt(high, low)) {
129			low = make_kgid(&init_user_ns, 1);
130			high = make_kgid(&init_user_ns, 0);
131		}
132		set_ping_group_range(table, low, high);
133	}
134
135	return ret;
136}
137
138static int proc_tcp_congestion_control(ctl_table *ctl, int write,
139				       void __user *buffer, size_t *lenp, loff_t *ppos)
140{
141	char val[TCP_CA_NAME_MAX];
142	ctl_table tbl = {
143		.data = val,
144		.maxlen = TCP_CA_NAME_MAX,
145	};
146	int ret;
147
148	tcp_get_default_congestion_control(val);
149
150	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
151	if (write && ret == 0)
152		ret = tcp_set_default_congestion_control(val);
153	return ret;
154}
155
156static int proc_tcp_available_congestion_control(ctl_table *ctl,
157						 int write,
158						 void __user *buffer, size_t *lenp,
159						 loff_t *ppos)
160{
161	ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
162	int ret;
163
164	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
165	if (!tbl.data)
166		return -ENOMEM;
167	tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
168	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
169	kfree(tbl.data);
170	return ret;
171}
172
173static int proc_allowed_congestion_control(ctl_table *ctl,
174					   int write,
175					   void __user *buffer, size_t *lenp,
176					   loff_t *ppos)
177{
178	ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
179	int ret;
180
181	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
182	if (!tbl.data)
183		return -ENOMEM;
184
185	tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
186	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
187	if (write && ret == 0)
188		ret = tcp_set_allowed_congestion_control(tbl.data);
189	kfree(tbl.data);
190	return ret;
191}
192
193static int ipv4_tcp_mem(ctl_table *ctl, int write,
194			   void __user *buffer, size_t *lenp,
195			   loff_t *ppos)
196{
197	int ret;
198	unsigned long vec[3];
199	struct net *net = current->nsproxy->net_ns;
200#ifdef CONFIG_MEMCG_KMEM
201	struct mem_cgroup *memcg;
202#endif
203
204	ctl_table tmp = {
205		.data = &vec,
206		.maxlen = sizeof(vec),
207		.mode = ctl->mode,
208	};
209
210	if (!write) {
211		ctl->data = &net->ipv4.sysctl_tcp_mem;
212		return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
213	}
214
215	ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
216	if (ret)
217		return ret;
218
219#ifdef CONFIG_MEMCG_KMEM
220	rcu_read_lock();
221	memcg = mem_cgroup_from_task(current);
222
223	tcp_prot_mem(memcg, vec[0], 0);
224	tcp_prot_mem(memcg, vec[1], 1);
225	tcp_prot_mem(memcg, vec[2], 2);
226	rcu_read_unlock();
227#endif
228
229	net->ipv4.sysctl_tcp_mem[0] = vec[0];
230	net->ipv4.sysctl_tcp_mem[1] = vec[1];
231	net->ipv4.sysctl_tcp_mem[2] = vec[2];
232
233	return 0;
234}
235
236static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
237				 size_t *lenp, loff_t *ppos)
238{
239	ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
240	struct tcp_fastopen_context *ctxt;
241	int ret;
242	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
243
244	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
245	if (!tbl.data)
246		return -ENOMEM;
247
248	rcu_read_lock();
249	ctxt = rcu_dereference(tcp_fastopen_ctx);
250	if (ctxt)
251		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
252	else
253		memset(user_key, 0, sizeof(user_key));
254	rcu_read_unlock();
255
256	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
257		user_key[0], user_key[1], user_key[2], user_key[3]);
258	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
259
260	if (write && ret == 0) {
261		if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
262			   user_key + 2, user_key + 3) != 4) {
263			ret = -EINVAL;
264			goto bad_key;
265		}
266		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
267	}
268
269bad_key:
270	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
271	       user_key[0], user_key[1], user_key[2], user_key[3],
272	       (char *)tbl.data, ret);
273	kfree(tbl.data);
274	return ret;
275}
276
277static struct ctl_table ipv4_table[] = {
278	{
279		.procname	= "tcp_timestamps",
280		.data		= &sysctl_tcp_timestamps,
281		.maxlen		= sizeof(int),
282		.mode		= 0644,
283		.proc_handler	= proc_dointvec
284	},
285	{
286		.procname	= "tcp_window_scaling",
287		.data		= &sysctl_tcp_window_scaling,
288		.maxlen		= sizeof(int),
289		.mode		= 0644,
290		.proc_handler	= proc_dointvec
291	},
292	{
293		.procname	= "tcp_sack",
294		.data		= &sysctl_tcp_sack,
295		.maxlen		= sizeof(int),
296		.mode		= 0644,
297		.proc_handler	= proc_dointvec
298	},
299	{
300		.procname	= "tcp_retrans_collapse",
301		.data		= &sysctl_tcp_retrans_collapse,
302		.maxlen		= sizeof(int),
303		.mode		= 0644,
304		.proc_handler	= proc_dointvec
305	},
306	{
307		.procname	= "ip_default_ttl",
308		.data		= &sysctl_ip_default_ttl,
309		.maxlen		= sizeof(int),
310		.mode		= 0644,
311		.proc_handler	= proc_dointvec_minmax,
312		.extra1		= &ip_ttl_min,
313		.extra2		= &ip_ttl_max,
314	},
315	{
316		.procname	= "ip_no_pmtu_disc",
317		.data		= &ipv4_config.no_pmtu_disc,
318		.maxlen		= sizeof(int),
319		.mode		= 0644,
320		.proc_handler	= proc_dointvec
321	},
322	{
323		.procname	= "ip_nonlocal_bind",
324		.data		= &sysctl_ip_nonlocal_bind,
325		.maxlen		= sizeof(int),
326		.mode		= 0644,
327		.proc_handler	= proc_dointvec
328	},
329	{
330		.procname	= "tcp_syn_retries",
331		.data		= &sysctl_tcp_syn_retries,
332		.maxlen		= sizeof(int),
333		.mode		= 0644,
334		.proc_handler	= proc_dointvec
335	},
336	{
337		.procname	= "tcp_synack_retries",
338		.data		= &sysctl_tcp_synack_retries,
339		.maxlen		= sizeof(int),
340		.mode		= 0644,
341		.proc_handler	= proc_dointvec
342	},
343	{
344		.procname	= "tcp_max_orphans",
345		.data		= &sysctl_tcp_max_orphans,
346		.maxlen		= sizeof(int),
347		.mode		= 0644,
348		.proc_handler	= proc_dointvec
349	},
350	{
351		.procname	= "tcp_max_tw_buckets",
352		.data		= &tcp_death_row.sysctl_max_tw_buckets,
353		.maxlen		= sizeof(int),
354		.mode		= 0644,
355		.proc_handler	= proc_dointvec
356	},
357	{
358		.procname	= "ip_early_demux",
359		.data		= &sysctl_ip_early_demux,
360		.maxlen		= sizeof(int),
361		.mode		= 0644,
362		.proc_handler	= proc_dointvec
363	},
364	{
365		.procname	= "ip_dynaddr",
366		.data		= &sysctl_ip_dynaddr,
367		.maxlen		= sizeof(int),
368		.mode		= 0644,
369		.proc_handler	= proc_dointvec
370	},
371	{
372		.procname	= "tcp_keepalive_time",
373		.data		= &sysctl_tcp_keepalive_time,
374		.maxlen		= sizeof(int),
375		.mode		= 0644,
376		.proc_handler	= proc_dointvec_jiffies,
377	},
378	{
379		.procname	= "tcp_keepalive_probes",
380		.data		= &sysctl_tcp_keepalive_probes,
381		.maxlen		= sizeof(int),
382		.mode		= 0644,
383		.proc_handler	= proc_dointvec
384	},
385	{
386		.procname	= "tcp_keepalive_intvl",
387		.data		= &sysctl_tcp_keepalive_intvl,
388		.maxlen		= sizeof(int),
389		.mode		= 0644,
390		.proc_handler	= proc_dointvec_jiffies,
391	},
392	{
393		.procname	= "tcp_retries1",
394		.data		= &sysctl_tcp_retries1,
395		.maxlen		= sizeof(int),
396		.mode		= 0644,
397		.proc_handler	= proc_dointvec_minmax,
398		.extra2		= &tcp_retr1_max
399	},
400	{
401		.procname	= "tcp_retries2",
402		.data		= &sysctl_tcp_retries2,
403		.maxlen		= sizeof(int),
404		.mode		= 0644,
405		.proc_handler	= proc_dointvec
406	},
407	{
408		.procname	= "tcp_fin_timeout",
409		.data		= &sysctl_tcp_fin_timeout,
410		.maxlen		= sizeof(int),
411		.mode		= 0644,
412		.proc_handler	= proc_dointvec_jiffies,
413	},
414#ifdef CONFIG_SYN_COOKIES
415	{
416		.procname	= "tcp_syncookies",
417		.data		= &sysctl_tcp_syncookies,
418		.maxlen		= sizeof(int),
419		.mode		= 0644,
420		.proc_handler	= proc_dointvec
421	},
422#endif
423	{
424		.procname	= "tcp_fastopen",
425		.data		= &sysctl_tcp_fastopen,
426		.maxlen		= sizeof(int),
427		.mode		= 0644,
428		.proc_handler	= proc_dointvec,
429	},
430	{
431		.procname	= "tcp_fastopen_key",
432		.mode		= 0600,
433		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
434		.proc_handler	= proc_tcp_fastopen_key,
435	},
436	{
437		.procname	= "tcp_tw_recycle",
438		.data		= &tcp_death_row.sysctl_tw_recycle,
439		.maxlen		= sizeof(int),
440		.mode		= 0644,
441		.proc_handler	= proc_dointvec
442	},
443	{
444		.procname	= "tcp_abort_on_overflow",
445		.data		= &sysctl_tcp_abort_on_overflow,
446		.maxlen		= sizeof(int),
447		.mode		= 0644,
448		.proc_handler	= proc_dointvec
449	},
450	{
451		.procname	= "tcp_stdurg",
452		.data		= &sysctl_tcp_stdurg,
453		.maxlen		= sizeof(int),
454		.mode		= 0644,
455		.proc_handler	= proc_dointvec
456	},
457	{
458		.procname	= "tcp_rfc1337",
459		.data		= &sysctl_tcp_rfc1337,
460		.maxlen		= sizeof(int),
461		.mode		= 0644,
462		.proc_handler	= proc_dointvec
463	},
464	{
465		.procname	= "tcp_max_syn_backlog",
466		.data		= &sysctl_max_syn_backlog,
467		.maxlen		= sizeof(int),
468		.mode		= 0644,
469		.proc_handler	= proc_dointvec
470	},
471	{
472		.procname	= "ip_local_port_range",
473		.data		= &sysctl_local_ports.range,
474		.maxlen		= sizeof(sysctl_local_ports.range),
475		.mode		= 0644,
476		.proc_handler	= ipv4_local_port_range,
477	},
478	{
479		.procname	= "ip_local_reserved_ports",
480		.data		= NULL, /* initialized in sysctl_ipv4_init */
481		.maxlen		= 65536,
482		.mode		= 0644,
483		.proc_handler	= proc_do_large_bitmap,
484	},
485	{
486		.procname	= "igmp_max_memberships",
487		.data		= &sysctl_igmp_max_memberships,
488		.maxlen		= sizeof(int),
489		.mode		= 0644,
490		.proc_handler	= proc_dointvec
491	},
492	{
493		.procname	= "igmp_max_msf",
494		.data		= &sysctl_igmp_max_msf,
495		.maxlen		= sizeof(int),
496		.mode		= 0644,
497		.proc_handler	= proc_dointvec
498	},
499	{
500		.procname	= "inet_peer_threshold",
501		.data		= &inet_peer_threshold,
502		.maxlen		= sizeof(int),
503		.mode		= 0644,
504		.proc_handler	= proc_dointvec
505	},
506	{
507		.procname	= "inet_peer_minttl",
508		.data		= &inet_peer_minttl,
509		.maxlen		= sizeof(int),
510		.mode		= 0644,
511		.proc_handler	= proc_dointvec_jiffies,
512	},
513	{
514		.procname	= "inet_peer_maxttl",
515		.data		= &inet_peer_maxttl,
516		.maxlen		= sizeof(int),
517		.mode		= 0644,
518		.proc_handler	= proc_dointvec_jiffies,
519	},
520	{
521		.procname	= "tcp_orphan_retries",
522		.data		= &sysctl_tcp_orphan_retries,
523		.maxlen		= sizeof(int),
524		.mode		= 0644,
525		.proc_handler	= proc_dointvec
526	},
527	{
528		.procname	= "tcp_fack",
529		.data		= &sysctl_tcp_fack,
530		.maxlen		= sizeof(int),
531		.mode		= 0644,
532		.proc_handler	= proc_dointvec
533	},
534	{
535		.procname	= "tcp_reordering",
536		.data		= &sysctl_tcp_reordering,
537		.maxlen		= sizeof(int),
538		.mode		= 0644,
539		.proc_handler	= proc_dointvec
540	},
541	{
542		.procname	= "tcp_dsack",
543		.data		= &sysctl_tcp_dsack,
544		.maxlen		= sizeof(int),
545		.mode		= 0644,
546		.proc_handler	= proc_dointvec
547	},
548	{
549		.procname	= "tcp_wmem",
550		.data		= &sysctl_tcp_wmem,
551		.maxlen		= sizeof(sysctl_tcp_wmem),
552		.mode		= 0644,
553		.proc_handler	= proc_dointvec_minmax,
554		.extra1		= &one,
555	},
556	{
557		.procname	= "tcp_rmem",
558		.data		= &sysctl_tcp_rmem,
559		.maxlen		= sizeof(sysctl_tcp_rmem),
560		.mode		= 0644,
561		.proc_handler	= proc_dointvec_minmax,
562		.extra1		= &one,
563	},
564	{
565		.procname	= "tcp_app_win",
566		.data		= &sysctl_tcp_app_win,
567		.maxlen		= sizeof(int),
568		.mode		= 0644,
569		.proc_handler	= proc_dointvec
570	},
571	{
572		.procname	= "tcp_adv_win_scale",
573		.data		= &sysctl_tcp_adv_win_scale,
574		.maxlen		= sizeof(int),
575		.mode		= 0644,
576		.proc_handler	= proc_dointvec_minmax,
577		.extra1		= &tcp_adv_win_scale_min,
578		.extra2		= &tcp_adv_win_scale_max,
579	},
580	{
581		.procname	= "tcp_tw_reuse",
582		.data		= &sysctl_tcp_tw_reuse,
583		.maxlen		= sizeof(int),
584		.mode		= 0644,
585		.proc_handler	= proc_dointvec
586	},
587	{
588		.procname	= "tcp_frto",
589		.data		= &sysctl_tcp_frto,
590		.maxlen		= sizeof(int),
591		.mode		= 0644,
592		.proc_handler	= proc_dointvec
593	},
594	{
595		.procname	= "tcp_frto_response",
596		.data		= &sysctl_tcp_frto_response,
597		.maxlen		= sizeof(int),
598		.mode		= 0644,
599		.proc_handler	= proc_dointvec
600	},
601	{
602		.procname	= "tcp_low_latency",
603		.data		= &sysctl_tcp_low_latency,
604		.maxlen		= sizeof(int),
605		.mode		= 0644,
606		.proc_handler	= proc_dointvec
607	},
608	{
609		.procname	= "tcp_no_metrics_save",
610		.data		= &sysctl_tcp_nometrics_save,
611		.maxlen		= sizeof(int),
612		.mode		= 0644,
613		.proc_handler	= proc_dointvec,
614	},
615	{
616		.procname	= "tcp_moderate_rcvbuf",
617		.data		= &sysctl_tcp_moderate_rcvbuf,
618		.maxlen		= sizeof(int),
619		.mode		= 0644,
620		.proc_handler	= proc_dointvec,
621	},
622	{
623		.procname	= "tcp_tso_win_divisor",
624		.data		= &sysctl_tcp_tso_win_divisor,
625		.maxlen		= sizeof(int),
626		.mode		= 0644,
627		.proc_handler	= proc_dointvec,
628	},
629	{
630		.procname	= "tcp_congestion_control",
631		.mode		= 0644,
632		.maxlen		= TCP_CA_NAME_MAX,
633		.proc_handler	= proc_tcp_congestion_control,
634	},
635	{
636		.procname	= "tcp_mtu_probing",
637		.data		= &sysctl_tcp_mtu_probing,
638		.maxlen		= sizeof(int),
639		.mode		= 0644,
640		.proc_handler	= proc_dointvec,
641	},
642	{
643		.procname	= "tcp_base_mss",
644		.data		= &sysctl_tcp_base_mss,
645		.maxlen		= sizeof(int),
646		.mode		= 0644,
647		.proc_handler	= proc_dointvec,
648	},
649	{
650		.procname	= "tcp_workaround_signed_windows",
651		.data		= &sysctl_tcp_workaround_signed_windows,
652		.maxlen		= sizeof(int),
653		.mode		= 0644,
654		.proc_handler	= proc_dointvec
655	},
656	{
657		.procname	= "tcp_limit_output_bytes",
658		.data		= &sysctl_tcp_limit_output_bytes,
659		.maxlen		= sizeof(int),
660		.mode		= 0644,
661		.proc_handler	= proc_dointvec
662	},
663	{
664		.procname	= "tcp_challenge_ack_limit",
665		.data		= &sysctl_tcp_challenge_ack_limit,
666		.maxlen		= sizeof(int),
667		.mode		= 0644,
668		.proc_handler	= proc_dointvec
669	},
670#ifdef CONFIG_NET_DMA
671	{
672		.procname	= "tcp_dma_copybreak",
673		.data		= &sysctl_tcp_dma_copybreak,
674		.maxlen		= sizeof(int),
675		.mode		= 0644,
676		.proc_handler	= proc_dointvec
677	},
678#endif
679	{
680		.procname	= "tcp_slow_start_after_idle",
681		.data		= &sysctl_tcp_slow_start_after_idle,
682		.maxlen		= sizeof(int),
683		.mode		= 0644,
684		.proc_handler	= proc_dointvec
685	},
686#ifdef CONFIG_NETLABEL
687	{
688		.procname	= "cipso_cache_enable",
689		.data		= &cipso_v4_cache_enabled,
690		.maxlen		= sizeof(int),
691		.mode		= 0644,
692		.proc_handler	= proc_dointvec,
693	},
694	{
695		.procname	= "cipso_cache_bucket_size",
696		.data		= &cipso_v4_cache_bucketsize,
697		.maxlen		= sizeof(int),
698		.mode		= 0644,
699		.proc_handler	= proc_dointvec,
700	},
701	{
702		.procname	= "cipso_rbm_optfmt",
703		.data		= &cipso_v4_rbm_optfmt,
704		.maxlen		= sizeof(int),
705		.mode		= 0644,
706		.proc_handler	= proc_dointvec,
707	},
708	{
709		.procname	= "cipso_rbm_strictvalid",
710		.data		= &cipso_v4_rbm_strictvalid,
711		.maxlen		= sizeof(int),
712		.mode		= 0644,
713		.proc_handler	= proc_dointvec,
714	},
715#endif /* CONFIG_NETLABEL */
716	{
717		.procname	= "tcp_available_congestion_control",
718		.maxlen		= TCP_CA_BUF_MAX,
719		.mode		= 0444,
720		.proc_handler   = proc_tcp_available_congestion_control,
721	},
722	{
723		.procname	= "tcp_allowed_congestion_control",
724		.maxlen		= TCP_CA_BUF_MAX,
725		.mode		= 0644,
726		.proc_handler   = proc_allowed_congestion_control,
727	},
728	{
729		.procname	= "tcp_max_ssthresh",
730		.data		= &sysctl_tcp_max_ssthresh,
731		.maxlen		= sizeof(int),
732		.mode		= 0644,
733		.proc_handler	= proc_dointvec,
734	},
735	{
736		.procname	= "tcp_cookie_size",
737		.data		= &sysctl_tcp_cookie_size,
738		.maxlen		= sizeof(int),
739		.mode		= 0644,
740		.proc_handler	= proc_dointvec
741	},
742	{
743		.procname       = "tcp_thin_linear_timeouts",
744		.data           = &sysctl_tcp_thin_linear_timeouts,
745		.maxlen         = sizeof(int),
746		.mode           = 0644,
747		.proc_handler   = proc_dointvec
748	},
749        {
750		.procname       = "tcp_thin_dupack",
751		.data           = &sysctl_tcp_thin_dupack,
752		.maxlen         = sizeof(int),
753		.mode           = 0644,
754		.proc_handler   = proc_dointvec
755	},
756	{
757		.procname	= "tcp_early_retrans",
758		.data		= &sysctl_tcp_early_retrans,
759		.maxlen		= sizeof(int),
760		.mode		= 0644,
761		.proc_handler	= proc_dointvec_minmax,
762		.extra1		= &zero,
763		.extra2		= &four,
764	},
765	{
766		.procname	= "udp_mem",
767		.data		= &sysctl_udp_mem,
768		.maxlen		= sizeof(sysctl_udp_mem),
769		.mode		= 0644,
770		.proc_handler	= proc_doulongvec_minmax,
771	},
772	{
773		.procname	= "udp_rmem_min",
774		.data		= &sysctl_udp_rmem_min,
775		.maxlen		= sizeof(sysctl_udp_rmem_min),
776		.mode		= 0644,
777		.proc_handler	= proc_dointvec_minmax,
778		.extra1		= &one
779	},
780	{
781		.procname	= "udp_wmem_min",
782		.data		= &sysctl_udp_wmem_min,
783		.maxlen		= sizeof(sysctl_udp_wmem_min),
784		.mode		= 0644,
785		.proc_handler	= proc_dointvec_minmax,
786		.extra1		= &one
787	},
788	{ }
789};
790
791static struct ctl_table ipv4_net_table[] = {
792	{
793		.procname	= "icmp_echo_ignore_all",
794		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_all,
795		.maxlen		= sizeof(int),
796		.mode		= 0644,
797		.proc_handler	= proc_dointvec
798	},
799	{
800		.procname	= "icmp_echo_ignore_broadcasts",
801		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
802		.maxlen		= sizeof(int),
803		.mode		= 0644,
804		.proc_handler	= proc_dointvec
805	},
806	{
807		.procname	= "icmp_ignore_bogus_error_responses",
808		.data		= &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
809		.maxlen		= sizeof(int),
810		.mode		= 0644,
811		.proc_handler	= proc_dointvec
812	},
813	{
814		.procname	= "icmp_errors_use_inbound_ifaddr",
815		.data		= &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
816		.maxlen		= sizeof(int),
817		.mode		= 0644,
818		.proc_handler	= proc_dointvec
819	},
820	{
821		.procname	= "icmp_ratelimit",
822		.data		= &init_net.ipv4.sysctl_icmp_ratelimit,
823		.maxlen		= sizeof(int),
824		.mode		= 0644,
825		.proc_handler	= proc_dointvec_ms_jiffies,
826	},
827	{
828		.procname	= "icmp_ratemask",
829		.data		= &init_net.ipv4.sysctl_icmp_ratemask,
830		.maxlen		= sizeof(int),
831		.mode		= 0644,
832		.proc_handler	= proc_dointvec
833	},
834	{
835		.procname	= "ping_group_range",
836		.data		= &init_net.ipv4.sysctl_ping_group_range,
837		.maxlen		= sizeof(gid_t)*2,
838		.mode		= 0644,
839		.proc_handler	= ipv4_ping_group_range,
840	},
841	{
842		.procname	= "tcp_ecn",
843		.data		= &init_net.ipv4.sysctl_tcp_ecn,
844		.maxlen		= sizeof(int),
845		.mode		= 0644,
846		.proc_handler	= proc_dointvec
847	},
848	{
849		.procname	= "tcp_mem",
850		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_mem),
851		.mode		= 0644,
852		.proc_handler	= ipv4_tcp_mem,
853	},
854	{ }
855};
856
857static __net_init int ipv4_sysctl_init_net(struct net *net)
858{
859	struct ctl_table *table;
860
861	table = ipv4_net_table;
862	if (!net_eq(net, &init_net)) {
863		table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
864		if (table == NULL)
865			goto err_alloc;
866
867		table[0].data =
868			&net->ipv4.sysctl_icmp_echo_ignore_all;
869		table[1].data =
870			&net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
871		table[2].data =
872			&net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
873		table[3].data =
874			&net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
875		table[4].data =
876			&net->ipv4.sysctl_icmp_ratelimit;
877		table[5].data =
878			&net->ipv4.sysctl_icmp_ratemask;
879		table[6].data =
880			&net->ipv4.sysctl_ping_group_range;
881		table[7].data =
882			&net->ipv4.sysctl_tcp_ecn;
883
884		/* Don't export sysctls to unprivileged users */
885		if (net->user_ns != &init_user_ns)
886			table[0].procname = NULL;
887	}
888
889	/*
890	 * Sane defaults - nobody may create ping sockets.
891	 * Boot scripts should set this to distro-specific group.
892	 */
893	net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
894	net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
895
896	tcp_init_mem(net);
897
898	net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
899	if (net->ipv4.ipv4_hdr == NULL)
900		goto err_reg;
901
902	return 0;
903
904err_reg:
905	if (!net_eq(net, &init_net))
906		kfree(table);
907err_alloc:
908	return -ENOMEM;
909}
910
911static __net_exit void ipv4_sysctl_exit_net(struct net *net)
912{
913	struct ctl_table *table;
914
915	table = net->ipv4.ipv4_hdr->ctl_table_arg;
916	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
917	kfree(table);
918}
919
920static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
921	.init = ipv4_sysctl_init_net,
922	.exit = ipv4_sysctl_exit_net,
923};
924
925static __init int sysctl_ipv4_init(void)
926{
927	struct ctl_table_header *hdr;
928	struct ctl_table *i;
929
930	for (i = ipv4_table; i->procname; i++) {
931		if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
932			i->data = sysctl_local_reserved_ports;
933			break;
934		}
935	}
936	if (!i->procname)
937		return -EINVAL;
938
939	hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
940	if (hdr == NULL)
941		return -ENOMEM;
942
943	if (register_pernet_subsys(&ipv4_sysctl_ops)) {
944		unregister_net_sysctl_table(hdr);
945		return -ENOMEM;
946	}
947
948	return 0;
949}
950
951__initcall(sysctl_ipv4_init);
952