sock.c revision 9958089a43ae8a9af07402461c0b2b7548c7341e
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		Generic socket support routines. Memory allocators, socket lock/release
7 *		handler for protocols to use and generic option handler.
8 *
9 *
10 * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
12 * Authors:	Ross Biro
13 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *		Florian La Roche, <flla@stud.uni-sb.de>
15 *		Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 *		Alan Cox	: 	Numerous verify_area() problems
19 *		Alan Cox	:	Connecting on a connecting socket
20 *					now returns an error for tcp.
21 *		Alan Cox	:	sock->protocol is set correctly.
22 *					and is not sometimes left as 0.
23 *		Alan Cox	:	connect handles icmp errors on a
24 *					connect properly. Unfortunately there
25 *					is a restart syscall nasty there. I
26 *					can't match BSD without hacking the C
27 *					library. Ideas urgently sought!
28 *		Alan Cox	:	Disallow bind() to addresses that are
29 *					not ours - especially broadcast ones!!
30 *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31 *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32 *					instead they leave that for the DESTROY timer.
33 *		Alan Cox	:	Clean up error flag in accept
34 *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35 *					was buggy. Put a remove_sock() in the handler
36 *					for memory when we hit 0. Also altered the timer
37 *					code. The ACK stuff can wait and needs major
38 *					TCP layer surgery.
39 *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40 *					and fixed timer/inet_bh race.
41 *		Alan Cox	:	Added zapped flag for TCP
42 *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43 *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45 *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48 *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49 *	Pauline Middelink	:	identd support
50 *		Alan Cox	:	Fixed connect() taking signals I think.
51 *		Alan Cox	:	SO_LINGER supported
52 *		Alan Cox	:	Error reporting fixes
53 *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54 *		Alan Cox	:	inet sockets don't set sk->type!
55 *		Alan Cox	:	Split socket option code
56 *		Alan Cox	:	Callbacks
57 *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58 *		Alex		:	Removed restriction on inet fioctl
59 *		Alan Cox	:	Splitting INET from NET core
60 *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61 *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62 *		Alan Cox	:	Split IP from generic code
63 *		Alan Cox	:	New kfree_skbmem()
64 *		Alan Cox	:	Make SO_DEBUG superuser only.
65 *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66 *					(compatibility fix)
67 *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68 *		Alan Cox	:	Allocator for a socket is settable.
69 *		Alan Cox	:	SO_ERROR includes soft errors.
70 *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71 *		Alan Cox	: 	Generic socket allocation to make hooks
72 *					easier (suggested by Craig Metz).
73 *		Michael Pall	:	SO_ERROR returns positive errno again
74 *              Steve Whitehouse:       Added default destructor to free
75 *                                      protocol private data.
76 *              Steve Whitehouse:       Added various other default routines
77 *                                      common to several socket families.
78 *              Chris Evans     :       Call suser() check last on F_SETOWN
79 *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81 *		Andi Kleen	:	Fix write_space callback
82 *		Chris Evans	:	Security fixes - signedness again
83 *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 *		This program is free software; you can redistribute it and/or
89 *		modify it under the terms of the GNU General Public License
90 *		as published by the Free Software Foundation; either version
91 *		2 of the License, or (at your option) any later version.
92 */
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114#include <linux/highmem.h>
115
116#include <asm/uaccess.h>
117#include <asm/system.h>
118
119#include <linux/netdevice.h>
120#include <net/protocol.h>
121#include <linux/skbuff.h>
122#include <net/request_sock.h>
123#include <net/sock.h>
124#include <net/xfrm.h>
125#include <linux/ipsec.h>
126
127#include <linux/filter.h>
128
129#ifdef CONFIG_INET
130#include <net/tcp.h>
131#endif
132
133/*
134 * Each address family might have different locking rules, so we have
135 * one slock key per address family:
136 */
137static struct lock_class_key af_family_keys[AF_MAX];
138static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140#ifdef CONFIG_DEBUG_LOCK_ALLOC
141/*
142 * Make lock validator output more readable. (we pre-construct these
143 * strings build-time, so that runtime initialization of socket
144 * locks is fast):
145 */
146static const char *af_family_key_strings[AF_MAX+1] = {
147  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154  "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
158};
159static const char *af_family_slock_key_strings[AF_MAX+1] = {
160  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
161  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
162  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
163  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
164  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
165  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
166  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
167  "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
168  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
169  "slock-27"       , "slock-28"          , "slock-29"          ,
170  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_MAX"
171};
172#endif
173
174/*
175 * sk_callback_lock locking rules are per-address-family,
176 * so split the lock classes by using a per-AF key:
177 */
178static struct lock_class_key af_callback_keys[AF_MAX];
179
180/* Take into consideration the size of the struct sk_buff overhead in the
181 * determination of these values, since that is non-constant across
182 * platforms.  This makes socket queueing behavior and performance
183 * not depend upon such differences.
184 */
185#define _SK_MEM_PACKETS		256
186#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
187#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
188#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
189
190/* Run time adjustable parameters. */
191__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
192__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
193__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
194__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
195
196/* Maximal space eaten by iovec or ancilliary data plus some space */
197int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
198
199static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
200{
201	struct timeval tv;
202
203	if (optlen < sizeof(tv))
204		return -EINVAL;
205	if (copy_from_user(&tv, optval, sizeof(tv)))
206		return -EFAULT;
207
208	*timeo_p = MAX_SCHEDULE_TIMEOUT;
209	if (tv.tv_sec == 0 && tv.tv_usec == 0)
210		return 0;
211	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
212		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
213	return 0;
214}
215
216static void sock_warn_obsolete_bsdism(const char *name)
217{
218	static int warned;
219	static char warncomm[TASK_COMM_LEN];
220	if (strcmp(warncomm, current->comm) && warned < 5) {
221		strcpy(warncomm,  current->comm);
222		printk(KERN_WARNING "process `%s' is using obsolete "
223		       "%s SO_BSDCOMPAT\n", warncomm, name);
224		warned++;
225	}
226}
227
228static void sock_disable_timestamp(struct sock *sk)
229{
230	if (sock_flag(sk, SOCK_TIMESTAMP)) {
231		sock_reset_flag(sk, SOCK_TIMESTAMP);
232		net_disable_timestamp();
233	}
234}
235
236
237int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
238{
239	int err = 0;
240	int skb_len;
241
242	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
243	   number of warnings when compiling with -W --ANK
244	 */
245	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
246	    (unsigned)sk->sk_rcvbuf) {
247		err = -ENOMEM;
248		goto out;
249	}
250
251	err = sk_filter(sk, skb);
252	if (err)
253		goto out;
254
255	skb->dev = NULL;
256	skb_set_owner_r(skb, sk);
257
258	/* Cache the SKB length before we tack it onto the receive
259	 * queue.  Once it is added it no longer belongs to us and
260	 * may be freed by other threads of control pulling packets
261	 * from the queue.
262	 */
263	skb_len = skb->len;
264
265	skb_queue_tail(&sk->sk_receive_queue, skb);
266
267	if (!sock_flag(sk, SOCK_DEAD))
268		sk->sk_data_ready(sk, skb_len);
269out:
270	return err;
271}
272EXPORT_SYMBOL(sock_queue_rcv_skb);
273
274int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
275{
276	int rc = NET_RX_SUCCESS;
277
278	if (sk_filter(sk, skb))
279		goto discard_and_relse;
280
281	skb->dev = NULL;
282
283	if (nested)
284		bh_lock_sock_nested(sk);
285	else
286		bh_lock_sock(sk);
287	if (!sock_owned_by_user(sk)) {
288		/*
289		 * trylock + unlock semantics:
290		 */
291		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
292
293		rc = sk->sk_backlog_rcv(sk, skb);
294
295		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
296	} else
297		sk_add_backlog(sk, skb);
298	bh_unlock_sock(sk);
299out:
300	sock_put(sk);
301	return rc;
302discard_and_relse:
303	kfree_skb(skb);
304	goto out;
305}
306EXPORT_SYMBOL(sk_receive_skb);
307
308struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
309{
310	struct dst_entry *dst = sk->sk_dst_cache;
311
312	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
313		sk->sk_dst_cache = NULL;
314		dst_release(dst);
315		return NULL;
316	}
317
318	return dst;
319}
320EXPORT_SYMBOL(__sk_dst_check);
321
322struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
323{
324	struct dst_entry *dst = sk_dst_get(sk);
325
326	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
327		sk_dst_reset(sk);
328		dst_release(dst);
329		return NULL;
330	}
331
332	return dst;
333}
334EXPORT_SYMBOL(sk_dst_check);
335
336/*
337 *	This is meant for all protocols to use and covers goings on
338 *	at the socket level. Everything here is generic.
339 */
340
341int sock_setsockopt(struct socket *sock, int level, int optname,
342		    char __user *optval, int optlen)
343{
344	struct sock *sk=sock->sk;
345	struct sk_filter *filter;
346	int val;
347	int valbool;
348	struct linger ling;
349	int ret = 0;
350
351	/*
352	 *	Options without arguments
353	 */
354
355#ifdef SO_DONTLINGER		/* Compatibility item... */
356	if (optname == SO_DONTLINGER) {
357		lock_sock(sk);
358		sock_reset_flag(sk, SOCK_LINGER);
359		release_sock(sk);
360		return 0;
361	}
362#endif
363
364	if (optlen < sizeof(int))
365		return -EINVAL;
366
367	if (get_user(val, (int __user *)optval))
368		return -EFAULT;
369
370	valbool = val?1:0;
371
372	lock_sock(sk);
373
374	switch(optname) {
375	case SO_DEBUG:
376		if (val && !capable(CAP_NET_ADMIN)) {
377			ret = -EACCES;
378		}
379		else if (valbool)
380			sock_set_flag(sk, SOCK_DBG);
381		else
382			sock_reset_flag(sk, SOCK_DBG);
383		break;
384	case SO_REUSEADDR:
385		sk->sk_reuse = valbool;
386		break;
387	case SO_TYPE:
388	case SO_ERROR:
389		ret = -ENOPROTOOPT;
390		break;
391	case SO_DONTROUTE:
392		if (valbool)
393			sock_set_flag(sk, SOCK_LOCALROUTE);
394		else
395			sock_reset_flag(sk, SOCK_LOCALROUTE);
396		break;
397	case SO_BROADCAST:
398		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
399		break;
400	case SO_SNDBUF:
401		/* Don't error on this BSD doesn't and if you think
402		   about it this is right. Otherwise apps have to
403		   play 'guess the biggest size' games. RCVBUF/SNDBUF
404		   are treated in BSD as hints */
405
406		if (val > sysctl_wmem_max)
407			val = sysctl_wmem_max;
408set_sndbuf:
409		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
410		if ((val * 2) < SOCK_MIN_SNDBUF)
411			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
412		else
413			sk->sk_sndbuf = val * 2;
414
415		/*
416		 *	Wake up sending tasks if we
417		 *	upped the value.
418		 */
419		sk->sk_write_space(sk);
420		break;
421
422	case SO_SNDBUFFORCE:
423		if (!capable(CAP_NET_ADMIN)) {
424			ret = -EPERM;
425			break;
426		}
427		goto set_sndbuf;
428
429	case SO_RCVBUF:
430		/* Don't error on this BSD doesn't and if you think
431		   about it this is right. Otherwise apps have to
432		   play 'guess the biggest size' games. RCVBUF/SNDBUF
433		   are treated in BSD as hints */
434
435		if (val > sysctl_rmem_max)
436			val = sysctl_rmem_max;
437set_rcvbuf:
438		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
439		/*
440		 * We double it on the way in to account for
441		 * "struct sk_buff" etc. overhead.   Applications
442		 * assume that the SO_RCVBUF setting they make will
443		 * allow that much actual data to be received on that
444		 * socket.
445		 *
446		 * Applications are unaware that "struct sk_buff" and
447		 * other overheads allocate from the receive buffer
448		 * during socket buffer allocation.
449		 *
450		 * And after considering the possible alternatives,
451		 * returning the value we actually used in getsockopt
452		 * is the most desirable behavior.
453		 */
454		if ((val * 2) < SOCK_MIN_RCVBUF)
455			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
456		else
457			sk->sk_rcvbuf = val * 2;
458		break;
459
460	case SO_RCVBUFFORCE:
461		if (!capable(CAP_NET_ADMIN)) {
462			ret = -EPERM;
463			break;
464		}
465		goto set_rcvbuf;
466
467	case SO_KEEPALIVE:
468#ifdef CONFIG_INET
469		if (sk->sk_protocol == IPPROTO_TCP)
470			tcp_set_keepalive(sk, valbool);
471#endif
472		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
473		break;
474
475	case SO_OOBINLINE:
476		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
477		break;
478
479	case SO_NO_CHECK:
480		sk->sk_no_check = valbool;
481		break;
482
483	case SO_PRIORITY:
484		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
485			sk->sk_priority = val;
486		else
487			ret = -EPERM;
488		break;
489
490	case SO_LINGER:
491		if (optlen < sizeof(ling)) {
492			ret = -EINVAL;	/* 1003.1g */
493			break;
494		}
495		if (copy_from_user(&ling,optval,sizeof(ling))) {
496			ret = -EFAULT;
497			break;
498		}
499		if (!ling.l_onoff)
500			sock_reset_flag(sk, SOCK_LINGER);
501		else {
502#if (BITS_PER_LONG == 32)
503			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
504				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
505			else
506#endif
507				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
508			sock_set_flag(sk, SOCK_LINGER);
509		}
510		break;
511
512	case SO_BSDCOMPAT:
513		sock_warn_obsolete_bsdism("setsockopt");
514		break;
515
516	case SO_PASSCRED:
517		if (valbool)
518			set_bit(SOCK_PASSCRED, &sock->flags);
519		else
520			clear_bit(SOCK_PASSCRED, &sock->flags);
521		break;
522
523	case SO_TIMESTAMP:
524	case SO_TIMESTAMPNS:
525		if (valbool)  {
526			if (optname == SO_TIMESTAMP)
527				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
528			else
529				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
530			sock_set_flag(sk, SOCK_RCVTSTAMP);
531			sock_enable_timestamp(sk);
532		} else {
533			sock_reset_flag(sk, SOCK_RCVTSTAMP);
534			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
535		}
536		break;
537
538	case SO_RCVLOWAT:
539		if (val < 0)
540			val = INT_MAX;
541		sk->sk_rcvlowat = val ? : 1;
542		break;
543
544	case SO_RCVTIMEO:
545		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
546		break;
547
548	case SO_SNDTIMEO:
549		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
550		break;
551
552#ifdef CONFIG_NETDEVICES
553	case SO_BINDTODEVICE:
554	{
555		char devname[IFNAMSIZ];
556
557		/* Sorry... */
558		if (!capable(CAP_NET_RAW)) {
559			ret = -EPERM;
560			break;
561		}
562
563		/* Bind this socket to a particular device like "eth0",
564		 * as specified in the passed interface name. If the
565		 * name is "" or the option length is zero the socket
566		 * is not bound.
567		 */
568
569		if (!valbool) {
570			sk->sk_bound_dev_if = 0;
571		} else {
572			if (optlen > IFNAMSIZ - 1)
573				optlen = IFNAMSIZ - 1;
574			memset(devname, 0, sizeof(devname));
575			if (copy_from_user(devname, optval, optlen)) {
576				ret = -EFAULT;
577				break;
578			}
579
580			/* Remove any cached route for this socket. */
581			sk_dst_reset(sk);
582
583			if (devname[0] == '\0') {
584				sk->sk_bound_dev_if = 0;
585			} else {
586				struct net_device *dev = dev_get_by_name(devname);
587				if (!dev) {
588					ret = -ENODEV;
589					break;
590				}
591				sk->sk_bound_dev_if = dev->ifindex;
592				dev_put(dev);
593			}
594		}
595		break;
596	}
597#endif
598
599
600	case SO_ATTACH_FILTER:
601		ret = -EINVAL;
602		if (optlen == sizeof(struct sock_fprog)) {
603			struct sock_fprog fprog;
604
605			ret = -EFAULT;
606			if (copy_from_user(&fprog, optval, sizeof(fprog)))
607				break;
608
609			ret = sk_attach_filter(&fprog, sk);
610		}
611		break;
612
613	case SO_DETACH_FILTER:
614		rcu_read_lock_bh();
615		filter = rcu_dereference(sk->sk_filter);
616		if (filter) {
617			rcu_assign_pointer(sk->sk_filter, NULL);
618			sk_filter_release(sk, filter);
619			rcu_read_unlock_bh();
620			break;
621		}
622		rcu_read_unlock_bh();
623		ret = -ENONET;
624		break;
625
626	case SO_PASSSEC:
627		if (valbool)
628			set_bit(SOCK_PASSSEC, &sock->flags);
629		else
630			clear_bit(SOCK_PASSSEC, &sock->flags);
631		break;
632
633		/* We implement the SO_SNDLOWAT etc to
634		   not be settable (1003.1g 5.3) */
635	default:
636		ret = -ENOPROTOOPT;
637		break;
638	}
639	release_sock(sk);
640	return ret;
641}
642
643
644int sock_getsockopt(struct socket *sock, int level, int optname,
645		    char __user *optval, int __user *optlen)
646{
647	struct sock *sk = sock->sk;
648
649	union {
650		int val;
651		struct linger ling;
652		struct timeval tm;
653	} v;
654
655	unsigned int lv = sizeof(int);
656	int len;
657
658	if (get_user(len, optlen))
659		return -EFAULT;
660	if (len < 0)
661		return -EINVAL;
662
663	switch(optname) {
664	case SO_DEBUG:
665		v.val = sock_flag(sk, SOCK_DBG);
666		break;
667
668	case SO_DONTROUTE:
669		v.val = sock_flag(sk, SOCK_LOCALROUTE);
670		break;
671
672	case SO_BROADCAST:
673		v.val = !!sock_flag(sk, SOCK_BROADCAST);
674		break;
675
676	case SO_SNDBUF:
677		v.val = sk->sk_sndbuf;
678		break;
679
680	case SO_RCVBUF:
681		v.val = sk->sk_rcvbuf;
682		break;
683
684	case SO_REUSEADDR:
685		v.val = sk->sk_reuse;
686		break;
687
688	case SO_KEEPALIVE:
689		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
690		break;
691
692	case SO_TYPE:
693		v.val = sk->sk_type;
694		break;
695
696	case SO_ERROR:
697		v.val = -sock_error(sk);
698		if (v.val==0)
699			v.val = xchg(&sk->sk_err_soft, 0);
700		break;
701
702	case SO_OOBINLINE:
703		v.val = !!sock_flag(sk, SOCK_URGINLINE);
704		break;
705
706	case SO_NO_CHECK:
707		v.val = sk->sk_no_check;
708		break;
709
710	case SO_PRIORITY:
711		v.val = sk->sk_priority;
712		break;
713
714	case SO_LINGER:
715		lv		= sizeof(v.ling);
716		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
717		v.ling.l_linger	= sk->sk_lingertime / HZ;
718		break;
719
720	case SO_BSDCOMPAT:
721		sock_warn_obsolete_bsdism("getsockopt");
722		break;
723
724	case SO_TIMESTAMP:
725		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
726				!sock_flag(sk, SOCK_RCVTSTAMPNS);
727		break;
728
729	case SO_TIMESTAMPNS:
730		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
731		break;
732
733	case SO_RCVTIMEO:
734		lv=sizeof(struct timeval);
735		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
736			v.tm.tv_sec = 0;
737			v.tm.tv_usec = 0;
738		} else {
739			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
740			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
741		}
742		break;
743
744	case SO_SNDTIMEO:
745		lv=sizeof(struct timeval);
746		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
747			v.tm.tv_sec = 0;
748			v.tm.tv_usec = 0;
749		} else {
750			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
751			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
752		}
753		break;
754
755	case SO_RCVLOWAT:
756		v.val = sk->sk_rcvlowat;
757		break;
758
759	case SO_SNDLOWAT:
760		v.val=1;
761		break;
762
763	case SO_PASSCRED:
764		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
765		break;
766
767	case SO_PEERCRED:
768		if (len > sizeof(sk->sk_peercred))
769			len = sizeof(sk->sk_peercred);
770		if (copy_to_user(optval, &sk->sk_peercred, len))
771			return -EFAULT;
772		goto lenout;
773
774	case SO_PEERNAME:
775	{
776		char address[128];
777
778		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
779			return -ENOTCONN;
780		if (lv < len)
781			return -EINVAL;
782		if (copy_to_user(optval, address, len))
783			return -EFAULT;
784		goto lenout;
785	}
786
787	/* Dubious BSD thing... Probably nobody even uses it, but
788	 * the UNIX standard wants it for whatever reason... -DaveM
789	 */
790	case SO_ACCEPTCONN:
791		v.val = sk->sk_state == TCP_LISTEN;
792		break;
793
794	case SO_PASSSEC:
795		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
796		break;
797
798	case SO_PEERSEC:
799		return security_socket_getpeersec_stream(sock, optval, optlen, len);
800
801	default:
802		return -ENOPROTOOPT;
803	}
804
805	if (len > lv)
806		len = lv;
807	if (copy_to_user(optval, &v, len))
808		return -EFAULT;
809lenout:
810	if (put_user(len, optlen))
811		return -EFAULT;
812	return 0;
813}
814
815/*
816 * Initialize an sk_lock.
817 *
818 * (We also register the sk_lock with the lock validator.)
819 */
820static inline void sock_lock_init(struct sock *sk)
821{
822	sock_lock_init_class_and_name(sk,
823			af_family_slock_key_strings[sk->sk_family],
824			af_family_slock_keys + sk->sk_family,
825			af_family_key_strings[sk->sk_family],
826			af_family_keys + sk->sk_family);
827}
828
829/**
830 *	sk_alloc - All socket objects are allocated here
831 *	@family: protocol family
832 *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
833 *	@prot: struct proto associated with this new sock instance
834 *	@zero_it: if we should zero the newly allocated sock
835 */
836struct sock *sk_alloc(int family, gfp_t priority,
837		      struct proto *prot, int zero_it)
838{
839	struct sock *sk = NULL;
840	struct kmem_cache *slab = prot->slab;
841
842	if (slab != NULL)
843		sk = kmem_cache_alloc(slab, priority);
844	else
845		sk = kmalloc(prot->obj_size, priority);
846
847	if (sk) {
848		if (zero_it) {
849			memset(sk, 0, prot->obj_size);
850			sk->sk_family = family;
851			/*
852			 * See comment in struct sock definition to understand
853			 * why we need sk_prot_creator -acme
854			 */
855			sk->sk_prot = sk->sk_prot_creator = prot;
856			sock_lock_init(sk);
857		}
858
859		if (security_sk_alloc(sk, family, priority))
860			goto out_free;
861
862		if (!try_module_get(prot->owner))
863			goto out_free;
864	}
865	return sk;
866
867out_free:
868	if (slab != NULL)
869		kmem_cache_free(slab, sk);
870	else
871		kfree(sk);
872	return NULL;
873}
874
875void sk_free(struct sock *sk)
876{
877	struct sk_filter *filter;
878	struct module *owner = sk->sk_prot_creator->owner;
879
880	if (sk->sk_destruct)
881		sk->sk_destruct(sk);
882
883	filter = rcu_dereference(sk->sk_filter);
884	if (filter) {
885		sk_filter_release(sk, filter);
886		rcu_assign_pointer(sk->sk_filter, NULL);
887	}
888
889	sock_disable_timestamp(sk);
890
891	if (atomic_read(&sk->sk_omem_alloc))
892		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
893		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
894
895	security_sk_free(sk);
896	if (sk->sk_prot_creator->slab != NULL)
897		kmem_cache_free(sk->sk_prot_creator->slab, sk);
898	else
899		kfree(sk);
900	module_put(owner);
901}
902
903struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
904{
905	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
906
907	if (newsk != NULL) {
908		struct sk_filter *filter;
909
910		sock_copy(newsk, sk);
911
912		/* SANITY */
913		sk_node_init(&newsk->sk_node);
914		sock_lock_init(newsk);
915		bh_lock_sock(newsk);
916		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
917
918		atomic_set(&newsk->sk_rmem_alloc, 0);
919		atomic_set(&newsk->sk_wmem_alloc, 0);
920		atomic_set(&newsk->sk_omem_alloc, 0);
921		skb_queue_head_init(&newsk->sk_receive_queue);
922		skb_queue_head_init(&newsk->sk_write_queue);
923#ifdef CONFIG_NET_DMA
924		skb_queue_head_init(&newsk->sk_async_wait_queue);
925#endif
926
927		rwlock_init(&newsk->sk_dst_lock);
928		rwlock_init(&newsk->sk_callback_lock);
929		lockdep_set_class(&newsk->sk_callback_lock,
930				   af_callback_keys + newsk->sk_family);
931
932		newsk->sk_dst_cache	= NULL;
933		newsk->sk_wmem_queued	= 0;
934		newsk->sk_forward_alloc = 0;
935		newsk->sk_send_head	= NULL;
936		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
937
938		sock_reset_flag(newsk, SOCK_DONE);
939		skb_queue_head_init(&newsk->sk_error_queue);
940
941		filter = newsk->sk_filter;
942		if (filter != NULL)
943			sk_filter_charge(newsk, filter);
944
945		if (unlikely(xfrm_sk_clone_policy(newsk))) {
946			/* It is still raw copy of parent, so invalidate
947			 * destructor and make plain sk_free() */
948			newsk->sk_destruct = NULL;
949			sk_free(newsk);
950			newsk = NULL;
951			goto out;
952		}
953
954		newsk->sk_err	   = 0;
955		newsk->sk_priority = 0;
956		atomic_set(&newsk->sk_refcnt, 2);
957
958		/*
959		 * Increment the counter in the same struct proto as the master
960		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
961		 * is the same as sk->sk_prot->socks, as this field was copied
962		 * with memcpy).
963		 *
964		 * This _changes_ the previous behaviour, where
965		 * tcp_create_openreq_child always was incrementing the
966		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
967		 * to be taken into account in all callers. -acme
968		 */
969		sk_refcnt_debug_inc(newsk);
970		newsk->sk_socket = NULL;
971		newsk->sk_sleep	 = NULL;
972
973		if (newsk->sk_prot->sockets_allocated)
974			atomic_inc(newsk->sk_prot->sockets_allocated);
975	}
976out:
977	return newsk;
978}
979
980EXPORT_SYMBOL_GPL(sk_clone);
981
982void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
983{
984	__sk_dst_set(sk, dst);
985	sk->sk_route_caps = dst->dev->features;
986	if (sk->sk_route_caps & NETIF_F_GSO)
987		sk->sk_route_caps |= NETIF_F_GSO_MASK;
988	if (sk_can_gso(sk)) {
989		if (dst->header_len)
990			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
991		else
992			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
993	}
994}
995EXPORT_SYMBOL_GPL(sk_setup_caps);
996
997void __init sk_init(void)
998{
999	if (num_physpages <= 4096) {
1000		sysctl_wmem_max = 32767;
1001		sysctl_rmem_max = 32767;
1002		sysctl_wmem_default = 32767;
1003		sysctl_rmem_default = 32767;
1004	} else if (num_physpages >= 131072) {
1005		sysctl_wmem_max = 131071;
1006		sysctl_rmem_max = 131071;
1007	}
1008}
1009
1010/*
1011 *	Simple resource managers for sockets.
1012 */
1013
1014
1015/*
1016 * Write buffer destructor automatically called from kfree_skb.
1017 */
1018void sock_wfree(struct sk_buff *skb)
1019{
1020	struct sock *sk = skb->sk;
1021
1022	/* In case it might be waiting for more memory. */
1023	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1024	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1025		sk->sk_write_space(sk);
1026	sock_put(sk);
1027}
1028
1029/*
1030 * Read buffer destructor automatically called from kfree_skb.
1031 */
1032void sock_rfree(struct sk_buff *skb)
1033{
1034	struct sock *sk = skb->sk;
1035
1036	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1037}
1038
1039
1040int sock_i_uid(struct sock *sk)
1041{
1042	int uid;
1043
1044	read_lock(&sk->sk_callback_lock);
1045	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1046	read_unlock(&sk->sk_callback_lock);
1047	return uid;
1048}
1049
1050unsigned long sock_i_ino(struct sock *sk)
1051{
1052	unsigned long ino;
1053
1054	read_lock(&sk->sk_callback_lock);
1055	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1056	read_unlock(&sk->sk_callback_lock);
1057	return ino;
1058}
1059
1060/*
1061 * Allocate a skb from the socket's send buffer.
1062 */
1063struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1064			     gfp_t priority)
1065{
1066	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1067		struct sk_buff * skb = alloc_skb(size, priority);
1068		if (skb) {
1069			skb_set_owner_w(skb, sk);
1070			return skb;
1071		}
1072	}
1073	return NULL;
1074}
1075
1076/*
1077 * Allocate a skb from the socket's receive buffer.
1078 */
1079struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1080			     gfp_t priority)
1081{
1082	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1083		struct sk_buff *skb = alloc_skb(size, priority);
1084		if (skb) {
1085			skb_set_owner_r(skb, sk);
1086			return skb;
1087		}
1088	}
1089	return NULL;
1090}
1091
1092/*
1093 * Allocate a memory block from the socket's option memory buffer.
1094 */
1095void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1096{
1097	if ((unsigned)size <= sysctl_optmem_max &&
1098	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1099		void *mem;
1100		/* First do the add, to avoid the race if kmalloc
1101		 * might sleep.
1102		 */
1103		atomic_add(size, &sk->sk_omem_alloc);
1104		mem = kmalloc(size, priority);
1105		if (mem)
1106			return mem;
1107		atomic_sub(size, &sk->sk_omem_alloc);
1108	}
1109	return NULL;
1110}
1111
1112/*
1113 * Free an option memory block.
1114 */
1115void sock_kfree_s(struct sock *sk, void *mem, int size)
1116{
1117	kfree(mem);
1118	atomic_sub(size, &sk->sk_omem_alloc);
1119}
1120
1121/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1122   I think, these locks should be removed for datagram sockets.
1123 */
1124static long sock_wait_for_wmem(struct sock * sk, long timeo)
1125{
1126	DEFINE_WAIT(wait);
1127
1128	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1129	for (;;) {
1130		if (!timeo)
1131			break;
1132		if (signal_pending(current))
1133			break;
1134		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1135		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1136		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1137			break;
1138		if (sk->sk_shutdown & SEND_SHUTDOWN)
1139			break;
1140		if (sk->sk_err)
1141			break;
1142		timeo = schedule_timeout(timeo);
1143	}
1144	finish_wait(sk->sk_sleep, &wait);
1145	return timeo;
1146}
1147
1148
1149/*
1150 *	Generic send/receive buffer handlers
1151 */
1152
1153static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1154					    unsigned long header_len,
1155					    unsigned long data_len,
1156					    int noblock, int *errcode)
1157{
1158	struct sk_buff *skb;
1159	gfp_t gfp_mask;
1160	long timeo;
1161	int err;
1162
1163	gfp_mask = sk->sk_allocation;
1164	if (gfp_mask & __GFP_WAIT)
1165		gfp_mask |= __GFP_REPEAT;
1166
1167	timeo = sock_sndtimeo(sk, noblock);
1168	while (1) {
1169		err = sock_error(sk);
1170		if (err != 0)
1171			goto failure;
1172
1173		err = -EPIPE;
1174		if (sk->sk_shutdown & SEND_SHUTDOWN)
1175			goto failure;
1176
1177		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1178			skb = alloc_skb(header_len, gfp_mask);
1179			if (skb) {
1180				int npages;
1181				int i;
1182
1183				/* No pages, we're done... */
1184				if (!data_len)
1185					break;
1186
1187				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1188				skb->truesize += data_len;
1189				skb_shinfo(skb)->nr_frags = npages;
1190				for (i = 0; i < npages; i++) {
1191					struct page *page;
1192					skb_frag_t *frag;
1193
1194					page = alloc_pages(sk->sk_allocation, 0);
1195					if (!page) {
1196						err = -ENOBUFS;
1197						skb_shinfo(skb)->nr_frags = i;
1198						kfree_skb(skb);
1199						goto failure;
1200					}
1201
1202					frag = &skb_shinfo(skb)->frags[i];
1203					frag->page = page;
1204					frag->page_offset = 0;
1205					frag->size = (data_len >= PAGE_SIZE ?
1206						      PAGE_SIZE :
1207						      data_len);
1208					data_len -= PAGE_SIZE;
1209				}
1210
1211				/* Full success... */
1212				break;
1213			}
1214			err = -ENOBUFS;
1215			goto failure;
1216		}
1217		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1218		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1219		err = -EAGAIN;
1220		if (!timeo)
1221			goto failure;
1222		if (signal_pending(current))
1223			goto interrupted;
1224		timeo = sock_wait_for_wmem(sk, timeo);
1225	}
1226
1227	skb_set_owner_w(skb, sk);
1228	return skb;
1229
1230interrupted:
1231	err = sock_intr_errno(timeo);
1232failure:
1233	*errcode = err;
1234	return NULL;
1235}
1236
1237struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1238				    int noblock, int *errcode)
1239{
1240	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1241}
1242
1243static void __lock_sock(struct sock *sk)
1244{
1245	DEFINE_WAIT(wait);
1246
1247	for (;;) {
1248		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1249					TASK_UNINTERRUPTIBLE);
1250		spin_unlock_bh(&sk->sk_lock.slock);
1251		schedule();
1252		spin_lock_bh(&sk->sk_lock.slock);
1253		if (!sock_owned_by_user(sk))
1254			break;
1255	}
1256	finish_wait(&sk->sk_lock.wq, &wait);
1257}
1258
1259static void __release_sock(struct sock *sk)
1260{
1261	struct sk_buff *skb = sk->sk_backlog.head;
1262
1263	do {
1264		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1265		bh_unlock_sock(sk);
1266
1267		do {
1268			struct sk_buff *next = skb->next;
1269
1270			skb->next = NULL;
1271			sk->sk_backlog_rcv(sk, skb);
1272
1273			/*
1274			 * We are in process context here with softirqs
1275			 * disabled, use cond_resched_softirq() to preempt.
1276			 * This is safe to do because we've taken the backlog
1277			 * queue private:
1278			 */
1279			cond_resched_softirq();
1280
1281			skb = next;
1282		} while (skb != NULL);
1283
1284		bh_lock_sock(sk);
1285	} while ((skb = sk->sk_backlog.head) != NULL);
1286}
1287
1288/**
1289 * sk_wait_data - wait for data to arrive at sk_receive_queue
1290 * @sk:    sock to wait on
1291 * @timeo: for how long
1292 *
1293 * Now socket state including sk->sk_err is changed only under lock,
1294 * hence we may omit checks after joining wait queue.
1295 * We check receive queue before schedule() only as optimization;
1296 * it is very likely that release_sock() added new data.
1297 */
1298int sk_wait_data(struct sock *sk, long *timeo)
1299{
1300	int rc;
1301	DEFINE_WAIT(wait);
1302
1303	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1304	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1305	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1306	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1307	finish_wait(sk->sk_sleep, &wait);
1308	return rc;
1309}
1310
1311EXPORT_SYMBOL(sk_wait_data);
1312
1313/*
1314 * Set of default routines for initialising struct proto_ops when
1315 * the protocol does not support a particular function. In certain
1316 * cases where it makes no sense for a protocol to have a "do nothing"
1317 * function, some default processing is provided.
1318 */
1319
1320int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1321{
1322	return -EOPNOTSUPP;
1323}
1324
1325int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1326		    int len, int flags)
1327{
1328	return -EOPNOTSUPP;
1329}
1330
1331int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1332{
1333	return -EOPNOTSUPP;
1334}
1335
1336int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1337{
1338	return -EOPNOTSUPP;
1339}
1340
1341int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1342		    int *len, int peer)
1343{
1344	return -EOPNOTSUPP;
1345}
1346
1347unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1348{
1349	return 0;
1350}
1351
1352int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1353{
1354	return -EOPNOTSUPP;
1355}
1356
1357int sock_no_listen(struct socket *sock, int backlog)
1358{
1359	return -EOPNOTSUPP;
1360}
1361
1362int sock_no_shutdown(struct socket *sock, int how)
1363{
1364	return -EOPNOTSUPP;
1365}
1366
1367int sock_no_setsockopt(struct socket *sock, int level, int optname,
1368		    char __user *optval, int optlen)
1369{
1370	return -EOPNOTSUPP;
1371}
1372
1373int sock_no_getsockopt(struct socket *sock, int level, int optname,
1374		    char __user *optval, int __user *optlen)
1375{
1376	return -EOPNOTSUPP;
1377}
1378
1379int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1380		    size_t len)
1381{
1382	return -EOPNOTSUPP;
1383}
1384
1385int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1386		    size_t len, int flags)
1387{
1388	return -EOPNOTSUPP;
1389}
1390
1391int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1392{
1393	/* Mirror missing mmap method error code */
1394	return -ENODEV;
1395}
1396
1397ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1398{
1399	ssize_t res;
1400	struct msghdr msg = {.msg_flags = flags};
1401	struct kvec iov;
1402	char *kaddr = kmap(page);
1403	iov.iov_base = kaddr + offset;
1404	iov.iov_len = size;
1405	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1406	kunmap(page);
1407	return res;
1408}
1409
1410/*
1411 *	Default Socket Callbacks
1412 */
1413
1414static void sock_def_wakeup(struct sock *sk)
1415{
1416	read_lock(&sk->sk_callback_lock);
1417	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1418		wake_up_interruptible_all(sk->sk_sleep);
1419	read_unlock(&sk->sk_callback_lock);
1420}
1421
1422static void sock_def_error_report(struct sock *sk)
1423{
1424	read_lock(&sk->sk_callback_lock);
1425	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1426		wake_up_interruptible(sk->sk_sleep);
1427	sk_wake_async(sk,0,POLL_ERR);
1428	read_unlock(&sk->sk_callback_lock);
1429}
1430
1431static void sock_def_readable(struct sock *sk, int len)
1432{
1433	read_lock(&sk->sk_callback_lock);
1434	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1435		wake_up_interruptible(sk->sk_sleep);
1436	sk_wake_async(sk,1,POLL_IN);
1437	read_unlock(&sk->sk_callback_lock);
1438}
1439
1440static void sock_def_write_space(struct sock *sk)
1441{
1442	read_lock(&sk->sk_callback_lock);
1443
1444	/* Do not wake up a writer until he can make "significant"
1445	 * progress.  --DaveM
1446	 */
1447	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1448		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1449			wake_up_interruptible(sk->sk_sleep);
1450
1451		/* Should agree with poll, otherwise some programs break */
1452		if (sock_writeable(sk))
1453			sk_wake_async(sk, 2, POLL_OUT);
1454	}
1455
1456	read_unlock(&sk->sk_callback_lock);
1457}
1458
1459static void sock_def_destruct(struct sock *sk)
1460{
1461	kfree(sk->sk_protinfo);
1462}
1463
1464void sk_send_sigurg(struct sock *sk)
1465{
1466	if (sk->sk_socket && sk->sk_socket->file)
1467		if (send_sigurg(&sk->sk_socket->file->f_owner))
1468			sk_wake_async(sk, 3, POLL_PRI);
1469}
1470
1471void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1472		    unsigned long expires)
1473{
1474	if (!mod_timer(timer, expires))
1475		sock_hold(sk);
1476}
1477
1478EXPORT_SYMBOL(sk_reset_timer);
1479
1480void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1481{
1482	if (timer_pending(timer) && del_timer(timer))
1483		__sock_put(sk);
1484}
1485
1486EXPORT_SYMBOL(sk_stop_timer);
1487
1488void sock_init_data(struct socket *sock, struct sock *sk)
1489{
1490	skb_queue_head_init(&sk->sk_receive_queue);
1491	skb_queue_head_init(&sk->sk_write_queue);
1492	skb_queue_head_init(&sk->sk_error_queue);
1493#ifdef CONFIG_NET_DMA
1494	skb_queue_head_init(&sk->sk_async_wait_queue);
1495#endif
1496
1497	sk->sk_send_head	=	NULL;
1498
1499	init_timer(&sk->sk_timer);
1500
1501	sk->sk_allocation	=	GFP_KERNEL;
1502	sk->sk_rcvbuf		=	sysctl_rmem_default;
1503	sk->sk_sndbuf		=	sysctl_wmem_default;
1504	sk->sk_state		=	TCP_CLOSE;
1505	sk->sk_socket		=	sock;
1506
1507	sock_set_flag(sk, SOCK_ZAPPED);
1508
1509	if (sock) {
1510		sk->sk_type	=	sock->type;
1511		sk->sk_sleep	=	&sock->wait;
1512		sock->sk	=	sk;
1513	} else
1514		sk->sk_sleep	=	NULL;
1515
1516	rwlock_init(&sk->sk_dst_lock);
1517	rwlock_init(&sk->sk_callback_lock);
1518	lockdep_set_class(&sk->sk_callback_lock,
1519			   af_callback_keys + sk->sk_family);
1520
1521	sk->sk_state_change	=	sock_def_wakeup;
1522	sk->sk_data_ready	=	sock_def_readable;
1523	sk->sk_write_space	=	sock_def_write_space;
1524	sk->sk_error_report	=	sock_def_error_report;
1525	sk->sk_destruct		=	sock_def_destruct;
1526
1527	sk->sk_sndmsg_page	=	NULL;
1528	sk->sk_sndmsg_off	=	0;
1529
1530	sk->sk_peercred.pid 	=	0;
1531	sk->sk_peercred.uid	=	-1;
1532	sk->sk_peercred.gid	=	-1;
1533	sk->sk_write_pending	=	0;
1534	sk->sk_rcvlowat		=	1;
1535	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1536	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1537
1538	sk->sk_stamp = ktime_set(-1L, -1L);
1539
1540	atomic_set(&sk->sk_refcnt, 1);
1541}
1542
1543void fastcall lock_sock_nested(struct sock *sk, int subclass)
1544{
1545	might_sleep();
1546	spin_lock_bh(&sk->sk_lock.slock);
1547	if (sk->sk_lock.owner)
1548		__lock_sock(sk);
1549	sk->sk_lock.owner = (void *)1;
1550	spin_unlock(&sk->sk_lock.slock);
1551	/*
1552	 * The sk_lock has mutex_lock() semantics here:
1553	 */
1554	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1555	local_bh_enable();
1556}
1557
1558EXPORT_SYMBOL(lock_sock_nested);
1559
1560void fastcall release_sock(struct sock *sk)
1561{
1562	/*
1563	 * The sk_lock has mutex_unlock() semantics:
1564	 */
1565	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1566
1567	spin_lock_bh(&sk->sk_lock.slock);
1568	if (sk->sk_backlog.tail)
1569		__release_sock(sk);
1570	sk->sk_lock.owner = NULL;
1571	if (waitqueue_active(&sk->sk_lock.wq))
1572		wake_up(&sk->sk_lock.wq);
1573	spin_unlock_bh(&sk->sk_lock.slock);
1574}
1575EXPORT_SYMBOL(release_sock);
1576
1577int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1578{
1579	struct timeval tv;
1580	if (!sock_flag(sk, SOCK_TIMESTAMP))
1581		sock_enable_timestamp(sk);
1582	tv = ktime_to_timeval(sk->sk_stamp);
1583	if (tv.tv_sec == -1)
1584		return -ENOENT;
1585	if (tv.tv_sec == 0) {
1586		sk->sk_stamp = ktime_get_real();
1587		tv = ktime_to_timeval(sk->sk_stamp);
1588	}
1589	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1590}
1591EXPORT_SYMBOL(sock_get_timestamp);
1592
1593int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1594{
1595	struct timespec ts;
1596	if (!sock_flag(sk, SOCK_TIMESTAMP))
1597		sock_enable_timestamp(sk);
1598	ts = ktime_to_timespec(sk->sk_stamp);
1599	if (ts.tv_sec == -1)
1600		return -ENOENT;
1601	if (ts.tv_sec == 0) {
1602		sk->sk_stamp = ktime_get_real();
1603		ts = ktime_to_timespec(sk->sk_stamp);
1604	}
1605	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1606}
1607EXPORT_SYMBOL(sock_get_timestampns);
1608
1609void sock_enable_timestamp(struct sock *sk)
1610{
1611	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1612		sock_set_flag(sk, SOCK_TIMESTAMP);
1613		net_enable_timestamp();
1614	}
1615}
1616EXPORT_SYMBOL(sock_enable_timestamp);
1617
1618/*
1619 *	Get a socket option on an socket.
1620 *
1621 *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1622 *	asynchronous errors should be reported by getsockopt. We assume
1623 *	this means if you specify SO_ERROR (otherwise whats the point of it).
1624 */
1625int sock_common_getsockopt(struct socket *sock, int level, int optname,
1626			   char __user *optval, int __user *optlen)
1627{
1628	struct sock *sk = sock->sk;
1629
1630	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1631}
1632
1633EXPORT_SYMBOL(sock_common_getsockopt);
1634
1635#ifdef CONFIG_COMPAT
1636int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1637				  char __user *optval, int __user *optlen)
1638{
1639	struct sock *sk = sock->sk;
1640
1641	if (sk->sk_prot->compat_getsockopt != NULL)
1642		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1643						      optval, optlen);
1644	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1645}
1646EXPORT_SYMBOL(compat_sock_common_getsockopt);
1647#endif
1648
1649int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1650			struct msghdr *msg, size_t size, int flags)
1651{
1652	struct sock *sk = sock->sk;
1653	int addr_len = 0;
1654	int err;
1655
1656	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1657				   flags & ~MSG_DONTWAIT, &addr_len);
1658	if (err >= 0)
1659		msg->msg_namelen = addr_len;
1660	return err;
1661}
1662
1663EXPORT_SYMBOL(sock_common_recvmsg);
1664
1665/*
1666 *	Set socket options on an inet socket.
1667 */
1668int sock_common_setsockopt(struct socket *sock, int level, int optname,
1669			   char __user *optval, int optlen)
1670{
1671	struct sock *sk = sock->sk;
1672
1673	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1674}
1675
1676EXPORT_SYMBOL(sock_common_setsockopt);
1677
1678#ifdef CONFIG_COMPAT
1679int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1680				  char __user *optval, int optlen)
1681{
1682	struct sock *sk = sock->sk;
1683
1684	if (sk->sk_prot->compat_setsockopt != NULL)
1685		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1686						      optval, optlen);
1687	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1688}
1689EXPORT_SYMBOL(compat_sock_common_setsockopt);
1690#endif
1691
1692void sk_common_release(struct sock *sk)
1693{
1694	if (sk->sk_prot->destroy)
1695		sk->sk_prot->destroy(sk);
1696
1697	/*
1698	 * Observation: when sock_common_release is called, processes have
1699	 * no access to socket. But net still has.
1700	 * Step one, detach it from networking:
1701	 *
1702	 * A. Remove from hash tables.
1703	 */
1704
1705	sk->sk_prot->unhash(sk);
1706
1707	/*
1708	 * In this point socket cannot receive new packets, but it is possible
1709	 * that some packets are in flight because some CPU runs receiver and
1710	 * did hash table lookup before we unhashed socket. They will achieve
1711	 * receive queue and will be purged by socket destructor.
1712	 *
1713	 * Also we still have packets pending on receive queue and probably,
1714	 * our own packets waiting in device queues. sock_destroy will drain
1715	 * receive queue, but transmitted packets will delay socket destruction
1716	 * until the last reference will be released.
1717	 */
1718
1719	sock_orphan(sk);
1720
1721	xfrm_sk_free_policy(sk);
1722
1723	sk_refcnt_debug_release(sk);
1724	sock_put(sk);
1725}
1726
1727EXPORT_SYMBOL(sk_common_release);
1728
1729static DEFINE_RWLOCK(proto_list_lock);
1730static LIST_HEAD(proto_list);
1731
1732int proto_register(struct proto *prot, int alloc_slab)
1733{
1734	char *request_sock_slab_name = NULL;
1735	char *timewait_sock_slab_name;
1736	int rc = -ENOBUFS;
1737
1738	if (alloc_slab) {
1739		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1740					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1741
1742		if (prot->slab == NULL) {
1743			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1744			       prot->name);
1745			goto out;
1746		}
1747
1748		if (prot->rsk_prot != NULL) {
1749			static const char mask[] = "request_sock_%s";
1750
1751			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1752			if (request_sock_slab_name == NULL)
1753				goto out_free_sock_slab;
1754
1755			sprintf(request_sock_slab_name, mask, prot->name);
1756			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1757								 prot->rsk_prot->obj_size, 0,
1758								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1759
1760			if (prot->rsk_prot->slab == NULL) {
1761				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1762				       prot->name);
1763				goto out_free_request_sock_slab_name;
1764			}
1765		}
1766
1767		if (prot->twsk_prot != NULL) {
1768			static const char mask[] = "tw_sock_%s";
1769
1770			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1771
1772			if (timewait_sock_slab_name == NULL)
1773				goto out_free_request_sock_slab;
1774
1775			sprintf(timewait_sock_slab_name, mask, prot->name);
1776			prot->twsk_prot->twsk_slab =
1777				kmem_cache_create(timewait_sock_slab_name,
1778						  prot->twsk_prot->twsk_obj_size,
1779						  0, SLAB_HWCACHE_ALIGN,
1780						  NULL, NULL);
1781			if (prot->twsk_prot->twsk_slab == NULL)
1782				goto out_free_timewait_sock_slab_name;
1783		}
1784	}
1785
1786	write_lock(&proto_list_lock);
1787	list_add(&prot->node, &proto_list);
1788	write_unlock(&proto_list_lock);
1789	rc = 0;
1790out:
1791	return rc;
1792out_free_timewait_sock_slab_name:
1793	kfree(timewait_sock_slab_name);
1794out_free_request_sock_slab:
1795	if (prot->rsk_prot && prot->rsk_prot->slab) {
1796		kmem_cache_destroy(prot->rsk_prot->slab);
1797		prot->rsk_prot->slab = NULL;
1798	}
1799out_free_request_sock_slab_name:
1800	kfree(request_sock_slab_name);
1801out_free_sock_slab:
1802	kmem_cache_destroy(prot->slab);
1803	prot->slab = NULL;
1804	goto out;
1805}
1806
1807EXPORT_SYMBOL(proto_register);
1808
1809void proto_unregister(struct proto *prot)
1810{
1811	write_lock(&proto_list_lock);
1812	list_del(&prot->node);
1813	write_unlock(&proto_list_lock);
1814
1815	if (prot->slab != NULL) {
1816		kmem_cache_destroy(prot->slab);
1817		prot->slab = NULL;
1818	}
1819
1820	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1821		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1822
1823		kmem_cache_destroy(prot->rsk_prot->slab);
1824		kfree(name);
1825		prot->rsk_prot->slab = NULL;
1826	}
1827
1828	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1829		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1830
1831		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1832		kfree(name);
1833		prot->twsk_prot->twsk_slab = NULL;
1834	}
1835}
1836
1837EXPORT_SYMBOL(proto_unregister);
1838
1839#ifdef CONFIG_PROC_FS
1840static inline struct proto *__proto_head(void)
1841{
1842	return list_entry(proto_list.next, struct proto, node);
1843}
1844
1845static inline struct proto *proto_head(void)
1846{
1847	return list_empty(&proto_list) ? NULL : __proto_head();
1848}
1849
1850static inline struct proto *proto_next(struct proto *proto)
1851{
1852	return proto->node.next == &proto_list ? NULL :
1853		list_entry(proto->node.next, struct proto, node);
1854}
1855
1856static inline struct proto *proto_get_idx(loff_t pos)
1857{
1858	struct proto *proto;
1859	loff_t i = 0;
1860
1861	list_for_each_entry(proto, &proto_list, node)
1862		if (i++ == pos)
1863			goto out;
1864
1865	proto = NULL;
1866out:
1867	return proto;
1868}
1869
1870static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1871{
1872	read_lock(&proto_list_lock);
1873	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1874}
1875
1876static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1877{
1878	++*pos;
1879	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1880}
1881
1882static void proto_seq_stop(struct seq_file *seq, void *v)
1883{
1884	read_unlock(&proto_list_lock);
1885}
1886
1887static char proto_method_implemented(const void *method)
1888{
1889	return method == NULL ? 'n' : 'y';
1890}
1891
1892static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1893{
1894	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1895			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1896		   proto->name,
1897		   proto->obj_size,
1898		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1899		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1900		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1901		   proto->max_header,
1902		   proto->slab == NULL ? "no" : "yes",
1903		   module_name(proto->owner),
1904		   proto_method_implemented(proto->close),
1905		   proto_method_implemented(proto->connect),
1906		   proto_method_implemented(proto->disconnect),
1907		   proto_method_implemented(proto->accept),
1908		   proto_method_implemented(proto->ioctl),
1909		   proto_method_implemented(proto->init),
1910		   proto_method_implemented(proto->destroy),
1911		   proto_method_implemented(proto->shutdown),
1912		   proto_method_implemented(proto->setsockopt),
1913		   proto_method_implemented(proto->getsockopt),
1914		   proto_method_implemented(proto->sendmsg),
1915		   proto_method_implemented(proto->recvmsg),
1916		   proto_method_implemented(proto->sendpage),
1917		   proto_method_implemented(proto->bind),
1918		   proto_method_implemented(proto->backlog_rcv),
1919		   proto_method_implemented(proto->hash),
1920		   proto_method_implemented(proto->unhash),
1921		   proto_method_implemented(proto->get_port),
1922		   proto_method_implemented(proto->enter_memory_pressure));
1923}
1924
1925static int proto_seq_show(struct seq_file *seq, void *v)
1926{
1927	if (v == SEQ_START_TOKEN)
1928		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1929			   "protocol",
1930			   "size",
1931			   "sockets",
1932			   "memory",
1933			   "press",
1934			   "maxhdr",
1935			   "slab",
1936			   "module",
1937			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1938	else
1939		proto_seq_printf(seq, v);
1940	return 0;
1941}
1942
1943static const struct seq_operations proto_seq_ops = {
1944	.start  = proto_seq_start,
1945	.next   = proto_seq_next,
1946	.stop   = proto_seq_stop,
1947	.show   = proto_seq_show,
1948};
1949
1950static int proto_seq_open(struct inode *inode, struct file *file)
1951{
1952	return seq_open(file, &proto_seq_ops);
1953}
1954
1955static const struct file_operations proto_seq_fops = {
1956	.owner		= THIS_MODULE,
1957	.open		= proto_seq_open,
1958	.read		= seq_read,
1959	.llseek		= seq_lseek,
1960	.release	= seq_release,
1961};
1962
1963static int __init proto_init(void)
1964{
1965	/* register /proc/net/protocols */
1966	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1967}
1968
1969subsys_initcall(proto_init);
1970
1971#endif /* PROC_FS */
1972
1973EXPORT_SYMBOL(sk_alloc);
1974EXPORT_SYMBOL(sk_free);
1975EXPORT_SYMBOL(sk_send_sigurg);
1976EXPORT_SYMBOL(sock_alloc_send_skb);
1977EXPORT_SYMBOL(sock_init_data);
1978EXPORT_SYMBOL(sock_kfree_s);
1979EXPORT_SYMBOL(sock_kmalloc);
1980EXPORT_SYMBOL(sock_no_accept);
1981EXPORT_SYMBOL(sock_no_bind);
1982EXPORT_SYMBOL(sock_no_connect);
1983EXPORT_SYMBOL(sock_no_getname);
1984EXPORT_SYMBOL(sock_no_getsockopt);
1985EXPORT_SYMBOL(sock_no_ioctl);
1986EXPORT_SYMBOL(sock_no_listen);
1987EXPORT_SYMBOL(sock_no_mmap);
1988EXPORT_SYMBOL(sock_no_poll);
1989EXPORT_SYMBOL(sock_no_recvmsg);
1990EXPORT_SYMBOL(sock_no_sendmsg);
1991EXPORT_SYMBOL(sock_no_sendpage);
1992EXPORT_SYMBOL(sock_no_setsockopt);
1993EXPORT_SYMBOL(sock_no_shutdown);
1994EXPORT_SYMBOL(sock_no_socketpair);
1995EXPORT_SYMBOL(sock_rfree);
1996EXPORT_SYMBOL(sock_setsockopt);
1997EXPORT_SYMBOL(sock_wfree);
1998EXPORT_SYMBOL(sock_wmalloc);
1999EXPORT_SYMBOL(sock_i_uid);
2000EXPORT_SYMBOL(sock_i_ino);
2001EXPORT_SYMBOL(sysctl_optmem_max);
2002#ifdef CONFIG_SYSCTL
2003EXPORT_SYMBOL(sysctl_rmem_max);
2004EXPORT_SYMBOL(sysctl_wmem_max);
2005#endif
2006