sock.c revision 6257ff2177ff02d7f260a7a501876aa41cb9a9f6
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		Generic socket support routines. Memory allocators, socket lock/release
7 *		handler for protocols to use and generic option handler.
8 *
9 *
10 * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
12 * Authors:	Ross Biro
13 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *		Florian La Roche, <flla@stud.uni-sb.de>
15 *		Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 *		Alan Cox	: 	Numerous verify_area() problems
19 *		Alan Cox	:	Connecting on a connecting socket
20 *					now returns an error for tcp.
21 *		Alan Cox	:	sock->protocol is set correctly.
22 *					and is not sometimes left as 0.
23 *		Alan Cox	:	connect handles icmp errors on a
24 *					connect properly. Unfortunately there
25 *					is a restart syscall nasty there. I
26 *					can't match BSD without hacking the C
27 *					library. Ideas urgently sought!
28 *		Alan Cox	:	Disallow bind() to addresses that are
29 *					not ours - especially broadcast ones!!
30 *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31 *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32 *					instead they leave that for the DESTROY timer.
33 *		Alan Cox	:	Clean up error flag in accept
34 *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35 *					was buggy. Put a remove_sock() in the handler
36 *					for memory when we hit 0. Also altered the timer
37 *					code. The ACK stuff can wait and needs major
38 *					TCP layer surgery.
39 *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40 *					and fixed timer/inet_bh race.
41 *		Alan Cox	:	Added zapped flag for TCP
42 *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43 *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45 *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48 *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49 *	Pauline Middelink	:	identd support
50 *		Alan Cox	:	Fixed connect() taking signals I think.
51 *		Alan Cox	:	SO_LINGER supported
52 *		Alan Cox	:	Error reporting fixes
53 *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54 *		Alan Cox	:	inet sockets don't set sk->type!
55 *		Alan Cox	:	Split socket option code
56 *		Alan Cox	:	Callbacks
57 *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58 *		Alex		:	Removed restriction on inet fioctl
59 *		Alan Cox	:	Splitting INET from NET core
60 *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61 *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62 *		Alan Cox	:	Split IP from generic code
63 *		Alan Cox	:	New kfree_skbmem()
64 *		Alan Cox	:	Make SO_DEBUG superuser only.
65 *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66 *					(compatibility fix)
67 *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68 *		Alan Cox	:	Allocator for a socket is settable.
69 *		Alan Cox	:	SO_ERROR includes soft errors.
70 *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71 *		Alan Cox	: 	Generic socket allocation to make hooks
72 *					easier (suggested by Craig Metz).
73 *		Michael Pall	:	SO_ERROR returns positive errno again
74 *              Steve Whitehouse:       Added default destructor to free
75 *                                      protocol private data.
76 *              Steve Whitehouse:       Added various other default routines
77 *                                      common to several socket families.
78 *              Chris Evans     :       Call suser() check last on F_SETOWN
79 *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81 *		Andi Kleen	:	Fix write_space callback
82 *		Chris Evans	:	Security fixes - signedness again
83 *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 *		This program is free software; you can redistribute it and/or
89 *		modify it under the terms of the GNU General Public License
90 *		as published by the Free Software Foundation; either version
91 *		2 of the License, or (at your option) any later version.
92 */
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114#include <linux/highmem.h>
115
116#include <asm/uaccess.h>
117#include <asm/system.h>
118
119#include <linux/netdevice.h>
120#include <net/protocol.h>
121#include <linux/skbuff.h>
122#include <net/net_namespace.h>
123#include <net/request_sock.h>
124#include <net/sock.h>
125#include <net/xfrm.h>
126#include <linux/ipsec.h>
127
128#include <linux/filter.h>
129
130#ifdef CONFIG_INET
131#include <net/tcp.h>
132#endif
133
134/*
135 * Each address family might have different locking rules, so we have
136 * one slock key per address family:
137 */
138static struct lock_class_key af_family_keys[AF_MAX];
139static struct lock_class_key af_family_slock_keys[AF_MAX];
140
141#ifdef CONFIG_DEBUG_LOCK_ALLOC
142/*
143 * Make lock validator output more readable. (we pre-construct these
144 * strings build-time, so that runtime initialization of socket
145 * locks is fast):
146 */
147static const char *af_family_key_strings[AF_MAX+1] = {
148  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
149  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
150  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
151  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
152  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
153  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
154  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
155  "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
156  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
157  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
158  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
159  "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
160};
161static const char *af_family_slock_key_strings[AF_MAX+1] = {
162  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
163  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
164  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
165  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
166  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
167  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
168  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
169  "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
170  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
171  "slock-27"       , "slock-28"          , "slock-29"          ,
172  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
173  "slock-AF_RXRPC" , "slock-AF_MAX"
174};
175static const char *af_family_clock_key_strings[AF_MAX+1] = {
176  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
177  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
178  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
179  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
180  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
181  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
182  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
183  "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
184  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
185  "clock-27"       , "clock-28"          , "clock-29"          ,
186  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
187  "clock-AF_RXRPC" , "clock-AF_MAX"
188};
189#endif
190
191/*
192 * sk_callback_lock locking rules are per-address-family,
193 * so split the lock classes by using a per-AF key:
194 */
195static struct lock_class_key af_callback_keys[AF_MAX];
196
197/* Take into consideration the size of the struct sk_buff overhead in the
198 * determination of these values, since that is non-constant across
199 * platforms.  This makes socket queueing behavior and performance
200 * not depend upon such differences.
201 */
202#define _SK_MEM_PACKETS		256
203#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
204#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
206
207/* Run time adjustable parameters. */
208__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
209__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
210__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
211__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
212
213/* Maximal space eaten by iovec or ancilliary data plus some space */
214int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
215
216static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
217{
218	struct timeval tv;
219
220	if (optlen < sizeof(tv))
221		return -EINVAL;
222	if (copy_from_user(&tv, optval, sizeof(tv)))
223		return -EFAULT;
224	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
225		return -EDOM;
226
227	if (tv.tv_sec < 0) {
228		static int warned __read_mostly;
229
230		*timeo_p = 0;
231		if (warned < 10 && net_ratelimit())
232			warned++;
233			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
234			       "tries to set negative timeout\n",
235				current->comm, task_pid_nr(current));
236		return 0;
237	}
238	*timeo_p = MAX_SCHEDULE_TIMEOUT;
239	if (tv.tv_sec == 0 && tv.tv_usec == 0)
240		return 0;
241	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
242		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
243	return 0;
244}
245
246static void sock_warn_obsolete_bsdism(const char *name)
247{
248	static int warned;
249	static char warncomm[TASK_COMM_LEN];
250	if (strcmp(warncomm, current->comm) && warned < 5) {
251		strcpy(warncomm,  current->comm);
252		printk(KERN_WARNING "process `%s' is using obsolete "
253		       "%s SO_BSDCOMPAT\n", warncomm, name);
254		warned++;
255	}
256}
257
258static void sock_disable_timestamp(struct sock *sk)
259{
260	if (sock_flag(sk, SOCK_TIMESTAMP)) {
261		sock_reset_flag(sk, SOCK_TIMESTAMP);
262		net_disable_timestamp();
263	}
264}
265
266
267int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
268{
269	int err = 0;
270	int skb_len;
271
272	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
273	   number of warnings when compiling with -W --ANK
274	 */
275	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
276	    (unsigned)sk->sk_rcvbuf) {
277		err = -ENOMEM;
278		goto out;
279	}
280
281	err = sk_filter(sk, skb);
282	if (err)
283		goto out;
284
285	skb->dev = NULL;
286	skb_set_owner_r(skb, sk);
287
288	/* Cache the SKB length before we tack it onto the receive
289	 * queue.  Once it is added it no longer belongs to us and
290	 * may be freed by other threads of control pulling packets
291	 * from the queue.
292	 */
293	skb_len = skb->len;
294
295	skb_queue_tail(&sk->sk_receive_queue, skb);
296
297	if (!sock_flag(sk, SOCK_DEAD))
298		sk->sk_data_ready(sk, skb_len);
299out:
300	return err;
301}
302EXPORT_SYMBOL(sock_queue_rcv_skb);
303
304int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
305{
306	int rc = NET_RX_SUCCESS;
307
308	if (sk_filter(sk, skb))
309		goto discard_and_relse;
310
311	skb->dev = NULL;
312
313	if (nested)
314		bh_lock_sock_nested(sk);
315	else
316		bh_lock_sock(sk);
317	if (!sock_owned_by_user(sk)) {
318		/*
319		 * trylock + unlock semantics:
320		 */
321		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
322
323		rc = sk->sk_backlog_rcv(sk, skb);
324
325		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
326	} else
327		sk_add_backlog(sk, skb);
328	bh_unlock_sock(sk);
329out:
330	sock_put(sk);
331	return rc;
332discard_and_relse:
333	kfree_skb(skb);
334	goto out;
335}
336EXPORT_SYMBOL(sk_receive_skb);
337
338struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
339{
340	struct dst_entry *dst = sk->sk_dst_cache;
341
342	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
343		sk->sk_dst_cache = NULL;
344		dst_release(dst);
345		return NULL;
346	}
347
348	return dst;
349}
350EXPORT_SYMBOL(__sk_dst_check);
351
352struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
353{
354	struct dst_entry *dst = sk_dst_get(sk);
355
356	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
357		sk_dst_reset(sk);
358		dst_release(dst);
359		return NULL;
360	}
361
362	return dst;
363}
364EXPORT_SYMBOL(sk_dst_check);
365
366static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
367{
368	int ret = -ENOPROTOOPT;
369#ifdef CONFIG_NETDEVICES
370	struct net *net = sk->sk_net;
371	char devname[IFNAMSIZ];
372	int index;
373
374	/* Sorry... */
375	ret = -EPERM;
376	if (!capable(CAP_NET_RAW))
377		goto out;
378
379	ret = -EINVAL;
380	if (optlen < 0)
381		goto out;
382
383	/* Bind this socket to a particular device like "eth0",
384	 * as specified in the passed interface name. If the
385	 * name is "" or the option length is zero the socket
386	 * is not bound.
387	 */
388	if (optlen > IFNAMSIZ - 1)
389		optlen = IFNAMSIZ - 1;
390	memset(devname, 0, sizeof(devname));
391
392	ret = -EFAULT;
393	if (copy_from_user(devname, optval, optlen))
394		goto out;
395
396	if (devname[0] == '\0') {
397		index = 0;
398	} else {
399		struct net_device *dev = dev_get_by_name(net, devname);
400
401		ret = -ENODEV;
402		if (!dev)
403			goto out;
404
405		index = dev->ifindex;
406		dev_put(dev);
407	}
408
409	lock_sock(sk);
410	sk->sk_bound_dev_if = index;
411	sk_dst_reset(sk);
412	release_sock(sk);
413
414	ret = 0;
415
416out:
417#endif
418
419	return ret;
420}
421
422/*
423 *	This is meant for all protocols to use and covers goings on
424 *	at the socket level. Everything here is generic.
425 */
426
427int sock_setsockopt(struct socket *sock, int level, int optname,
428		    char __user *optval, int optlen)
429{
430	struct sock *sk=sock->sk;
431	int val;
432	int valbool;
433	struct linger ling;
434	int ret = 0;
435
436	/*
437	 *	Options without arguments
438	 */
439
440#ifdef SO_DONTLINGER		/* Compatibility item... */
441	if (optname == SO_DONTLINGER) {
442		lock_sock(sk);
443		sock_reset_flag(sk, SOCK_LINGER);
444		release_sock(sk);
445		return 0;
446	}
447#endif
448
449	if (optname == SO_BINDTODEVICE)
450		return sock_bindtodevice(sk, optval, optlen);
451
452	if (optlen < sizeof(int))
453		return -EINVAL;
454
455	if (get_user(val, (int __user *)optval))
456		return -EFAULT;
457
458	valbool = val?1:0;
459
460	lock_sock(sk);
461
462	switch(optname) {
463	case SO_DEBUG:
464		if (val && !capable(CAP_NET_ADMIN)) {
465			ret = -EACCES;
466		}
467		else if (valbool)
468			sock_set_flag(sk, SOCK_DBG);
469		else
470			sock_reset_flag(sk, SOCK_DBG);
471		break;
472	case SO_REUSEADDR:
473		sk->sk_reuse = valbool;
474		break;
475	case SO_TYPE:
476	case SO_ERROR:
477		ret = -ENOPROTOOPT;
478		break;
479	case SO_DONTROUTE:
480		if (valbool)
481			sock_set_flag(sk, SOCK_LOCALROUTE);
482		else
483			sock_reset_flag(sk, SOCK_LOCALROUTE);
484		break;
485	case SO_BROADCAST:
486		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
487		break;
488	case SO_SNDBUF:
489		/* Don't error on this BSD doesn't and if you think
490		   about it this is right. Otherwise apps have to
491		   play 'guess the biggest size' games. RCVBUF/SNDBUF
492		   are treated in BSD as hints */
493
494		if (val > sysctl_wmem_max)
495			val = sysctl_wmem_max;
496set_sndbuf:
497		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
498		if ((val * 2) < SOCK_MIN_SNDBUF)
499			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
500		else
501			sk->sk_sndbuf = val * 2;
502
503		/*
504		 *	Wake up sending tasks if we
505		 *	upped the value.
506		 */
507		sk->sk_write_space(sk);
508		break;
509
510	case SO_SNDBUFFORCE:
511		if (!capable(CAP_NET_ADMIN)) {
512			ret = -EPERM;
513			break;
514		}
515		goto set_sndbuf;
516
517	case SO_RCVBUF:
518		/* Don't error on this BSD doesn't and if you think
519		   about it this is right. Otherwise apps have to
520		   play 'guess the biggest size' games. RCVBUF/SNDBUF
521		   are treated in BSD as hints */
522
523		if (val > sysctl_rmem_max)
524			val = sysctl_rmem_max;
525set_rcvbuf:
526		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
527		/*
528		 * We double it on the way in to account for
529		 * "struct sk_buff" etc. overhead.   Applications
530		 * assume that the SO_RCVBUF setting they make will
531		 * allow that much actual data to be received on that
532		 * socket.
533		 *
534		 * Applications are unaware that "struct sk_buff" and
535		 * other overheads allocate from the receive buffer
536		 * during socket buffer allocation.
537		 *
538		 * And after considering the possible alternatives,
539		 * returning the value we actually used in getsockopt
540		 * is the most desirable behavior.
541		 */
542		if ((val * 2) < SOCK_MIN_RCVBUF)
543			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
544		else
545			sk->sk_rcvbuf = val * 2;
546		break;
547
548	case SO_RCVBUFFORCE:
549		if (!capable(CAP_NET_ADMIN)) {
550			ret = -EPERM;
551			break;
552		}
553		goto set_rcvbuf;
554
555	case SO_KEEPALIVE:
556#ifdef CONFIG_INET
557		if (sk->sk_protocol == IPPROTO_TCP)
558			tcp_set_keepalive(sk, valbool);
559#endif
560		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
561		break;
562
563	case SO_OOBINLINE:
564		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
565		break;
566
567	case SO_NO_CHECK:
568		sk->sk_no_check = valbool;
569		break;
570
571	case SO_PRIORITY:
572		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
573			sk->sk_priority = val;
574		else
575			ret = -EPERM;
576		break;
577
578	case SO_LINGER:
579		if (optlen < sizeof(ling)) {
580			ret = -EINVAL;	/* 1003.1g */
581			break;
582		}
583		if (copy_from_user(&ling,optval,sizeof(ling))) {
584			ret = -EFAULT;
585			break;
586		}
587		if (!ling.l_onoff)
588			sock_reset_flag(sk, SOCK_LINGER);
589		else {
590#if (BITS_PER_LONG == 32)
591			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
592				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
593			else
594#endif
595				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
596			sock_set_flag(sk, SOCK_LINGER);
597		}
598		break;
599
600	case SO_BSDCOMPAT:
601		sock_warn_obsolete_bsdism("setsockopt");
602		break;
603
604	case SO_PASSCRED:
605		if (valbool)
606			set_bit(SOCK_PASSCRED, &sock->flags);
607		else
608			clear_bit(SOCK_PASSCRED, &sock->flags);
609		break;
610
611	case SO_TIMESTAMP:
612	case SO_TIMESTAMPNS:
613		if (valbool)  {
614			if (optname == SO_TIMESTAMP)
615				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
616			else
617				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
618			sock_set_flag(sk, SOCK_RCVTSTAMP);
619			sock_enable_timestamp(sk);
620		} else {
621			sock_reset_flag(sk, SOCK_RCVTSTAMP);
622			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
623		}
624		break;
625
626	case SO_RCVLOWAT:
627		if (val < 0)
628			val = INT_MAX;
629		sk->sk_rcvlowat = val ? : 1;
630		break;
631
632	case SO_RCVTIMEO:
633		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
634		break;
635
636	case SO_SNDTIMEO:
637		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
638		break;
639
640	case SO_ATTACH_FILTER:
641		ret = -EINVAL;
642		if (optlen == sizeof(struct sock_fprog)) {
643			struct sock_fprog fprog;
644
645			ret = -EFAULT;
646			if (copy_from_user(&fprog, optval, sizeof(fprog)))
647				break;
648
649			ret = sk_attach_filter(&fprog, sk);
650		}
651		break;
652
653	case SO_DETACH_FILTER:
654		ret = sk_detach_filter(sk);
655		break;
656
657	case SO_PASSSEC:
658		if (valbool)
659			set_bit(SOCK_PASSSEC, &sock->flags);
660		else
661			clear_bit(SOCK_PASSSEC, &sock->flags);
662		break;
663
664		/* We implement the SO_SNDLOWAT etc to
665		   not be settable (1003.1g 5.3) */
666	default:
667		ret = -ENOPROTOOPT;
668		break;
669	}
670	release_sock(sk);
671	return ret;
672}
673
674
675int sock_getsockopt(struct socket *sock, int level, int optname,
676		    char __user *optval, int __user *optlen)
677{
678	struct sock *sk = sock->sk;
679
680	union {
681		int val;
682		struct linger ling;
683		struct timeval tm;
684	} v;
685
686	unsigned int lv = sizeof(int);
687	int len;
688
689	if (get_user(len, optlen))
690		return -EFAULT;
691	if (len < 0)
692		return -EINVAL;
693
694	switch(optname) {
695	case SO_DEBUG:
696		v.val = sock_flag(sk, SOCK_DBG);
697		break;
698
699	case SO_DONTROUTE:
700		v.val = sock_flag(sk, SOCK_LOCALROUTE);
701		break;
702
703	case SO_BROADCAST:
704		v.val = !!sock_flag(sk, SOCK_BROADCAST);
705		break;
706
707	case SO_SNDBUF:
708		v.val = sk->sk_sndbuf;
709		break;
710
711	case SO_RCVBUF:
712		v.val = sk->sk_rcvbuf;
713		break;
714
715	case SO_REUSEADDR:
716		v.val = sk->sk_reuse;
717		break;
718
719	case SO_KEEPALIVE:
720		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
721		break;
722
723	case SO_TYPE:
724		v.val = sk->sk_type;
725		break;
726
727	case SO_ERROR:
728		v.val = -sock_error(sk);
729		if (v.val==0)
730			v.val = xchg(&sk->sk_err_soft, 0);
731		break;
732
733	case SO_OOBINLINE:
734		v.val = !!sock_flag(sk, SOCK_URGINLINE);
735		break;
736
737	case SO_NO_CHECK:
738		v.val = sk->sk_no_check;
739		break;
740
741	case SO_PRIORITY:
742		v.val = sk->sk_priority;
743		break;
744
745	case SO_LINGER:
746		lv		= sizeof(v.ling);
747		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
748		v.ling.l_linger	= sk->sk_lingertime / HZ;
749		break;
750
751	case SO_BSDCOMPAT:
752		sock_warn_obsolete_bsdism("getsockopt");
753		break;
754
755	case SO_TIMESTAMP:
756		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
757				!sock_flag(sk, SOCK_RCVTSTAMPNS);
758		break;
759
760	case SO_TIMESTAMPNS:
761		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
762		break;
763
764	case SO_RCVTIMEO:
765		lv=sizeof(struct timeval);
766		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
767			v.tm.tv_sec = 0;
768			v.tm.tv_usec = 0;
769		} else {
770			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
771			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
772		}
773		break;
774
775	case SO_SNDTIMEO:
776		lv=sizeof(struct timeval);
777		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
778			v.tm.tv_sec = 0;
779			v.tm.tv_usec = 0;
780		} else {
781			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
782			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
783		}
784		break;
785
786	case SO_RCVLOWAT:
787		v.val = sk->sk_rcvlowat;
788		break;
789
790	case SO_SNDLOWAT:
791		v.val=1;
792		break;
793
794	case SO_PASSCRED:
795		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
796		break;
797
798	case SO_PEERCRED:
799		if (len > sizeof(sk->sk_peercred))
800			len = sizeof(sk->sk_peercred);
801		if (copy_to_user(optval, &sk->sk_peercred, len))
802			return -EFAULT;
803		goto lenout;
804
805	case SO_PEERNAME:
806	{
807		char address[128];
808
809		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
810			return -ENOTCONN;
811		if (lv < len)
812			return -EINVAL;
813		if (copy_to_user(optval, address, len))
814			return -EFAULT;
815		goto lenout;
816	}
817
818	/* Dubious BSD thing... Probably nobody even uses it, but
819	 * the UNIX standard wants it for whatever reason... -DaveM
820	 */
821	case SO_ACCEPTCONN:
822		v.val = sk->sk_state == TCP_LISTEN;
823		break;
824
825	case SO_PASSSEC:
826		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
827		break;
828
829	case SO_PEERSEC:
830		return security_socket_getpeersec_stream(sock, optval, optlen, len);
831
832	default:
833		return -ENOPROTOOPT;
834	}
835
836	if (len > lv)
837		len = lv;
838	if (copy_to_user(optval, &v, len))
839		return -EFAULT;
840lenout:
841	if (put_user(len, optlen))
842		return -EFAULT;
843	return 0;
844}
845
846/*
847 * Initialize an sk_lock.
848 *
849 * (We also register the sk_lock with the lock validator.)
850 */
851static inline void sock_lock_init(struct sock *sk)
852{
853	sock_lock_init_class_and_name(sk,
854			af_family_slock_key_strings[sk->sk_family],
855			af_family_slock_keys + sk->sk_family,
856			af_family_key_strings[sk->sk_family],
857			af_family_keys + sk->sk_family);
858}
859
860static void sock_copy(struct sock *nsk, const struct sock *osk)
861{
862#ifdef CONFIG_SECURITY_NETWORK
863	void *sptr = nsk->sk_security;
864#endif
865
866	memcpy(nsk, osk, osk->sk_prot->obj_size);
867#ifdef CONFIG_SECURITY_NETWORK
868	nsk->sk_security = sptr;
869	security_sk_clone(osk, nsk);
870#endif
871}
872
873static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
874		int family)
875{
876	struct sock *sk;
877	struct kmem_cache *slab;
878
879	slab = prot->slab;
880	if (slab != NULL)
881		sk = kmem_cache_alloc(slab, priority);
882	else
883		sk = kmalloc(prot->obj_size, priority);
884
885	if (sk != NULL) {
886		if (security_sk_alloc(sk, family, priority))
887			goto out_free;
888
889		if (!try_module_get(prot->owner))
890			goto out_free_sec;
891	}
892
893	return sk;
894
895out_free_sec:
896	security_sk_free(sk);
897out_free:
898	if (slab != NULL)
899		kmem_cache_free(slab, sk);
900	else
901		kfree(sk);
902	return NULL;
903}
904
905static void sk_prot_free(struct proto *prot, struct sock *sk)
906{
907	struct kmem_cache *slab;
908	struct module *owner;
909
910	owner = prot->owner;
911	slab = prot->slab;
912
913	security_sk_free(sk);
914	if (slab != NULL)
915		kmem_cache_free(slab, sk);
916	else
917		kfree(sk);
918	module_put(owner);
919}
920
921/**
922 *	sk_alloc - All socket objects are allocated here
923 *	@net: the applicable net namespace
924 *	@family: protocol family
925 *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
926 *	@prot: struct proto associated with this new sock instance
927 *	@zero_it: if we should zero the newly allocated sock
928 */
929struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
930		      struct proto *prot)
931{
932	struct sock *sk;
933
934	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
935	if (sk) {
936		sk->sk_family = family;
937		/*
938		 * See comment in struct sock definition to understand
939		 * why we need sk_prot_creator -acme
940		 */
941		sk->sk_prot = sk->sk_prot_creator = prot;
942		sock_lock_init(sk);
943		sk->sk_net = get_net(net);
944	}
945
946	return sk;
947}
948
949void sk_free(struct sock *sk)
950{
951	struct sk_filter *filter;
952
953	if (sk->sk_destruct)
954		sk->sk_destruct(sk);
955
956	filter = rcu_dereference(sk->sk_filter);
957	if (filter) {
958		sk_filter_uncharge(sk, filter);
959		rcu_assign_pointer(sk->sk_filter, NULL);
960	}
961
962	sock_disable_timestamp(sk);
963
964	if (atomic_read(&sk->sk_omem_alloc))
965		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
966		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
967
968	put_net(sk->sk_net);
969	sk_prot_free(sk->sk_prot_creator, sk);
970}
971
972struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
973{
974	struct sock *newsk;
975
976	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
977	if (newsk != NULL) {
978		struct sk_filter *filter;
979
980		sock_copy(newsk, sk);
981
982		/* SANITY */
983		get_net(newsk->sk_net);
984		sk_node_init(&newsk->sk_node);
985		sock_lock_init(newsk);
986		bh_lock_sock(newsk);
987		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
988
989		atomic_set(&newsk->sk_rmem_alloc, 0);
990		atomic_set(&newsk->sk_wmem_alloc, 0);
991		atomic_set(&newsk->sk_omem_alloc, 0);
992		skb_queue_head_init(&newsk->sk_receive_queue);
993		skb_queue_head_init(&newsk->sk_write_queue);
994#ifdef CONFIG_NET_DMA
995		skb_queue_head_init(&newsk->sk_async_wait_queue);
996#endif
997
998		rwlock_init(&newsk->sk_dst_lock);
999		rwlock_init(&newsk->sk_callback_lock);
1000		lockdep_set_class_and_name(&newsk->sk_callback_lock,
1001				af_callback_keys + newsk->sk_family,
1002				af_family_clock_key_strings[newsk->sk_family]);
1003
1004		newsk->sk_dst_cache	= NULL;
1005		newsk->sk_wmem_queued	= 0;
1006		newsk->sk_forward_alloc = 0;
1007		newsk->sk_send_head	= NULL;
1008		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1009
1010		sock_reset_flag(newsk, SOCK_DONE);
1011		skb_queue_head_init(&newsk->sk_error_queue);
1012
1013		filter = newsk->sk_filter;
1014		if (filter != NULL)
1015			sk_filter_charge(newsk, filter);
1016
1017		if (unlikely(xfrm_sk_clone_policy(newsk))) {
1018			/* It is still raw copy of parent, so invalidate
1019			 * destructor and make plain sk_free() */
1020			newsk->sk_destruct = NULL;
1021			sk_free(newsk);
1022			newsk = NULL;
1023			goto out;
1024		}
1025
1026		newsk->sk_err	   = 0;
1027		newsk->sk_priority = 0;
1028		atomic_set(&newsk->sk_refcnt, 2);
1029
1030		/*
1031		 * Increment the counter in the same struct proto as the master
1032		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1033		 * is the same as sk->sk_prot->socks, as this field was copied
1034		 * with memcpy).
1035		 *
1036		 * This _changes_ the previous behaviour, where
1037		 * tcp_create_openreq_child always was incrementing the
1038		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1039		 * to be taken into account in all callers. -acme
1040		 */
1041		sk_refcnt_debug_inc(newsk);
1042		newsk->sk_socket = NULL;
1043		newsk->sk_sleep	 = NULL;
1044
1045		if (newsk->sk_prot->sockets_allocated)
1046			atomic_inc(newsk->sk_prot->sockets_allocated);
1047	}
1048out:
1049	return newsk;
1050}
1051
1052EXPORT_SYMBOL_GPL(sk_clone);
1053
1054void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1055{
1056	__sk_dst_set(sk, dst);
1057	sk->sk_route_caps = dst->dev->features;
1058	if (sk->sk_route_caps & NETIF_F_GSO)
1059		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1060	if (sk_can_gso(sk)) {
1061		if (dst->header_len)
1062			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1063		else
1064			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1065	}
1066}
1067EXPORT_SYMBOL_GPL(sk_setup_caps);
1068
1069void __init sk_init(void)
1070{
1071	if (num_physpages <= 4096) {
1072		sysctl_wmem_max = 32767;
1073		sysctl_rmem_max = 32767;
1074		sysctl_wmem_default = 32767;
1075		sysctl_rmem_default = 32767;
1076	} else if (num_physpages >= 131072) {
1077		sysctl_wmem_max = 131071;
1078		sysctl_rmem_max = 131071;
1079	}
1080}
1081
1082/*
1083 *	Simple resource managers for sockets.
1084 */
1085
1086
1087/*
1088 * Write buffer destructor automatically called from kfree_skb.
1089 */
1090void sock_wfree(struct sk_buff *skb)
1091{
1092	struct sock *sk = skb->sk;
1093
1094	/* In case it might be waiting for more memory. */
1095	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1096	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1097		sk->sk_write_space(sk);
1098	sock_put(sk);
1099}
1100
1101/*
1102 * Read buffer destructor automatically called from kfree_skb.
1103 */
1104void sock_rfree(struct sk_buff *skb)
1105{
1106	struct sock *sk = skb->sk;
1107
1108	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1109}
1110
1111
1112int sock_i_uid(struct sock *sk)
1113{
1114	int uid;
1115
1116	read_lock(&sk->sk_callback_lock);
1117	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1118	read_unlock(&sk->sk_callback_lock);
1119	return uid;
1120}
1121
1122unsigned long sock_i_ino(struct sock *sk)
1123{
1124	unsigned long ino;
1125
1126	read_lock(&sk->sk_callback_lock);
1127	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1128	read_unlock(&sk->sk_callback_lock);
1129	return ino;
1130}
1131
1132/*
1133 * Allocate a skb from the socket's send buffer.
1134 */
1135struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1136			     gfp_t priority)
1137{
1138	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1139		struct sk_buff * skb = alloc_skb(size, priority);
1140		if (skb) {
1141			skb_set_owner_w(skb, sk);
1142			return skb;
1143		}
1144	}
1145	return NULL;
1146}
1147
1148/*
1149 * Allocate a skb from the socket's receive buffer.
1150 */
1151struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1152			     gfp_t priority)
1153{
1154	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1155		struct sk_buff *skb = alloc_skb(size, priority);
1156		if (skb) {
1157			skb_set_owner_r(skb, sk);
1158			return skb;
1159		}
1160	}
1161	return NULL;
1162}
1163
1164/*
1165 * Allocate a memory block from the socket's option memory buffer.
1166 */
1167void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1168{
1169	if ((unsigned)size <= sysctl_optmem_max &&
1170	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1171		void *mem;
1172		/* First do the add, to avoid the race if kmalloc
1173		 * might sleep.
1174		 */
1175		atomic_add(size, &sk->sk_omem_alloc);
1176		mem = kmalloc(size, priority);
1177		if (mem)
1178			return mem;
1179		atomic_sub(size, &sk->sk_omem_alloc);
1180	}
1181	return NULL;
1182}
1183
1184/*
1185 * Free an option memory block.
1186 */
1187void sock_kfree_s(struct sock *sk, void *mem, int size)
1188{
1189	kfree(mem);
1190	atomic_sub(size, &sk->sk_omem_alloc);
1191}
1192
1193/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1194   I think, these locks should be removed for datagram sockets.
1195 */
1196static long sock_wait_for_wmem(struct sock * sk, long timeo)
1197{
1198	DEFINE_WAIT(wait);
1199
1200	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1201	for (;;) {
1202		if (!timeo)
1203			break;
1204		if (signal_pending(current))
1205			break;
1206		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1207		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1208		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1209			break;
1210		if (sk->sk_shutdown & SEND_SHUTDOWN)
1211			break;
1212		if (sk->sk_err)
1213			break;
1214		timeo = schedule_timeout(timeo);
1215	}
1216	finish_wait(sk->sk_sleep, &wait);
1217	return timeo;
1218}
1219
1220
1221/*
1222 *	Generic send/receive buffer handlers
1223 */
1224
1225static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1226					    unsigned long header_len,
1227					    unsigned long data_len,
1228					    int noblock, int *errcode)
1229{
1230	struct sk_buff *skb;
1231	gfp_t gfp_mask;
1232	long timeo;
1233	int err;
1234
1235	gfp_mask = sk->sk_allocation;
1236	if (gfp_mask & __GFP_WAIT)
1237		gfp_mask |= __GFP_REPEAT;
1238
1239	timeo = sock_sndtimeo(sk, noblock);
1240	while (1) {
1241		err = sock_error(sk);
1242		if (err != 0)
1243			goto failure;
1244
1245		err = -EPIPE;
1246		if (sk->sk_shutdown & SEND_SHUTDOWN)
1247			goto failure;
1248
1249		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1250			skb = alloc_skb(header_len, gfp_mask);
1251			if (skb) {
1252				int npages;
1253				int i;
1254
1255				/* No pages, we're done... */
1256				if (!data_len)
1257					break;
1258
1259				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1260				skb->truesize += data_len;
1261				skb_shinfo(skb)->nr_frags = npages;
1262				for (i = 0; i < npages; i++) {
1263					struct page *page;
1264					skb_frag_t *frag;
1265
1266					page = alloc_pages(sk->sk_allocation, 0);
1267					if (!page) {
1268						err = -ENOBUFS;
1269						skb_shinfo(skb)->nr_frags = i;
1270						kfree_skb(skb);
1271						goto failure;
1272					}
1273
1274					frag = &skb_shinfo(skb)->frags[i];
1275					frag->page = page;
1276					frag->page_offset = 0;
1277					frag->size = (data_len >= PAGE_SIZE ?
1278						      PAGE_SIZE :
1279						      data_len);
1280					data_len -= PAGE_SIZE;
1281				}
1282
1283				/* Full success... */
1284				break;
1285			}
1286			err = -ENOBUFS;
1287			goto failure;
1288		}
1289		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1290		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1291		err = -EAGAIN;
1292		if (!timeo)
1293			goto failure;
1294		if (signal_pending(current))
1295			goto interrupted;
1296		timeo = sock_wait_for_wmem(sk, timeo);
1297	}
1298
1299	skb_set_owner_w(skb, sk);
1300	return skb;
1301
1302interrupted:
1303	err = sock_intr_errno(timeo);
1304failure:
1305	*errcode = err;
1306	return NULL;
1307}
1308
1309struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1310				    int noblock, int *errcode)
1311{
1312	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1313}
1314
1315static void __lock_sock(struct sock *sk)
1316{
1317	DEFINE_WAIT(wait);
1318
1319	for (;;) {
1320		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1321					TASK_UNINTERRUPTIBLE);
1322		spin_unlock_bh(&sk->sk_lock.slock);
1323		schedule();
1324		spin_lock_bh(&sk->sk_lock.slock);
1325		if (!sock_owned_by_user(sk))
1326			break;
1327	}
1328	finish_wait(&sk->sk_lock.wq, &wait);
1329}
1330
1331static void __release_sock(struct sock *sk)
1332{
1333	struct sk_buff *skb = sk->sk_backlog.head;
1334
1335	do {
1336		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1337		bh_unlock_sock(sk);
1338
1339		do {
1340			struct sk_buff *next = skb->next;
1341
1342			skb->next = NULL;
1343			sk->sk_backlog_rcv(sk, skb);
1344
1345			/*
1346			 * We are in process context here with softirqs
1347			 * disabled, use cond_resched_softirq() to preempt.
1348			 * This is safe to do because we've taken the backlog
1349			 * queue private:
1350			 */
1351			cond_resched_softirq();
1352
1353			skb = next;
1354		} while (skb != NULL);
1355
1356		bh_lock_sock(sk);
1357	} while ((skb = sk->sk_backlog.head) != NULL);
1358}
1359
1360/**
1361 * sk_wait_data - wait for data to arrive at sk_receive_queue
1362 * @sk:    sock to wait on
1363 * @timeo: for how long
1364 *
1365 * Now socket state including sk->sk_err is changed only under lock,
1366 * hence we may omit checks after joining wait queue.
1367 * We check receive queue before schedule() only as optimization;
1368 * it is very likely that release_sock() added new data.
1369 */
1370int sk_wait_data(struct sock *sk, long *timeo)
1371{
1372	int rc;
1373	DEFINE_WAIT(wait);
1374
1375	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1376	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1377	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1378	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1379	finish_wait(sk->sk_sleep, &wait);
1380	return rc;
1381}
1382
1383EXPORT_SYMBOL(sk_wait_data);
1384
1385/*
1386 * Set of default routines for initialising struct proto_ops when
1387 * the protocol does not support a particular function. In certain
1388 * cases where it makes no sense for a protocol to have a "do nothing"
1389 * function, some default processing is provided.
1390 */
1391
1392int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1393{
1394	return -EOPNOTSUPP;
1395}
1396
1397int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1398		    int len, int flags)
1399{
1400	return -EOPNOTSUPP;
1401}
1402
1403int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1404{
1405	return -EOPNOTSUPP;
1406}
1407
1408int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1409{
1410	return -EOPNOTSUPP;
1411}
1412
1413int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1414		    int *len, int peer)
1415{
1416	return -EOPNOTSUPP;
1417}
1418
1419unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1420{
1421	return 0;
1422}
1423
1424int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1425{
1426	return -EOPNOTSUPP;
1427}
1428
1429int sock_no_listen(struct socket *sock, int backlog)
1430{
1431	return -EOPNOTSUPP;
1432}
1433
1434int sock_no_shutdown(struct socket *sock, int how)
1435{
1436	return -EOPNOTSUPP;
1437}
1438
1439int sock_no_setsockopt(struct socket *sock, int level, int optname,
1440		    char __user *optval, int optlen)
1441{
1442	return -EOPNOTSUPP;
1443}
1444
1445int sock_no_getsockopt(struct socket *sock, int level, int optname,
1446		    char __user *optval, int __user *optlen)
1447{
1448	return -EOPNOTSUPP;
1449}
1450
1451int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1452		    size_t len)
1453{
1454	return -EOPNOTSUPP;
1455}
1456
1457int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1458		    size_t len, int flags)
1459{
1460	return -EOPNOTSUPP;
1461}
1462
1463int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1464{
1465	/* Mirror missing mmap method error code */
1466	return -ENODEV;
1467}
1468
1469ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1470{
1471	ssize_t res;
1472	struct msghdr msg = {.msg_flags = flags};
1473	struct kvec iov;
1474	char *kaddr = kmap(page);
1475	iov.iov_base = kaddr + offset;
1476	iov.iov_len = size;
1477	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1478	kunmap(page);
1479	return res;
1480}
1481
1482/*
1483 *	Default Socket Callbacks
1484 */
1485
1486static void sock_def_wakeup(struct sock *sk)
1487{
1488	read_lock(&sk->sk_callback_lock);
1489	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1490		wake_up_interruptible_all(sk->sk_sleep);
1491	read_unlock(&sk->sk_callback_lock);
1492}
1493
1494static void sock_def_error_report(struct sock *sk)
1495{
1496	read_lock(&sk->sk_callback_lock);
1497	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1498		wake_up_interruptible(sk->sk_sleep);
1499	sk_wake_async(sk,0,POLL_ERR);
1500	read_unlock(&sk->sk_callback_lock);
1501}
1502
1503static void sock_def_readable(struct sock *sk, int len)
1504{
1505	read_lock(&sk->sk_callback_lock);
1506	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1507		wake_up_interruptible(sk->sk_sleep);
1508	sk_wake_async(sk,1,POLL_IN);
1509	read_unlock(&sk->sk_callback_lock);
1510}
1511
1512static void sock_def_write_space(struct sock *sk)
1513{
1514	read_lock(&sk->sk_callback_lock);
1515
1516	/* Do not wake up a writer until he can make "significant"
1517	 * progress.  --DaveM
1518	 */
1519	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1520		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1521			wake_up_interruptible(sk->sk_sleep);
1522
1523		/* Should agree with poll, otherwise some programs break */
1524		if (sock_writeable(sk))
1525			sk_wake_async(sk, 2, POLL_OUT);
1526	}
1527
1528	read_unlock(&sk->sk_callback_lock);
1529}
1530
1531static void sock_def_destruct(struct sock *sk)
1532{
1533	kfree(sk->sk_protinfo);
1534}
1535
1536void sk_send_sigurg(struct sock *sk)
1537{
1538	if (sk->sk_socket && sk->sk_socket->file)
1539		if (send_sigurg(&sk->sk_socket->file->f_owner))
1540			sk_wake_async(sk, 3, POLL_PRI);
1541}
1542
1543void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1544		    unsigned long expires)
1545{
1546	if (!mod_timer(timer, expires))
1547		sock_hold(sk);
1548}
1549
1550EXPORT_SYMBOL(sk_reset_timer);
1551
1552void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1553{
1554	if (timer_pending(timer) && del_timer(timer))
1555		__sock_put(sk);
1556}
1557
1558EXPORT_SYMBOL(sk_stop_timer);
1559
1560void sock_init_data(struct socket *sock, struct sock *sk)
1561{
1562	skb_queue_head_init(&sk->sk_receive_queue);
1563	skb_queue_head_init(&sk->sk_write_queue);
1564	skb_queue_head_init(&sk->sk_error_queue);
1565#ifdef CONFIG_NET_DMA
1566	skb_queue_head_init(&sk->sk_async_wait_queue);
1567#endif
1568
1569	sk->sk_send_head	=	NULL;
1570
1571	init_timer(&sk->sk_timer);
1572
1573	sk->sk_allocation	=	GFP_KERNEL;
1574	sk->sk_rcvbuf		=	sysctl_rmem_default;
1575	sk->sk_sndbuf		=	sysctl_wmem_default;
1576	sk->sk_state		=	TCP_CLOSE;
1577	sk->sk_socket		=	sock;
1578
1579	sock_set_flag(sk, SOCK_ZAPPED);
1580
1581	if (sock) {
1582		sk->sk_type	=	sock->type;
1583		sk->sk_sleep	=	&sock->wait;
1584		sock->sk	=	sk;
1585	} else
1586		sk->sk_sleep	=	NULL;
1587
1588	rwlock_init(&sk->sk_dst_lock);
1589	rwlock_init(&sk->sk_callback_lock);
1590	lockdep_set_class_and_name(&sk->sk_callback_lock,
1591			af_callback_keys + sk->sk_family,
1592			af_family_clock_key_strings[sk->sk_family]);
1593
1594	sk->sk_state_change	=	sock_def_wakeup;
1595	sk->sk_data_ready	=	sock_def_readable;
1596	sk->sk_write_space	=	sock_def_write_space;
1597	sk->sk_error_report	=	sock_def_error_report;
1598	sk->sk_destruct		=	sock_def_destruct;
1599
1600	sk->sk_sndmsg_page	=	NULL;
1601	sk->sk_sndmsg_off	=	0;
1602
1603	sk->sk_peercred.pid 	=	0;
1604	sk->sk_peercred.uid	=	-1;
1605	sk->sk_peercred.gid	=	-1;
1606	sk->sk_write_pending	=	0;
1607	sk->sk_rcvlowat		=	1;
1608	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1609	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1610
1611	sk->sk_stamp = ktime_set(-1L, -1L);
1612
1613	atomic_set(&sk->sk_refcnt, 1);
1614}
1615
1616void fastcall lock_sock_nested(struct sock *sk, int subclass)
1617{
1618	might_sleep();
1619	spin_lock_bh(&sk->sk_lock.slock);
1620	if (sk->sk_lock.owned)
1621		__lock_sock(sk);
1622	sk->sk_lock.owned = 1;
1623	spin_unlock(&sk->sk_lock.slock);
1624	/*
1625	 * The sk_lock has mutex_lock() semantics here:
1626	 */
1627	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1628	local_bh_enable();
1629}
1630
1631EXPORT_SYMBOL(lock_sock_nested);
1632
1633void fastcall release_sock(struct sock *sk)
1634{
1635	/*
1636	 * The sk_lock has mutex_unlock() semantics:
1637	 */
1638	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1639
1640	spin_lock_bh(&sk->sk_lock.slock);
1641	if (sk->sk_backlog.tail)
1642		__release_sock(sk);
1643	sk->sk_lock.owned = 0;
1644	if (waitqueue_active(&sk->sk_lock.wq))
1645		wake_up(&sk->sk_lock.wq);
1646	spin_unlock_bh(&sk->sk_lock.slock);
1647}
1648EXPORT_SYMBOL(release_sock);
1649
1650int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1651{
1652	struct timeval tv;
1653	if (!sock_flag(sk, SOCK_TIMESTAMP))
1654		sock_enable_timestamp(sk);
1655	tv = ktime_to_timeval(sk->sk_stamp);
1656	if (tv.tv_sec == -1)
1657		return -ENOENT;
1658	if (tv.tv_sec == 0) {
1659		sk->sk_stamp = ktime_get_real();
1660		tv = ktime_to_timeval(sk->sk_stamp);
1661	}
1662	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1663}
1664EXPORT_SYMBOL(sock_get_timestamp);
1665
1666int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1667{
1668	struct timespec ts;
1669	if (!sock_flag(sk, SOCK_TIMESTAMP))
1670		sock_enable_timestamp(sk);
1671	ts = ktime_to_timespec(sk->sk_stamp);
1672	if (ts.tv_sec == -1)
1673		return -ENOENT;
1674	if (ts.tv_sec == 0) {
1675		sk->sk_stamp = ktime_get_real();
1676		ts = ktime_to_timespec(sk->sk_stamp);
1677	}
1678	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1679}
1680EXPORT_SYMBOL(sock_get_timestampns);
1681
1682void sock_enable_timestamp(struct sock *sk)
1683{
1684	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1685		sock_set_flag(sk, SOCK_TIMESTAMP);
1686		net_enable_timestamp();
1687	}
1688}
1689
1690/*
1691 *	Get a socket option on an socket.
1692 *
1693 *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1694 *	asynchronous errors should be reported by getsockopt. We assume
1695 *	this means if you specify SO_ERROR (otherwise whats the point of it).
1696 */
1697int sock_common_getsockopt(struct socket *sock, int level, int optname,
1698			   char __user *optval, int __user *optlen)
1699{
1700	struct sock *sk = sock->sk;
1701
1702	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1703}
1704
1705EXPORT_SYMBOL(sock_common_getsockopt);
1706
1707#ifdef CONFIG_COMPAT
1708int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1709				  char __user *optval, int __user *optlen)
1710{
1711	struct sock *sk = sock->sk;
1712
1713	if (sk->sk_prot->compat_getsockopt != NULL)
1714		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1715						      optval, optlen);
1716	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1717}
1718EXPORT_SYMBOL(compat_sock_common_getsockopt);
1719#endif
1720
1721int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1722			struct msghdr *msg, size_t size, int flags)
1723{
1724	struct sock *sk = sock->sk;
1725	int addr_len = 0;
1726	int err;
1727
1728	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1729				   flags & ~MSG_DONTWAIT, &addr_len);
1730	if (err >= 0)
1731		msg->msg_namelen = addr_len;
1732	return err;
1733}
1734
1735EXPORT_SYMBOL(sock_common_recvmsg);
1736
1737/*
1738 *	Set socket options on an inet socket.
1739 */
1740int sock_common_setsockopt(struct socket *sock, int level, int optname,
1741			   char __user *optval, int optlen)
1742{
1743	struct sock *sk = sock->sk;
1744
1745	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1746}
1747
1748EXPORT_SYMBOL(sock_common_setsockopt);
1749
1750#ifdef CONFIG_COMPAT
1751int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1752				  char __user *optval, int optlen)
1753{
1754	struct sock *sk = sock->sk;
1755
1756	if (sk->sk_prot->compat_setsockopt != NULL)
1757		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1758						      optval, optlen);
1759	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1760}
1761EXPORT_SYMBOL(compat_sock_common_setsockopt);
1762#endif
1763
1764void sk_common_release(struct sock *sk)
1765{
1766	if (sk->sk_prot->destroy)
1767		sk->sk_prot->destroy(sk);
1768
1769	/*
1770	 * Observation: when sock_common_release is called, processes have
1771	 * no access to socket. But net still has.
1772	 * Step one, detach it from networking:
1773	 *
1774	 * A. Remove from hash tables.
1775	 */
1776
1777	sk->sk_prot->unhash(sk);
1778
1779	/*
1780	 * In this point socket cannot receive new packets, but it is possible
1781	 * that some packets are in flight because some CPU runs receiver and
1782	 * did hash table lookup before we unhashed socket. They will achieve
1783	 * receive queue and will be purged by socket destructor.
1784	 *
1785	 * Also we still have packets pending on receive queue and probably,
1786	 * our own packets waiting in device queues. sock_destroy will drain
1787	 * receive queue, but transmitted packets will delay socket destruction
1788	 * until the last reference will be released.
1789	 */
1790
1791	sock_orphan(sk);
1792
1793	xfrm_sk_free_policy(sk);
1794
1795	sk_refcnt_debug_release(sk);
1796	sock_put(sk);
1797}
1798
1799EXPORT_SYMBOL(sk_common_release);
1800
1801static DEFINE_RWLOCK(proto_list_lock);
1802static LIST_HEAD(proto_list);
1803
1804int proto_register(struct proto *prot, int alloc_slab)
1805{
1806	char *request_sock_slab_name = NULL;
1807	char *timewait_sock_slab_name;
1808	int rc = -ENOBUFS;
1809
1810	if (alloc_slab) {
1811		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1812					       SLAB_HWCACHE_ALIGN, NULL);
1813
1814		if (prot->slab == NULL) {
1815			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1816			       prot->name);
1817			goto out;
1818		}
1819
1820		if (prot->rsk_prot != NULL) {
1821			static const char mask[] = "request_sock_%s";
1822
1823			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1824			if (request_sock_slab_name == NULL)
1825				goto out_free_sock_slab;
1826
1827			sprintf(request_sock_slab_name, mask, prot->name);
1828			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1829								 prot->rsk_prot->obj_size, 0,
1830								 SLAB_HWCACHE_ALIGN, NULL);
1831
1832			if (prot->rsk_prot->slab == NULL) {
1833				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1834				       prot->name);
1835				goto out_free_request_sock_slab_name;
1836			}
1837		}
1838
1839		if (prot->twsk_prot != NULL) {
1840			static const char mask[] = "tw_sock_%s";
1841
1842			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1843
1844			if (timewait_sock_slab_name == NULL)
1845				goto out_free_request_sock_slab;
1846
1847			sprintf(timewait_sock_slab_name, mask, prot->name);
1848			prot->twsk_prot->twsk_slab =
1849				kmem_cache_create(timewait_sock_slab_name,
1850						  prot->twsk_prot->twsk_obj_size,
1851						  0, SLAB_HWCACHE_ALIGN,
1852						  NULL);
1853			if (prot->twsk_prot->twsk_slab == NULL)
1854				goto out_free_timewait_sock_slab_name;
1855		}
1856	}
1857
1858	write_lock(&proto_list_lock);
1859	list_add(&prot->node, &proto_list);
1860	write_unlock(&proto_list_lock);
1861	rc = 0;
1862out:
1863	return rc;
1864out_free_timewait_sock_slab_name:
1865	kfree(timewait_sock_slab_name);
1866out_free_request_sock_slab:
1867	if (prot->rsk_prot && prot->rsk_prot->slab) {
1868		kmem_cache_destroy(prot->rsk_prot->slab);
1869		prot->rsk_prot->slab = NULL;
1870	}
1871out_free_request_sock_slab_name:
1872	kfree(request_sock_slab_name);
1873out_free_sock_slab:
1874	kmem_cache_destroy(prot->slab);
1875	prot->slab = NULL;
1876	goto out;
1877}
1878
1879EXPORT_SYMBOL(proto_register);
1880
1881void proto_unregister(struct proto *prot)
1882{
1883	write_lock(&proto_list_lock);
1884	list_del(&prot->node);
1885	write_unlock(&proto_list_lock);
1886
1887	if (prot->slab != NULL) {
1888		kmem_cache_destroy(prot->slab);
1889		prot->slab = NULL;
1890	}
1891
1892	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1893		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1894
1895		kmem_cache_destroy(prot->rsk_prot->slab);
1896		kfree(name);
1897		prot->rsk_prot->slab = NULL;
1898	}
1899
1900	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1901		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1902
1903		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1904		kfree(name);
1905		prot->twsk_prot->twsk_slab = NULL;
1906	}
1907}
1908
1909EXPORT_SYMBOL(proto_unregister);
1910
1911#ifdef CONFIG_PROC_FS
1912static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1913{
1914	read_lock(&proto_list_lock);
1915	return seq_list_start_head(&proto_list, *pos);
1916}
1917
1918static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1919{
1920	return seq_list_next(v, &proto_list, pos);
1921}
1922
1923static void proto_seq_stop(struct seq_file *seq, void *v)
1924{
1925	read_unlock(&proto_list_lock);
1926}
1927
1928static char proto_method_implemented(const void *method)
1929{
1930	return method == NULL ? 'n' : 'y';
1931}
1932
1933static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1934{
1935	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1936			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1937		   proto->name,
1938		   proto->obj_size,
1939		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1940		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1941		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1942		   proto->max_header,
1943		   proto->slab == NULL ? "no" : "yes",
1944		   module_name(proto->owner),
1945		   proto_method_implemented(proto->close),
1946		   proto_method_implemented(proto->connect),
1947		   proto_method_implemented(proto->disconnect),
1948		   proto_method_implemented(proto->accept),
1949		   proto_method_implemented(proto->ioctl),
1950		   proto_method_implemented(proto->init),
1951		   proto_method_implemented(proto->destroy),
1952		   proto_method_implemented(proto->shutdown),
1953		   proto_method_implemented(proto->setsockopt),
1954		   proto_method_implemented(proto->getsockopt),
1955		   proto_method_implemented(proto->sendmsg),
1956		   proto_method_implemented(proto->recvmsg),
1957		   proto_method_implemented(proto->sendpage),
1958		   proto_method_implemented(proto->bind),
1959		   proto_method_implemented(proto->backlog_rcv),
1960		   proto_method_implemented(proto->hash),
1961		   proto_method_implemented(proto->unhash),
1962		   proto_method_implemented(proto->get_port),
1963		   proto_method_implemented(proto->enter_memory_pressure));
1964}
1965
1966static int proto_seq_show(struct seq_file *seq, void *v)
1967{
1968	if (v == &proto_list)
1969		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1970			   "protocol",
1971			   "size",
1972			   "sockets",
1973			   "memory",
1974			   "press",
1975			   "maxhdr",
1976			   "slab",
1977			   "module",
1978			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1979	else
1980		proto_seq_printf(seq, list_entry(v, struct proto, node));
1981	return 0;
1982}
1983
1984static const struct seq_operations proto_seq_ops = {
1985	.start  = proto_seq_start,
1986	.next   = proto_seq_next,
1987	.stop   = proto_seq_stop,
1988	.show   = proto_seq_show,
1989};
1990
1991static int proto_seq_open(struct inode *inode, struct file *file)
1992{
1993	return seq_open(file, &proto_seq_ops);
1994}
1995
1996static const struct file_operations proto_seq_fops = {
1997	.owner		= THIS_MODULE,
1998	.open		= proto_seq_open,
1999	.read		= seq_read,
2000	.llseek		= seq_lseek,
2001	.release	= seq_release,
2002};
2003
2004static int __init proto_init(void)
2005{
2006	/* register /proc/net/protocols */
2007	return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
2008}
2009
2010subsys_initcall(proto_init);
2011
2012#endif /* PROC_FS */
2013
2014EXPORT_SYMBOL(sk_alloc);
2015EXPORT_SYMBOL(sk_free);
2016EXPORT_SYMBOL(sk_send_sigurg);
2017EXPORT_SYMBOL(sock_alloc_send_skb);
2018EXPORT_SYMBOL(sock_init_data);
2019EXPORT_SYMBOL(sock_kfree_s);
2020EXPORT_SYMBOL(sock_kmalloc);
2021EXPORT_SYMBOL(sock_no_accept);
2022EXPORT_SYMBOL(sock_no_bind);
2023EXPORT_SYMBOL(sock_no_connect);
2024EXPORT_SYMBOL(sock_no_getname);
2025EXPORT_SYMBOL(sock_no_getsockopt);
2026EXPORT_SYMBOL(sock_no_ioctl);
2027EXPORT_SYMBOL(sock_no_listen);
2028EXPORT_SYMBOL(sock_no_mmap);
2029EXPORT_SYMBOL(sock_no_poll);
2030EXPORT_SYMBOL(sock_no_recvmsg);
2031EXPORT_SYMBOL(sock_no_sendmsg);
2032EXPORT_SYMBOL(sock_no_sendpage);
2033EXPORT_SYMBOL(sock_no_setsockopt);
2034EXPORT_SYMBOL(sock_no_shutdown);
2035EXPORT_SYMBOL(sock_no_socketpair);
2036EXPORT_SYMBOL(sock_rfree);
2037EXPORT_SYMBOL(sock_setsockopt);
2038EXPORT_SYMBOL(sock_wfree);
2039EXPORT_SYMBOL(sock_wmalloc);
2040EXPORT_SYMBOL(sock_i_uid);
2041EXPORT_SYMBOL(sock_i_ino);
2042EXPORT_SYMBOL(sysctl_optmem_max);
2043#ifdef CONFIG_SYSCTL
2044EXPORT_SYMBOL(sysctl_rmem_max);
2045EXPORT_SYMBOL(sysctl_wmem_max);
2046#endif
2047