sock.c revision c0ef877b2c9f543e9fb7953bfe1a0cd3a4eae362
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		Generic socket support routines. Memory allocators, socket lock/release
7 *		handler for protocols to use and generic option handler.
8 *
9 *
10 * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
12 * Authors:	Ross Biro
13 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *		Florian La Roche, <flla@stud.uni-sb.de>
15 *		Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 *		Alan Cox	: 	Numerous verify_area() problems
19 *		Alan Cox	:	Connecting on a connecting socket
20 *					now returns an error for tcp.
21 *		Alan Cox	:	sock->protocol is set correctly.
22 *					and is not sometimes left as 0.
23 *		Alan Cox	:	connect handles icmp errors on a
24 *					connect properly. Unfortunately there
25 *					is a restart syscall nasty there. I
26 *					can't match BSD without hacking the C
27 *					library. Ideas urgently sought!
28 *		Alan Cox	:	Disallow bind() to addresses that are
29 *					not ours - especially broadcast ones!!
30 *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31 *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32 *					instead they leave that for the DESTROY timer.
33 *		Alan Cox	:	Clean up error flag in accept
34 *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35 *					was buggy. Put a remove_sock() in the handler
36 *					for memory when we hit 0. Also altered the timer
37 *					code. The ACK stuff can wait and needs major
38 *					TCP layer surgery.
39 *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40 *					and fixed timer/inet_bh race.
41 *		Alan Cox	:	Added zapped flag for TCP
42 *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43 *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45 *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48 *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49 *	Pauline Middelink	:	identd support
50 *		Alan Cox	:	Fixed connect() taking signals I think.
51 *		Alan Cox	:	SO_LINGER supported
52 *		Alan Cox	:	Error reporting fixes
53 *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54 *		Alan Cox	:	inet sockets don't set sk->type!
55 *		Alan Cox	:	Split socket option code
56 *		Alan Cox	:	Callbacks
57 *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58 *		Alex		:	Removed restriction on inet fioctl
59 *		Alan Cox	:	Splitting INET from NET core
60 *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61 *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62 *		Alan Cox	:	Split IP from generic code
63 *		Alan Cox	:	New kfree_skbmem()
64 *		Alan Cox	:	Make SO_DEBUG superuser only.
65 *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66 *					(compatibility fix)
67 *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68 *		Alan Cox	:	Allocator for a socket is settable.
69 *		Alan Cox	:	SO_ERROR includes soft errors.
70 *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71 *		Alan Cox	: 	Generic socket allocation to make hooks
72 *					easier (suggested by Craig Metz).
73 *		Michael Pall	:	SO_ERROR returns positive errno again
74 *              Steve Whitehouse:       Added default destructor to free
75 *                                      protocol private data.
76 *              Steve Whitehouse:       Added various other default routines
77 *                                      common to several socket families.
78 *              Chris Evans     :       Call suser() check last on F_SETOWN
79 *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81 *		Andi Kleen	:	Fix write_space callback
82 *		Chris Evans	:	Security fixes - signedness again
83 *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 *		This program is free software; you can redistribute it and/or
89 *		modify it under the terms of the GNU General Public License
90 *		as published by the Free Software Foundation; either version
91 *		2 of the License, or (at your option) any later version.
92 */
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114#include <linux/highmem.h>
115
116#include <asm/uaccess.h>
117#include <asm/system.h>
118
119#include <linux/netdevice.h>
120#include <net/protocol.h>
121#include <linux/skbuff.h>
122#include <net/net_namespace.h>
123#include <net/request_sock.h>
124#include <net/sock.h>
125#include <net/xfrm.h>
126#include <linux/ipsec.h>
127
128#include <linux/filter.h>
129
130#ifdef CONFIG_INET
131#include <net/tcp.h>
132#endif
133
134/*
135 * Each address family might have different locking rules, so we have
136 * one slock key per address family:
137 */
138static struct lock_class_key af_family_keys[AF_MAX];
139static struct lock_class_key af_family_slock_keys[AF_MAX];
140
141#ifdef CONFIG_DEBUG_LOCK_ALLOC
142/*
143 * Make lock validator output more readable. (we pre-construct these
144 * strings build-time, so that runtime initialization of socket
145 * locks is fast):
146 */
147static const char *af_family_key_strings[AF_MAX+1] = {
148  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
149  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
150  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
151  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
152  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
153  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
154  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
155  "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
156  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
157  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
158  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
159  "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
160};
161static const char *af_family_slock_key_strings[AF_MAX+1] = {
162  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
163  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
164  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
165  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
166  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
167  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
168  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
169  "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
170  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
171  "slock-27"       , "slock-28"          , "slock-29"          ,
172  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
173  "slock-AF_RXRPC" , "slock-AF_MAX"
174};
175static const char *af_family_clock_key_strings[AF_MAX+1] = {
176  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
177  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
178  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
179  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
180  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
181  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
182  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
183  "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
184  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
185  "clock-27"       , "clock-28"          , "clock-29"          ,
186  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
187  "clock-AF_RXRPC" , "clock-AF_MAX"
188};
189#endif
190
191/*
192 * sk_callback_lock locking rules are per-address-family,
193 * so split the lock classes by using a per-AF key:
194 */
195static struct lock_class_key af_callback_keys[AF_MAX];
196
197/* Take into consideration the size of the struct sk_buff overhead in the
198 * determination of these values, since that is non-constant across
199 * platforms.  This makes socket queueing behavior and performance
200 * not depend upon such differences.
201 */
202#define _SK_MEM_PACKETS		256
203#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
204#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
206
207/* Run time adjustable parameters. */
208__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
209__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
210__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
211__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
212
213/* Maximal space eaten by iovec or ancilliary data plus some space */
214int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
215
216static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
217{
218	struct timeval tv;
219
220	if (optlen < sizeof(tv))
221		return -EINVAL;
222	if (copy_from_user(&tv, optval, sizeof(tv)))
223		return -EFAULT;
224	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
225		return -EDOM;
226
227	if (tv.tv_sec < 0) {
228		static int warned __read_mostly;
229
230		*timeo_p = 0;
231		if (warned < 10 && net_ratelimit())
232			warned++;
233			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
234			       "tries to set negative timeout\n",
235				current->comm, task_pid_nr(current));
236		return 0;
237	}
238	*timeo_p = MAX_SCHEDULE_TIMEOUT;
239	if (tv.tv_sec == 0 && tv.tv_usec == 0)
240		return 0;
241	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
242		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
243	return 0;
244}
245
246static void sock_warn_obsolete_bsdism(const char *name)
247{
248	static int warned;
249	static char warncomm[TASK_COMM_LEN];
250	if (strcmp(warncomm, current->comm) && warned < 5) {
251		strcpy(warncomm,  current->comm);
252		printk(KERN_WARNING "process `%s' is using obsolete "
253		       "%s SO_BSDCOMPAT\n", warncomm, name);
254		warned++;
255	}
256}
257
258static void sock_disable_timestamp(struct sock *sk)
259{
260	if (sock_flag(sk, SOCK_TIMESTAMP)) {
261		sock_reset_flag(sk, SOCK_TIMESTAMP);
262		net_disable_timestamp();
263	}
264}
265
266
267int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
268{
269	int err = 0;
270	int skb_len;
271
272	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
273	   number of warnings when compiling with -W --ANK
274	 */
275	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
276	    (unsigned)sk->sk_rcvbuf) {
277		err = -ENOMEM;
278		goto out;
279	}
280
281	err = sk_filter(sk, skb);
282	if (err)
283		goto out;
284
285	skb->dev = NULL;
286	skb_set_owner_r(skb, sk);
287
288	/* Cache the SKB length before we tack it onto the receive
289	 * queue.  Once it is added it no longer belongs to us and
290	 * may be freed by other threads of control pulling packets
291	 * from the queue.
292	 */
293	skb_len = skb->len;
294
295	skb_queue_tail(&sk->sk_receive_queue, skb);
296
297	if (!sock_flag(sk, SOCK_DEAD))
298		sk->sk_data_ready(sk, skb_len);
299out:
300	return err;
301}
302EXPORT_SYMBOL(sock_queue_rcv_skb);
303
304int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
305{
306	int rc = NET_RX_SUCCESS;
307
308	if (sk_filter(sk, skb))
309		goto discard_and_relse;
310
311	skb->dev = NULL;
312
313	if (nested)
314		bh_lock_sock_nested(sk);
315	else
316		bh_lock_sock(sk);
317	if (!sock_owned_by_user(sk)) {
318		/*
319		 * trylock + unlock semantics:
320		 */
321		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
322
323		rc = sk->sk_backlog_rcv(sk, skb);
324
325		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
326	} else
327		sk_add_backlog(sk, skb);
328	bh_unlock_sock(sk);
329out:
330	sock_put(sk);
331	return rc;
332discard_and_relse:
333	kfree_skb(skb);
334	goto out;
335}
336EXPORT_SYMBOL(sk_receive_skb);
337
338struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
339{
340	struct dst_entry *dst = sk->sk_dst_cache;
341
342	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
343		sk->sk_dst_cache = NULL;
344		dst_release(dst);
345		return NULL;
346	}
347
348	return dst;
349}
350EXPORT_SYMBOL(__sk_dst_check);
351
352struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
353{
354	struct dst_entry *dst = sk_dst_get(sk);
355
356	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
357		sk_dst_reset(sk);
358		dst_release(dst);
359		return NULL;
360	}
361
362	return dst;
363}
364EXPORT_SYMBOL(sk_dst_check);
365
366static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
367{
368	int ret = -ENOPROTOOPT;
369#ifdef CONFIG_NETDEVICES
370	struct net *net = sk->sk_net;
371	char devname[IFNAMSIZ];
372	int index;
373
374	/* Sorry... */
375	ret = -EPERM;
376	if (!capable(CAP_NET_RAW))
377		goto out;
378
379	ret = -EINVAL;
380	if (optlen < 0)
381		goto out;
382
383	/* Bind this socket to a particular device like "eth0",
384	 * as specified in the passed interface name. If the
385	 * name is "" or the option length is zero the socket
386	 * is not bound.
387	 */
388	if (optlen > IFNAMSIZ - 1)
389		optlen = IFNAMSIZ - 1;
390	memset(devname, 0, sizeof(devname));
391
392	ret = -EFAULT;
393	if (copy_from_user(devname, optval, optlen))
394		goto out;
395
396	if (devname[0] == '\0') {
397		index = 0;
398	} else {
399		struct net_device *dev = dev_get_by_name(net, devname);
400
401		ret = -ENODEV;
402		if (!dev)
403			goto out;
404
405		index = dev->ifindex;
406		dev_put(dev);
407	}
408
409	lock_sock(sk);
410	sk->sk_bound_dev_if = index;
411	sk_dst_reset(sk);
412	release_sock(sk);
413
414	ret = 0;
415
416out:
417#endif
418
419	return ret;
420}
421
422static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
423{
424	if (valbool)
425		sock_set_flag(sk, bit);
426	else
427		sock_reset_flag(sk, bit);
428}
429
430/*
431 *	This is meant for all protocols to use and covers goings on
432 *	at the socket level. Everything here is generic.
433 */
434
435int sock_setsockopt(struct socket *sock, int level, int optname,
436		    char __user *optval, int optlen)
437{
438	struct sock *sk=sock->sk;
439	int val;
440	int valbool;
441	struct linger ling;
442	int ret = 0;
443
444	/*
445	 *	Options without arguments
446	 */
447
448#ifdef SO_DONTLINGER		/* Compatibility item... */
449	if (optname == SO_DONTLINGER) {
450		lock_sock(sk);
451		sock_reset_flag(sk, SOCK_LINGER);
452		release_sock(sk);
453		return 0;
454	}
455#endif
456
457	if (optname == SO_BINDTODEVICE)
458		return sock_bindtodevice(sk, optval, optlen);
459
460	if (optlen < sizeof(int))
461		return -EINVAL;
462
463	if (get_user(val, (int __user *)optval))
464		return -EFAULT;
465
466	valbool = val?1:0;
467
468	lock_sock(sk);
469
470	switch(optname) {
471	case SO_DEBUG:
472		if (val && !capable(CAP_NET_ADMIN)) {
473			ret = -EACCES;
474		} else
475			sock_valbool_flag(sk, SOCK_DBG, valbool);
476		break;
477	case SO_REUSEADDR:
478		sk->sk_reuse = valbool;
479		break;
480	case SO_TYPE:
481	case SO_ERROR:
482		ret = -ENOPROTOOPT;
483		break;
484	case SO_DONTROUTE:
485		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
486		break;
487	case SO_BROADCAST:
488		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
489		break;
490	case SO_SNDBUF:
491		/* Don't error on this BSD doesn't and if you think
492		   about it this is right. Otherwise apps have to
493		   play 'guess the biggest size' games. RCVBUF/SNDBUF
494		   are treated in BSD as hints */
495
496		if (val > sysctl_wmem_max)
497			val = sysctl_wmem_max;
498set_sndbuf:
499		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
500		if ((val * 2) < SOCK_MIN_SNDBUF)
501			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
502		else
503			sk->sk_sndbuf = val * 2;
504
505		/*
506		 *	Wake up sending tasks if we
507		 *	upped the value.
508		 */
509		sk->sk_write_space(sk);
510		break;
511
512	case SO_SNDBUFFORCE:
513		if (!capable(CAP_NET_ADMIN)) {
514			ret = -EPERM;
515			break;
516		}
517		goto set_sndbuf;
518
519	case SO_RCVBUF:
520		/* Don't error on this BSD doesn't and if you think
521		   about it this is right. Otherwise apps have to
522		   play 'guess the biggest size' games. RCVBUF/SNDBUF
523		   are treated in BSD as hints */
524
525		if (val > sysctl_rmem_max)
526			val = sysctl_rmem_max;
527set_rcvbuf:
528		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
529		/*
530		 * We double it on the way in to account for
531		 * "struct sk_buff" etc. overhead.   Applications
532		 * assume that the SO_RCVBUF setting they make will
533		 * allow that much actual data to be received on that
534		 * socket.
535		 *
536		 * Applications are unaware that "struct sk_buff" and
537		 * other overheads allocate from the receive buffer
538		 * during socket buffer allocation.
539		 *
540		 * And after considering the possible alternatives,
541		 * returning the value we actually used in getsockopt
542		 * is the most desirable behavior.
543		 */
544		if ((val * 2) < SOCK_MIN_RCVBUF)
545			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
546		else
547			sk->sk_rcvbuf = val * 2;
548		break;
549
550	case SO_RCVBUFFORCE:
551		if (!capable(CAP_NET_ADMIN)) {
552			ret = -EPERM;
553			break;
554		}
555		goto set_rcvbuf;
556
557	case SO_KEEPALIVE:
558#ifdef CONFIG_INET
559		if (sk->sk_protocol == IPPROTO_TCP)
560			tcp_set_keepalive(sk, valbool);
561#endif
562		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
563		break;
564
565	case SO_OOBINLINE:
566		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
567		break;
568
569	case SO_NO_CHECK:
570		sk->sk_no_check = valbool;
571		break;
572
573	case SO_PRIORITY:
574		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
575			sk->sk_priority = val;
576		else
577			ret = -EPERM;
578		break;
579
580	case SO_LINGER:
581		if (optlen < sizeof(ling)) {
582			ret = -EINVAL;	/* 1003.1g */
583			break;
584		}
585		if (copy_from_user(&ling,optval,sizeof(ling))) {
586			ret = -EFAULT;
587			break;
588		}
589		if (!ling.l_onoff)
590			sock_reset_flag(sk, SOCK_LINGER);
591		else {
592#if (BITS_PER_LONG == 32)
593			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
594				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
595			else
596#endif
597				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
598			sock_set_flag(sk, SOCK_LINGER);
599		}
600		break;
601
602	case SO_BSDCOMPAT:
603		sock_warn_obsolete_bsdism("setsockopt");
604		break;
605
606	case SO_PASSCRED:
607		if (valbool)
608			set_bit(SOCK_PASSCRED, &sock->flags);
609		else
610			clear_bit(SOCK_PASSCRED, &sock->flags);
611		break;
612
613	case SO_TIMESTAMP:
614	case SO_TIMESTAMPNS:
615		if (valbool)  {
616			if (optname == SO_TIMESTAMP)
617				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
618			else
619				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
620			sock_set_flag(sk, SOCK_RCVTSTAMP);
621			sock_enable_timestamp(sk);
622		} else {
623			sock_reset_flag(sk, SOCK_RCVTSTAMP);
624			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
625		}
626		break;
627
628	case SO_RCVLOWAT:
629		if (val < 0)
630			val = INT_MAX;
631		sk->sk_rcvlowat = val ? : 1;
632		break;
633
634	case SO_RCVTIMEO:
635		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
636		break;
637
638	case SO_SNDTIMEO:
639		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
640		break;
641
642	case SO_ATTACH_FILTER:
643		ret = -EINVAL;
644		if (optlen == sizeof(struct sock_fprog)) {
645			struct sock_fprog fprog;
646
647			ret = -EFAULT;
648			if (copy_from_user(&fprog, optval, sizeof(fprog)))
649				break;
650
651			ret = sk_attach_filter(&fprog, sk);
652		}
653		break;
654
655	case SO_DETACH_FILTER:
656		ret = sk_detach_filter(sk);
657		break;
658
659	case SO_PASSSEC:
660		if (valbool)
661			set_bit(SOCK_PASSSEC, &sock->flags);
662		else
663			clear_bit(SOCK_PASSSEC, &sock->flags);
664		break;
665
666		/* We implement the SO_SNDLOWAT etc to
667		   not be settable (1003.1g 5.3) */
668	default:
669		ret = -ENOPROTOOPT;
670		break;
671	}
672	release_sock(sk);
673	return ret;
674}
675
676
677int sock_getsockopt(struct socket *sock, int level, int optname,
678		    char __user *optval, int __user *optlen)
679{
680	struct sock *sk = sock->sk;
681
682	union {
683		int val;
684		struct linger ling;
685		struct timeval tm;
686	} v;
687
688	unsigned int lv = sizeof(int);
689	int len;
690
691	if (get_user(len, optlen))
692		return -EFAULT;
693	if (len < 0)
694		return -EINVAL;
695
696	switch(optname) {
697	case SO_DEBUG:
698		v.val = sock_flag(sk, SOCK_DBG);
699		break;
700
701	case SO_DONTROUTE:
702		v.val = sock_flag(sk, SOCK_LOCALROUTE);
703		break;
704
705	case SO_BROADCAST:
706		v.val = !!sock_flag(sk, SOCK_BROADCAST);
707		break;
708
709	case SO_SNDBUF:
710		v.val = sk->sk_sndbuf;
711		break;
712
713	case SO_RCVBUF:
714		v.val = sk->sk_rcvbuf;
715		break;
716
717	case SO_REUSEADDR:
718		v.val = sk->sk_reuse;
719		break;
720
721	case SO_KEEPALIVE:
722		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
723		break;
724
725	case SO_TYPE:
726		v.val = sk->sk_type;
727		break;
728
729	case SO_ERROR:
730		v.val = -sock_error(sk);
731		if (v.val==0)
732			v.val = xchg(&sk->sk_err_soft, 0);
733		break;
734
735	case SO_OOBINLINE:
736		v.val = !!sock_flag(sk, SOCK_URGINLINE);
737		break;
738
739	case SO_NO_CHECK:
740		v.val = sk->sk_no_check;
741		break;
742
743	case SO_PRIORITY:
744		v.val = sk->sk_priority;
745		break;
746
747	case SO_LINGER:
748		lv		= sizeof(v.ling);
749		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
750		v.ling.l_linger	= sk->sk_lingertime / HZ;
751		break;
752
753	case SO_BSDCOMPAT:
754		sock_warn_obsolete_bsdism("getsockopt");
755		break;
756
757	case SO_TIMESTAMP:
758		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
759				!sock_flag(sk, SOCK_RCVTSTAMPNS);
760		break;
761
762	case SO_TIMESTAMPNS:
763		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
764		break;
765
766	case SO_RCVTIMEO:
767		lv=sizeof(struct timeval);
768		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
769			v.tm.tv_sec = 0;
770			v.tm.tv_usec = 0;
771		} else {
772			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
773			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
774		}
775		break;
776
777	case SO_SNDTIMEO:
778		lv=sizeof(struct timeval);
779		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
780			v.tm.tv_sec = 0;
781			v.tm.tv_usec = 0;
782		} else {
783			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
784			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
785		}
786		break;
787
788	case SO_RCVLOWAT:
789		v.val = sk->sk_rcvlowat;
790		break;
791
792	case SO_SNDLOWAT:
793		v.val=1;
794		break;
795
796	case SO_PASSCRED:
797		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
798		break;
799
800	case SO_PEERCRED:
801		if (len > sizeof(sk->sk_peercred))
802			len = sizeof(sk->sk_peercred);
803		if (copy_to_user(optval, &sk->sk_peercred, len))
804			return -EFAULT;
805		goto lenout;
806
807	case SO_PEERNAME:
808	{
809		char address[128];
810
811		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
812			return -ENOTCONN;
813		if (lv < len)
814			return -EINVAL;
815		if (copy_to_user(optval, address, len))
816			return -EFAULT;
817		goto lenout;
818	}
819
820	/* Dubious BSD thing... Probably nobody even uses it, but
821	 * the UNIX standard wants it for whatever reason... -DaveM
822	 */
823	case SO_ACCEPTCONN:
824		v.val = sk->sk_state == TCP_LISTEN;
825		break;
826
827	case SO_PASSSEC:
828		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
829		break;
830
831	case SO_PEERSEC:
832		return security_socket_getpeersec_stream(sock, optval, optlen, len);
833
834	default:
835		return -ENOPROTOOPT;
836	}
837
838	if (len > lv)
839		len = lv;
840	if (copy_to_user(optval, &v, len))
841		return -EFAULT;
842lenout:
843	if (put_user(len, optlen))
844		return -EFAULT;
845	return 0;
846}
847
848/*
849 * Initialize an sk_lock.
850 *
851 * (We also register the sk_lock with the lock validator.)
852 */
853static inline void sock_lock_init(struct sock *sk)
854{
855	sock_lock_init_class_and_name(sk,
856			af_family_slock_key_strings[sk->sk_family],
857			af_family_slock_keys + sk->sk_family,
858			af_family_key_strings[sk->sk_family],
859			af_family_keys + sk->sk_family);
860}
861
862static void sock_copy(struct sock *nsk, const struct sock *osk)
863{
864#ifdef CONFIG_SECURITY_NETWORK
865	void *sptr = nsk->sk_security;
866#endif
867
868	memcpy(nsk, osk, osk->sk_prot->obj_size);
869#ifdef CONFIG_SECURITY_NETWORK
870	nsk->sk_security = sptr;
871	security_sk_clone(osk, nsk);
872#endif
873}
874
875static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
876		int family)
877{
878	struct sock *sk;
879	struct kmem_cache *slab;
880
881	slab = prot->slab;
882	if (slab != NULL)
883		sk = kmem_cache_alloc(slab, priority);
884	else
885		sk = kmalloc(prot->obj_size, priority);
886
887	if (sk != NULL) {
888		if (security_sk_alloc(sk, family, priority))
889			goto out_free;
890
891		if (!try_module_get(prot->owner))
892			goto out_free_sec;
893	}
894
895	return sk;
896
897out_free_sec:
898	security_sk_free(sk);
899out_free:
900	if (slab != NULL)
901		kmem_cache_free(slab, sk);
902	else
903		kfree(sk);
904	return NULL;
905}
906
907static void sk_prot_free(struct proto *prot, struct sock *sk)
908{
909	struct kmem_cache *slab;
910	struct module *owner;
911
912	owner = prot->owner;
913	slab = prot->slab;
914
915	security_sk_free(sk);
916	if (slab != NULL)
917		kmem_cache_free(slab, sk);
918	else
919		kfree(sk);
920	module_put(owner);
921}
922
923/**
924 *	sk_alloc - All socket objects are allocated here
925 *	@net: the applicable net namespace
926 *	@family: protocol family
927 *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
928 *	@prot: struct proto associated with this new sock instance
929 *	@zero_it: if we should zero the newly allocated sock
930 */
931struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
932		      struct proto *prot)
933{
934	struct sock *sk;
935
936	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
937	if (sk) {
938		sk->sk_family = family;
939		/*
940		 * See comment in struct sock definition to understand
941		 * why we need sk_prot_creator -acme
942		 */
943		sk->sk_prot = sk->sk_prot_creator = prot;
944		sock_lock_init(sk);
945		sk->sk_net = get_net(net);
946	}
947
948	return sk;
949}
950
951void sk_free(struct sock *sk)
952{
953	struct sk_filter *filter;
954
955	if (sk->sk_destruct)
956		sk->sk_destruct(sk);
957
958	filter = rcu_dereference(sk->sk_filter);
959	if (filter) {
960		sk_filter_uncharge(sk, filter);
961		rcu_assign_pointer(sk->sk_filter, NULL);
962	}
963
964	sock_disable_timestamp(sk);
965
966	if (atomic_read(&sk->sk_omem_alloc))
967		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
968		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
969
970	put_net(sk->sk_net);
971	sk_prot_free(sk->sk_prot_creator, sk);
972}
973
974struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
975{
976	struct sock *newsk;
977
978	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
979	if (newsk != NULL) {
980		struct sk_filter *filter;
981
982		sock_copy(newsk, sk);
983
984		/* SANITY */
985		get_net(newsk->sk_net);
986		sk_node_init(&newsk->sk_node);
987		sock_lock_init(newsk);
988		bh_lock_sock(newsk);
989		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
990
991		atomic_set(&newsk->sk_rmem_alloc, 0);
992		atomic_set(&newsk->sk_wmem_alloc, 0);
993		atomic_set(&newsk->sk_omem_alloc, 0);
994		skb_queue_head_init(&newsk->sk_receive_queue);
995		skb_queue_head_init(&newsk->sk_write_queue);
996#ifdef CONFIG_NET_DMA
997		skb_queue_head_init(&newsk->sk_async_wait_queue);
998#endif
999
1000		rwlock_init(&newsk->sk_dst_lock);
1001		rwlock_init(&newsk->sk_callback_lock);
1002		lockdep_set_class_and_name(&newsk->sk_callback_lock,
1003				af_callback_keys + newsk->sk_family,
1004				af_family_clock_key_strings[newsk->sk_family]);
1005
1006		newsk->sk_dst_cache	= NULL;
1007		newsk->sk_wmem_queued	= 0;
1008		newsk->sk_forward_alloc = 0;
1009		newsk->sk_send_head	= NULL;
1010		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1011
1012		sock_reset_flag(newsk, SOCK_DONE);
1013		skb_queue_head_init(&newsk->sk_error_queue);
1014
1015		filter = newsk->sk_filter;
1016		if (filter != NULL)
1017			sk_filter_charge(newsk, filter);
1018
1019		if (unlikely(xfrm_sk_clone_policy(newsk))) {
1020			/* It is still raw copy of parent, so invalidate
1021			 * destructor and make plain sk_free() */
1022			newsk->sk_destruct = NULL;
1023			sk_free(newsk);
1024			newsk = NULL;
1025			goto out;
1026		}
1027
1028		newsk->sk_err	   = 0;
1029		newsk->sk_priority = 0;
1030		atomic_set(&newsk->sk_refcnt, 2);
1031
1032		/*
1033		 * Increment the counter in the same struct proto as the master
1034		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1035		 * is the same as sk->sk_prot->socks, as this field was copied
1036		 * with memcpy).
1037		 *
1038		 * This _changes_ the previous behaviour, where
1039		 * tcp_create_openreq_child always was incrementing the
1040		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1041		 * to be taken into account in all callers. -acme
1042		 */
1043		sk_refcnt_debug_inc(newsk);
1044		newsk->sk_socket = NULL;
1045		newsk->sk_sleep	 = NULL;
1046
1047		if (newsk->sk_prot->sockets_allocated)
1048			atomic_inc(newsk->sk_prot->sockets_allocated);
1049	}
1050out:
1051	return newsk;
1052}
1053
1054EXPORT_SYMBOL_GPL(sk_clone);
1055
1056void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1057{
1058	__sk_dst_set(sk, dst);
1059	sk->sk_route_caps = dst->dev->features;
1060	if (sk->sk_route_caps & NETIF_F_GSO)
1061		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1062	if (sk_can_gso(sk)) {
1063		if (dst->header_len)
1064			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1065		else
1066			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1067	}
1068}
1069EXPORT_SYMBOL_GPL(sk_setup_caps);
1070
1071void __init sk_init(void)
1072{
1073	if (num_physpages <= 4096) {
1074		sysctl_wmem_max = 32767;
1075		sysctl_rmem_max = 32767;
1076		sysctl_wmem_default = 32767;
1077		sysctl_rmem_default = 32767;
1078	} else if (num_physpages >= 131072) {
1079		sysctl_wmem_max = 131071;
1080		sysctl_rmem_max = 131071;
1081	}
1082}
1083
1084/*
1085 *	Simple resource managers for sockets.
1086 */
1087
1088
1089/*
1090 * Write buffer destructor automatically called from kfree_skb.
1091 */
1092void sock_wfree(struct sk_buff *skb)
1093{
1094	struct sock *sk = skb->sk;
1095
1096	/* In case it might be waiting for more memory. */
1097	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1098	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1099		sk->sk_write_space(sk);
1100	sock_put(sk);
1101}
1102
1103/*
1104 * Read buffer destructor automatically called from kfree_skb.
1105 */
1106void sock_rfree(struct sk_buff *skb)
1107{
1108	struct sock *sk = skb->sk;
1109
1110	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1111}
1112
1113
1114int sock_i_uid(struct sock *sk)
1115{
1116	int uid;
1117
1118	read_lock(&sk->sk_callback_lock);
1119	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1120	read_unlock(&sk->sk_callback_lock);
1121	return uid;
1122}
1123
1124unsigned long sock_i_ino(struct sock *sk)
1125{
1126	unsigned long ino;
1127
1128	read_lock(&sk->sk_callback_lock);
1129	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1130	read_unlock(&sk->sk_callback_lock);
1131	return ino;
1132}
1133
1134/*
1135 * Allocate a skb from the socket's send buffer.
1136 */
1137struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1138			     gfp_t priority)
1139{
1140	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1141		struct sk_buff * skb = alloc_skb(size, priority);
1142		if (skb) {
1143			skb_set_owner_w(skb, sk);
1144			return skb;
1145		}
1146	}
1147	return NULL;
1148}
1149
1150/*
1151 * Allocate a skb from the socket's receive buffer.
1152 */
1153struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1154			     gfp_t priority)
1155{
1156	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1157		struct sk_buff *skb = alloc_skb(size, priority);
1158		if (skb) {
1159			skb_set_owner_r(skb, sk);
1160			return skb;
1161		}
1162	}
1163	return NULL;
1164}
1165
1166/*
1167 * Allocate a memory block from the socket's option memory buffer.
1168 */
1169void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1170{
1171	if ((unsigned)size <= sysctl_optmem_max &&
1172	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1173		void *mem;
1174		/* First do the add, to avoid the race if kmalloc
1175		 * might sleep.
1176		 */
1177		atomic_add(size, &sk->sk_omem_alloc);
1178		mem = kmalloc(size, priority);
1179		if (mem)
1180			return mem;
1181		atomic_sub(size, &sk->sk_omem_alloc);
1182	}
1183	return NULL;
1184}
1185
1186/*
1187 * Free an option memory block.
1188 */
1189void sock_kfree_s(struct sock *sk, void *mem, int size)
1190{
1191	kfree(mem);
1192	atomic_sub(size, &sk->sk_omem_alloc);
1193}
1194
1195/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1196   I think, these locks should be removed for datagram sockets.
1197 */
1198static long sock_wait_for_wmem(struct sock * sk, long timeo)
1199{
1200	DEFINE_WAIT(wait);
1201
1202	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1203	for (;;) {
1204		if (!timeo)
1205			break;
1206		if (signal_pending(current))
1207			break;
1208		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1209		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1210		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1211			break;
1212		if (sk->sk_shutdown & SEND_SHUTDOWN)
1213			break;
1214		if (sk->sk_err)
1215			break;
1216		timeo = schedule_timeout(timeo);
1217	}
1218	finish_wait(sk->sk_sleep, &wait);
1219	return timeo;
1220}
1221
1222
1223/*
1224 *	Generic send/receive buffer handlers
1225 */
1226
1227static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1228					    unsigned long header_len,
1229					    unsigned long data_len,
1230					    int noblock, int *errcode)
1231{
1232	struct sk_buff *skb;
1233	gfp_t gfp_mask;
1234	long timeo;
1235	int err;
1236
1237	gfp_mask = sk->sk_allocation;
1238	if (gfp_mask & __GFP_WAIT)
1239		gfp_mask |= __GFP_REPEAT;
1240
1241	timeo = sock_sndtimeo(sk, noblock);
1242	while (1) {
1243		err = sock_error(sk);
1244		if (err != 0)
1245			goto failure;
1246
1247		err = -EPIPE;
1248		if (sk->sk_shutdown & SEND_SHUTDOWN)
1249			goto failure;
1250
1251		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1252			skb = alloc_skb(header_len, gfp_mask);
1253			if (skb) {
1254				int npages;
1255				int i;
1256
1257				/* No pages, we're done... */
1258				if (!data_len)
1259					break;
1260
1261				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1262				skb->truesize += data_len;
1263				skb_shinfo(skb)->nr_frags = npages;
1264				for (i = 0; i < npages; i++) {
1265					struct page *page;
1266					skb_frag_t *frag;
1267
1268					page = alloc_pages(sk->sk_allocation, 0);
1269					if (!page) {
1270						err = -ENOBUFS;
1271						skb_shinfo(skb)->nr_frags = i;
1272						kfree_skb(skb);
1273						goto failure;
1274					}
1275
1276					frag = &skb_shinfo(skb)->frags[i];
1277					frag->page = page;
1278					frag->page_offset = 0;
1279					frag->size = (data_len >= PAGE_SIZE ?
1280						      PAGE_SIZE :
1281						      data_len);
1282					data_len -= PAGE_SIZE;
1283				}
1284
1285				/* Full success... */
1286				break;
1287			}
1288			err = -ENOBUFS;
1289			goto failure;
1290		}
1291		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1292		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1293		err = -EAGAIN;
1294		if (!timeo)
1295			goto failure;
1296		if (signal_pending(current))
1297			goto interrupted;
1298		timeo = sock_wait_for_wmem(sk, timeo);
1299	}
1300
1301	skb_set_owner_w(skb, sk);
1302	return skb;
1303
1304interrupted:
1305	err = sock_intr_errno(timeo);
1306failure:
1307	*errcode = err;
1308	return NULL;
1309}
1310
1311struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1312				    int noblock, int *errcode)
1313{
1314	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1315}
1316
1317static void __lock_sock(struct sock *sk)
1318{
1319	DEFINE_WAIT(wait);
1320
1321	for (;;) {
1322		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1323					TASK_UNINTERRUPTIBLE);
1324		spin_unlock_bh(&sk->sk_lock.slock);
1325		schedule();
1326		spin_lock_bh(&sk->sk_lock.slock);
1327		if (!sock_owned_by_user(sk))
1328			break;
1329	}
1330	finish_wait(&sk->sk_lock.wq, &wait);
1331}
1332
1333static void __release_sock(struct sock *sk)
1334{
1335	struct sk_buff *skb = sk->sk_backlog.head;
1336
1337	do {
1338		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1339		bh_unlock_sock(sk);
1340
1341		do {
1342			struct sk_buff *next = skb->next;
1343
1344			skb->next = NULL;
1345			sk->sk_backlog_rcv(sk, skb);
1346
1347			/*
1348			 * We are in process context here with softirqs
1349			 * disabled, use cond_resched_softirq() to preempt.
1350			 * This is safe to do because we've taken the backlog
1351			 * queue private:
1352			 */
1353			cond_resched_softirq();
1354
1355			skb = next;
1356		} while (skb != NULL);
1357
1358		bh_lock_sock(sk);
1359	} while ((skb = sk->sk_backlog.head) != NULL);
1360}
1361
1362/**
1363 * sk_wait_data - wait for data to arrive at sk_receive_queue
1364 * @sk:    sock to wait on
1365 * @timeo: for how long
1366 *
1367 * Now socket state including sk->sk_err is changed only under lock,
1368 * hence we may omit checks after joining wait queue.
1369 * We check receive queue before schedule() only as optimization;
1370 * it is very likely that release_sock() added new data.
1371 */
1372int sk_wait_data(struct sock *sk, long *timeo)
1373{
1374	int rc;
1375	DEFINE_WAIT(wait);
1376
1377	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1378	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1379	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1380	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1381	finish_wait(sk->sk_sleep, &wait);
1382	return rc;
1383}
1384
1385EXPORT_SYMBOL(sk_wait_data);
1386
1387/*
1388 * Set of default routines for initialising struct proto_ops when
1389 * the protocol does not support a particular function. In certain
1390 * cases where it makes no sense for a protocol to have a "do nothing"
1391 * function, some default processing is provided.
1392 */
1393
1394int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1395{
1396	return -EOPNOTSUPP;
1397}
1398
1399int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1400		    int len, int flags)
1401{
1402	return -EOPNOTSUPP;
1403}
1404
1405int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1406{
1407	return -EOPNOTSUPP;
1408}
1409
1410int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1411{
1412	return -EOPNOTSUPP;
1413}
1414
1415int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1416		    int *len, int peer)
1417{
1418	return -EOPNOTSUPP;
1419}
1420
1421unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1422{
1423	return 0;
1424}
1425
1426int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1427{
1428	return -EOPNOTSUPP;
1429}
1430
1431int sock_no_listen(struct socket *sock, int backlog)
1432{
1433	return -EOPNOTSUPP;
1434}
1435
1436int sock_no_shutdown(struct socket *sock, int how)
1437{
1438	return -EOPNOTSUPP;
1439}
1440
1441int sock_no_setsockopt(struct socket *sock, int level, int optname,
1442		    char __user *optval, int optlen)
1443{
1444	return -EOPNOTSUPP;
1445}
1446
1447int sock_no_getsockopt(struct socket *sock, int level, int optname,
1448		    char __user *optval, int __user *optlen)
1449{
1450	return -EOPNOTSUPP;
1451}
1452
1453int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1454		    size_t len)
1455{
1456	return -EOPNOTSUPP;
1457}
1458
1459int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1460		    size_t len, int flags)
1461{
1462	return -EOPNOTSUPP;
1463}
1464
1465int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1466{
1467	/* Mirror missing mmap method error code */
1468	return -ENODEV;
1469}
1470
1471ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1472{
1473	ssize_t res;
1474	struct msghdr msg = {.msg_flags = flags};
1475	struct kvec iov;
1476	char *kaddr = kmap(page);
1477	iov.iov_base = kaddr + offset;
1478	iov.iov_len = size;
1479	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1480	kunmap(page);
1481	return res;
1482}
1483
1484/*
1485 *	Default Socket Callbacks
1486 */
1487
1488static void sock_def_wakeup(struct sock *sk)
1489{
1490	read_lock(&sk->sk_callback_lock);
1491	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1492		wake_up_interruptible_all(sk->sk_sleep);
1493	read_unlock(&sk->sk_callback_lock);
1494}
1495
1496static void sock_def_error_report(struct sock *sk)
1497{
1498	read_lock(&sk->sk_callback_lock);
1499	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1500		wake_up_interruptible(sk->sk_sleep);
1501	sk_wake_async(sk,0,POLL_ERR);
1502	read_unlock(&sk->sk_callback_lock);
1503}
1504
1505static void sock_def_readable(struct sock *sk, int len)
1506{
1507	read_lock(&sk->sk_callback_lock);
1508	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1509		wake_up_interruptible(sk->sk_sleep);
1510	sk_wake_async(sk,1,POLL_IN);
1511	read_unlock(&sk->sk_callback_lock);
1512}
1513
1514static void sock_def_write_space(struct sock *sk)
1515{
1516	read_lock(&sk->sk_callback_lock);
1517
1518	/* Do not wake up a writer until he can make "significant"
1519	 * progress.  --DaveM
1520	 */
1521	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1522		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1523			wake_up_interruptible(sk->sk_sleep);
1524
1525		/* Should agree with poll, otherwise some programs break */
1526		if (sock_writeable(sk))
1527			sk_wake_async(sk, 2, POLL_OUT);
1528	}
1529
1530	read_unlock(&sk->sk_callback_lock);
1531}
1532
1533static void sock_def_destruct(struct sock *sk)
1534{
1535	kfree(sk->sk_protinfo);
1536}
1537
1538void sk_send_sigurg(struct sock *sk)
1539{
1540	if (sk->sk_socket && sk->sk_socket->file)
1541		if (send_sigurg(&sk->sk_socket->file->f_owner))
1542			sk_wake_async(sk, 3, POLL_PRI);
1543}
1544
1545void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1546		    unsigned long expires)
1547{
1548	if (!mod_timer(timer, expires))
1549		sock_hold(sk);
1550}
1551
1552EXPORT_SYMBOL(sk_reset_timer);
1553
1554void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1555{
1556	if (timer_pending(timer) && del_timer(timer))
1557		__sock_put(sk);
1558}
1559
1560EXPORT_SYMBOL(sk_stop_timer);
1561
1562void sock_init_data(struct socket *sock, struct sock *sk)
1563{
1564	skb_queue_head_init(&sk->sk_receive_queue);
1565	skb_queue_head_init(&sk->sk_write_queue);
1566	skb_queue_head_init(&sk->sk_error_queue);
1567#ifdef CONFIG_NET_DMA
1568	skb_queue_head_init(&sk->sk_async_wait_queue);
1569#endif
1570
1571	sk->sk_send_head	=	NULL;
1572
1573	init_timer(&sk->sk_timer);
1574
1575	sk->sk_allocation	=	GFP_KERNEL;
1576	sk->sk_rcvbuf		=	sysctl_rmem_default;
1577	sk->sk_sndbuf		=	sysctl_wmem_default;
1578	sk->sk_state		=	TCP_CLOSE;
1579	sk->sk_socket		=	sock;
1580
1581	sock_set_flag(sk, SOCK_ZAPPED);
1582
1583	if (sock) {
1584		sk->sk_type	=	sock->type;
1585		sk->sk_sleep	=	&sock->wait;
1586		sock->sk	=	sk;
1587	} else
1588		sk->sk_sleep	=	NULL;
1589
1590	rwlock_init(&sk->sk_dst_lock);
1591	rwlock_init(&sk->sk_callback_lock);
1592	lockdep_set_class_and_name(&sk->sk_callback_lock,
1593			af_callback_keys + sk->sk_family,
1594			af_family_clock_key_strings[sk->sk_family]);
1595
1596	sk->sk_state_change	=	sock_def_wakeup;
1597	sk->sk_data_ready	=	sock_def_readable;
1598	sk->sk_write_space	=	sock_def_write_space;
1599	sk->sk_error_report	=	sock_def_error_report;
1600	sk->sk_destruct		=	sock_def_destruct;
1601
1602	sk->sk_sndmsg_page	=	NULL;
1603	sk->sk_sndmsg_off	=	0;
1604
1605	sk->sk_peercred.pid 	=	0;
1606	sk->sk_peercred.uid	=	-1;
1607	sk->sk_peercred.gid	=	-1;
1608	sk->sk_write_pending	=	0;
1609	sk->sk_rcvlowat		=	1;
1610	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1611	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1612
1613	sk->sk_stamp = ktime_set(-1L, -1L);
1614
1615	atomic_set(&sk->sk_refcnt, 1);
1616	atomic_set(&sk->sk_drops, 0);
1617}
1618
1619void fastcall lock_sock_nested(struct sock *sk, int subclass)
1620{
1621	might_sleep();
1622	spin_lock_bh(&sk->sk_lock.slock);
1623	if (sk->sk_lock.owned)
1624		__lock_sock(sk);
1625	sk->sk_lock.owned = 1;
1626	spin_unlock(&sk->sk_lock.slock);
1627	/*
1628	 * The sk_lock has mutex_lock() semantics here:
1629	 */
1630	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1631	local_bh_enable();
1632}
1633
1634EXPORT_SYMBOL(lock_sock_nested);
1635
1636void fastcall release_sock(struct sock *sk)
1637{
1638	/*
1639	 * The sk_lock has mutex_unlock() semantics:
1640	 */
1641	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1642
1643	spin_lock_bh(&sk->sk_lock.slock);
1644	if (sk->sk_backlog.tail)
1645		__release_sock(sk);
1646	sk->sk_lock.owned = 0;
1647	if (waitqueue_active(&sk->sk_lock.wq))
1648		wake_up(&sk->sk_lock.wq);
1649	spin_unlock_bh(&sk->sk_lock.slock);
1650}
1651EXPORT_SYMBOL(release_sock);
1652
1653int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1654{
1655	struct timeval tv;
1656	if (!sock_flag(sk, SOCK_TIMESTAMP))
1657		sock_enable_timestamp(sk);
1658	tv = ktime_to_timeval(sk->sk_stamp);
1659	if (tv.tv_sec == -1)
1660		return -ENOENT;
1661	if (tv.tv_sec == 0) {
1662		sk->sk_stamp = ktime_get_real();
1663		tv = ktime_to_timeval(sk->sk_stamp);
1664	}
1665	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1666}
1667EXPORT_SYMBOL(sock_get_timestamp);
1668
1669int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1670{
1671	struct timespec ts;
1672	if (!sock_flag(sk, SOCK_TIMESTAMP))
1673		sock_enable_timestamp(sk);
1674	ts = ktime_to_timespec(sk->sk_stamp);
1675	if (ts.tv_sec == -1)
1676		return -ENOENT;
1677	if (ts.tv_sec == 0) {
1678		sk->sk_stamp = ktime_get_real();
1679		ts = ktime_to_timespec(sk->sk_stamp);
1680	}
1681	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1682}
1683EXPORT_SYMBOL(sock_get_timestampns);
1684
1685void sock_enable_timestamp(struct sock *sk)
1686{
1687	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1688		sock_set_flag(sk, SOCK_TIMESTAMP);
1689		net_enable_timestamp();
1690	}
1691}
1692
1693/*
1694 *	Get a socket option on an socket.
1695 *
1696 *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1697 *	asynchronous errors should be reported by getsockopt. We assume
1698 *	this means if you specify SO_ERROR (otherwise whats the point of it).
1699 */
1700int sock_common_getsockopt(struct socket *sock, int level, int optname,
1701			   char __user *optval, int __user *optlen)
1702{
1703	struct sock *sk = sock->sk;
1704
1705	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1706}
1707
1708EXPORT_SYMBOL(sock_common_getsockopt);
1709
1710#ifdef CONFIG_COMPAT
1711int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1712				  char __user *optval, int __user *optlen)
1713{
1714	struct sock *sk = sock->sk;
1715
1716	if (sk->sk_prot->compat_getsockopt != NULL)
1717		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1718						      optval, optlen);
1719	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1720}
1721EXPORT_SYMBOL(compat_sock_common_getsockopt);
1722#endif
1723
1724int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1725			struct msghdr *msg, size_t size, int flags)
1726{
1727	struct sock *sk = sock->sk;
1728	int addr_len = 0;
1729	int err;
1730
1731	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1732				   flags & ~MSG_DONTWAIT, &addr_len);
1733	if (err >= 0)
1734		msg->msg_namelen = addr_len;
1735	return err;
1736}
1737
1738EXPORT_SYMBOL(sock_common_recvmsg);
1739
1740/*
1741 *	Set socket options on an inet socket.
1742 */
1743int sock_common_setsockopt(struct socket *sock, int level, int optname,
1744			   char __user *optval, int optlen)
1745{
1746	struct sock *sk = sock->sk;
1747
1748	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1749}
1750
1751EXPORT_SYMBOL(sock_common_setsockopt);
1752
1753#ifdef CONFIG_COMPAT
1754int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1755				  char __user *optval, int optlen)
1756{
1757	struct sock *sk = sock->sk;
1758
1759	if (sk->sk_prot->compat_setsockopt != NULL)
1760		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1761						      optval, optlen);
1762	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1763}
1764EXPORT_SYMBOL(compat_sock_common_setsockopt);
1765#endif
1766
1767void sk_common_release(struct sock *sk)
1768{
1769	if (sk->sk_prot->destroy)
1770		sk->sk_prot->destroy(sk);
1771
1772	/*
1773	 * Observation: when sock_common_release is called, processes have
1774	 * no access to socket. But net still has.
1775	 * Step one, detach it from networking:
1776	 *
1777	 * A. Remove from hash tables.
1778	 */
1779
1780	sk->sk_prot->unhash(sk);
1781
1782	/*
1783	 * In this point socket cannot receive new packets, but it is possible
1784	 * that some packets are in flight because some CPU runs receiver and
1785	 * did hash table lookup before we unhashed socket. They will achieve
1786	 * receive queue and will be purged by socket destructor.
1787	 *
1788	 * Also we still have packets pending on receive queue and probably,
1789	 * our own packets waiting in device queues. sock_destroy will drain
1790	 * receive queue, but transmitted packets will delay socket destruction
1791	 * until the last reference will be released.
1792	 */
1793
1794	sock_orphan(sk);
1795
1796	xfrm_sk_free_policy(sk);
1797
1798	sk_refcnt_debug_release(sk);
1799	sock_put(sk);
1800}
1801
1802EXPORT_SYMBOL(sk_common_release);
1803
1804static DEFINE_RWLOCK(proto_list_lock);
1805static LIST_HEAD(proto_list);
1806
1807#ifdef CONFIG_SMP
1808/*
1809 * Define default functions to keep track of inuse sockets per protocol
1810 * Note that often used protocols use dedicated functions to get a speed increase.
1811 * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
1812 */
1813static void inuse_add(struct proto *prot, int inc)
1814{
1815	per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
1816}
1817
1818static int inuse_get(const struct proto *prot)
1819{
1820	int res = 0, cpu;
1821	for_each_possible_cpu(cpu)
1822		res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
1823	return res;
1824}
1825
1826static int inuse_init(struct proto *prot)
1827{
1828	if (!prot->inuse_getval || !prot->inuse_add) {
1829		prot->inuse_ptr = alloc_percpu(int);
1830		if (prot->inuse_ptr == NULL)
1831			return -ENOBUFS;
1832
1833		prot->inuse_getval = inuse_get;
1834		prot->inuse_add = inuse_add;
1835	}
1836	return 0;
1837}
1838
1839static void inuse_fini(struct proto *prot)
1840{
1841	if (prot->inuse_ptr != NULL) {
1842		free_percpu(prot->inuse_ptr);
1843		prot->inuse_ptr = NULL;
1844		prot->inuse_getval = NULL;
1845		prot->inuse_add = NULL;
1846	}
1847}
1848#else
1849static inline int inuse_init(struct proto *prot)
1850{
1851	return 0;
1852}
1853
1854static inline void inuse_fini(struct proto *prot)
1855{
1856}
1857#endif
1858
1859int proto_register(struct proto *prot, int alloc_slab)
1860{
1861	char *request_sock_slab_name = NULL;
1862	char *timewait_sock_slab_name;
1863
1864	if (inuse_init(prot))
1865		goto out;
1866
1867	if (alloc_slab) {
1868		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1869					       SLAB_HWCACHE_ALIGN, NULL);
1870
1871		if (prot->slab == NULL) {
1872			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1873			       prot->name);
1874			goto out_free_inuse;
1875		}
1876
1877		if (prot->rsk_prot != NULL) {
1878			static const char mask[] = "request_sock_%s";
1879
1880			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1881			if (request_sock_slab_name == NULL)
1882				goto out_free_sock_slab;
1883
1884			sprintf(request_sock_slab_name, mask, prot->name);
1885			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1886								 prot->rsk_prot->obj_size, 0,
1887								 SLAB_HWCACHE_ALIGN, NULL);
1888
1889			if (prot->rsk_prot->slab == NULL) {
1890				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1891				       prot->name);
1892				goto out_free_request_sock_slab_name;
1893			}
1894		}
1895
1896		if (prot->twsk_prot != NULL) {
1897			static const char mask[] = "tw_sock_%s";
1898
1899			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1900
1901			if (timewait_sock_slab_name == NULL)
1902				goto out_free_request_sock_slab;
1903
1904			sprintf(timewait_sock_slab_name, mask, prot->name);
1905			prot->twsk_prot->twsk_slab =
1906				kmem_cache_create(timewait_sock_slab_name,
1907						  prot->twsk_prot->twsk_obj_size,
1908						  0, SLAB_HWCACHE_ALIGN,
1909						  NULL);
1910			if (prot->twsk_prot->twsk_slab == NULL)
1911				goto out_free_timewait_sock_slab_name;
1912		}
1913	}
1914
1915	write_lock(&proto_list_lock);
1916	list_add(&prot->node, &proto_list);
1917	write_unlock(&proto_list_lock);
1918	return 0;
1919
1920out_free_timewait_sock_slab_name:
1921	kfree(timewait_sock_slab_name);
1922out_free_request_sock_slab:
1923	if (prot->rsk_prot && prot->rsk_prot->slab) {
1924		kmem_cache_destroy(prot->rsk_prot->slab);
1925		prot->rsk_prot->slab = NULL;
1926	}
1927out_free_request_sock_slab_name:
1928	kfree(request_sock_slab_name);
1929out_free_sock_slab:
1930	kmem_cache_destroy(prot->slab);
1931	prot->slab = NULL;
1932out_free_inuse:
1933	inuse_fini(prot);
1934out:
1935	return -ENOBUFS;
1936}
1937
1938EXPORT_SYMBOL(proto_register);
1939
1940void proto_unregister(struct proto *prot)
1941{
1942	write_lock(&proto_list_lock);
1943	list_del(&prot->node);
1944	write_unlock(&proto_list_lock);
1945
1946	inuse_fini(prot);
1947	if (prot->slab != NULL) {
1948		kmem_cache_destroy(prot->slab);
1949		prot->slab = NULL;
1950	}
1951
1952	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1953		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1954
1955		kmem_cache_destroy(prot->rsk_prot->slab);
1956		kfree(name);
1957		prot->rsk_prot->slab = NULL;
1958	}
1959
1960	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1961		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1962
1963		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1964		kfree(name);
1965		prot->twsk_prot->twsk_slab = NULL;
1966	}
1967}
1968
1969EXPORT_SYMBOL(proto_unregister);
1970
1971#ifdef CONFIG_PROC_FS
1972static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1973{
1974	read_lock(&proto_list_lock);
1975	return seq_list_start_head(&proto_list, *pos);
1976}
1977
1978static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1979{
1980	return seq_list_next(v, &proto_list, pos);
1981}
1982
1983static void proto_seq_stop(struct seq_file *seq, void *v)
1984{
1985	read_unlock(&proto_list_lock);
1986}
1987
1988static char proto_method_implemented(const void *method)
1989{
1990	return method == NULL ? 'n' : 'y';
1991}
1992
1993static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1994{
1995	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1996			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1997		   proto->name,
1998		   proto->obj_size,
1999		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
2000		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
2001		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2002		   proto->max_header,
2003		   proto->slab == NULL ? "no" : "yes",
2004		   module_name(proto->owner),
2005		   proto_method_implemented(proto->close),
2006		   proto_method_implemented(proto->connect),
2007		   proto_method_implemented(proto->disconnect),
2008		   proto_method_implemented(proto->accept),
2009		   proto_method_implemented(proto->ioctl),
2010		   proto_method_implemented(proto->init),
2011		   proto_method_implemented(proto->destroy),
2012		   proto_method_implemented(proto->shutdown),
2013		   proto_method_implemented(proto->setsockopt),
2014		   proto_method_implemented(proto->getsockopt),
2015		   proto_method_implemented(proto->sendmsg),
2016		   proto_method_implemented(proto->recvmsg),
2017		   proto_method_implemented(proto->sendpage),
2018		   proto_method_implemented(proto->bind),
2019		   proto_method_implemented(proto->backlog_rcv),
2020		   proto_method_implemented(proto->hash),
2021		   proto_method_implemented(proto->unhash),
2022		   proto_method_implemented(proto->get_port),
2023		   proto_method_implemented(proto->enter_memory_pressure));
2024}
2025
2026static int proto_seq_show(struct seq_file *seq, void *v)
2027{
2028	if (v == &proto_list)
2029		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2030			   "protocol",
2031			   "size",
2032			   "sockets",
2033			   "memory",
2034			   "press",
2035			   "maxhdr",
2036			   "slab",
2037			   "module",
2038			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2039	else
2040		proto_seq_printf(seq, list_entry(v, struct proto, node));
2041	return 0;
2042}
2043
2044static const struct seq_operations proto_seq_ops = {
2045	.start  = proto_seq_start,
2046	.next   = proto_seq_next,
2047	.stop   = proto_seq_stop,
2048	.show   = proto_seq_show,
2049};
2050
2051static int proto_seq_open(struct inode *inode, struct file *file)
2052{
2053	return seq_open(file, &proto_seq_ops);
2054}
2055
2056static const struct file_operations proto_seq_fops = {
2057	.owner		= THIS_MODULE,
2058	.open		= proto_seq_open,
2059	.read		= seq_read,
2060	.llseek		= seq_lseek,
2061	.release	= seq_release,
2062};
2063
2064static int __init proto_init(void)
2065{
2066	/* register /proc/net/protocols */
2067	return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
2068}
2069
2070subsys_initcall(proto_init);
2071
2072#endif /* PROC_FS */
2073
2074EXPORT_SYMBOL(sk_alloc);
2075EXPORT_SYMBOL(sk_free);
2076EXPORT_SYMBOL(sk_send_sigurg);
2077EXPORT_SYMBOL(sock_alloc_send_skb);
2078EXPORT_SYMBOL(sock_init_data);
2079EXPORT_SYMBOL(sock_kfree_s);
2080EXPORT_SYMBOL(sock_kmalloc);
2081EXPORT_SYMBOL(sock_no_accept);
2082EXPORT_SYMBOL(sock_no_bind);
2083EXPORT_SYMBOL(sock_no_connect);
2084EXPORT_SYMBOL(sock_no_getname);
2085EXPORT_SYMBOL(sock_no_getsockopt);
2086EXPORT_SYMBOL(sock_no_ioctl);
2087EXPORT_SYMBOL(sock_no_listen);
2088EXPORT_SYMBOL(sock_no_mmap);
2089EXPORT_SYMBOL(sock_no_poll);
2090EXPORT_SYMBOL(sock_no_recvmsg);
2091EXPORT_SYMBOL(sock_no_sendmsg);
2092EXPORT_SYMBOL(sock_no_sendpage);
2093EXPORT_SYMBOL(sock_no_setsockopt);
2094EXPORT_SYMBOL(sock_no_shutdown);
2095EXPORT_SYMBOL(sock_no_socketpair);
2096EXPORT_SYMBOL(sock_rfree);
2097EXPORT_SYMBOL(sock_setsockopt);
2098EXPORT_SYMBOL(sock_wfree);
2099EXPORT_SYMBOL(sock_wmalloc);
2100EXPORT_SYMBOL(sock_i_uid);
2101EXPORT_SYMBOL(sock_i_ino);
2102EXPORT_SYMBOL(sysctl_optmem_max);
2103