sock.c revision 476e19cfa131e2b6eedc4017b627cdc4ca419ffb
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		Generic socket support routines. Memory allocators, socket lock/release
7 *		handler for protocols to use and generic option handler.
8 *
9 *
10 * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
12 * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
13 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *		Florian La Roche, <flla@stud.uni-sb.de>
15 *		Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 *		Alan Cox	: 	Numerous verify_area() problems
19 *		Alan Cox	:	Connecting on a connecting socket
20 *					now returns an error for tcp.
21 *		Alan Cox	:	sock->protocol is set correctly.
22 *					and is not sometimes left as 0.
23 *		Alan Cox	:	connect handles icmp errors on a
24 *					connect properly. Unfortunately there
25 *					is a restart syscall nasty there. I
26 *					can't match BSD without hacking the C
27 *					library. Ideas urgently sought!
28 *		Alan Cox	:	Disallow bind() to addresses that are
29 *					not ours - especially broadcast ones!!
30 *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31 *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32 *					instead they leave that for the DESTROY timer.
33 *		Alan Cox	:	Clean up error flag in accept
34 *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35 *					was buggy. Put a remove_sock() in the handler
36 *					for memory when we hit 0. Also altered the timer
37 *					code. The ACK stuff can wait and needs major
38 *					TCP layer surgery.
39 *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40 *					and fixed timer/inet_bh race.
41 *		Alan Cox	:	Added zapped flag for TCP
42 *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43 *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45 *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48 *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49 *	Pauline Middelink	:	identd support
50 *		Alan Cox	:	Fixed connect() taking signals I think.
51 *		Alan Cox	:	SO_LINGER supported
52 *		Alan Cox	:	Error reporting fixes
53 *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54 *		Alan Cox	:	inet sockets don't set sk->type!
55 *		Alan Cox	:	Split socket option code
56 *		Alan Cox	:	Callbacks
57 *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58 *		Alex		:	Removed restriction on inet fioctl
59 *		Alan Cox	:	Splitting INET from NET core
60 *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61 *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62 *		Alan Cox	:	Split IP from generic code
63 *		Alan Cox	:	New kfree_skbmem()
64 *		Alan Cox	:	Make SO_DEBUG superuser only.
65 *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66 *					(compatibility fix)
67 *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68 *		Alan Cox	:	Allocator for a socket is settable.
69 *		Alan Cox	:	SO_ERROR includes soft errors.
70 *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71 *		Alan Cox	: 	Generic socket allocation to make hooks
72 *					easier (suggested by Craig Metz).
73 *		Michael Pall	:	SO_ERROR returns positive errno again
74 *              Steve Whitehouse:       Added default destructor to free
75 *                                      protocol private data.
76 *              Steve Whitehouse:       Added various other default routines
77 *                                      common to several socket families.
78 *              Chris Evans     :       Call suser() check last on F_SETOWN
79 *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81 *		Andi Kleen	:	Fix write_space callback
82 *		Chris Evans	:	Security fixes - signedness again
83 *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 *		This program is free software; you can redistribute it and/or
89 *		modify it under the terms of the GNU General Public License
90 *		as published by the Free Software Foundation; either version
91 *		2 of the License, or (at your option) any later version.
92 */
93
94#include <linux/config.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114
115#include <asm/uaccess.h>
116#include <asm/system.h>
117
118#include <linux/netdevice.h>
119#include <net/protocol.h>
120#include <linux/skbuff.h>
121#include <net/sock.h>
122#include <net/xfrm.h>
123#include <linux/ipsec.h>
124
125#include <linux/filter.h>
126
127#ifdef CONFIG_INET
128#include <net/tcp.h>
129#endif
130
131/* Take into consideration the size of the struct sk_buff overhead in the
132 * determination of these values, since that is non-constant across
133 * platforms.  This makes socket queueing behavior and performance
134 * not depend upon such differences.
135 */
136#define _SK_MEM_PACKETS		256
137#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
138#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
139#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140
141/* Run time adjustable parameters. */
142__u32 sysctl_wmem_max = SK_WMEM_MAX;
143__u32 sysctl_rmem_max = SK_RMEM_MAX;
144__u32 sysctl_wmem_default = SK_WMEM_MAX;
145__u32 sysctl_rmem_default = SK_RMEM_MAX;
146
147/* Maximal space eaten by iovec or ancilliary data plus some space */
148int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
149
150static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
151{
152	struct timeval tv;
153
154	if (optlen < sizeof(tv))
155		return -EINVAL;
156	if (copy_from_user(&tv, optval, sizeof(tv)))
157		return -EFAULT;
158
159	*timeo_p = MAX_SCHEDULE_TIMEOUT;
160	if (tv.tv_sec == 0 && tv.tv_usec == 0)
161		return 0;
162	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
163		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
164	return 0;
165}
166
167static void sock_warn_obsolete_bsdism(const char *name)
168{
169	static int warned;
170	static char warncomm[TASK_COMM_LEN];
171	if (strcmp(warncomm, current->comm) && warned < 5) {
172		strcpy(warncomm,  current->comm);
173		printk(KERN_WARNING "process `%s' is using obsolete "
174		       "%s SO_BSDCOMPAT\n", warncomm, name);
175		warned++;
176	}
177}
178
179static void sock_disable_timestamp(struct sock *sk)
180{
181	if (sock_flag(sk, SOCK_TIMESTAMP)) {
182		sock_reset_flag(sk, SOCK_TIMESTAMP);
183		net_disable_timestamp();
184	}
185}
186
187
188/*
189 *	This is meant for all protocols to use and covers goings on
190 *	at the socket level. Everything here is generic.
191 */
192
193int sock_setsockopt(struct socket *sock, int level, int optname,
194		    char __user *optval, int optlen)
195{
196	struct sock *sk=sock->sk;
197	struct sk_filter *filter;
198	int val;
199	int valbool;
200	struct linger ling;
201	int ret = 0;
202
203	/*
204	 *	Options without arguments
205	 */
206
207#ifdef SO_DONTLINGER		/* Compatibility item... */
208	switch (optname) {
209		case SO_DONTLINGER:
210			sock_reset_flag(sk, SOCK_LINGER);
211			return 0;
212	}
213#endif
214
215  	if(optlen<sizeof(int))
216  		return(-EINVAL);
217
218	if (get_user(val, (int __user *)optval))
219		return -EFAULT;
220
221  	valbool = val?1:0;
222
223	lock_sock(sk);
224
225  	switch(optname)
226  	{
227		case SO_DEBUG:
228			if(val && !capable(CAP_NET_ADMIN))
229			{
230				ret = -EACCES;
231			}
232			else if (valbool)
233				sock_set_flag(sk, SOCK_DBG);
234			else
235				sock_reset_flag(sk, SOCK_DBG);
236			break;
237		case SO_REUSEADDR:
238			sk->sk_reuse = valbool;
239			break;
240		case SO_TYPE:
241		case SO_ERROR:
242			ret = -ENOPROTOOPT;
243		  	break;
244		case SO_DONTROUTE:
245			if (valbool)
246				sock_set_flag(sk, SOCK_LOCALROUTE);
247			else
248				sock_reset_flag(sk, SOCK_LOCALROUTE);
249			break;
250		case SO_BROADCAST:
251			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
252			break;
253		case SO_SNDBUF:
254			/* Don't error on this BSD doesn't and if you think
255			   about it this is right. Otherwise apps have to
256			   play 'guess the biggest size' games. RCVBUF/SNDBUF
257			   are treated in BSD as hints */
258
259			if (val > sysctl_wmem_max)
260				val = sysctl_wmem_max;
261
262			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
263			if ((val * 2) < SOCK_MIN_SNDBUF)
264				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
265			else
266				sk->sk_sndbuf = val * 2;
267
268			/*
269			 *	Wake up sending tasks if we
270			 *	upped the value.
271			 */
272			sk->sk_write_space(sk);
273			break;
274
275		case SO_RCVBUF:
276			/* Don't error on this BSD doesn't and if you think
277			   about it this is right. Otherwise apps have to
278			   play 'guess the biggest size' games. RCVBUF/SNDBUF
279			   are treated in BSD as hints */
280
281			if (val > sysctl_rmem_max)
282				val = sysctl_rmem_max;
283
284			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
285			/* FIXME: is this lower bound the right one? */
286			if ((val * 2) < SOCK_MIN_RCVBUF)
287				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
288			else
289				sk->sk_rcvbuf = val * 2;
290			break;
291
292		case SO_KEEPALIVE:
293#ifdef CONFIG_INET
294			if (sk->sk_protocol == IPPROTO_TCP)
295				tcp_set_keepalive(sk, valbool);
296#endif
297			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
298			break;
299
300	 	case SO_OOBINLINE:
301			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
302			break;
303
304	 	case SO_NO_CHECK:
305			sk->sk_no_check = valbool;
306			break;
307
308		case SO_PRIORITY:
309			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
310				sk->sk_priority = val;
311			else
312				ret = -EPERM;
313			break;
314
315		case SO_LINGER:
316			if(optlen<sizeof(ling)) {
317				ret = -EINVAL;	/* 1003.1g */
318				break;
319			}
320			if (copy_from_user(&ling,optval,sizeof(ling))) {
321				ret = -EFAULT;
322				break;
323			}
324			if (!ling.l_onoff)
325				sock_reset_flag(sk, SOCK_LINGER);
326			else {
327#if (BITS_PER_LONG == 32)
328				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
329					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
330				else
331#endif
332					sk->sk_lingertime = ling.l_linger * HZ;
333				sock_set_flag(sk, SOCK_LINGER);
334			}
335			break;
336
337		case SO_BSDCOMPAT:
338			sock_warn_obsolete_bsdism("setsockopt");
339			break;
340
341		case SO_PASSCRED:
342			if (valbool)
343				set_bit(SOCK_PASSCRED, &sock->flags);
344			else
345				clear_bit(SOCK_PASSCRED, &sock->flags);
346			break;
347
348		case SO_TIMESTAMP:
349			if (valbool)  {
350				sock_set_flag(sk, SOCK_RCVTSTAMP);
351				sock_enable_timestamp(sk);
352			} else
353				sock_reset_flag(sk, SOCK_RCVTSTAMP);
354			break;
355
356		case SO_RCVLOWAT:
357			if (val < 0)
358				val = INT_MAX;
359			sk->sk_rcvlowat = val ? : 1;
360			break;
361
362		case SO_RCVTIMEO:
363			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
364			break;
365
366		case SO_SNDTIMEO:
367			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
368			break;
369
370#ifdef CONFIG_NETDEVICES
371		case SO_BINDTODEVICE:
372		{
373			char devname[IFNAMSIZ];
374
375			/* Sorry... */
376			if (!capable(CAP_NET_RAW)) {
377				ret = -EPERM;
378				break;
379			}
380
381			/* Bind this socket to a particular device like "eth0",
382			 * as specified in the passed interface name. If the
383			 * name is "" or the option length is zero the socket
384			 * is not bound.
385			 */
386
387			if (!valbool) {
388				sk->sk_bound_dev_if = 0;
389			} else {
390				if (optlen > IFNAMSIZ)
391					optlen = IFNAMSIZ;
392				if (copy_from_user(devname, optval, optlen)) {
393					ret = -EFAULT;
394					break;
395				}
396
397				/* Remove any cached route for this socket. */
398				sk_dst_reset(sk);
399
400				if (devname[0] == '\0') {
401					sk->sk_bound_dev_if = 0;
402				} else {
403					struct net_device *dev = dev_get_by_name(devname);
404					if (!dev) {
405						ret = -ENODEV;
406						break;
407					}
408					sk->sk_bound_dev_if = dev->ifindex;
409					dev_put(dev);
410				}
411			}
412			break;
413		}
414#endif
415
416
417		case SO_ATTACH_FILTER:
418			ret = -EINVAL;
419			if (optlen == sizeof(struct sock_fprog)) {
420				struct sock_fprog fprog;
421
422				ret = -EFAULT;
423				if (copy_from_user(&fprog, optval, sizeof(fprog)))
424					break;
425
426				ret = sk_attach_filter(&fprog, sk);
427			}
428			break;
429
430		case SO_DETACH_FILTER:
431			spin_lock_bh(&sk->sk_lock.slock);
432			filter = sk->sk_filter;
433                        if (filter) {
434				sk->sk_filter = NULL;
435				spin_unlock_bh(&sk->sk_lock.slock);
436				sk_filter_release(sk, filter);
437				break;
438			}
439			spin_unlock_bh(&sk->sk_lock.slock);
440			ret = -ENONET;
441			break;
442
443		/* We implement the SO_SNDLOWAT etc to
444		   not be settable (1003.1g 5.3) */
445		default:
446		  	ret = -ENOPROTOOPT;
447			break;
448  	}
449	release_sock(sk);
450	return ret;
451}
452
453
454int sock_getsockopt(struct socket *sock, int level, int optname,
455		    char __user *optval, int __user *optlen)
456{
457	struct sock *sk = sock->sk;
458
459	union
460	{
461  		int val;
462  		struct linger ling;
463		struct timeval tm;
464	} v;
465
466	unsigned int lv = sizeof(int);
467	int len;
468
469  	if(get_user(len,optlen))
470  		return -EFAULT;
471	if(len < 0)
472		return -EINVAL;
473
474  	switch(optname)
475  	{
476		case SO_DEBUG:
477			v.val = sock_flag(sk, SOCK_DBG);
478			break;
479
480		case SO_DONTROUTE:
481			v.val = sock_flag(sk, SOCK_LOCALROUTE);
482			break;
483
484		case SO_BROADCAST:
485			v.val = !!sock_flag(sk, SOCK_BROADCAST);
486			break;
487
488		case SO_SNDBUF:
489			v.val = sk->sk_sndbuf;
490			break;
491
492		case SO_RCVBUF:
493			v.val = sk->sk_rcvbuf;
494			break;
495
496		case SO_REUSEADDR:
497			v.val = sk->sk_reuse;
498			break;
499
500		case SO_KEEPALIVE:
501			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
502			break;
503
504		case SO_TYPE:
505			v.val = sk->sk_type;
506			break;
507
508		case SO_ERROR:
509			v.val = -sock_error(sk);
510			if(v.val==0)
511				v.val = xchg(&sk->sk_err_soft, 0);
512			break;
513
514		case SO_OOBINLINE:
515			v.val = !!sock_flag(sk, SOCK_URGINLINE);
516			break;
517
518		case SO_NO_CHECK:
519			v.val = sk->sk_no_check;
520			break;
521
522		case SO_PRIORITY:
523			v.val = sk->sk_priority;
524			break;
525
526		case SO_LINGER:
527			lv		= sizeof(v.ling);
528			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
529 			v.ling.l_linger	= sk->sk_lingertime / HZ;
530			break;
531
532		case SO_BSDCOMPAT:
533			sock_warn_obsolete_bsdism("getsockopt");
534			break;
535
536		case SO_TIMESTAMP:
537			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
538			break;
539
540		case SO_RCVTIMEO:
541			lv=sizeof(struct timeval);
542			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
543				v.tm.tv_sec = 0;
544				v.tm.tv_usec = 0;
545			} else {
546				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
547				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
548			}
549			break;
550
551		case SO_SNDTIMEO:
552			lv=sizeof(struct timeval);
553			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
554				v.tm.tv_sec = 0;
555				v.tm.tv_usec = 0;
556			} else {
557				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
558				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
559			}
560			break;
561
562		case SO_RCVLOWAT:
563			v.val = sk->sk_rcvlowat;
564			break;
565
566		case SO_SNDLOWAT:
567			v.val=1;
568			break;
569
570		case SO_PASSCRED:
571			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
572			break;
573
574		case SO_PEERCRED:
575			if (len > sizeof(sk->sk_peercred))
576				len = sizeof(sk->sk_peercred);
577			if (copy_to_user(optval, &sk->sk_peercred, len))
578				return -EFAULT;
579			goto lenout;
580
581		case SO_PEERNAME:
582		{
583			char address[128];
584
585			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
586				return -ENOTCONN;
587			if (lv < len)
588				return -EINVAL;
589			if (copy_to_user(optval, address, len))
590				return -EFAULT;
591			goto lenout;
592		}
593
594		/* Dubious BSD thing... Probably nobody even uses it, but
595		 * the UNIX standard wants it for whatever reason... -DaveM
596		 */
597		case SO_ACCEPTCONN:
598			v.val = sk->sk_state == TCP_LISTEN;
599			break;
600
601		case SO_PEERSEC:
602			return security_socket_getpeersec(sock, optval, optlen, len);
603
604		default:
605			return(-ENOPROTOOPT);
606	}
607	if (len > lv)
608		len = lv;
609	if (copy_to_user(optval, &v, len))
610		return -EFAULT;
611lenout:
612  	if (put_user(len, optlen))
613  		return -EFAULT;
614  	return 0;
615}
616
617/**
618 *	sk_alloc - All socket objects are allocated here
619 *	@family: protocol family
620 *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
621 *	@prot: struct proto associated with this new sock instance
622 *	@zero_it: if we should zero the newly allocated sock
623 */
624struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
625{
626	struct sock *sk = NULL;
627	kmem_cache_t *slab = prot->slab;
628
629	if (slab != NULL)
630		sk = kmem_cache_alloc(slab, priority);
631	else
632		sk = kmalloc(prot->obj_size, priority);
633
634	if (sk) {
635		if (zero_it) {
636			memset(sk, 0, prot->obj_size);
637			sk->sk_family = family;
638			/*
639			 * See comment in struct sock definition to understand
640			 * why we need sk_prot_creator -acme
641			 */
642			sk->sk_prot = sk->sk_prot_creator = prot;
643			sock_lock_init(sk);
644		}
645
646		if (security_sk_alloc(sk, family, priority)) {
647			if (slab != NULL)
648				kmem_cache_free(slab, sk);
649			else
650				kfree(sk);
651			sk = NULL;
652		} else
653			__module_get(prot->owner);
654	}
655	return sk;
656}
657
658void sk_free(struct sock *sk)
659{
660	struct sk_filter *filter;
661	struct module *owner = sk->sk_prot_creator->owner;
662
663	if (sk->sk_destruct)
664		sk->sk_destruct(sk);
665
666	filter = sk->sk_filter;
667	if (filter) {
668		sk_filter_release(sk, filter);
669		sk->sk_filter = NULL;
670	}
671
672	sock_disable_timestamp(sk);
673
674	if (atomic_read(&sk->sk_omem_alloc))
675		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
676		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
677
678	security_sk_free(sk);
679	if (sk->sk_prot_creator->slab != NULL)
680		kmem_cache_free(sk->sk_prot_creator->slab, sk);
681	else
682		kfree(sk);
683	module_put(owner);
684}
685
686void __init sk_init(void)
687{
688	if (num_physpages <= 4096) {
689		sysctl_wmem_max = 32767;
690		sysctl_rmem_max = 32767;
691		sysctl_wmem_default = 32767;
692		sysctl_rmem_default = 32767;
693	} else if (num_physpages >= 131072) {
694		sysctl_wmem_max = 131071;
695		sysctl_rmem_max = 131071;
696	}
697}
698
699/*
700 *	Simple resource managers for sockets.
701 */
702
703
704/*
705 * Write buffer destructor automatically called from kfree_skb.
706 */
707void sock_wfree(struct sk_buff *skb)
708{
709	struct sock *sk = skb->sk;
710
711	/* In case it might be waiting for more memory. */
712	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
713	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
714		sk->sk_write_space(sk);
715	sock_put(sk);
716}
717
718/*
719 * Read buffer destructor automatically called from kfree_skb.
720 */
721void sock_rfree(struct sk_buff *skb)
722{
723	struct sock *sk = skb->sk;
724
725	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
726}
727
728
729int sock_i_uid(struct sock *sk)
730{
731	int uid;
732
733	read_lock(&sk->sk_callback_lock);
734	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
735	read_unlock(&sk->sk_callback_lock);
736	return uid;
737}
738
739unsigned long sock_i_ino(struct sock *sk)
740{
741	unsigned long ino;
742
743	read_lock(&sk->sk_callback_lock);
744	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
745	read_unlock(&sk->sk_callback_lock);
746	return ino;
747}
748
749/*
750 * Allocate a skb from the socket's send buffer.
751 */
752struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
753{
754	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
755		struct sk_buff * skb = alloc_skb(size, priority);
756		if (skb) {
757			skb_set_owner_w(skb, sk);
758			return skb;
759		}
760	}
761	return NULL;
762}
763
764/*
765 * Allocate a skb from the socket's receive buffer.
766 */
767struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
768{
769	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
770		struct sk_buff *skb = alloc_skb(size, priority);
771		if (skb) {
772			skb_set_owner_r(skb, sk);
773			return skb;
774		}
775	}
776	return NULL;
777}
778
779/*
780 * Allocate a memory block from the socket's option memory buffer.
781 */
782void *sock_kmalloc(struct sock *sk, int size, int priority)
783{
784	if ((unsigned)size <= sysctl_optmem_max &&
785	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
786		void *mem;
787		/* First do the add, to avoid the race if kmalloc
788 		 * might sleep.
789		 */
790		atomic_add(size, &sk->sk_omem_alloc);
791		mem = kmalloc(size, priority);
792		if (mem)
793			return mem;
794		atomic_sub(size, &sk->sk_omem_alloc);
795	}
796	return NULL;
797}
798
799/*
800 * Free an option memory block.
801 */
802void sock_kfree_s(struct sock *sk, void *mem, int size)
803{
804	kfree(mem);
805	atomic_sub(size, &sk->sk_omem_alloc);
806}
807
808/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
809   I think, these locks should be removed for datagram sockets.
810 */
811static long sock_wait_for_wmem(struct sock * sk, long timeo)
812{
813	DEFINE_WAIT(wait);
814
815	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
816	for (;;) {
817		if (!timeo)
818			break;
819		if (signal_pending(current))
820			break;
821		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
822		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
823		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
824			break;
825		if (sk->sk_shutdown & SEND_SHUTDOWN)
826			break;
827		if (sk->sk_err)
828			break;
829		timeo = schedule_timeout(timeo);
830	}
831	finish_wait(sk->sk_sleep, &wait);
832	return timeo;
833}
834
835
836/*
837 *	Generic send/receive buffer handlers
838 */
839
840static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
841					    unsigned long header_len,
842					    unsigned long data_len,
843					    int noblock, int *errcode)
844{
845	struct sk_buff *skb;
846	unsigned int gfp_mask;
847	long timeo;
848	int err;
849
850	gfp_mask = sk->sk_allocation;
851	if (gfp_mask & __GFP_WAIT)
852		gfp_mask |= __GFP_REPEAT;
853
854	timeo = sock_sndtimeo(sk, noblock);
855	while (1) {
856		err = sock_error(sk);
857		if (err != 0)
858			goto failure;
859
860		err = -EPIPE;
861		if (sk->sk_shutdown & SEND_SHUTDOWN)
862			goto failure;
863
864		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
865			skb = alloc_skb(header_len, sk->sk_allocation);
866			if (skb) {
867				int npages;
868				int i;
869
870				/* No pages, we're done... */
871				if (!data_len)
872					break;
873
874				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
875				skb->truesize += data_len;
876				skb_shinfo(skb)->nr_frags = npages;
877				for (i = 0; i < npages; i++) {
878					struct page *page;
879					skb_frag_t *frag;
880
881					page = alloc_pages(sk->sk_allocation, 0);
882					if (!page) {
883						err = -ENOBUFS;
884						skb_shinfo(skb)->nr_frags = i;
885						kfree_skb(skb);
886						goto failure;
887					}
888
889					frag = &skb_shinfo(skb)->frags[i];
890					frag->page = page;
891					frag->page_offset = 0;
892					frag->size = (data_len >= PAGE_SIZE ?
893						      PAGE_SIZE :
894						      data_len);
895					data_len -= PAGE_SIZE;
896				}
897
898				/* Full success... */
899				break;
900			}
901			err = -ENOBUFS;
902			goto failure;
903		}
904		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
905		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
906		err = -EAGAIN;
907		if (!timeo)
908			goto failure;
909		if (signal_pending(current))
910			goto interrupted;
911		timeo = sock_wait_for_wmem(sk, timeo);
912	}
913
914	skb_set_owner_w(skb, sk);
915	return skb;
916
917interrupted:
918	err = sock_intr_errno(timeo);
919failure:
920	*errcode = err;
921	return NULL;
922}
923
924struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
925				    int noblock, int *errcode)
926{
927	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
928}
929
930static void __lock_sock(struct sock *sk)
931{
932	DEFINE_WAIT(wait);
933
934	for(;;) {
935		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
936					TASK_UNINTERRUPTIBLE);
937		spin_unlock_bh(&sk->sk_lock.slock);
938		schedule();
939		spin_lock_bh(&sk->sk_lock.slock);
940		if(!sock_owned_by_user(sk))
941			break;
942	}
943	finish_wait(&sk->sk_lock.wq, &wait);
944}
945
946static void __release_sock(struct sock *sk)
947{
948	struct sk_buff *skb = sk->sk_backlog.head;
949
950	do {
951		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
952		bh_unlock_sock(sk);
953
954		do {
955			struct sk_buff *next = skb->next;
956
957			skb->next = NULL;
958			sk->sk_backlog_rcv(sk, skb);
959
960			/*
961			 * We are in process context here with softirqs
962			 * disabled, use cond_resched_softirq() to preempt.
963			 * This is safe to do because we've taken the backlog
964			 * queue private:
965			 */
966			cond_resched_softirq();
967
968			skb = next;
969		} while (skb != NULL);
970
971		bh_lock_sock(sk);
972	} while((skb = sk->sk_backlog.head) != NULL);
973}
974
975/**
976 * sk_wait_data - wait for data to arrive at sk_receive_queue
977 * @sk:    sock to wait on
978 * @timeo: for how long
979 *
980 * Now socket state including sk->sk_err is changed only under lock,
981 * hence we may omit checks after joining wait queue.
982 * We check receive queue before schedule() only as optimization;
983 * it is very likely that release_sock() added new data.
984 */
985int sk_wait_data(struct sock *sk, long *timeo)
986{
987	int rc;
988	DEFINE_WAIT(wait);
989
990	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
991	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
992	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
993	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
994	finish_wait(sk->sk_sleep, &wait);
995	return rc;
996}
997
998EXPORT_SYMBOL(sk_wait_data);
999
1000/*
1001 * Set of default routines for initialising struct proto_ops when
1002 * the protocol does not support a particular function. In certain
1003 * cases where it makes no sense for a protocol to have a "do nothing"
1004 * function, some default processing is provided.
1005 */
1006
1007int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1008{
1009	return -EOPNOTSUPP;
1010}
1011
1012int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1013		    int len, int flags)
1014{
1015	return -EOPNOTSUPP;
1016}
1017
1018int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1019{
1020	return -EOPNOTSUPP;
1021}
1022
1023int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1024{
1025	return -EOPNOTSUPP;
1026}
1027
1028int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1029		    int *len, int peer)
1030{
1031	return -EOPNOTSUPP;
1032}
1033
1034unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1035{
1036	return 0;
1037}
1038
1039int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1040{
1041	return -EOPNOTSUPP;
1042}
1043
1044int sock_no_listen(struct socket *sock, int backlog)
1045{
1046	return -EOPNOTSUPP;
1047}
1048
1049int sock_no_shutdown(struct socket *sock, int how)
1050{
1051	return -EOPNOTSUPP;
1052}
1053
1054int sock_no_setsockopt(struct socket *sock, int level, int optname,
1055		    char __user *optval, int optlen)
1056{
1057	return -EOPNOTSUPP;
1058}
1059
1060int sock_no_getsockopt(struct socket *sock, int level, int optname,
1061		    char __user *optval, int __user *optlen)
1062{
1063	return -EOPNOTSUPP;
1064}
1065
1066int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1067		    size_t len)
1068{
1069	return -EOPNOTSUPP;
1070}
1071
1072int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1073		    size_t len, int flags)
1074{
1075	return -EOPNOTSUPP;
1076}
1077
1078int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1079{
1080	/* Mirror missing mmap method error code */
1081	return -ENODEV;
1082}
1083
1084ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1085{
1086	ssize_t res;
1087	struct msghdr msg = {.msg_flags = flags};
1088	struct kvec iov;
1089	char *kaddr = kmap(page);
1090	iov.iov_base = kaddr + offset;
1091	iov.iov_len = size;
1092	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1093	kunmap(page);
1094	return res;
1095}
1096
1097/*
1098 *	Default Socket Callbacks
1099 */
1100
1101static void sock_def_wakeup(struct sock *sk)
1102{
1103	read_lock(&sk->sk_callback_lock);
1104	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1105		wake_up_interruptible_all(sk->sk_sleep);
1106	read_unlock(&sk->sk_callback_lock);
1107}
1108
1109static void sock_def_error_report(struct sock *sk)
1110{
1111	read_lock(&sk->sk_callback_lock);
1112	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1113		wake_up_interruptible(sk->sk_sleep);
1114	sk_wake_async(sk,0,POLL_ERR);
1115	read_unlock(&sk->sk_callback_lock);
1116}
1117
1118static void sock_def_readable(struct sock *sk, int len)
1119{
1120	read_lock(&sk->sk_callback_lock);
1121	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1122		wake_up_interruptible(sk->sk_sleep);
1123	sk_wake_async(sk,1,POLL_IN);
1124	read_unlock(&sk->sk_callback_lock);
1125}
1126
1127static void sock_def_write_space(struct sock *sk)
1128{
1129	read_lock(&sk->sk_callback_lock);
1130
1131	/* Do not wake up a writer until he can make "significant"
1132	 * progress.  --DaveM
1133	 */
1134	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1135		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1136			wake_up_interruptible(sk->sk_sleep);
1137
1138		/* Should agree with poll, otherwise some programs break */
1139		if (sock_writeable(sk))
1140			sk_wake_async(sk, 2, POLL_OUT);
1141	}
1142
1143	read_unlock(&sk->sk_callback_lock);
1144}
1145
1146static void sock_def_destruct(struct sock *sk)
1147{
1148	if (sk->sk_protinfo)
1149		kfree(sk->sk_protinfo);
1150}
1151
1152void sk_send_sigurg(struct sock *sk)
1153{
1154	if (sk->sk_socket && sk->sk_socket->file)
1155		if (send_sigurg(&sk->sk_socket->file->f_owner))
1156			sk_wake_async(sk, 3, POLL_PRI);
1157}
1158
1159void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1160		    unsigned long expires)
1161{
1162	if (!mod_timer(timer, expires))
1163		sock_hold(sk);
1164}
1165
1166EXPORT_SYMBOL(sk_reset_timer);
1167
1168void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1169{
1170	if (timer_pending(timer) && del_timer(timer))
1171		__sock_put(sk);
1172}
1173
1174EXPORT_SYMBOL(sk_stop_timer);
1175
1176void sock_init_data(struct socket *sock, struct sock *sk)
1177{
1178	skb_queue_head_init(&sk->sk_receive_queue);
1179	skb_queue_head_init(&sk->sk_write_queue);
1180	skb_queue_head_init(&sk->sk_error_queue);
1181
1182	sk->sk_send_head	=	NULL;
1183
1184	init_timer(&sk->sk_timer);
1185
1186	sk->sk_allocation	=	GFP_KERNEL;
1187	sk->sk_rcvbuf		=	sysctl_rmem_default;
1188	sk->sk_sndbuf		=	sysctl_wmem_default;
1189	sk->sk_state		=	TCP_CLOSE;
1190	sk->sk_socket		=	sock;
1191
1192	sock_set_flag(sk, SOCK_ZAPPED);
1193
1194	if(sock)
1195	{
1196		sk->sk_type	=	sock->type;
1197		sk->sk_sleep	=	&sock->wait;
1198		sock->sk	=	sk;
1199	} else
1200		sk->sk_sleep	=	NULL;
1201
1202	rwlock_init(&sk->sk_dst_lock);
1203	rwlock_init(&sk->sk_callback_lock);
1204
1205	sk->sk_state_change	=	sock_def_wakeup;
1206	sk->sk_data_ready	=	sock_def_readable;
1207	sk->sk_write_space	=	sock_def_write_space;
1208	sk->sk_error_report	=	sock_def_error_report;
1209	sk->sk_destruct		=	sock_def_destruct;
1210
1211	sk->sk_sndmsg_page	=	NULL;
1212	sk->sk_sndmsg_off	=	0;
1213
1214	sk->sk_peercred.pid 	=	0;
1215	sk->sk_peercred.uid	=	-1;
1216	sk->sk_peercred.gid	=	-1;
1217	sk->sk_write_pending	=	0;
1218	sk->sk_rcvlowat		=	1;
1219	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1220	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1221
1222	sk->sk_stamp.tv_sec     = -1L;
1223	sk->sk_stamp.tv_usec    = -1L;
1224
1225	atomic_set(&sk->sk_refcnt, 1);
1226}
1227
1228void fastcall lock_sock(struct sock *sk)
1229{
1230	might_sleep();
1231	spin_lock_bh(&(sk->sk_lock.slock));
1232	if (sk->sk_lock.owner)
1233		__lock_sock(sk);
1234	sk->sk_lock.owner = (void *)1;
1235	spin_unlock_bh(&(sk->sk_lock.slock));
1236}
1237
1238EXPORT_SYMBOL(lock_sock);
1239
1240void fastcall release_sock(struct sock *sk)
1241{
1242	spin_lock_bh(&(sk->sk_lock.slock));
1243	if (sk->sk_backlog.tail)
1244		__release_sock(sk);
1245	sk->sk_lock.owner = NULL;
1246        if (waitqueue_active(&(sk->sk_lock.wq)))
1247		wake_up(&(sk->sk_lock.wq));
1248	spin_unlock_bh(&(sk->sk_lock.slock));
1249}
1250EXPORT_SYMBOL(release_sock);
1251
1252int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1253{
1254	if (!sock_flag(sk, SOCK_TIMESTAMP))
1255		sock_enable_timestamp(sk);
1256	if (sk->sk_stamp.tv_sec == -1)
1257		return -ENOENT;
1258	if (sk->sk_stamp.tv_sec == 0)
1259		do_gettimeofday(&sk->sk_stamp);
1260	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1261		-EFAULT : 0;
1262}
1263EXPORT_SYMBOL(sock_get_timestamp);
1264
1265void sock_enable_timestamp(struct sock *sk)
1266{
1267	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1268		sock_set_flag(sk, SOCK_TIMESTAMP);
1269		net_enable_timestamp();
1270	}
1271}
1272EXPORT_SYMBOL(sock_enable_timestamp);
1273
1274/*
1275 *	Get a socket option on an socket.
1276 *
1277 *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1278 *	asynchronous errors should be reported by getsockopt. We assume
1279 *	this means if you specify SO_ERROR (otherwise whats the point of it).
1280 */
1281int sock_common_getsockopt(struct socket *sock, int level, int optname,
1282			   char __user *optval, int __user *optlen)
1283{
1284	struct sock *sk = sock->sk;
1285
1286	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1287}
1288
1289EXPORT_SYMBOL(sock_common_getsockopt);
1290
1291int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1292			struct msghdr *msg, size_t size, int flags)
1293{
1294	struct sock *sk = sock->sk;
1295	int addr_len = 0;
1296	int err;
1297
1298	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1299				   flags & ~MSG_DONTWAIT, &addr_len);
1300	if (err >= 0)
1301		msg->msg_namelen = addr_len;
1302	return err;
1303}
1304
1305EXPORT_SYMBOL(sock_common_recvmsg);
1306
1307/*
1308 *	Set socket options on an inet socket.
1309 */
1310int sock_common_setsockopt(struct socket *sock, int level, int optname,
1311			   char __user *optval, int optlen)
1312{
1313	struct sock *sk = sock->sk;
1314
1315	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1316}
1317
1318EXPORT_SYMBOL(sock_common_setsockopt);
1319
1320void sk_common_release(struct sock *sk)
1321{
1322	if (sk->sk_prot->destroy)
1323		sk->sk_prot->destroy(sk);
1324
1325	/*
1326	 * Observation: when sock_common_release is called, processes have
1327	 * no access to socket. But net still has.
1328	 * Step one, detach it from networking:
1329	 *
1330	 * A. Remove from hash tables.
1331	 */
1332
1333	sk->sk_prot->unhash(sk);
1334
1335	/*
1336	 * In this point socket cannot receive new packets, but it is possible
1337	 * that some packets are in flight because some CPU runs receiver and
1338	 * did hash table lookup before we unhashed socket. They will achieve
1339	 * receive queue and will be purged by socket destructor.
1340	 *
1341	 * Also we still have packets pending on receive queue and probably,
1342	 * our own packets waiting in device queues. sock_destroy will drain
1343	 * receive queue, but transmitted packets will delay socket destruction
1344	 * until the last reference will be released.
1345	 */
1346
1347	sock_orphan(sk);
1348
1349	xfrm_sk_free_policy(sk);
1350
1351#ifdef INET_REFCNT_DEBUG
1352	if (atomic_read(&sk->sk_refcnt) != 1)
1353		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
1354		       sk, atomic_read(&sk->sk_refcnt));
1355#endif
1356	sock_put(sk);
1357}
1358
1359EXPORT_SYMBOL(sk_common_release);
1360
1361static DEFINE_RWLOCK(proto_list_lock);
1362static LIST_HEAD(proto_list);
1363
1364int proto_register(struct proto *prot, int alloc_slab)
1365{
1366	int rc = -ENOBUFS;
1367
1368	if (alloc_slab) {
1369		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1370					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1371
1372		if (prot->slab == NULL) {
1373			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1374			       prot->name);
1375			goto out;
1376		}
1377	}
1378
1379	write_lock(&proto_list_lock);
1380	list_add(&prot->node, &proto_list);
1381	write_unlock(&proto_list_lock);
1382	rc = 0;
1383out:
1384	return rc;
1385}
1386
1387EXPORT_SYMBOL(proto_register);
1388
1389void proto_unregister(struct proto *prot)
1390{
1391	write_lock(&proto_list_lock);
1392
1393	if (prot->slab != NULL) {
1394		kmem_cache_destroy(prot->slab);
1395		prot->slab = NULL;
1396	}
1397
1398	list_del(&prot->node);
1399	write_unlock(&proto_list_lock);
1400}
1401
1402EXPORT_SYMBOL(proto_unregister);
1403
1404#ifdef CONFIG_PROC_FS
1405static inline struct proto *__proto_head(void)
1406{
1407	return list_entry(proto_list.next, struct proto, node);
1408}
1409
1410static inline struct proto *proto_head(void)
1411{
1412	return list_empty(&proto_list) ? NULL : __proto_head();
1413}
1414
1415static inline struct proto *proto_next(struct proto *proto)
1416{
1417	return proto->node.next == &proto_list ? NULL :
1418		list_entry(proto->node.next, struct proto, node);
1419}
1420
1421static inline struct proto *proto_get_idx(loff_t pos)
1422{
1423	struct proto *proto;
1424	loff_t i = 0;
1425
1426	list_for_each_entry(proto, &proto_list, node)
1427		if (i++ == pos)
1428			goto out;
1429
1430	proto = NULL;
1431out:
1432	return proto;
1433}
1434
1435static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1436{
1437	read_lock(&proto_list_lock);
1438	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1439}
1440
1441static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1442{
1443	++*pos;
1444	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1445}
1446
1447static void proto_seq_stop(struct seq_file *seq, void *v)
1448{
1449	read_unlock(&proto_list_lock);
1450}
1451
1452static char proto_method_implemented(const void *method)
1453{
1454	return method == NULL ? 'n' : 'y';
1455}
1456
1457static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1458{
1459	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1460			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1461		   proto->name,
1462		   proto->obj_size,
1463		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1464		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1465		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1466		   proto->max_header,
1467		   proto->slab == NULL ? "no" : "yes",
1468		   module_name(proto->owner),
1469		   proto_method_implemented(proto->close),
1470		   proto_method_implemented(proto->connect),
1471		   proto_method_implemented(proto->disconnect),
1472		   proto_method_implemented(proto->accept),
1473		   proto_method_implemented(proto->ioctl),
1474		   proto_method_implemented(proto->init),
1475		   proto_method_implemented(proto->destroy),
1476		   proto_method_implemented(proto->shutdown),
1477		   proto_method_implemented(proto->setsockopt),
1478		   proto_method_implemented(proto->getsockopt),
1479		   proto_method_implemented(proto->sendmsg),
1480		   proto_method_implemented(proto->recvmsg),
1481		   proto_method_implemented(proto->sendpage),
1482		   proto_method_implemented(proto->bind),
1483		   proto_method_implemented(proto->backlog_rcv),
1484		   proto_method_implemented(proto->hash),
1485		   proto_method_implemented(proto->unhash),
1486		   proto_method_implemented(proto->get_port),
1487		   proto_method_implemented(proto->enter_memory_pressure));
1488}
1489
1490static int proto_seq_show(struct seq_file *seq, void *v)
1491{
1492	if (v == SEQ_START_TOKEN)
1493		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1494			   "protocol",
1495			   "size",
1496			   "sockets",
1497			   "memory",
1498			   "press",
1499			   "maxhdr",
1500			   "slab",
1501			   "module",
1502			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1503	else
1504		proto_seq_printf(seq, v);
1505	return 0;
1506}
1507
1508static struct seq_operations proto_seq_ops = {
1509	.start  = proto_seq_start,
1510	.next   = proto_seq_next,
1511	.stop   = proto_seq_stop,
1512	.show   = proto_seq_show,
1513};
1514
1515static int proto_seq_open(struct inode *inode, struct file *file)
1516{
1517	return seq_open(file, &proto_seq_ops);
1518}
1519
1520static struct file_operations proto_seq_fops = {
1521	.owner		= THIS_MODULE,
1522	.open		= proto_seq_open,
1523	.read		= seq_read,
1524	.llseek		= seq_lseek,
1525	.release	= seq_release,
1526};
1527
1528static int __init proto_init(void)
1529{
1530	/* register /proc/net/protocols */
1531	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1532}
1533
1534subsys_initcall(proto_init);
1535
1536#endif /* PROC_FS */
1537
1538EXPORT_SYMBOL(sk_alloc);
1539EXPORT_SYMBOL(sk_free);
1540EXPORT_SYMBOL(sk_send_sigurg);
1541EXPORT_SYMBOL(sock_alloc_send_skb);
1542EXPORT_SYMBOL(sock_init_data);
1543EXPORT_SYMBOL(sock_kfree_s);
1544EXPORT_SYMBOL(sock_kmalloc);
1545EXPORT_SYMBOL(sock_no_accept);
1546EXPORT_SYMBOL(sock_no_bind);
1547EXPORT_SYMBOL(sock_no_connect);
1548EXPORT_SYMBOL(sock_no_getname);
1549EXPORT_SYMBOL(sock_no_getsockopt);
1550EXPORT_SYMBOL(sock_no_ioctl);
1551EXPORT_SYMBOL(sock_no_listen);
1552EXPORT_SYMBOL(sock_no_mmap);
1553EXPORT_SYMBOL(sock_no_poll);
1554EXPORT_SYMBOL(sock_no_recvmsg);
1555EXPORT_SYMBOL(sock_no_sendmsg);
1556EXPORT_SYMBOL(sock_no_sendpage);
1557EXPORT_SYMBOL(sock_no_setsockopt);
1558EXPORT_SYMBOL(sock_no_shutdown);
1559EXPORT_SYMBOL(sock_no_socketpair);
1560EXPORT_SYMBOL(sock_rfree);
1561EXPORT_SYMBOL(sock_setsockopt);
1562EXPORT_SYMBOL(sock_wfree);
1563EXPORT_SYMBOL(sock_wmalloc);
1564EXPORT_SYMBOL(sock_i_uid);
1565EXPORT_SYMBOL(sock_i_ino);
1566#ifdef CONFIG_SYSCTL
1567EXPORT_SYMBOL(sysctl_optmem_max);
1568EXPORT_SYMBOL(sysctl_rmem_max);
1569EXPORT_SYMBOL(sysctl_wmem_max);
1570#endif
1571