sock.c revision 2e6599cb899ba4b133f42cbf9d2b1883d2dc583a
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		Generic socket support routines. Memory allocators, socket lock/release
7 *		handler for protocols to use and generic option handler.
8 *
9 *
10 * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
12 * Authors:	Ross Biro
13 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *		Florian La Roche, <flla@stud.uni-sb.de>
15 *		Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 *		Alan Cox	: 	Numerous verify_area() problems
19 *		Alan Cox	:	Connecting on a connecting socket
20 *					now returns an error for tcp.
21 *		Alan Cox	:	sock->protocol is set correctly.
22 *					and is not sometimes left as 0.
23 *		Alan Cox	:	connect handles icmp errors on a
24 *					connect properly. Unfortunately there
25 *					is a restart syscall nasty there. I
26 *					can't match BSD without hacking the C
27 *					library. Ideas urgently sought!
28 *		Alan Cox	:	Disallow bind() to addresses that are
29 *					not ours - especially broadcast ones!!
30 *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31 *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32 *					instead they leave that for the DESTROY timer.
33 *		Alan Cox	:	Clean up error flag in accept
34 *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35 *					was buggy. Put a remove_sock() in the handler
36 *					for memory when we hit 0. Also altered the timer
37 *					code. The ACK stuff can wait and needs major
38 *					TCP layer surgery.
39 *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40 *					and fixed timer/inet_bh race.
41 *		Alan Cox	:	Added zapped flag for TCP
42 *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43 *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45 *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48 *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49 *	Pauline Middelink	:	identd support
50 *		Alan Cox	:	Fixed connect() taking signals I think.
51 *		Alan Cox	:	SO_LINGER supported
52 *		Alan Cox	:	Error reporting fixes
53 *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54 *		Alan Cox	:	inet sockets don't set sk->type!
55 *		Alan Cox	:	Split socket option code
56 *		Alan Cox	:	Callbacks
57 *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58 *		Alex		:	Removed restriction on inet fioctl
59 *		Alan Cox	:	Splitting INET from NET core
60 *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61 *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62 *		Alan Cox	:	Split IP from generic code
63 *		Alan Cox	:	New kfree_skbmem()
64 *		Alan Cox	:	Make SO_DEBUG superuser only.
65 *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66 *					(compatibility fix)
67 *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68 *		Alan Cox	:	Allocator for a socket is settable.
69 *		Alan Cox	:	SO_ERROR includes soft errors.
70 *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71 *		Alan Cox	: 	Generic socket allocation to make hooks
72 *					easier (suggested by Craig Metz).
73 *		Michael Pall	:	SO_ERROR returns positive errno again
74 *              Steve Whitehouse:       Added default destructor to free
75 *                                      protocol private data.
76 *              Steve Whitehouse:       Added various other default routines
77 *                                      common to several socket families.
78 *              Chris Evans     :       Call suser() check last on F_SETOWN
79 *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81 *		Andi Kleen	:	Fix write_space callback
82 *		Chris Evans	:	Security fixes - signedness again
83 *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 *		This program is free software; you can redistribute it and/or
89 *		modify it under the terms of the GNU General Public License
90 *		as published by the Free Software Foundation; either version
91 *		2 of the License, or (at your option) any later version.
92 */
93
94#include <linux/config.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114
115#include <asm/uaccess.h>
116#include <asm/system.h>
117
118#include <linux/netdevice.h>
119#include <net/protocol.h>
120#include <linux/skbuff.h>
121#include <net/request_sock.h>
122#include <net/sock.h>
123#include <net/xfrm.h>
124#include <linux/ipsec.h>
125
126#include <linux/filter.h>
127
128#ifdef CONFIG_INET
129#include <net/tcp.h>
130#endif
131
132/* Take into consideration the size of the struct sk_buff overhead in the
133 * determination of these values, since that is non-constant across
134 * platforms.  This makes socket queueing behavior and performance
135 * not depend upon such differences.
136 */
137#define _SK_MEM_PACKETS		256
138#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
139#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141
142/* Run time adjustable parameters. */
143__u32 sysctl_wmem_max = SK_WMEM_MAX;
144__u32 sysctl_rmem_max = SK_RMEM_MAX;
145__u32 sysctl_wmem_default = SK_WMEM_MAX;
146__u32 sysctl_rmem_default = SK_RMEM_MAX;
147
148/* Maximal space eaten by iovec or ancilliary data plus some space */
149int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
150
151static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
152{
153	struct timeval tv;
154
155	if (optlen < sizeof(tv))
156		return -EINVAL;
157	if (copy_from_user(&tv, optval, sizeof(tv)))
158		return -EFAULT;
159
160	*timeo_p = MAX_SCHEDULE_TIMEOUT;
161	if (tv.tv_sec == 0 && tv.tv_usec == 0)
162		return 0;
163	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
164		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
165	return 0;
166}
167
168static void sock_warn_obsolete_bsdism(const char *name)
169{
170	static int warned;
171	static char warncomm[TASK_COMM_LEN];
172	if (strcmp(warncomm, current->comm) && warned < 5) {
173		strcpy(warncomm,  current->comm);
174		printk(KERN_WARNING "process `%s' is using obsolete "
175		       "%s SO_BSDCOMPAT\n", warncomm, name);
176		warned++;
177	}
178}
179
180static void sock_disable_timestamp(struct sock *sk)
181{
182	if (sock_flag(sk, SOCK_TIMESTAMP)) {
183		sock_reset_flag(sk, SOCK_TIMESTAMP);
184		net_disable_timestamp();
185	}
186}
187
188
189/*
190 *	This is meant for all protocols to use and covers goings on
191 *	at the socket level. Everything here is generic.
192 */
193
194int sock_setsockopt(struct socket *sock, int level, int optname,
195		    char __user *optval, int optlen)
196{
197	struct sock *sk=sock->sk;
198	struct sk_filter *filter;
199	int val;
200	int valbool;
201	struct linger ling;
202	int ret = 0;
203
204	/*
205	 *	Options without arguments
206	 */
207
208#ifdef SO_DONTLINGER		/* Compatibility item... */
209	switch (optname) {
210		case SO_DONTLINGER:
211			sock_reset_flag(sk, SOCK_LINGER);
212			return 0;
213	}
214#endif
215
216  	if(optlen<sizeof(int))
217  		return(-EINVAL);
218
219	if (get_user(val, (int __user *)optval))
220		return -EFAULT;
221
222  	valbool = val?1:0;
223
224	lock_sock(sk);
225
226  	switch(optname)
227  	{
228		case SO_DEBUG:
229			if(val && !capable(CAP_NET_ADMIN))
230			{
231				ret = -EACCES;
232			}
233			else if (valbool)
234				sock_set_flag(sk, SOCK_DBG);
235			else
236				sock_reset_flag(sk, SOCK_DBG);
237			break;
238		case SO_REUSEADDR:
239			sk->sk_reuse = valbool;
240			break;
241		case SO_TYPE:
242		case SO_ERROR:
243			ret = -ENOPROTOOPT;
244		  	break;
245		case SO_DONTROUTE:
246			if (valbool)
247				sock_set_flag(sk, SOCK_LOCALROUTE);
248			else
249				sock_reset_flag(sk, SOCK_LOCALROUTE);
250			break;
251		case SO_BROADCAST:
252			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
253			break;
254		case SO_SNDBUF:
255			/* Don't error on this BSD doesn't and if you think
256			   about it this is right. Otherwise apps have to
257			   play 'guess the biggest size' games. RCVBUF/SNDBUF
258			   are treated in BSD as hints */
259
260			if (val > sysctl_wmem_max)
261				val = sysctl_wmem_max;
262
263			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
264			if ((val * 2) < SOCK_MIN_SNDBUF)
265				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
266			else
267				sk->sk_sndbuf = val * 2;
268
269			/*
270			 *	Wake up sending tasks if we
271			 *	upped the value.
272			 */
273			sk->sk_write_space(sk);
274			break;
275
276		case SO_RCVBUF:
277			/* Don't error on this BSD doesn't and if you think
278			   about it this is right. Otherwise apps have to
279			   play 'guess the biggest size' games. RCVBUF/SNDBUF
280			   are treated in BSD as hints */
281
282			if (val > sysctl_rmem_max)
283				val = sysctl_rmem_max;
284
285			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
286			/* FIXME: is this lower bound the right one? */
287			if ((val * 2) < SOCK_MIN_RCVBUF)
288				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
289			else
290				sk->sk_rcvbuf = val * 2;
291			break;
292
293		case SO_KEEPALIVE:
294#ifdef CONFIG_INET
295			if (sk->sk_protocol == IPPROTO_TCP)
296				tcp_set_keepalive(sk, valbool);
297#endif
298			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
299			break;
300
301	 	case SO_OOBINLINE:
302			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
303			break;
304
305	 	case SO_NO_CHECK:
306			sk->sk_no_check = valbool;
307			break;
308
309		case SO_PRIORITY:
310			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
311				sk->sk_priority = val;
312			else
313				ret = -EPERM;
314			break;
315
316		case SO_LINGER:
317			if(optlen<sizeof(ling)) {
318				ret = -EINVAL;	/* 1003.1g */
319				break;
320			}
321			if (copy_from_user(&ling,optval,sizeof(ling))) {
322				ret = -EFAULT;
323				break;
324			}
325			if (!ling.l_onoff)
326				sock_reset_flag(sk, SOCK_LINGER);
327			else {
328#if (BITS_PER_LONG == 32)
329				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
330					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
331				else
332#endif
333					sk->sk_lingertime = ling.l_linger * HZ;
334				sock_set_flag(sk, SOCK_LINGER);
335			}
336			break;
337
338		case SO_BSDCOMPAT:
339			sock_warn_obsolete_bsdism("setsockopt");
340			break;
341
342		case SO_PASSCRED:
343			if (valbool)
344				set_bit(SOCK_PASSCRED, &sock->flags);
345			else
346				clear_bit(SOCK_PASSCRED, &sock->flags);
347			break;
348
349		case SO_TIMESTAMP:
350			if (valbool)  {
351				sock_set_flag(sk, SOCK_RCVTSTAMP);
352				sock_enable_timestamp(sk);
353			} else
354				sock_reset_flag(sk, SOCK_RCVTSTAMP);
355			break;
356
357		case SO_RCVLOWAT:
358			if (val < 0)
359				val = INT_MAX;
360			sk->sk_rcvlowat = val ? : 1;
361			break;
362
363		case SO_RCVTIMEO:
364			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
365			break;
366
367		case SO_SNDTIMEO:
368			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
369			break;
370
371#ifdef CONFIG_NETDEVICES
372		case SO_BINDTODEVICE:
373		{
374			char devname[IFNAMSIZ];
375
376			/* Sorry... */
377			if (!capable(CAP_NET_RAW)) {
378				ret = -EPERM;
379				break;
380			}
381
382			/* Bind this socket to a particular device like "eth0",
383			 * as specified in the passed interface name. If the
384			 * name is "" or the option length is zero the socket
385			 * is not bound.
386			 */
387
388			if (!valbool) {
389				sk->sk_bound_dev_if = 0;
390			} else {
391				if (optlen > IFNAMSIZ)
392					optlen = IFNAMSIZ;
393				if (copy_from_user(devname, optval, optlen)) {
394					ret = -EFAULT;
395					break;
396				}
397
398				/* Remove any cached route for this socket. */
399				sk_dst_reset(sk);
400
401				if (devname[0] == '\0') {
402					sk->sk_bound_dev_if = 0;
403				} else {
404					struct net_device *dev = dev_get_by_name(devname);
405					if (!dev) {
406						ret = -ENODEV;
407						break;
408					}
409					sk->sk_bound_dev_if = dev->ifindex;
410					dev_put(dev);
411				}
412			}
413			break;
414		}
415#endif
416
417
418		case SO_ATTACH_FILTER:
419			ret = -EINVAL;
420			if (optlen == sizeof(struct sock_fprog)) {
421				struct sock_fprog fprog;
422
423				ret = -EFAULT;
424				if (copy_from_user(&fprog, optval, sizeof(fprog)))
425					break;
426
427				ret = sk_attach_filter(&fprog, sk);
428			}
429			break;
430
431		case SO_DETACH_FILTER:
432			spin_lock_bh(&sk->sk_lock.slock);
433			filter = sk->sk_filter;
434                        if (filter) {
435				sk->sk_filter = NULL;
436				spin_unlock_bh(&sk->sk_lock.slock);
437				sk_filter_release(sk, filter);
438				break;
439			}
440			spin_unlock_bh(&sk->sk_lock.slock);
441			ret = -ENONET;
442			break;
443
444		/* We implement the SO_SNDLOWAT etc to
445		   not be settable (1003.1g 5.3) */
446		default:
447		  	ret = -ENOPROTOOPT;
448			break;
449  	}
450	release_sock(sk);
451	return ret;
452}
453
454
455int sock_getsockopt(struct socket *sock, int level, int optname,
456		    char __user *optval, int __user *optlen)
457{
458	struct sock *sk = sock->sk;
459
460	union
461	{
462  		int val;
463  		struct linger ling;
464		struct timeval tm;
465	} v;
466
467	unsigned int lv = sizeof(int);
468	int len;
469
470  	if(get_user(len,optlen))
471  		return -EFAULT;
472	if(len < 0)
473		return -EINVAL;
474
475  	switch(optname)
476  	{
477		case SO_DEBUG:
478			v.val = sock_flag(sk, SOCK_DBG);
479			break;
480
481		case SO_DONTROUTE:
482			v.val = sock_flag(sk, SOCK_LOCALROUTE);
483			break;
484
485		case SO_BROADCAST:
486			v.val = !!sock_flag(sk, SOCK_BROADCAST);
487			break;
488
489		case SO_SNDBUF:
490			v.val = sk->sk_sndbuf;
491			break;
492
493		case SO_RCVBUF:
494			v.val = sk->sk_rcvbuf;
495			break;
496
497		case SO_REUSEADDR:
498			v.val = sk->sk_reuse;
499			break;
500
501		case SO_KEEPALIVE:
502			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
503			break;
504
505		case SO_TYPE:
506			v.val = sk->sk_type;
507			break;
508
509		case SO_ERROR:
510			v.val = -sock_error(sk);
511			if(v.val==0)
512				v.val = xchg(&sk->sk_err_soft, 0);
513			break;
514
515		case SO_OOBINLINE:
516			v.val = !!sock_flag(sk, SOCK_URGINLINE);
517			break;
518
519		case SO_NO_CHECK:
520			v.val = sk->sk_no_check;
521			break;
522
523		case SO_PRIORITY:
524			v.val = sk->sk_priority;
525			break;
526
527		case SO_LINGER:
528			lv		= sizeof(v.ling);
529			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
530 			v.ling.l_linger	= sk->sk_lingertime / HZ;
531			break;
532
533		case SO_BSDCOMPAT:
534			sock_warn_obsolete_bsdism("getsockopt");
535			break;
536
537		case SO_TIMESTAMP:
538			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
539			break;
540
541		case SO_RCVTIMEO:
542			lv=sizeof(struct timeval);
543			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
544				v.tm.tv_sec = 0;
545				v.tm.tv_usec = 0;
546			} else {
547				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
548				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
549			}
550			break;
551
552		case SO_SNDTIMEO:
553			lv=sizeof(struct timeval);
554			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
555				v.tm.tv_sec = 0;
556				v.tm.tv_usec = 0;
557			} else {
558				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
559				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
560			}
561			break;
562
563		case SO_RCVLOWAT:
564			v.val = sk->sk_rcvlowat;
565			break;
566
567		case SO_SNDLOWAT:
568			v.val=1;
569			break;
570
571		case SO_PASSCRED:
572			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
573			break;
574
575		case SO_PEERCRED:
576			if (len > sizeof(sk->sk_peercred))
577				len = sizeof(sk->sk_peercred);
578			if (copy_to_user(optval, &sk->sk_peercred, len))
579				return -EFAULT;
580			goto lenout;
581
582		case SO_PEERNAME:
583		{
584			char address[128];
585
586			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
587				return -ENOTCONN;
588			if (lv < len)
589				return -EINVAL;
590			if (copy_to_user(optval, address, len))
591				return -EFAULT;
592			goto lenout;
593		}
594
595		/* Dubious BSD thing... Probably nobody even uses it, but
596		 * the UNIX standard wants it for whatever reason... -DaveM
597		 */
598		case SO_ACCEPTCONN:
599			v.val = sk->sk_state == TCP_LISTEN;
600			break;
601
602		case SO_PEERSEC:
603			return security_socket_getpeersec(sock, optval, optlen, len);
604
605		default:
606			return(-ENOPROTOOPT);
607	}
608	if (len > lv)
609		len = lv;
610	if (copy_to_user(optval, &v, len))
611		return -EFAULT;
612lenout:
613  	if (put_user(len, optlen))
614  		return -EFAULT;
615  	return 0;
616}
617
618/**
619 *	sk_alloc - All socket objects are allocated here
620 *	@family: protocol family
621 *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
622 *	@prot: struct proto associated with this new sock instance
623 *	@zero_it: if we should zero the newly allocated sock
624 */
625struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
626{
627	struct sock *sk = NULL;
628	kmem_cache_t *slab = prot->slab;
629
630	if (slab != NULL)
631		sk = kmem_cache_alloc(slab, priority);
632	else
633		sk = kmalloc(prot->obj_size, priority);
634
635	if (sk) {
636		if (zero_it) {
637			memset(sk, 0, prot->obj_size);
638			sk->sk_family = family;
639			/*
640			 * See comment in struct sock definition to understand
641			 * why we need sk_prot_creator -acme
642			 */
643			sk->sk_prot = sk->sk_prot_creator = prot;
644			sock_lock_init(sk);
645		}
646
647		if (security_sk_alloc(sk, family, priority)) {
648			if (slab != NULL)
649				kmem_cache_free(slab, sk);
650			else
651				kfree(sk);
652			sk = NULL;
653		} else
654			__module_get(prot->owner);
655	}
656	return sk;
657}
658
659void sk_free(struct sock *sk)
660{
661	struct sk_filter *filter;
662	struct module *owner = sk->sk_prot_creator->owner;
663
664	if (sk->sk_destruct)
665		sk->sk_destruct(sk);
666
667	filter = sk->sk_filter;
668	if (filter) {
669		sk_filter_release(sk, filter);
670		sk->sk_filter = NULL;
671	}
672
673	sock_disable_timestamp(sk);
674
675	if (atomic_read(&sk->sk_omem_alloc))
676		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
677		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
678
679	security_sk_free(sk);
680	if (sk->sk_prot_creator->slab != NULL)
681		kmem_cache_free(sk->sk_prot_creator->slab, sk);
682	else
683		kfree(sk);
684	module_put(owner);
685}
686
687void __init sk_init(void)
688{
689	if (num_physpages <= 4096) {
690		sysctl_wmem_max = 32767;
691		sysctl_rmem_max = 32767;
692		sysctl_wmem_default = 32767;
693		sysctl_rmem_default = 32767;
694	} else if (num_physpages >= 131072) {
695		sysctl_wmem_max = 131071;
696		sysctl_rmem_max = 131071;
697	}
698}
699
700/*
701 *	Simple resource managers for sockets.
702 */
703
704
705/*
706 * Write buffer destructor automatically called from kfree_skb.
707 */
708void sock_wfree(struct sk_buff *skb)
709{
710	struct sock *sk = skb->sk;
711
712	/* In case it might be waiting for more memory. */
713	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
714	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
715		sk->sk_write_space(sk);
716	sock_put(sk);
717}
718
719/*
720 * Read buffer destructor automatically called from kfree_skb.
721 */
722void sock_rfree(struct sk_buff *skb)
723{
724	struct sock *sk = skb->sk;
725
726	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
727}
728
729
730int sock_i_uid(struct sock *sk)
731{
732	int uid;
733
734	read_lock(&sk->sk_callback_lock);
735	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
736	read_unlock(&sk->sk_callback_lock);
737	return uid;
738}
739
740unsigned long sock_i_ino(struct sock *sk)
741{
742	unsigned long ino;
743
744	read_lock(&sk->sk_callback_lock);
745	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
746	read_unlock(&sk->sk_callback_lock);
747	return ino;
748}
749
750/*
751 * Allocate a skb from the socket's send buffer.
752 */
753struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
754{
755	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
756		struct sk_buff * skb = alloc_skb(size, priority);
757		if (skb) {
758			skb_set_owner_w(skb, sk);
759			return skb;
760		}
761	}
762	return NULL;
763}
764
765/*
766 * Allocate a skb from the socket's receive buffer.
767 */
768struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
769{
770	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
771		struct sk_buff *skb = alloc_skb(size, priority);
772		if (skb) {
773			skb_set_owner_r(skb, sk);
774			return skb;
775		}
776	}
777	return NULL;
778}
779
780/*
781 * Allocate a memory block from the socket's option memory buffer.
782 */
783void *sock_kmalloc(struct sock *sk, int size, int priority)
784{
785	if ((unsigned)size <= sysctl_optmem_max &&
786	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
787		void *mem;
788		/* First do the add, to avoid the race if kmalloc
789 		 * might sleep.
790		 */
791		atomic_add(size, &sk->sk_omem_alloc);
792		mem = kmalloc(size, priority);
793		if (mem)
794			return mem;
795		atomic_sub(size, &sk->sk_omem_alloc);
796	}
797	return NULL;
798}
799
800/*
801 * Free an option memory block.
802 */
803void sock_kfree_s(struct sock *sk, void *mem, int size)
804{
805	kfree(mem);
806	atomic_sub(size, &sk->sk_omem_alloc);
807}
808
809/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
810   I think, these locks should be removed for datagram sockets.
811 */
812static long sock_wait_for_wmem(struct sock * sk, long timeo)
813{
814	DEFINE_WAIT(wait);
815
816	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
817	for (;;) {
818		if (!timeo)
819			break;
820		if (signal_pending(current))
821			break;
822		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
823		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
824		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
825			break;
826		if (sk->sk_shutdown & SEND_SHUTDOWN)
827			break;
828		if (sk->sk_err)
829			break;
830		timeo = schedule_timeout(timeo);
831	}
832	finish_wait(sk->sk_sleep, &wait);
833	return timeo;
834}
835
836
837/*
838 *	Generic send/receive buffer handlers
839 */
840
841static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
842					    unsigned long header_len,
843					    unsigned long data_len,
844					    int noblock, int *errcode)
845{
846	struct sk_buff *skb;
847	unsigned int gfp_mask;
848	long timeo;
849	int err;
850
851	gfp_mask = sk->sk_allocation;
852	if (gfp_mask & __GFP_WAIT)
853		gfp_mask |= __GFP_REPEAT;
854
855	timeo = sock_sndtimeo(sk, noblock);
856	while (1) {
857		err = sock_error(sk);
858		if (err != 0)
859			goto failure;
860
861		err = -EPIPE;
862		if (sk->sk_shutdown & SEND_SHUTDOWN)
863			goto failure;
864
865		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
866			skb = alloc_skb(header_len, sk->sk_allocation);
867			if (skb) {
868				int npages;
869				int i;
870
871				/* No pages, we're done... */
872				if (!data_len)
873					break;
874
875				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
876				skb->truesize += data_len;
877				skb_shinfo(skb)->nr_frags = npages;
878				for (i = 0; i < npages; i++) {
879					struct page *page;
880					skb_frag_t *frag;
881
882					page = alloc_pages(sk->sk_allocation, 0);
883					if (!page) {
884						err = -ENOBUFS;
885						skb_shinfo(skb)->nr_frags = i;
886						kfree_skb(skb);
887						goto failure;
888					}
889
890					frag = &skb_shinfo(skb)->frags[i];
891					frag->page = page;
892					frag->page_offset = 0;
893					frag->size = (data_len >= PAGE_SIZE ?
894						      PAGE_SIZE :
895						      data_len);
896					data_len -= PAGE_SIZE;
897				}
898
899				/* Full success... */
900				break;
901			}
902			err = -ENOBUFS;
903			goto failure;
904		}
905		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
906		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
907		err = -EAGAIN;
908		if (!timeo)
909			goto failure;
910		if (signal_pending(current))
911			goto interrupted;
912		timeo = sock_wait_for_wmem(sk, timeo);
913	}
914
915	skb_set_owner_w(skb, sk);
916	return skb;
917
918interrupted:
919	err = sock_intr_errno(timeo);
920failure:
921	*errcode = err;
922	return NULL;
923}
924
925struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
926				    int noblock, int *errcode)
927{
928	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
929}
930
931static void __lock_sock(struct sock *sk)
932{
933	DEFINE_WAIT(wait);
934
935	for(;;) {
936		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
937					TASK_UNINTERRUPTIBLE);
938		spin_unlock_bh(&sk->sk_lock.slock);
939		schedule();
940		spin_lock_bh(&sk->sk_lock.slock);
941		if(!sock_owned_by_user(sk))
942			break;
943	}
944	finish_wait(&sk->sk_lock.wq, &wait);
945}
946
947static void __release_sock(struct sock *sk)
948{
949	struct sk_buff *skb = sk->sk_backlog.head;
950
951	do {
952		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
953		bh_unlock_sock(sk);
954
955		do {
956			struct sk_buff *next = skb->next;
957
958			skb->next = NULL;
959			sk->sk_backlog_rcv(sk, skb);
960
961			/*
962			 * We are in process context here with softirqs
963			 * disabled, use cond_resched_softirq() to preempt.
964			 * This is safe to do because we've taken the backlog
965			 * queue private:
966			 */
967			cond_resched_softirq();
968
969			skb = next;
970		} while (skb != NULL);
971
972		bh_lock_sock(sk);
973	} while((skb = sk->sk_backlog.head) != NULL);
974}
975
976/**
977 * sk_wait_data - wait for data to arrive at sk_receive_queue
978 * @sk:    sock to wait on
979 * @timeo: for how long
980 *
981 * Now socket state including sk->sk_err is changed only under lock,
982 * hence we may omit checks after joining wait queue.
983 * We check receive queue before schedule() only as optimization;
984 * it is very likely that release_sock() added new data.
985 */
986int sk_wait_data(struct sock *sk, long *timeo)
987{
988	int rc;
989	DEFINE_WAIT(wait);
990
991	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
992	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
993	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
994	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
995	finish_wait(sk->sk_sleep, &wait);
996	return rc;
997}
998
999EXPORT_SYMBOL(sk_wait_data);
1000
1001/*
1002 * Set of default routines for initialising struct proto_ops when
1003 * the protocol does not support a particular function. In certain
1004 * cases where it makes no sense for a protocol to have a "do nothing"
1005 * function, some default processing is provided.
1006 */
1007
1008int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1009{
1010	return -EOPNOTSUPP;
1011}
1012
1013int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1014		    int len, int flags)
1015{
1016	return -EOPNOTSUPP;
1017}
1018
1019int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1020{
1021	return -EOPNOTSUPP;
1022}
1023
1024int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1025{
1026	return -EOPNOTSUPP;
1027}
1028
1029int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1030		    int *len, int peer)
1031{
1032	return -EOPNOTSUPP;
1033}
1034
1035unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1036{
1037	return 0;
1038}
1039
1040int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1041{
1042	return -EOPNOTSUPP;
1043}
1044
1045int sock_no_listen(struct socket *sock, int backlog)
1046{
1047	return -EOPNOTSUPP;
1048}
1049
1050int sock_no_shutdown(struct socket *sock, int how)
1051{
1052	return -EOPNOTSUPP;
1053}
1054
1055int sock_no_setsockopt(struct socket *sock, int level, int optname,
1056		    char __user *optval, int optlen)
1057{
1058	return -EOPNOTSUPP;
1059}
1060
1061int sock_no_getsockopt(struct socket *sock, int level, int optname,
1062		    char __user *optval, int __user *optlen)
1063{
1064	return -EOPNOTSUPP;
1065}
1066
1067int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1068		    size_t len)
1069{
1070	return -EOPNOTSUPP;
1071}
1072
1073int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1074		    size_t len, int flags)
1075{
1076	return -EOPNOTSUPP;
1077}
1078
1079int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1080{
1081	/* Mirror missing mmap method error code */
1082	return -ENODEV;
1083}
1084
1085ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1086{
1087	ssize_t res;
1088	struct msghdr msg = {.msg_flags = flags};
1089	struct kvec iov;
1090	char *kaddr = kmap(page);
1091	iov.iov_base = kaddr + offset;
1092	iov.iov_len = size;
1093	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1094	kunmap(page);
1095	return res;
1096}
1097
1098/*
1099 *	Default Socket Callbacks
1100 */
1101
1102static void sock_def_wakeup(struct sock *sk)
1103{
1104	read_lock(&sk->sk_callback_lock);
1105	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1106		wake_up_interruptible_all(sk->sk_sleep);
1107	read_unlock(&sk->sk_callback_lock);
1108}
1109
1110static void sock_def_error_report(struct sock *sk)
1111{
1112	read_lock(&sk->sk_callback_lock);
1113	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1114		wake_up_interruptible(sk->sk_sleep);
1115	sk_wake_async(sk,0,POLL_ERR);
1116	read_unlock(&sk->sk_callback_lock);
1117}
1118
1119static void sock_def_readable(struct sock *sk, int len)
1120{
1121	read_lock(&sk->sk_callback_lock);
1122	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1123		wake_up_interruptible(sk->sk_sleep);
1124	sk_wake_async(sk,1,POLL_IN);
1125	read_unlock(&sk->sk_callback_lock);
1126}
1127
1128static void sock_def_write_space(struct sock *sk)
1129{
1130	read_lock(&sk->sk_callback_lock);
1131
1132	/* Do not wake up a writer until he can make "significant"
1133	 * progress.  --DaveM
1134	 */
1135	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1136		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1137			wake_up_interruptible(sk->sk_sleep);
1138
1139		/* Should agree with poll, otherwise some programs break */
1140		if (sock_writeable(sk))
1141			sk_wake_async(sk, 2, POLL_OUT);
1142	}
1143
1144	read_unlock(&sk->sk_callback_lock);
1145}
1146
1147static void sock_def_destruct(struct sock *sk)
1148{
1149	if (sk->sk_protinfo)
1150		kfree(sk->sk_protinfo);
1151}
1152
1153void sk_send_sigurg(struct sock *sk)
1154{
1155	if (sk->sk_socket && sk->sk_socket->file)
1156		if (send_sigurg(&sk->sk_socket->file->f_owner))
1157			sk_wake_async(sk, 3, POLL_PRI);
1158}
1159
1160void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1161		    unsigned long expires)
1162{
1163	if (!mod_timer(timer, expires))
1164		sock_hold(sk);
1165}
1166
1167EXPORT_SYMBOL(sk_reset_timer);
1168
1169void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1170{
1171	if (timer_pending(timer) && del_timer(timer))
1172		__sock_put(sk);
1173}
1174
1175EXPORT_SYMBOL(sk_stop_timer);
1176
1177void sock_init_data(struct socket *sock, struct sock *sk)
1178{
1179	skb_queue_head_init(&sk->sk_receive_queue);
1180	skb_queue_head_init(&sk->sk_write_queue);
1181	skb_queue_head_init(&sk->sk_error_queue);
1182
1183	sk->sk_send_head	=	NULL;
1184
1185	init_timer(&sk->sk_timer);
1186
1187	sk->sk_allocation	=	GFP_KERNEL;
1188	sk->sk_rcvbuf		=	sysctl_rmem_default;
1189	sk->sk_sndbuf		=	sysctl_wmem_default;
1190	sk->sk_state		=	TCP_CLOSE;
1191	sk->sk_socket		=	sock;
1192
1193	sock_set_flag(sk, SOCK_ZAPPED);
1194
1195	if(sock)
1196	{
1197		sk->sk_type	=	sock->type;
1198		sk->sk_sleep	=	&sock->wait;
1199		sock->sk	=	sk;
1200	} else
1201		sk->sk_sleep	=	NULL;
1202
1203	rwlock_init(&sk->sk_dst_lock);
1204	rwlock_init(&sk->sk_callback_lock);
1205
1206	sk->sk_state_change	=	sock_def_wakeup;
1207	sk->sk_data_ready	=	sock_def_readable;
1208	sk->sk_write_space	=	sock_def_write_space;
1209	sk->sk_error_report	=	sock_def_error_report;
1210	sk->sk_destruct		=	sock_def_destruct;
1211
1212	sk->sk_sndmsg_page	=	NULL;
1213	sk->sk_sndmsg_off	=	0;
1214
1215	sk->sk_peercred.pid 	=	0;
1216	sk->sk_peercred.uid	=	-1;
1217	sk->sk_peercred.gid	=	-1;
1218	sk->sk_write_pending	=	0;
1219	sk->sk_rcvlowat		=	1;
1220	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1221	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1222
1223	sk->sk_stamp.tv_sec     = -1L;
1224	sk->sk_stamp.tv_usec    = -1L;
1225
1226	atomic_set(&sk->sk_refcnt, 1);
1227}
1228
1229void fastcall lock_sock(struct sock *sk)
1230{
1231	might_sleep();
1232	spin_lock_bh(&(sk->sk_lock.slock));
1233	if (sk->sk_lock.owner)
1234		__lock_sock(sk);
1235	sk->sk_lock.owner = (void *)1;
1236	spin_unlock_bh(&(sk->sk_lock.slock));
1237}
1238
1239EXPORT_SYMBOL(lock_sock);
1240
1241void fastcall release_sock(struct sock *sk)
1242{
1243	spin_lock_bh(&(sk->sk_lock.slock));
1244	if (sk->sk_backlog.tail)
1245		__release_sock(sk);
1246	sk->sk_lock.owner = NULL;
1247        if (waitqueue_active(&(sk->sk_lock.wq)))
1248		wake_up(&(sk->sk_lock.wq));
1249	spin_unlock_bh(&(sk->sk_lock.slock));
1250}
1251EXPORT_SYMBOL(release_sock);
1252
1253int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1254{
1255	if (!sock_flag(sk, SOCK_TIMESTAMP))
1256		sock_enable_timestamp(sk);
1257	if (sk->sk_stamp.tv_sec == -1)
1258		return -ENOENT;
1259	if (sk->sk_stamp.tv_sec == 0)
1260		do_gettimeofday(&sk->sk_stamp);
1261	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1262		-EFAULT : 0;
1263}
1264EXPORT_SYMBOL(sock_get_timestamp);
1265
1266void sock_enable_timestamp(struct sock *sk)
1267{
1268	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1269		sock_set_flag(sk, SOCK_TIMESTAMP);
1270		net_enable_timestamp();
1271	}
1272}
1273EXPORT_SYMBOL(sock_enable_timestamp);
1274
1275/*
1276 *	Get a socket option on an socket.
1277 *
1278 *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1279 *	asynchronous errors should be reported by getsockopt. We assume
1280 *	this means if you specify SO_ERROR (otherwise whats the point of it).
1281 */
1282int sock_common_getsockopt(struct socket *sock, int level, int optname,
1283			   char __user *optval, int __user *optlen)
1284{
1285	struct sock *sk = sock->sk;
1286
1287	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1288}
1289
1290EXPORT_SYMBOL(sock_common_getsockopt);
1291
1292int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1293			struct msghdr *msg, size_t size, int flags)
1294{
1295	struct sock *sk = sock->sk;
1296	int addr_len = 0;
1297	int err;
1298
1299	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1300				   flags & ~MSG_DONTWAIT, &addr_len);
1301	if (err >= 0)
1302		msg->msg_namelen = addr_len;
1303	return err;
1304}
1305
1306EXPORT_SYMBOL(sock_common_recvmsg);
1307
1308/*
1309 *	Set socket options on an inet socket.
1310 */
1311int sock_common_setsockopt(struct socket *sock, int level, int optname,
1312			   char __user *optval, int optlen)
1313{
1314	struct sock *sk = sock->sk;
1315
1316	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1317}
1318
1319EXPORT_SYMBOL(sock_common_setsockopt);
1320
1321void sk_common_release(struct sock *sk)
1322{
1323	if (sk->sk_prot->destroy)
1324		sk->sk_prot->destroy(sk);
1325
1326	/*
1327	 * Observation: when sock_common_release is called, processes have
1328	 * no access to socket. But net still has.
1329	 * Step one, detach it from networking:
1330	 *
1331	 * A. Remove from hash tables.
1332	 */
1333
1334	sk->sk_prot->unhash(sk);
1335
1336	/*
1337	 * In this point socket cannot receive new packets, but it is possible
1338	 * that some packets are in flight because some CPU runs receiver and
1339	 * did hash table lookup before we unhashed socket. They will achieve
1340	 * receive queue and will be purged by socket destructor.
1341	 *
1342	 * Also we still have packets pending on receive queue and probably,
1343	 * our own packets waiting in device queues. sock_destroy will drain
1344	 * receive queue, but transmitted packets will delay socket destruction
1345	 * until the last reference will be released.
1346	 */
1347
1348	sock_orphan(sk);
1349
1350	xfrm_sk_free_policy(sk);
1351
1352#ifdef INET_REFCNT_DEBUG
1353	if (atomic_read(&sk->sk_refcnt) != 1)
1354		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
1355		       sk, atomic_read(&sk->sk_refcnt));
1356#endif
1357	sock_put(sk);
1358}
1359
1360EXPORT_SYMBOL(sk_common_release);
1361
1362static DEFINE_RWLOCK(proto_list_lock);
1363static LIST_HEAD(proto_list);
1364
1365int proto_register(struct proto *prot, int alloc_slab)
1366{
1367	char *request_sock_slab_name;
1368	int rc = -ENOBUFS;
1369
1370	if (alloc_slab) {
1371		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1372					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1373
1374		if (prot->slab == NULL) {
1375			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1376			       prot->name);
1377			goto out;
1378		}
1379
1380		if (prot->rsk_prot != NULL) {
1381			static const char mask[] = "request_sock_%s";
1382
1383			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1384			if (request_sock_slab_name == NULL)
1385				goto out_free_sock_slab;
1386
1387			sprintf(request_sock_slab_name, mask, prot->name);
1388			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1389								 prot->rsk_prot->obj_size, 0,
1390								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1391
1392			if (prot->rsk_prot->slab == NULL) {
1393				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1394				       prot->name);
1395				goto out_free_request_sock_slab_name;
1396			}
1397		}
1398	}
1399
1400	write_lock(&proto_list_lock);
1401	list_add(&prot->node, &proto_list);
1402	write_unlock(&proto_list_lock);
1403	rc = 0;
1404out:
1405	return rc;
1406out_free_request_sock_slab_name:
1407	kfree(request_sock_slab_name);
1408out_free_sock_slab:
1409	kmem_cache_destroy(prot->slab);
1410	prot->slab = NULL;
1411	goto out;
1412}
1413
1414EXPORT_SYMBOL(proto_register);
1415
1416void proto_unregister(struct proto *prot)
1417{
1418	write_lock(&proto_list_lock);
1419
1420	if (prot->slab != NULL) {
1421		kmem_cache_destroy(prot->slab);
1422		prot->slab = NULL;
1423	}
1424
1425	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1426		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1427
1428		kmem_cache_destroy(prot->rsk_prot->slab);
1429		kfree(name);
1430		prot->rsk_prot->slab = NULL;
1431	}
1432
1433	list_del(&prot->node);
1434	write_unlock(&proto_list_lock);
1435}
1436
1437EXPORT_SYMBOL(proto_unregister);
1438
1439#ifdef CONFIG_PROC_FS
1440static inline struct proto *__proto_head(void)
1441{
1442	return list_entry(proto_list.next, struct proto, node);
1443}
1444
1445static inline struct proto *proto_head(void)
1446{
1447	return list_empty(&proto_list) ? NULL : __proto_head();
1448}
1449
1450static inline struct proto *proto_next(struct proto *proto)
1451{
1452	return proto->node.next == &proto_list ? NULL :
1453		list_entry(proto->node.next, struct proto, node);
1454}
1455
1456static inline struct proto *proto_get_idx(loff_t pos)
1457{
1458	struct proto *proto;
1459	loff_t i = 0;
1460
1461	list_for_each_entry(proto, &proto_list, node)
1462		if (i++ == pos)
1463			goto out;
1464
1465	proto = NULL;
1466out:
1467	return proto;
1468}
1469
1470static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1471{
1472	read_lock(&proto_list_lock);
1473	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1474}
1475
1476static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1477{
1478	++*pos;
1479	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1480}
1481
1482static void proto_seq_stop(struct seq_file *seq, void *v)
1483{
1484	read_unlock(&proto_list_lock);
1485}
1486
1487static char proto_method_implemented(const void *method)
1488{
1489	return method == NULL ? 'n' : 'y';
1490}
1491
1492static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1493{
1494	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1495			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1496		   proto->name,
1497		   proto->obj_size,
1498		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1499		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1500		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1501		   proto->max_header,
1502		   proto->slab == NULL ? "no" : "yes",
1503		   module_name(proto->owner),
1504		   proto_method_implemented(proto->close),
1505		   proto_method_implemented(proto->connect),
1506		   proto_method_implemented(proto->disconnect),
1507		   proto_method_implemented(proto->accept),
1508		   proto_method_implemented(proto->ioctl),
1509		   proto_method_implemented(proto->init),
1510		   proto_method_implemented(proto->destroy),
1511		   proto_method_implemented(proto->shutdown),
1512		   proto_method_implemented(proto->setsockopt),
1513		   proto_method_implemented(proto->getsockopt),
1514		   proto_method_implemented(proto->sendmsg),
1515		   proto_method_implemented(proto->recvmsg),
1516		   proto_method_implemented(proto->sendpage),
1517		   proto_method_implemented(proto->bind),
1518		   proto_method_implemented(proto->backlog_rcv),
1519		   proto_method_implemented(proto->hash),
1520		   proto_method_implemented(proto->unhash),
1521		   proto_method_implemented(proto->get_port),
1522		   proto_method_implemented(proto->enter_memory_pressure));
1523}
1524
1525static int proto_seq_show(struct seq_file *seq, void *v)
1526{
1527	if (v == SEQ_START_TOKEN)
1528		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1529			   "protocol",
1530			   "size",
1531			   "sockets",
1532			   "memory",
1533			   "press",
1534			   "maxhdr",
1535			   "slab",
1536			   "module",
1537			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1538	else
1539		proto_seq_printf(seq, v);
1540	return 0;
1541}
1542
1543static struct seq_operations proto_seq_ops = {
1544	.start  = proto_seq_start,
1545	.next   = proto_seq_next,
1546	.stop   = proto_seq_stop,
1547	.show   = proto_seq_show,
1548};
1549
1550static int proto_seq_open(struct inode *inode, struct file *file)
1551{
1552	return seq_open(file, &proto_seq_ops);
1553}
1554
1555static struct file_operations proto_seq_fops = {
1556	.owner		= THIS_MODULE,
1557	.open		= proto_seq_open,
1558	.read		= seq_read,
1559	.llseek		= seq_lseek,
1560	.release	= seq_release,
1561};
1562
1563static int __init proto_init(void)
1564{
1565	/* register /proc/net/protocols */
1566	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1567}
1568
1569subsys_initcall(proto_init);
1570
1571#endif /* PROC_FS */
1572
1573EXPORT_SYMBOL(sk_alloc);
1574EXPORT_SYMBOL(sk_free);
1575EXPORT_SYMBOL(sk_send_sigurg);
1576EXPORT_SYMBOL(sock_alloc_send_skb);
1577EXPORT_SYMBOL(sock_init_data);
1578EXPORT_SYMBOL(sock_kfree_s);
1579EXPORT_SYMBOL(sock_kmalloc);
1580EXPORT_SYMBOL(sock_no_accept);
1581EXPORT_SYMBOL(sock_no_bind);
1582EXPORT_SYMBOL(sock_no_connect);
1583EXPORT_SYMBOL(sock_no_getname);
1584EXPORT_SYMBOL(sock_no_getsockopt);
1585EXPORT_SYMBOL(sock_no_ioctl);
1586EXPORT_SYMBOL(sock_no_listen);
1587EXPORT_SYMBOL(sock_no_mmap);
1588EXPORT_SYMBOL(sock_no_poll);
1589EXPORT_SYMBOL(sock_no_recvmsg);
1590EXPORT_SYMBOL(sock_no_sendmsg);
1591EXPORT_SYMBOL(sock_no_sendpage);
1592EXPORT_SYMBOL(sock_no_setsockopt);
1593EXPORT_SYMBOL(sock_no_shutdown);
1594EXPORT_SYMBOL(sock_no_socketpair);
1595EXPORT_SYMBOL(sock_rfree);
1596EXPORT_SYMBOL(sock_setsockopt);
1597EXPORT_SYMBOL(sock_wfree);
1598EXPORT_SYMBOL(sock_wmalloc);
1599EXPORT_SYMBOL(sock_i_uid);
1600EXPORT_SYMBOL(sock_i_ino);
1601#ifdef CONFIG_SYSCTL
1602EXPORT_SYMBOL(sysctl_optmem_max);
1603EXPORT_SYMBOL(sysctl_rmem_max);
1604EXPORT_SYMBOL(sysctl_wmem_max);
1605#endif
1606