af_unix.c revision 40d44446cf10d9d118e8f0132c94e1f25ea3be97
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate rwlock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (!net_eq(sock_net(s), net))
296			continue;
297
298		if (dentry && dentry->d_inode == i) {
299			sock_hold(s);
300			goto found;
301		}
302	}
303	s = NULL;
304found:
305	spin_unlock(&unix_table_lock);
306	return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
316	read_lock(&sk->sk_callback_lock);
317	if (unix_writable(sk)) {
318		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
319			wake_up_interruptible_sync(sk->sk_sleep);
320		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321	}
322	read_unlock(&sk->sk_callback_lock);
323}
324
325/* When dgram socket disconnects (or changes its peer), we clear its receive
326 * queue of packets arrived from previous peer. First, it allows to do
327 * flow control based only on wmem_alloc; second, sk connected to peer
328 * may receive messages only from that peer. */
329static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330{
331	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332		skb_queue_purge(&sk->sk_receive_queue);
333		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335		/* If one link of bidirectional dgram pipe is disconnected,
336		 * we signal error. Messages are lost. Do not make this,
337		 * when peer was not connected to us.
338		 */
339		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340			other->sk_err = ECONNRESET;
341			other->sk_error_report(other);
342		}
343	}
344}
345
346static void unix_sock_destructor(struct sock *sk)
347{
348	struct unix_sock *u = unix_sk(sk);
349
350	skb_queue_purge(&sk->sk_receive_queue);
351
352	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353	WARN_ON(!sk_unhashed(sk));
354	WARN_ON(sk->sk_socket);
355	if (!sock_flag(sk, SOCK_DEAD)) {
356		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357		return;
358	}
359
360	if (u->addr)
361		unix_release_addr(u->addr);
362
363	atomic_dec(&unix_nr_socks);
364	local_bh_disable();
365	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366	local_bh_enable();
367#ifdef UNIX_REFCNT_DEBUG
368	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369		atomic_read(&unix_nr_socks));
370#endif
371}
372
373static int unix_release_sock(struct sock *sk, int embrion)
374{
375	struct unix_sock *u = unix_sk(sk);
376	struct dentry *dentry;
377	struct vfsmount *mnt;
378	struct sock *skpair;
379	struct sk_buff *skb;
380	int state;
381
382	unix_remove_socket(sk);
383
384	/* Clear state */
385	unix_state_lock(sk);
386	sock_orphan(sk);
387	sk->sk_shutdown = SHUTDOWN_MASK;
388	dentry	     = u->dentry;
389	u->dentry    = NULL;
390	mnt	     = u->mnt;
391	u->mnt	     = NULL;
392	state = sk->sk_state;
393	sk->sk_state = TCP_CLOSE;
394	unix_state_unlock(sk);
395
396	wake_up_interruptible_all(&u->peer_wait);
397
398	skpair = unix_peer(sk);
399
400	if (skpair != NULL) {
401		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402			unix_state_lock(skpair);
403			/* No more writes */
404			skpair->sk_shutdown = SHUTDOWN_MASK;
405			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406				skpair->sk_err = ECONNRESET;
407			unix_state_unlock(skpair);
408			skpair->sk_state_change(skpair);
409			read_lock(&skpair->sk_callback_lock);
410			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411			read_unlock(&skpair->sk_callback_lock);
412		}
413		sock_put(skpair); /* It may now die */
414		unix_peer(sk) = NULL;
415	}
416
417	/* Try to flush out this socket. Throw out buffers at least */
418
419	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420		if (state == TCP_LISTEN)
421			unix_release_sock(skb->sk, 1);
422		/* passed fds are erased in the kfree_skb hook	      */
423		kfree_skb(skb);
424	}
425
426	if (dentry) {
427		dput(dentry);
428		mntput(mnt);
429	}
430
431	sock_put(sk);
432
433	/* ---- Socket is dead now and most probably destroyed ---- */
434
435	/*
436	 * Fixme: BSD difference: In BSD all sockets connected to use get
437	 *	  ECONNRESET and we die on the spot. In Linux we behave
438	 *	  like files and pipes do and wait for the last
439	 *	  dereference.
440	 *
441	 * Can't we simply set sock->err?
442	 *
443	 *	  What the above comment does talk about? --ANK(980817)
444	 */
445
446	if (unix_tot_inflight)
447		unix_gc();		/* Garbage collect fds */
448
449	return 0;
450}
451
452static int unix_listen(struct socket *sock, int backlog)
453{
454	int err;
455	struct sock *sk = sock->sk;
456	struct unix_sock *u = unix_sk(sk);
457
458	err = -EOPNOTSUPP;
459	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460		goto out;	/* Only stream/seqpacket sockets accept */
461	err = -EINVAL;
462	if (!u->addr)
463		goto out;	/* No listens on an unbound socket */
464	unix_state_lock(sk);
465	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466		goto out_unlock;
467	if (backlog > sk->sk_max_ack_backlog)
468		wake_up_interruptible_all(&u->peer_wait);
469	sk->sk_max_ack_backlog	= backlog;
470	sk->sk_state		= TCP_LISTEN;
471	/* set credentials so connect can copy them */
472	sk->sk_peercred.pid	= task_tgid_vnr(current);
473	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474	err = 0;
475
476out_unlock:
477	unix_state_unlock(sk);
478out:
479	return err;
480}
481
482static int unix_release(struct socket *);
483static int unix_bind(struct socket *, struct sockaddr *, int);
484static int unix_stream_connect(struct socket *, struct sockaddr *,
485			       int addr_len, int flags);
486static int unix_socketpair(struct socket *, struct socket *);
487static int unix_accept(struct socket *, struct socket *, int);
488static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490static unsigned int unix_dgram_poll(struct file *, struct socket *,
491				    poll_table *);
492static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493static int unix_shutdown(struct socket *, int);
494static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495			       struct msghdr *, size_t);
496static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497			       struct msghdr *, size_t, int);
498static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499			      struct msghdr *, size_t);
500static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501			      struct msghdr *, size_t, int);
502static int unix_dgram_connect(struct socket *, struct sockaddr *,
503			      int, int);
504static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505				  struct msghdr *, size_t);
506
507static const struct proto_ops unix_stream_ops = {
508	.family =	PF_UNIX,
509	.owner =	THIS_MODULE,
510	.release =	unix_release,
511	.bind =		unix_bind,
512	.connect =	unix_stream_connect,
513	.socketpair =	unix_socketpair,
514	.accept =	unix_accept,
515	.getname =	unix_getname,
516	.poll =		unix_poll,
517	.ioctl =	unix_ioctl,
518	.listen =	unix_listen,
519	.shutdown =	unix_shutdown,
520	.setsockopt =	sock_no_setsockopt,
521	.getsockopt =	sock_no_getsockopt,
522	.sendmsg =	unix_stream_sendmsg,
523	.recvmsg =	unix_stream_recvmsg,
524	.mmap =		sock_no_mmap,
525	.sendpage =	sock_no_sendpage,
526};
527
528static const struct proto_ops unix_dgram_ops = {
529	.family =	PF_UNIX,
530	.owner =	THIS_MODULE,
531	.release =	unix_release,
532	.bind =		unix_bind,
533	.connect =	unix_dgram_connect,
534	.socketpair =	unix_socketpair,
535	.accept =	sock_no_accept,
536	.getname =	unix_getname,
537	.poll =		unix_dgram_poll,
538	.ioctl =	unix_ioctl,
539	.listen =	sock_no_listen,
540	.shutdown =	unix_shutdown,
541	.setsockopt =	sock_no_setsockopt,
542	.getsockopt =	sock_no_getsockopt,
543	.sendmsg =	unix_dgram_sendmsg,
544	.recvmsg =	unix_dgram_recvmsg,
545	.mmap =		sock_no_mmap,
546	.sendpage =	sock_no_sendpage,
547};
548
549static const struct proto_ops unix_seqpacket_ops = {
550	.family =	PF_UNIX,
551	.owner =	THIS_MODULE,
552	.release =	unix_release,
553	.bind =		unix_bind,
554	.connect =	unix_stream_connect,
555	.socketpair =	unix_socketpair,
556	.accept =	unix_accept,
557	.getname =	unix_getname,
558	.poll =		unix_dgram_poll,
559	.ioctl =	unix_ioctl,
560	.listen =	unix_listen,
561	.shutdown =	unix_shutdown,
562	.setsockopt =	sock_no_setsockopt,
563	.getsockopt =	sock_no_getsockopt,
564	.sendmsg =	unix_seqpacket_sendmsg,
565	.recvmsg =	unix_dgram_recvmsg,
566	.mmap =		sock_no_mmap,
567	.sendpage =	sock_no_sendpage,
568};
569
570static struct proto unix_proto = {
571	.name			= "UNIX",
572	.owner			= THIS_MODULE,
573	.obj_size		= sizeof(struct unix_sock),
574};
575
576/*
577 * AF_UNIX sockets do not interact with hardware, hence they
578 * dont trigger interrupts - so it's safe for them to have
579 * bh-unsafe locking for their sk_receive_queue.lock. Split off
580 * this special lock-class by reinitializing the spinlock key:
581 */
582static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583
584static struct sock *unix_create1(struct net *net, struct socket *sock)
585{
586	struct sock *sk = NULL;
587	struct unix_sock *u;
588
589	atomic_inc(&unix_nr_socks);
590	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591		goto out;
592
593	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594	if (!sk)
595		goto out;
596
597	sock_init_data(sock, sk);
598	lockdep_set_class(&sk->sk_receive_queue.lock,
599				&af_unix_sk_receive_queue_lock_key);
600
601	sk->sk_write_space	= unix_write_space;
602	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603	sk->sk_destruct		= unix_sock_destructor;
604	u	  = unix_sk(sk);
605	u->dentry = NULL;
606	u->mnt	  = NULL;
607	spin_lock_init(&u->lock);
608	atomic_long_set(&u->inflight, 0);
609	INIT_LIST_HEAD(&u->link);
610	mutex_init(&u->readlock); /* single task reading lock */
611	init_waitqueue_head(&u->peer_wait);
612	unix_insert_socket(unix_sockets_unbound, sk);
613out:
614	if (sk == NULL)
615		atomic_dec(&unix_nr_socks);
616	else {
617		local_bh_disable();
618		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619		local_bh_enable();
620	}
621	return sk;
622}
623
624static int unix_create(struct net *net, struct socket *sock, int protocol)
625{
626	if (protocol && protocol != PF_UNIX)
627		return -EPROTONOSUPPORT;
628
629	sock->state = SS_UNCONNECTED;
630
631	switch (sock->type) {
632	case SOCK_STREAM:
633		sock->ops = &unix_stream_ops;
634		break;
635		/*
636		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
637		 *	nothing uses it.
638		 */
639	case SOCK_RAW:
640		sock->type = SOCK_DGRAM;
641	case SOCK_DGRAM:
642		sock->ops = &unix_dgram_ops;
643		break;
644	case SOCK_SEQPACKET:
645		sock->ops = &unix_seqpacket_ops;
646		break;
647	default:
648		return -ESOCKTNOSUPPORT;
649	}
650
651	return unix_create1(net, sock) ? 0 : -ENOMEM;
652}
653
654static int unix_release(struct socket *sock)
655{
656	struct sock *sk = sock->sk;
657
658	if (!sk)
659		return 0;
660
661	sock->sk = NULL;
662
663	return unix_release_sock(sk, 0);
664}
665
666static int unix_autobind(struct socket *sock)
667{
668	struct sock *sk = sock->sk;
669	struct net *net = sock_net(sk);
670	struct unix_sock *u = unix_sk(sk);
671	static u32 ordernum = 1;
672	struct unix_address *addr;
673	int err;
674
675	mutex_lock(&u->readlock);
676
677	err = 0;
678	if (u->addr)
679		goto out;
680
681	err = -ENOMEM;
682	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683	if (!addr)
684		goto out;
685
686	addr->name->sun_family = AF_UNIX;
687	atomic_set(&addr->refcnt, 1);
688
689retry:
690	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692
693	spin_lock(&unix_table_lock);
694	ordernum = (ordernum+1)&0xFFFFF;
695
696	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697				      addr->hash)) {
698		spin_unlock(&unix_table_lock);
699		/* Sanity yield. It is unusual case, but yet... */
700		if (!(ordernum&0xFF))
701			yield();
702		goto retry;
703	}
704	addr->hash ^= sk->sk_type;
705
706	__unix_remove_socket(sk);
707	u->addr = addr;
708	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
709	spin_unlock(&unix_table_lock);
710	err = 0;
711
712out:	mutex_unlock(&u->readlock);
713	return err;
714}
715
716static struct sock *unix_find_other(struct net *net,
717				    struct sockaddr_un *sunname, int len,
718				    int type, unsigned hash, int *error)
719{
720	struct sock *u;
721	struct path path;
722	int err = 0;
723
724	if (sunname->sun_path[0]) {
725		struct inode *inode;
726		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727		if (err)
728			goto fail;
729		inode = path.dentry->d_inode;
730		err = inode_permission(inode, MAY_WRITE);
731		if (err)
732			goto put_fail;
733
734		err = -ECONNREFUSED;
735		if (!S_ISSOCK(inode->i_mode))
736			goto put_fail;
737		u = unix_find_socket_byinode(net, inode);
738		if (!u)
739			goto put_fail;
740
741		if (u->sk_type == type)
742			touch_atime(path.mnt, path.dentry);
743
744		path_put(&path);
745
746		err = -EPROTOTYPE;
747		if (u->sk_type != type) {
748			sock_put(u);
749			goto fail;
750		}
751	} else {
752		err = -ECONNREFUSED;
753		u = unix_find_socket_byname(net, sunname, len, type, hash);
754		if (u) {
755			struct dentry *dentry;
756			dentry = unix_sk(u)->dentry;
757			if (dentry)
758				touch_atime(unix_sk(u)->mnt, dentry);
759		} else
760			goto fail;
761	}
762	return u;
763
764put_fail:
765	path_put(&path);
766fail:
767	*error = err;
768	return NULL;
769}
770
771
772static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773{
774	struct sock *sk = sock->sk;
775	struct net *net = sock_net(sk);
776	struct unix_sock *u = unix_sk(sk);
777	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778	struct dentry *dentry = NULL;
779	struct nameidata nd;
780	int err;
781	unsigned hash;
782	struct unix_address *addr;
783	struct hlist_head *list;
784
785	err = -EINVAL;
786	if (sunaddr->sun_family != AF_UNIX)
787		goto out;
788
789	if (addr_len == sizeof(short)) {
790		err = unix_autobind(sock);
791		goto out;
792	}
793
794	err = unix_mkname(sunaddr, addr_len, &hash);
795	if (err < 0)
796		goto out;
797	addr_len = err;
798
799	mutex_lock(&u->readlock);
800
801	err = -EINVAL;
802	if (u->addr)
803		goto out_up;
804
805	err = -ENOMEM;
806	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807	if (!addr)
808		goto out_up;
809
810	memcpy(addr->name, sunaddr, addr_len);
811	addr->len = addr_len;
812	addr->hash = hash ^ sk->sk_type;
813	atomic_set(&addr->refcnt, 1);
814
815	if (sunaddr->sun_path[0]) {
816		unsigned int mode;
817		err = 0;
818		/*
819		 * Get the parent directory, calculate the hash for last
820		 * component.
821		 */
822		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823		if (err)
824			goto out_mknod_parent;
825
826		dentry = lookup_create(&nd, 0);
827		err = PTR_ERR(dentry);
828		if (IS_ERR(dentry))
829			goto out_mknod_unlock;
830
831		/*
832		 * All right, let's create it.
833		 */
834		mode = S_IFSOCK |
835		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
836		err = mnt_want_write(nd.path.mnt);
837		if (err)
838			goto out_mknod_dput;
839		err = security_path_mknod(&nd.path, dentry, mode, 0);
840		if (err)
841			goto out_mknod_drop_write;
842		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
843out_mknod_drop_write:
844		mnt_drop_write(nd.path.mnt);
845		if (err)
846			goto out_mknod_dput;
847		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
848		dput(nd.path.dentry);
849		nd.path.dentry = dentry;
850
851		addr->hash = UNIX_HASH_SIZE;
852	}
853
854	spin_lock(&unix_table_lock);
855
856	if (!sunaddr->sun_path[0]) {
857		err = -EADDRINUSE;
858		if (__unix_find_socket_byname(net, sunaddr, addr_len,
859					      sk->sk_type, hash)) {
860			unix_release_addr(addr);
861			goto out_unlock;
862		}
863
864		list = &unix_socket_table[addr->hash];
865	} else {
866		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
867		u->dentry = nd.path.dentry;
868		u->mnt    = nd.path.mnt;
869	}
870
871	err = 0;
872	__unix_remove_socket(sk);
873	u->addr = addr;
874	__unix_insert_socket(list, sk);
875
876out_unlock:
877	spin_unlock(&unix_table_lock);
878out_up:
879	mutex_unlock(&u->readlock);
880out:
881	return err;
882
883out_mknod_dput:
884	dput(dentry);
885out_mknod_unlock:
886	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
887	path_put(&nd.path);
888out_mknod_parent:
889	if (err == -EEXIST)
890		err = -EADDRINUSE;
891	unix_release_addr(addr);
892	goto out_up;
893}
894
895static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
896{
897	if (unlikely(sk1 == sk2) || !sk2) {
898		unix_state_lock(sk1);
899		return;
900	}
901	if (sk1 < sk2) {
902		unix_state_lock(sk1);
903		unix_state_lock_nested(sk2);
904	} else {
905		unix_state_lock(sk2);
906		unix_state_lock_nested(sk1);
907	}
908}
909
910static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
911{
912	if (unlikely(sk1 == sk2) || !sk2) {
913		unix_state_unlock(sk1);
914		return;
915	}
916	unix_state_unlock(sk1);
917	unix_state_unlock(sk2);
918}
919
920static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
921			      int alen, int flags)
922{
923	struct sock *sk = sock->sk;
924	struct net *net = sock_net(sk);
925	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
926	struct sock *other;
927	unsigned hash;
928	int err;
929
930	if (addr->sa_family != AF_UNSPEC) {
931		err = unix_mkname(sunaddr, alen, &hash);
932		if (err < 0)
933			goto out;
934		alen = err;
935
936		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
937		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
938			goto out;
939
940restart:
941		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
942		if (!other)
943			goto out;
944
945		unix_state_double_lock(sk, other);
946
947		/* Apparently VFS overslept socket death. Retry. */
948		if (sock_flag(other, SOCK_DEAD)) {
949			unix_state_double_unlock(sk, other);
950			sock_put(other);
951			goto restart;
952		}
953
954		err = -EPERM;
955		if (!unix_may_send(sk, other))
956			goto out_unlock;
957
958		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
959		if (err)
960			goto out_unlock;
961
962	} else {
963		/*
964		 *	1003.1g breaking connected state with AF_UNSPEC
965		 */
966		other = NULL;
967		unix_state_double_lock(sk, other);
968	}
969
970	/*
971	 * If it was connected, reconnect.
972	 */
973	if (unix_peer(sk)) {
974		struct sock *old_peer = unix_peer(sk);
975		unix_peer(sk) = other;
976		unix_state_double_unlock(sk, other);
977
978		if (other != old_peer)
979			unix_dgram_disconnected(sk, old_peer);
980		sock_put(old_peer);
981	} else {
982		unix_peer(sk) = other;
983		unix_state_double_unlock(sk, other);
984	}
985	return 0;
986
987out_unlock:
988	unix_state_double_unlock(sk, other);
989	sock_put(other);
990out:
991	return err;
992}
993
994static long unix_wait_for_peer(struct sock *other, long timeo)
995{
996	struct unix_sock *u = unix_sk(other);
997	int sched;
998	DEFINE_WAIT(wait);
999
1000	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001
1002	sched = !sock_flag(other, SOCK_DEAD) &&
1003		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1004		unix_recvq_full(other);
1005
1006	unix_state_unlock(other);
1007
1008	if (sched)
1009		timeo = schedule_timeout(timeo);
1010
1011	finish_wait(&u->peer_wait, &wait);
1012	return timeo;
1013}
1014
1015static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016			       int addr_len, int flags)
1017{
1018	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019	struct sock *sk = sock->sk;
1020	struct net *net = sock_net(sk);
1021	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022	struct sock *newsk = NULL;
1023	struct sock *other = NULL;
1024	struct sk_buff *skb = NULL;
1025	unsigned hash;
1026	int st;
1027	int err;
1028	long timeo;
1029
1030	err = unix_mkname(sunaddr, addr_len, &hash);
1031	if (err < 0)
1032		goto out;
1033	addr_len = err;
1034
1035	if (test_bit(SOCK_PASSCRED, &sock->flags)
1036		&& !u->addr && (err = unix_autobind(sock)) != 0)
1037		goto out;
1038
1039	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040
1041	/* First of all allocate resources.
1042	   If we will make it after state is locked,
1043	   we will have to recheck all again in any case.
1044	 */
1045
1046	err = -ENOMEM;
1047
1048	/* create new sock for complete connection */
1049	newsk = unix_create1(sock_net(sk), NULL);
1050	if (newsk == NULL)
1051		goto out;
1052
1053	/* Allocate skb for sending to listening sock */
1054	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055	if (skb == NULL)
1056		goto out;
1057
1058restart:
1059	/*  Find listening sock. */
1060	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061	if (!other)
1062		goto out;
1063
1064	/* Latch state of peer */
1065	unix_state_lock(other);
1066
1067	/* Apparently VFS overslept socket death. Retry. */
1068	if (sock_flag(other, SOCK_DEAD)) {
1069		unix_state_unlock(other);
1070		sock_put(other);
1071		goto restart;
1072	}
1073
1074	err = -ECONNREFUSED;
1075	if (other->sk_state != TCP_LISTEN)
1076		goto out_unlock;
1077
1078	if (unix_recvq_full(other)) {
1079		err = -EAGAIN;
1080		if (!timeo)
1081			goto out_unlock;
1082
1083		timeo = unix_wait_for_peer(other, timeo);
1084
1085		err = sock_intr_errno(timeo);
1086		if (signal_pending(current))
1087			goto out;
1088		sock_put(other);
1089		goto restart;
1090	}
1091
1092	/* Latch our state.
1093
1094	   It is tricky place. We need to grab write lock and cannot
1095	   drop lock on peer. It is dangerous because deadlock is
1096	   possible. Connect to self case and simultaneous
1097	   attempt to connect are eliminated by checking socket
1098	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1099	   check this before attempt to grab lock.
1100
1101	   Well, and we have to recheck the state after socket locked.
1102	 */
1103	st = sk->sk_state;
1104
1105	switch (st) {
1106	case TCP_CLOSE:
1107		/* This is ok... continue with connect */
1108		break;
1109	case TCP_ESTABLISHED:
1110		/* Socket is already connected */
1111		err = -EISCONN;
1112		goto out_unlock;
1113	default:
1114		err = -EINVAL;
1115		goto out_unlock;
1116	}
1117
1118	unix_state_lock_nested(sk);
1119
1120	if (sk->sk_state != st) {
1121		unix_state_unlock(sk);
1122		unix_state_unlock(other);
1123		sock_put(other);
1124		goto restart;
1125	}
1126
1127	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1128	if (err) {
1129		unix_state_unlock(sk);
1130		goto out_unlock;
1131	}
1132
1133	/* The way is open! Fastly set all the necessary fields... */
1134
1135	sock_hold(sk);
1136	unix_peer(newsk)	= sk;
1137	newsk->sk_state		= TCP_ESTABLISHED;
1138	newsk->sk_type		= sk->sk_type;
1139	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1140	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1141	newu = unix_sk(newsk);
1142	newsk->sk_sleep		= &newu->peer_wait;
1143	otheru = unix_sk(other);
1144
1145	/* copy address information from listening to new sock*/
1146	if (otheru->addr) {
1147		atomic_inc(&otheru->addr->refcnt);
1148		newu->addr = otheru->addr;
1149	}
1150	if (otheru->dentry) {
1151		newu->dentry	= dget(otheru->dentry);
1152		newu->mnt	= mntget(otheru->mnt);
1153	}
1154
1155	/* Set credentials */
1156	sk->sk_peercred = other->sk_peercred;
1157
1158	sock->state	= SS_CONNECTED;
1159	sk->sk_state	= TCP_ESTABLISHED;
1160	sock_hold(newsk);
1161
1162	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1163	unix_peer(sk)	= newsk;
1164
1165	unix_state_unlock(sk);
1166
1167	/* take ten and and send info to listening sock */
1168	spin_lock(&other->sk_receive_queue.lock);
1169	__skb_queue_tail(&other->sk_receive_queue, skb);
1170	spin_unlock(&other->sk_receive_queue.lock);
1171	unix_state_unlock(other);
1172	other->sk_data_ready(other, 0);
1173	sock_put(other);
1174	return 0;
1175
1176out_unlock:
1177	if (other)
1178		unix_state_unlock(other);
1179
1180out:
1181	kfree_skb(skb);
1182	if (newsk)
1183		unix_release_sock(newsk, 0);
1184	if (other)
1185		sock_put(other);
1186	return err;
1187}
1188
1189static int unix_socketpair(struct socket *socka, struct socket *sockb)
1190{
1191	struct sock *ska = socka->sk, *skb = sockb->sk;
1192
1193	/* Join our sockets back to back */
1194	sock_hold(ska);
1195	sock_hold(skb);
1196	unix_peer(ska) = skb;
1197	unix_peer(skb) = ska;
1198	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1199	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1200	ska->sk_peercred.uid = skb->sk_peercred.uid;
1201	ska->sk_peercred.gid = skb->sk_peercred.gid;
1202
1203	if (ska->sk_type != SOCK_DGRAM) {
1204		ska->sk_state = TCP_ESTABLISHED;
1205		skb->sk_state = TCP_ESTABLISHED;
1206		socka->state  = SS_CONNECTED;
1207		sockb->state  = SS_CONNECTED;
1208	}
1209	return 0;
1210}
1211
1212static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1213{
1214	struct sock *sk = sock->sk;
1215	struct sock *tsk;
1216	struct sk_buff *skb;
1217	int err;
1218
1219	err = -EOPNOTSUPP;
1220	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1221		goto out;
1222
1223	err = -EINVAL;
1224	if (sk->sk_state != TCP_LISTEN)
1225		goto out;
1226
1227	/* If socket state is TCP_LISTEN it cannot change (for now...),
1228	 * so that no locks are necessary.
1229	 */
1230
1231	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1232	if (!skb) {
1233		/* This means receive shutdown. */
1234		if (err == 0)
1235			err = -EINVAL;
1236		goto out;
1237	}
1238
1239	tsk = skb->sk;
1240	skb_free_datagram(sk, skb);
1241	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1242
1243	/* attach accepted sock to socket */
1244	unix_state_lock(tsk);
1245	newsock->state = SS_CONNECTED;
1246	sock_graft(tsk, newsock);
1247	unix_state_unlock(tsk);
1248	return 0;
1249
1250out:
1251	return err;
1252}
1253
1254
1255static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1256{
1257	struct sock *sk = sock->sk;
1258	struct unix_sock *u;
1259	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1260	int err = 0;
1261
1262	if (peer) {
1263		sk = unix_peer_get(sk);
1264
1265		err = -ENOTCONN;
1266		if (!sk)
1267			goto out;
1268		err = 0;
1269	} else {
1270		sock_hold(sk);
1271	}
1272
1273	u = unix_sk(sk);
1274	unix_state_lock(sk);
1275	if (!u->addr) {
1276		sunaddr->sun_family = AF_UNIX;
1277		sunaddr->sun_path[0] = 0;
1278		*uaddr_len = sizeof(short);
1279	} else {
1280		struct unix_address *addr = u->addr;
1281
1282		*uaddr_len = addr->len;
1283		memcpy(sunaddr, addr->name, *uaddr_len);
1284	}
1285	unix_state_unlock(sk);
1286	sock_put(sk);
1287out:
1288	return err;
1289}
1290
1291static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1292{
1293	int i;
1294
1295	scm->fp = UNIXCB(skb).fp;
1296	skb->destructor = sock_wfree;
1297	UNIXCB(skb).fp = NULL;
1298
1299	for (i = scm->fp->count-1; i >= 0; i--)
1300		unix_notinflight(scm->fp->fp[i]);
1301}
1302
1303static void unix_destruct_fds(struct sk_buff *skb)
1304{
1305	struct scm_cookie scm;
1306	memset(&scm, 0, sizeof(scm));
1307	unix_detach_fds(&scm, skb);
1308
1309	/* Alas, it calls VFS */
1310	/* So fscking what? fput() had been SMP-safe since the last Summer */
1311	scm_destroy(&scm);
1312	sock_wfree(skb);
1313}
1314
1315static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1316{
1317	int i;
1318
1319	/*
1320	 * Need to duplicate file references for the sake of garbage
1321	 * collection.  Otherwise a socket in the fps might become a
1322	 * candidate for GC while the skb is not yet queued.
1323	 */
1324	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1325	if (!UNIXCB(skb).fp)
1326		return -ENOMEM;
1327
1328	for (i = scm->fp->count-1; i >= 0; i--)
1329		unix_inflight(scm->fp->fp[i]);
1330	skb->destructor = unix_destruct_fds;
1331	return 0;
1332}
1333
1334/*
1335 *	Send AF_UNIX data.
1336 */
1337
1338static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1339			      struct msghdr *msg, size_t len)
1340{
1341	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1342	struct sock *sk = sock->sk;
1343	struct net *net = sock_net(sk);
1344	struct unix_sock *u = unix_sk(sk);
1345	struct sockaddr_un *sunaddr = msg->msg_name;
1346	struct sock *other = NULL;
1347	int namelen = 0; /* fake GCC */
1348	int err;
1349	unsigned hash;
1350	struct sk_buff *skb;
1351	long timeo;
1352	struct scm_cookie tmp_scm;
1353
1354	if (NULL == siocb->scm)
1355		siocb->scm = &tmp_scm;
1356	wait_for_unix_gc();
1357	err = scm_send(sock, msg, siocb->scm);
1358	if (err < 0)
1359		return err;
1360
1361	err = -EOPNOTSUPP;
1362	if (msg->msg_flags&MSG_OOB)
1363		goto out;
1364
1365	if (msg->msg_namelen) {
1366		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1367		if (err < 0)
1368			goto out;
1369		namelen = err;
1370	} else {
1371		sunaddr = NULL;
1372		err = -ENOTCONN;
1373		other = unix_peer_get(sk);
1374		if (!other)
1375			goto out;
1376	}
1377
1378	if (test_bit(SOCK_PASSCRED, &sock->flags)
1379		&& !u->addr && (err = unix_autobind(sock)) != 0)
1380		goto out;
1381
1382	err = -EMSGSIZE;
1383	if (len > sk->sk_sndbuf - 32)
1384		goto out;
1385
1386	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1387	if (skb == NULL)
1388		goto out;
1389
1390	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1391	if (siocb->scm->fp) {
1392		err = unix_attach_fds(siocb->scm, skb);
1393		if (err)
1394			goto out_free;
1395	}
1396	unix_get_secdata(siocb->scm, skb);
1397
1398	skb_reset_transport_header(skb);
1399	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1400	if (err)
1401		goto out_free;
1402
1403	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1404
1405restart:
1406	if (!other) {
1407		err = -ECONNRESET;
1408		if (sunaddr == NULL)
1409			goto out_free;
1410
1411		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1412					hash, &err);
1413		if (other == NULL)
1414			goto out_free;
1415	}
1416
1417	unix_state_lock(other);
1418	err = -EPERM;
1419	if (!unix_may_send(sk, other))
1420		goto out_unlock;
1421
1422	if (sock_flag(other, SOCK_DEAD)) {
1423		/*
1424		 *	Check with 1003.1g - what should
1425		 *	datagram error
1426		 */
1427		unix_state_unlock(other);
1428		sock_put(other);
1429
1430		err = 0;
1431		unix_state_lock(sk);
1432		if (unix_peer(sk) == other) {
1433			unix_peer(sk) = NULL;
1434			unix_state_unlock(sk);
1435
1436			unix_dgram_disconnected(sk, other);
1437			sock_put(other);
1438			err = -ECONNREFUSED;
1439		} else {
1440			unix_state_unlock(sk);
1441		}
1442
1443		other = NULL;
1444		if (err)
1445			goto out_free;
1446		goto restart;
1447	}
1448
1449	err = -EPIPE;
1450	if (other->sk_shutdown & RCV_SHUTDOWN)
1451		goto out_unlock;
1452
1453	if (sk->sk_type != SOCK_SEQPACKET) {
1454		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1455		if (err)
1456			goto out_unlock;
1457	}
1458
1459	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1460		if (!timeo) {
1461			err = -EAGAIN;
1462			goto out_unlock;
1463		}
1464
1465		timeo = unix_wait_for_peer(other, timeo);
1466
1467		err = sock_intr_errno(timeo);
1468		if (signal_pending(current))
1469			goto out_free;
1470
1471		goto restart;
1472	}
1473
1474	skb_queue_tail(&other->sk_receive_queue, skb);
1475	unix_state_unlock(other);
1476	other->sk_data_ready(other, len);
1477	sock_put(other);
1478	scm_destroy(siocb->scm);
1479	return len;
1480
1481out_unlock:
1482	unix_state_unlock(other);
1483out_free:
1484	kfree_skb(skb);
1485out:
1486	if (other)
1487		sock_put(other);
1488	scm_destroy(siocb->scm);
1489	return err;
1490}
1491
1492
1493static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1494			       struct msghdr *msg, size_t len)
1495{
1496	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1497	struct sock *sk = sock->sk;
1498	struct sock *other = NULL;
1499	struct sockaddr_un *sunaddr = msg->msg_name;
1500	int err, size;
1501	struct sk_buff *skb;
1502	int sent = 0;
1503	struct scm_cookie tmp_scm;
1504
1505	if (NULL == siocb->scm)
1506		siocb->scm = &tmp_scm;
1507	wait_for_unix_gc();
1508	err = scm_send(sock, msg, siocb->scm);
1509	if (err < 0)
1510		return err;
1511
1512	err = -EOPNOTSUPP;
1513	if (msg->msg_flags&MSG_OOB)
1514		goto out_err;
1515
1516	if (msg->msg_namelen) {
1517		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1518		goto out_err;
1519	} else {
1520		sunaddr = NULL;
1521		err = -ENOTCONN;
1522		other = unix_peer(sk);
1523		if (!other)
1524			goto out_err;
1525	}
1526
1527	if (sk->sk_shutdown & SEND_SHUTDOWN)
1528		goto pipe_err;
1529
1530	while (sent < len) {
1531		/*
1532		 *	Optimisation for the fact that under 0.01% of X
1533		 *	messages typically need breaking up.
1534		 */
1535
1536		size = len-sent;
1537
1538		/* Keep two messages in the pipe so it schedules better */
1539		if (size > ((sk->sk_sndbuf >> 1) - 64))
1540			size = (sk->sk_sndbuf >> 1) - 64;
1541
1542		if (size > SKB_MAX_ALLOC)
1543			size = SKB_MAX_ALLOC;
1544
1545		/*
1546		 *	Grab a buffer
1547		 */
1548
1549		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1550					  &err);
1551
1552		if (skb == NULL)
1553			goto out_err;
1554
1555		/*
1556		 *	If you pass two values to the sock_alloc_send_skb
1557		 *	it tries to grab the large buffer with GFP_NOFS
1558		 *	(which can fail easily), and if it fails grab the
1559		 *	fallback size buffer which is under a page and will
1560		 *	succeed. [Alan]
1561		 */
1562		size = min_t(int, size, skb_tailroom(skb));
1563
1564		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1565		if (siocb->scm->fp) {
1566			err = unix_attach_fds(siocb->scm, skb);
1567			if (err) {
1568				kfree_skb(skb);
1569				goto out_err;
1570			}
1571		}
1572
1573		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1574		if (err) {
1575			kfree_skb(skb);
1576			goto out_err;
1577		}
1578
1579		unix_state_lock(other);
1580
1581		if (sock_flag(other, SOCK_DEAD) ||
1582		    (other->sk_shutdown & RCV_SHUTDOWN))
1583			goto pipe_err_free;
1584
1585		skb_queue_tail(&other->sk_receive_queue, skb);
1586		unix_state_unlock(other);
1587		other->sk_data_ready(other, size);
1588		sent += size;
1589	}
1590
1591	scm_destroy(siocb->scm);
1592	siocb->scm = NULL;
1593
1594	return sent;
1595
1596pipe_err_free:
1597	unix_state_unlock(other);
1598	kfree_skb(skb);
1599pipe_err:
1600	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1601		send_sig(SIGPIPE, current, 0);
1602	err = -EPIPE;
1603out_err:
1604	scm_destroy(siocb->scm);
1605	siocb->scm = NULL;
1606	return sent ? : err;
1607}
1608
1609static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1610				  struct msghdr *msg, size_t len)
1611{
1612	int err;
1613	struct sock *sk = sock->sk;
1614
1615	err = sock_error(sk);
1616	if (err)
1617		return err;
1618
1619	if (sk->sk_state != TCP_ESTABLISHED)
1620		return -ENOTCONN;
1621
1622	if (msg->msg_namelen)
1623		msg->msg_namelen = 0;
1624
1625	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1626}
1627
1628static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1629{
1630	struct unix_sock *u = unix_sk(sk);
1631
1632	msg->msg_namelen = 0;
1633	if (u->addr) {
1634		msg->msg_namelen = u->addr->len;
1635		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1636	}
1637}
1638
1639static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1640			      struct msghdr *msg, size_t size,
1641			      int flags)
1642{
1643	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1644	struct scm_cookie tmp_scm;
1645	struct sock *sk = sock->sk;
1646	struct unix_sock *u = unix_sk(sk);
1647	int noblock = flags & MSG_DONTWAIT;
1648	struct sk_buff *skb;
1649	int err;
1650
1651	err = -EOPNOTSUPP;
1652	if (flags&MSG_OOB)
1653		goto out;
1654
1655	msg->msg_namelen = 0;
1656
1657	mutex_lock(&u->readlock);
1658
1659	skb = skb_recv_datagram(sk, flags, noblock, &err);
1660	if (!skb) {
1661		unix_state_lock(sk);
1662		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1663		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1664		    (sk->sk_shutdown & RCV_SHUTDOWN))
1665			err = 0;
1666		unix_state_unlock(sk);
1667		goto out_unlock;
1668	}
1669
1670	wake_up_interruptible_sync(&u->peer_wait);
1671
1672	if (msg->msg_name)
1673		unix_copy_addr(msg, skb->sk);
1674
1675	if (size > skb->len)
1676		size = skb->len;
1677	else if (size < skb->len)
1678		msg->msg_flags |= MSG_TRUNC;
1679
1680	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1681	if (err)
1682		goto out_free;
1683
1684	if (!siocb->scm) {
1685		siocb->scm = &tmp_scm;
1686		memset(&tmp_scm, 0, sizeof(tmp_scm));
1687	}
1688	siocb->scm->creds = *UNIXCREDS(skb);
1689	unix_set_secdata(siocb->scm, skb);
1690
1691	if (!(flags & MSG_PEEK)) {
1692		if (UNIXCB(skb).fp)
1693			unix_detach_fds(siocb->scm, skb);
1694	} else {
1695		/* It is questionable: on PEEK we could:
1696		   - do not return fds - good, but too simple 8)
1697		   - return fds, and do not return them on read (old strategy,
1698		     apparently wrong)
1699		   - clone fds (I chose it for now, it is the most universal
1700		     solution)
1701
1702		   POSIX 1003.1g does not actually define this clearly
1703		   at all. POSIX 1003.1g doesn't define a lot of things
1704		   clearly however!
1705
1706		*/
1707		if (UNIXCB(skb).fp)
1708			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1709	}
1710	err = size;
1711
1712	scm_recv(sock, msg, siocb->scm, flags);
1713
1714out_free:
1715	skb_free_datagram(sk, skb);
1716out_unlock:
1717	mutex_unlock(&u->readlock);
1718out:
1719	return err;
1720}
1721
1722/*
1723 *	Sleep until data has arrive. But check for races..
1724 */
1725
1726static long unix_stream_data_wait(struct sock *sk, long timeo)
1727{
1728	DEFINE_WAIT(wait);
1729
1730	unix_state_lock(sk);
1731
1732	for (;;) {
1733		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1734
1735		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1736		    sk->sk_err ||
1737		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1738		    signal_pending(current) ||
1739		    !timeo)
1740			break;
1741
1742		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1743		unix_state_unlock(sk);
1744		timeo = schedule_timeout(timeo);
1745		unix_state_lock(sk);
1746		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1747	}
1748
1749	finish_wait(sk->sk_sleep, &wait);
1750	unix_state_unlock(sk);
1751	return timeo;
1752}
1753
1754
1755
1756static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1757			       struct msghdr *msg, size_t size,
1758			       int flags)
1759{
1760	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1761	struct scm_cookie tmp_scm;
1762	struct sock *sk = sock->sk;
1763	struct unix_sock *u = unix_sk(sk);
1764	struct sockaddr_un *sunaddr = msg->msg_name;
1765	int copied = 0;
1766	int check_creds = 0;
1767	int target;
1768	int err = 0;
1769	long timeo;
1770
1771	err = -EINVAL;
1772	if (sk->sk_state != TCP_ESTABLISHED)
1773		goto out;
1774
1775	err = -EOPNOTSUPP;
1776	if (flags&MSG_OOB)
1777		goto out;
1778
1779	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1780	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1781
1782	msg->msg_namelen = 0;
1783
1784	/* Lock the socket to prevent queue disordering
1785	 * while sleeps in memcpy_tomsg
1786	 */
1787
1788	if (!siocb->scm) {
1789		siocb->scm = &tmp_scm;
1790		memset(&tmp_scm, 0, sizeof(tmp_scm));
1791	}
1792
1793	mutex_lock(&u->readlock);
1794
1795	do {
1796		int chunk;
1797		struct sk_buff *skb;
1798
1799		unix_state_lock(sk);
1800		skb = skb_dequeue(&sk->sk_receive_queue);
1801		if (skb == NULL) {
1802			if (copied >= target)
1803				goto unlock;
1804
1805			/*
1806			 *	POSIX 1003.1g mandates this order.
1807			 */
1808
1809			err = sock_error(sk);
1810			if (err)
1811				goto unlock;
1812			if (sk->sk_shutdown & RCV_SHUTDOWN)
1813				goto unlock;
1814
1815			unix_state_unlock(sk);
1816			err = -EAGAIN;
1817			if (!timeo)
1818				break;
1819			mutex_unlock(&u->readlock);
1820
1821			timeo = unix_stream_data_wait(sk, timeo);
1822
1823			if (signal_pending(current)) {
1824				err = sock_intr_errno(timeo);
1825				goto out;
1826			}
1827			mutex_lock(&u->readlock);
1828			continue;
1829 unlock:
1830			unix_state_unlock(sk);
1831			break;
1832		}
1833		unix_state_unlock(sk);
1834
1835		if (check_creds) {
1836			/* Never glue messages from different writers */
1837			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1838				   sizeof(siocb->scm->creds)) != 0) {
1839				skb_queue_head(&sk->sk_receive_queue, skb);
1840				break;
1841			}
1842		} else {
1843			/* Copy credentials */
1844			siocb->scm->creds = *UNIXCREDS(skb);
1845			check_creds = 1;
1846		}
1847
1848		/* Copy address just once */
1849		if (sunaddr) {
1850			unix_copy_addr(msg, skb->sk);
1851			sunaddr = NULL;
1852		}
1853
1854		chunk = min_t(unsigned int, skb->len, size);
1855		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1856			skb_queue_head(&sk->sk_receive_queue, skb);
1857			if (copied == 0)
1858				copied = -EFAULT;
1859			break;
1860		}
1861		copied += chunk;
1862		size -= chunk;
1863
1864		/* Mark read part of skb as used */
1865		if (!(flags & MSG_PEEK)) {
1866			skb_pull(skb, chunk);
1867
1868			if (UNIXCB(skb).fp)
1869				unix_detach_fds(siocb->scm, skb);
1870
1871			/* put the skb back if we didn't use it up.. */
1872			if (skb->len) {
1873				skb_queue_head(&sk->sk_receive_queue, skb);
1874				break;
1875			}
1876
1877			kfree_skb(skb);
1878
1879			if (siocb->scm->fp)
1880				break;
1881		} else {
1882			/* It is questionable, see note in unix_dgram_recvmsg.
1883			 */
1884			if (UNIXCB(skb).fp)
1885				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1886
1887			/* put message back and return */
1888			skb_queue_head(&sk->sk_receive_queue, skb);
1889			break;
1890		}
1891	} while (size);
1892
1893	mutex_unlock(&u->readlock);
1894	scm_recv(sock, msg, siocb->scm, flags);
1895out:
1896	return copied ? : err;
1897}
1898
1899static int unix_shutdown(struct socket *sock, int mode)
1900{
1901	struct sock *sk = sock->sk;
1902	struct sock *other;
1903
1904	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1905
1906	if (mode) {
1907		unix_state_lock(sk);
1908		sk->sk_shutdown |= mode;
1909		other = unix_peer(sk);
1910		if (other)
1911			sock_hold(other);
1912		unix_state_unlock(sk);
1913		sk->sk_state_change(sk);
1914
1915		if (other &&
1916			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1917
1918			int peer_mode = 0;
1919
1920			if (mode&RCV_SHUTDOWN)
1921				peer_mode |= SEND_SHUTDOWN;
1922			if (mode&SEND_SHUTDOWN)
1923				peer_mode |= RCV_SHUTDOWN;
1924			unix_state_lock(other);
1925			other->sk_shutdown |= peer_mode;
1926			unix_state_unlock(other);
1927			other->sk_state_change(other);
1928			read_lock(&other->sk_callback_lock);
1929			if (peer_mode == SHUTDOWN_MASK)
1930				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1931			else if (peer_mode & RCV_SHUTDOWN)
1932				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1933			read_unlock(&other->sk_callback_lock);
1934		}
1935		if (other)
1936			sock_put(other);
1937	}
1938	return 0;
1939}
1940
1941static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1942{
1943	struct sock *sk = sock->sk;
1944	long amount = 0;
1945	int err;
1946
1947	switch (cmd) {
1948	case SIOCOUTQ:
1949		amount = atomic_read(&sk->sk_wmem_alloc);
1950		err = put_user(amount, (int __user *)arg);
1951		break;
1952	case SIOCINQ:
1953		{
1954			struct sk_buff *skb;
1955
1956			if (sk->sk_state == TCP_LISTEN) {
1957				err = -EINVAL;
1958				break;
1959			}
1960
1961			spin_lock(&sk->sk_receive_queue.lock);
1962			if (sk->sk_type == SOCK_STREAM ||
1963			    sk->sk_type == SOCK_SEQPACKET) {
1964				skb_queue_walk(&sk->sk_receive_queue, skb)
1965					amount += skb->len;
1966			} else {
1967				skb = skb_peek(&sk->sk_receive_queue);
1968				if (skb)
1969					amount = skb->len;
1970			}
1971			spin_unlock(&sk->sk_receive_queue.lock);
1972			err = put_user(amount, (int __user *)arg);
1973			break;
1974		}
1975
1976	default:
1977		err = -ENOIOCTLCMD;
1978		break;
1979	}
1980	return err;
1981}
1982
1983static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1984{
1985	struct sock *sk = sock->sk;
1986	unsigned int mask;
1987
1988	poll_wait(file, sk->sk_sleep, wait);
1989	mask = 0;
1990
1991	/* exceptional events? */
1992	if (sk->sk_err)
1993		mask |= POLLERR;
1994	if (sk->sk_shutdown == SHUTDOWN_MASK)
1995		mask |= POLLHUP;
1996	if (sk->sk_shutdown & RCV_SHUTDOWN)
1997		mask |= POLLRDHUP;
1998
1999	/* readable? */
2000	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2001	    (sk->sk_shutdown & RCV_SHUTDOWN))
2002		mask |= POLLIN | POLLRDNORM;
2003
2004	/* Connection-based need to check for termination and startup */
2005	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2006	    sk->sk_state == TCP_CLOSE)
2007		mask |= POLLHUP;
2008
2009	/*
2010	 * we set writable also when the other side has shut down the
2011	 * connection. This prevents stuck sockets.
2012	 */
2013	if (unix_writable(sk))
2014		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2015
2016	return mask;
2017}
2018
2019static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2020				    poll_table *wait)
2021{
2022	struct sock *sk = sock->sk, *other;
2023	unsigned int mask, writable;
2024
2025	poll_wait(file, sk->sk_sleep, wait);
2026	mask = 0;
2027
2028	/* exceptional events? */
2029	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2030		mask |= POLLERR;
2031	if (sk->sk_shutdown & RCV_SHUTDOWN)
2032		mask |= POLLRDHUP;
2033	if (sk->sk_shutdown == SHUTDOWN_MASK)
2034		mask |= POLLHUP;
2035
2036	/* readable? */
2037	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2038	    (sk->sk_shutdown & RCV_SHUTDOWN))
2039		mask |= POLLIN | POLLRDNORM;
2040
2041	/* Connection-based need to check for termination and startup */
2042	if (sk->sk_type == SOCK_SEQPACKET) {
2043		if (sk->sk_state == TCP_CLOSE)
2044			mask |= POLLHUP;
2045		/* connection hasn't started yet? */
2046		if (sk->sk_state == TCP_SYN_SENT)
2047			return mask;
2048	}
2049
2050	/* writable? */
2051	writable = unix_writable(sk);
2052	if (writable) {
2053		other = unix_peer_get(sk);
2054		if (other) {
2055			if (unix_peer(other) != sk) {
2056				poll_wait(file, &unix_sk(other)->peer_wait,
2057					  wait);
2058				if (unix_recvq_full(other))
2059					writable = 0;
2060			}
2061
2062			sock_put(other);
2063		}
2064	}
2065
2066	if (writable)
2067		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2068	else
2069		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2070
2071	return mask;
2072}
2073
2074#ifdef CONFIG_PROC_FS
2075static struct sock *first_unix_socket(int *i)
2076{
2077	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2078		if (!hlist_empty(&unix_socket_table[*i]))
2079			return __sk_head(&unix_socket_table[*i]);
2080	}
2081	return NULL;
2082}
2083
2084static struct sock *next_unix_socket(int *i, struct sock *s)
2085{
2086	struct sock *next = sk_next(s);
2087	/* More in this chain? */
2088	if (next)
2089		return next;
2090	/* Look for next non-empty chain. */
2091	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2092		if (!hlist_empty(&unix_socket_table[*i]))
2093			return __sk_head(&unix_socket_table[*i]);
2094	}
2095	return NULL;
2096}
2097
2098struct unix_iter_state {
2099	struct seq_net_private p;
2100	int i;
2101};
2102
2103static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2104{
2105	struct unix_iter_state *iter = seq->private;
2106	loff_t off = 0;
2107	struct sock *s;
2108
2109	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2110		if (sock_net(s) != seq_file_net(seq))
2111			continue;
2112		if (off == pos)
2113			return s;
2114		++off;
2115	}
2116	return NULL;
2117}
2118
2119static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2120	__acquires(unix_table_lock)
2121{
2122	spin_lock(&unix_table_lock);
2123	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2124}
2125
2126static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2127{
2128	struct unix_iter_state *iter = seq->private;
2129	struct sock *sk = v;
2130	++*pos;
2131
2132	if (v == SEQ_START_TOKEN)
2133		sk = first_unix_socket(&iter->i);
2134	else
2135		sk = next_unix_socket(&iter->i, sk);
2136	while (sk && (sock_net(sk) != seq_file_net(seq)))
2137		sk = next_unix_socket(&iter->i, sk);
2138	return sk;
2139}
2140
2141static void unix_seq_stop(struct seq_file *seq, void *v)
2142	__releases(unix_table_lock)
2143{
2144	spin_unlock(&unix_table_lock);
2145}
2146
2147static int unix_seq_show(struct seq_file *seq, void *v)
2148{
2149
2150	if (v == SEQ_START_TOKEN)
2151		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2152			 "Inode Path\n");
2153	else {
2154		struct sock *s = v;
2155		struct unix_sock *u = unix_sk(s);
2156		unix_state_lock(s);
2157
2158		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2159			s,
2160			atomic_read(&s->sk_refcnt),
2161			0,
2162			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2163			s->sk_type,
2164			s->sk_socket ?
2165			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2166			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2167			sock_i_ino(s));
2168
2169		if (u->addr) {
2170			int i, len;
2171			seq_putc(seq, ' ');
2172
2173			i = 0;
2174			len = u->addr->len - sizeof(short);
2175			if (!UNIX_ABSTRACT(s))
2176				len--;
2177			else {
2178				seq_putc(seq, '@');
2179				i++;
2180			}
2181			for ( ; i < len; i++)
2182				seq_putc(seq, u->addr->name->sun_path[i]);
2183		}
2184		unix_state_unlock(s);
2185		seq_putc(seq, '\n');
2186	}
2187
2188	return 0;
2189}
2190
2191static const struct seq_operations unix_seq_ops = {
2192	.start  = unix_seq_start,
2193	.next   = unix_seq_next,
2194	.stop   = unix_seq_stop,
2195	.show   = unix_seq_show,
2196};
2197
2198static int unix_seq_open(struct inode *inode, struct file *file)
2199{
2200	return seq_open_net(inode, file, &unix_seq_ops,
2201			    sizeof(struct unix_iter_state));
2202}
2203
2204static const struct file_operations unix_seq_fops = {
2205	.owner		= THIS_MODULE,
2206	.open		= unix_seq_open,
2207	.read		= seq_read,
2208	.llseek		= seq_lseek,
2209	.release	= seq_release_net,
2210};
2211
2212#endif
2213
2214static struct net_proto_family unix_family_ops = {
2215	.family = PF_UNIX,
2216	.create = unix_create,
2217	.owner	= THIS_MODULE,
2218};
2219
2220
2221static int unix_net_init(struct net *net)
2222{
2223	int error = -ENOMEM;
2224
2225	net->unx.sysctl_max_dgram_qlen = 10;
2226	if (unix_sysctl_register(net))
2227		goto out;
2228
2229#ifdef CONFIG_PROC_FS
2230	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2231		unix_sysctl_unregister(net);
2232		goto out;
2233	}
2234#endif
2235	error = 0;
2236out:
2237	return error;
2238}
2239
2240static void unix_net_exit(struct net *net)
2241{
2242	unix_sysctl_unregister(net);
2243	proc_net_remove(net, "unix");
2244}
2245
2246static struct pernet_operations unix_net_ops = {
2247	.init = unix_net_init,
2248	.exit = unix_net_exit,
2249};
2250
2251static int __init af_unix_init(void)
2252{
2253	int rc = -1;
2254	struct sk_buff *dummy_skb;
2255
2256	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2257
2258	rc = proto_register(&unix_proto, 1);
2259	if (rc != 0) {
2260		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2261		       __func__);
2262		goto out;
2263	}
2264
2265	sock_register(&unix_family_ops);
2266	register_pernet_subsys(&unix_net_ops);
2267out:
2268	return rc;
2269}
2270
2271static void __exit af_unix_exit(void)
2272{
2273	sock_unregister(PF_UNIX);
2274	proto_unregister(&unix_proto);
2275	unregister_pernet_subsys(&unix_net_ops);
2276}
2277
2278/* Earlier than device_initcall() so that other drivers invoking
2279   request_module() don't end up in a loop when modprobe tries
2280   to use a UNIX socket. But later than subsys_initcall() because
2281   we depend on stuff initialised there */
2282fs_initcall(af_unix_init);
2283module_exit(af_unix_exit);
2284
2285MODULE_LICENSE("GPL");
2286MODULE_ALIAS_NETPROTO(PF_UNIX);
2287