af_unix.c revision 663717f65c075eb4c6da7a123041295bd5295cc0
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate spin lock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (!net_eq(sock_net(s), net))
296			continue;
297
298		if (dentry && dentry->d_inode == i) {
299			sock_hold(s);
300			goto found;
301		}
302	}
303	s = NULL;
304found:
305	spin_unlock(&unix_table_lock);
306	return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
316	read_lock(&sk->sk_callback_lock);
317	if (unix_writable(sk)) {
318		if (sk_has_sleeper(sk))
319			wake_up_interruptible_sync(sk->sk_sleep);
320		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321	}
322	read_unlock(&sk->sk_callback_lock);
323}
324
325/* When dgram socket disconnects (or changes its peer), we clear its receive
326 * queue of packets arrived from previous peer. First, it allows to do
327 * flow control based only on wmem_alloc; second, sk connected to peer
328 * may receive messages only from that peer. */
329static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330{
331	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332		skb_queue_purge(&sk->sk_receive_queue);
333		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335		/* If one link of bidirectional dgram pipe is disconnected,
336		 * we signal error. Messages are lost. Do not make this,
337		 * when peer was not connected to us.
338		 */
339		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340			other->sk_err = ECONNRESET;
341			other->sk_error_report(other);
342		}
343	}
344}
345
346static void unix_sock_destructor(struct sock *sk)
347{
348	struct unix_sock *u = unix_sk(sk);
349
350	skb_queue_purge(&sk->sk_receive_queue);
351
352	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353	WARN_ON(!sk_unhashed(sk));
354	WARN_ON(sk->sk_socket);
355	if (!sock_flag(sk, SOCK_DEAD)) {
356		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357		return;
358	}
359
360	if (u->addr)
361		unix_release_addr(u->addr);
362
363	atomic_dec(&unix_nr_socks);
364	local_bh_disable();
365	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366	local_bh_enable();
367#ifdef UNIX_REFCNT_DEBUG
368	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369		atomic_read(&unix_nr_socks));
370#endif
371}
372
373static int unix_release_sock(struct sock *sk, int embrion)
374{
375	struct unix_sock *u = unix_sk(sk);
376	struct dentry *dentry;
377	struct vfsmount *mnt;
378	struct sock *skpair;
379	struct sk_buff *skb;
380	int state;
381
382	unix_remove_socket(sk);
383
384	/* Clear state */
385	unix_state_lock(sk);
386	sock_orphan(sk);
387	sk->sk_shutdown = SHUTDOWN_MASK;
388	dentry	     = u->dentry;
389	u->dentry    = NULL;
390	mnt	     = u->mnt;
391	u->mnt	     = NULL;
392	state = sk->sk_state;
393	sk->sk_state = TCP_CLOSE;
394	unix_state_unlock(sk);
395
396	wake_up_interruptible_all(&u->peer_wait);
397
398	skpair = unix_peer(sk);
399
400	if (skpair != NULL) {
401		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402			unix_state_lock(skpair);
403			/* No more writes */
404			skpair->sk_shutdown = SHUTDOWN_MASK;
405			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406				skpair->sk_err = ECONNRESET;
407			unix_state_unlock(skpair);
408			skpair->sk_state_change(skpair);
409			read_lock(&skpair->sk_callback_lock);
410			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411			read_unlock(&skpair->sk_callback_lock);
412		}
413		sock_put(skpair); /* It may now die */
414		unix_peer(sk) = NULL;
415	}
416
417	/* Try to flush out this socket. Throw out buffers at least */
418
419	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420		if (state == TCP_LISTEN)
421			unix_release_sock(skb->sk, 1);
422		/* passed fds are erased in the kfree_skb hook	      */
423		kfree_skb(skb);
424	}
425
426	if (dentry) {
427		dput(dentry);
428		mntput(mnt);
429	}
430
431	sock_put(sk);
432
433	/* ---- Socket is dead now and most probably destroyed ---- */
434
435	/*
436	 * Fixme: BSD difference: In BSD all sockets connected to use get
437	 *	  ECONNRESET and we die on the spot. In Linux we behave
438	 *	  like files and pipes do and wait for the last
439	 *	  dereference.
440	 *
441	 * Can't we simply set sock->err?
442	 *
443	 *	  What the above comment does talk about? --ANK(980817)
444	 */
445
446	if (unix_tot_inflight)
447		unix_gc();		/* Garbage collect fds */
448
449	return 0;
450}
451
452static int unix_listen(struct socket *sock, int backlog)
453{
454	int err;
455	struct sock *sk = sock->sk;
456	struct unix_sock *u = unix_sk(sk);
457
458	err = -EOPNOTSUPP;
459	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460		goto out;	/* Only stream/seqpacket sockets accept */
461	err = -EINVAL;
462	if (!u->addr)
463		goto out;	/* No listens on an unbound socket */
464	unix_state_lock(sk);
465	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466		goto out_unlock;
467	if (backlog > sk->sk_max_ack_backlog)
468		wake_up_interruptible_all(&u->peer_wait);
469	sk->sk_max_ack_backlog	= backlog;
470	sk->sk_state		= TCP_LISTEN;
471	/* set credentials so connect can copy them */
472	sk->sk_peercred.pid	= task_tgid_vnr(current);
473	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474	err = 0;
475
476out_unlock:
477	unix_state_unlock(sk);
478out:
479	return err;
480}
481
482static int unix_release(struct socket *);
483static int unix_bind(struct socket *, struct sockaddr *, int);
484static int unix_stream_connect(struct socket *, struct sockaddr *,
485			       int addr_len, int flags);
486static int unix_socketpair(struct socket *, struct socket *);
487static int unix_accept(struct socket *, struct socket *, int);
488static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490static unsigned int unix_dgram_poll(struct file *, struct socket *,
491				    poll_table *);
492static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493static int unix_shutdown(struct socket *, int);
494static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495			       struct msghdr *, size_t);
496static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497			       struct msghdr *, size_t, int);
498static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499			      struct msghdr *, size_t);
500static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501			      struct msghdr *, size_t, int);
502static int unix_dgram_connect(struct socket *, struct sockaddr *,
503			      int, int);
504static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505				  struct msghdr *, size_t);
506
507static const struct proto_ops unix_stream_ops = {
508	.family =	PF_UNIX,
509	.owner =	THIS_MODULE,
510	.release =	unix_release,
511	.bind =		unix_bind,
512	.connect =	unix_stream_connect,
513	.socketpair =	unix_socketpair,
514	.accept =	unix_accept,
515	.getname =	unix_getname,
516	.poll =		unix_poll,
517	.ioctl =	unix_ioctl,
518	.listen =	unix_listen,
519	.shutdown =	unix_shutdown,
520	.setsockopt =	sock_no_setsockopt,
521	.getsockopt =	sock_no_getsockopt,
522	.sendmsg =	unix_stream_sendmsg,
523	.recvmsg =	unix_stream_recvmsg,
524	.mmap =		sock_no_mmap,
525	.sendpage =	sock_no_sendpage,
526};
527
528static const struct proto_ops unix_dgram_ops = {
529	.family =	PF_UNIX,
530	.owner =	THIS_MODULE,
531	.release =	unix_release,
532	.bind =		unix_bind,
533	.connect =	unix_dgram_connect,
534	.socketpair =	unix_socketpair,
535	.accept =	sock_no_accept,
536	.getname =	unix_getname,
537	.poll =		unix_dgram_poll,
538	.ioctl =	unix_ioctl,
539	.listen =	sock_no_listen,
540	.shutdown =	unix_shutdown,
541	.setsockopt =	sock_no_setsockopt,
542	.getsockopt =	sock_no_getsockopt,
543	.sendmsg =	unix_dgram_sendmsg,
544	.recvmsg =	unix_dgram_recvmsg,
545	.mmap =		sock_no_mmap,
546	.sendpage =	sock_no_sendpage,
547};
548
549static const struct proto_ops unix_seqpacket_ops = {
550	.family =	PF_UNIX,
551	.owner =	THIS_MODULE,
552	.release =	unix_release,
553	.bind =		unix_bind,
554	.connect =	unix_stream_connect,
555	.socketpair =	unix_socketpair,
556	.accept =	unix_accept,
557	.getname =	unix_getname,
558	.poll =		unix_dgram_poll,
559	.ioctl =	unix_ioctl,
560	.listen =	unix_listen,
561	.shutdown =	unix_shutdown,
562	.setsockopt =	sock_no_setsockopt,
563	.getsockopt =	sock_no_getsockopt,
564	.sendmsg =	unix_seqpacket_sendmsg,
565	.recvmsg =	unix_dgram_recvmsg,
566	.mmap =		sock_no_mmap,
567	.sendpage =	sock_no_sendpage,
568};
569
570static struct proto unix_proto = {
571	.name			= "UNIX",
572	.owner			= THIS_MODULE,
573	.obj_size		= sizeof(struct unix_sock),
574};
575
576/*
577 * AF_UNIX sockets do not interact with hardware, hence they
578 * dont trigger interrupts - so it's safe for them to have
579 * bh-unsafe locking for their sk_receive_queue.lock. Split off
580 * this special lock-class by reinitializing the spinlock key:
581 */
582static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583
584static struct sock *unix_create1(struct net *net, struct socket *sock)
585{
586	struct sock *sk = NULL;
587	struct unix_sock *u;
588
589	atomic_inc(&unix_nr_socks);
590	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591		goto out;
592
593	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594	if (!sk)
595		goto out;
596
597	sock_init_data(sock, sk);
598	lockdep_set_class(&sk->sk_receive_queue.lock,
599				&af_unix_sk_receive_queue_lock_key);
600
601	sk->sk_write_space	= unix_write_space;
602	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603	sk->sk_destruct		= unix_sock_destructor;
604	u	  = unix_sk(sk);
605	u->dentry = NULL;
606	u->mnt	  = NULL;
607	spin_lock_init(&u->lock);
608	atomic_long_set(&u->inflight, 0);
609	INIT_LIST_HEAD(&u->link);
610	mutex_init(&u->readlock); /* single task reading lock */
611	init_waitqueue_head(&u->peer_wait);
612	unix_insert_socket(unix_sockets_unbound, sk);
613out:
614	if (sk == NULL)
615		atomic_dec(&unix_nr_socks);
616	else {
617		local_bh_disable();
618		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619		local_bh_enable();
620	}
621	return sk;
622}
623
624static int unix_create(struct net *net, struct socket *sock, int protocol,
625		       int kern)
626{
627	if (protocol && protocol != PF_UNIX)
628		return -EPROTONOSUPPORT;
629
630	sock->state = SS_UNCONNECTED;
631
632	switch (sock->type) {
633	case SOCK_STREAM:
634		sock->ops = &unix_stream_ops;
635		break;
636		/*
637		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
638		 *	nothing uses it.
639		 */
640	case SOCK_RAW:
641		sock->type = SOCK_DGRAM;
642	case SOCK_DGRAM:
643		sock->ops = &unix_dgram_ops;
644		break;
645	case SOCK_SEQPACKET:
646		sock->ops = &unix_seqpacket_ops;
647		break;
648	default:
649		return -ESOCKTNOSUPPORT;
650	}
651
652	return unix_create1(net, sock) ? 0 : -ENOMEM;
653}
654
655static int unix_release(struct socket *sock)
656{
657	struct sock *sk = sock->sk;
658
659	if (!sk)
660		return 0;
661
662	sock->sk = NULL;
663
664	return unix_release_sock(sk, 0);
665}
666
667static int unix_autobind(struct socket *sock)
668{
669	struct sock *sk = sock->sk;
670	struct net *net = sock_net(sk);
671	struct unix_sock *u = unix_sk(sk);
672	static u32 ordernum = 1;
673	struct unix_address *addr;
674	int err;
675
676	mutex_lock(&u->readlock);
677
678	err = 0;
679	if (u->addr)
680		goto out;
681
682	err = -ENOMEM;
683	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
684	if (!addr)
685		goto out;
686
687	addr->name->sun_family = AF_UNIX;
688	atomic_set(&addr->refcnt, 1);
689
690retry:
691	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
692	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
693
694	spin_lock(&unix_table_lock);
695	ordernum = (ordernum+1)&0xFFFFF;
696
697	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
698				      addr->hash)) {
699		spin_unlock(&unix_table_lock);
700		/* Sanity yield. It is unusual case, but yet... */
701		if (!(ordernum&0xFF))
702			yield();
703		goto retry;
704	}
705	addr->hash ^= sk->sk_type;
706
707	__unix_remove_socket(sk);
708	u->addr = addr;
709	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
710	spin_unlock(&unix_table_lock);
711	err = 0;
712
713out:	mutex_unlock(&u->readlock);
714	return err;
715}
716
717static struct sock *unix_find_other(struct net *net,
718				    struct sockaddr_un *sunname, int len,
719				    int type, unsigned hash, int *error)
720{
721	struct sock *u;
722	struct path path;
723	int err = 0;
724
725	if (sunname->sun_path[0]) {
726		struct inode *inode;
727		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
728		if (err)
729			goto fail;
730		inode = path.dentry->d_inode;
731		err = inode_permission(inode, MAY_WRITE);
732		if (err)
733			goto put_fail;
734
735		err = -ECONNREFUSED;
736		if (!S_ISSOCK(inode->i_mode))
737			goto put_fail;
738		u = unix_find_socket_byinode(net, inode);
739		if (!u)
740			goto put_fail;
741
742		if (u->sk_type == type)
743			touch_atime(path.mnt, path.dentry);
744
745		path_put(&path);
746
747		err = -EPROTOTYPE;
748		if (u->sk_type != type) {
749			sock_put(u);
750			goto fail;
751		}
752	} else {
753		err = -ECONNREFUSED;
754		u = unix_find_socket_byname(net, sunname, len, type, hash);
755		if (u) {
756			struct dentry *dentry;
757			dentry = unix_sk(u)->dentry;
758			if (dentry)
759				touch_atime(unix_sk(u)->mnt, dentry);
760		} else
761			goto fail;
762	}
763	return u;
764
765put_fail:
766	path_put(&path);
767fail:
768	*error = err;
769	return NULL;
770}
771
772
773static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
774{
775	struct sock *sk = sock->sk;
776	struct net *net = sock_net(sk);
777	struct unix_sock *u = unix_sk(sk);
778	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
779	struct dentry *dentry = NULL;
780	struct nameidata nd;
781	int err;
782	unsigned hash;
783	struct unix_address *addr;
784	struct hlist_head *list;
785
786	err = -EINVAL;
787	if (sunaddr->sun_family != AF_UNIX)
788		goto out;
789
790	if (addr_len == sizeof(short)) {
791		err = unix_autobind(sock);
792		goto out;
793	}
794
795	err = unix_mkname(sunaddr, addr_len, &hash);
796	if (err < 0)
797		goto out;
798	addr_len = err;
799
800	mutex_lock(&u->readlock);
801
802	err = -EINVAL;
803	if (u->addr)
804		goto out_up;
805
806	err = -ENOMEM;
807	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
808	if (!addr)
809		goto out_up;
810
811	memcpy(addr->name, sunaddr, addr_len);
812	addr->len = addr_len;
813	addr->hash = hash ^ sk->sk_type;
814	atomic_set(&addr->refcnt, 1);
815
816	if (sunaddr->sun_path[0]) {
817		unsigned int mode;
818		err = 0;
819		/*
820		 * Get the parent directory, calculate the hash for last
821		 * component.
822		 */
823		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
824		if (err)
825			goto out_mknod_parent;
826
827		dentry = lookup_create(&nd, 0);
828		err = PTR_ERR(dentry);
829		if (IS_ERR(dentry))
830			goto out_mknod_unlock;
831
832		/*
833		 * All right, let's create it.
834		 */
835		mode = S_IFSOCK |
836		       (SOCK_INODE(sock)->i_mode & ~current_umask());
837		err = mnt_want_write(nd.path.mnt);
838		if (err)
839			goto out_mknod_dput;
840		err = security_path_mknod(&nd.path, dentry, mode, 0);
841		if (err)
842			goto out_mknod_drop_write;
843		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
844out_mknod_drop_write:
845		mnt_drop_write(nd.path.mnt);
846		if (err)
847			goto out_mknod_dput;
848		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
849		dput(nd.path.dentry);
850		nd.path.dentry = dentry;
851
852		addr->hash = UNIX_HASH_SIZE;
853	}
854
855	spin_lock(&unix_table_lock);
856
857	if (!sunaddr->sun_path[0]) {
858		err = -EADDRINUSE;
859		if (__unix_find_socket_byname(net, sunaddr, addr_len,
860					      sk->sk_type, hash)) {
861			unix_release_addr(addr);
862			goto out_unlock;
863		}
864
865		list = &unix_socket_table[addr->hash];
866	} else {
867		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
868		u->dentry = nd.path.dentry;
869		u->mnt    = nd.path.mnt;
870	}
871
872	err = 0;
873	__unix_remove_socket(sk);
874	u->addr = addr;
875	__unix_insert_socket(list, sk);
876
877out_unlock:
878	spin_unlock(&unix_table_lock);
879out_up:
880	mutex_unlock(&u->readlock);
881out:
882	return err;
883
884out_mknod_dput:
885	dput(dentry);
886out_mknod_unlock:
887	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
888	path_put(&nd.path);
889out_mknod_parent:
890	if (err == -EEXIST)
891		err = -EADDRINUSE;
892	unix_release_addr(addr);
893	goto out_up;
894}
895
896static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
897{
898	if (unlikely(sk1 == sk2) || !sk2) {
899		unix_state_lock(sk1);
900		return;
901	}
902	if (sk1 < sk2) {
903		unix_state_lock(sk1);
904		unix_state_lock_nested(sk2);
905	} else {
906		unix_state_lock(sk2);
907		unix_state_lock_nested(sk1);
908	}
909}
910
911static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
912{
913	if (unlikely(sk1 == sk2) || !sk2) {
914		unix_state_unlock(sk1);
915		return;
916	}
917	unix_state_unlock(sk1);
918	unix_state_unlock(sk2);
919}
920
921static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
922			      int alen, int flags)
923{
924	struct sock *sk = sock->sk;
925	struct net *net = sock_net(sk);
926	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
927	struct sock *other;
928	unsigned hash;
929	int err;
930
931	if (addr->sa_family != AF_UNSPEC) {
932		err = unix_mkname(sunaddr, alen, &hash);
933		if (err < 0)
934			goto out;
935		alen = err;
936
937		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
938		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
939			goto out;
940
941restart:
942		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
943		if (!other)
944			goto out;
945
946		unix_state_double_lock(sk, other);
947
948		/* Apparently VFS overslept socket death. Retry. */
949		if (sock_flag(other, SOCK_DEAD)) {
950			unix_state_double_unlock(sk, other);
951			sock_put(other);
952			goto restart;
953		}
954
955		err = -EPERM;
956		if (!unix_may_send(sk, other))
957			goto out_unlock;
958
959		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
960		if (err)
961			goto out_unlock;
962
963	} else {
964		/*
965		 *	1003.1g breaking connected state with AF_UNSPEC
966		 */
967		other = NULL;
968		unix_state_double_lock(sk, other);
969	}
970
971	/*
972	 * If it was connected, reconnect.
973	 */
974	if (unix_peer(sk)) {
975		struct sock *old_peer = unix_peer(sk);
976		unix_peer(sk) = other;
977		unix_state_double_unlock(sk, other);
978
979		if (other != old_peer)
980			unix_dgram_disconnected(sk, old_peer);
981		sock_put(old_peer);
982	} else {
983		unix_peer(sk) = other;
984		unix_state_double_unlock(sk, other);
985	}
986	return 0;
987
988out_unlock:
989	unix_state_double_unlock(sk, other);
990	sock_put(other);
991out:
992	return err;
993}
994
995static long unix_wait_for_peer(struct sock *other, long timeo)
996{
997	struct unix_sock *u = unix_sk(other);
998	int sched;
999	DEFINE_WAIT(wait);
1000
1001	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1002
1003	sched = !sock_flag(other, SOCK_DEAD) &&
1004		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1005		unix_recvq_full(other);
1006
1007	unix_state_unlock(other);
1008
1009	if (sched)
1010		timeo = schedule_timeout(timeo);
1011
1012	finish_wait(&u->peer_wait, &wait);
1013	return timeo;
1014}
1015
1016static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1017			       int addr_len, int flags)
1018{
1019	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1020	struct sock *sk = sock->sk;
1021	struct net *net = sock_net(sk);
1022	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1023	struct sock *newsk = NULL;
1024	struct sock *other = NULL;
1025	struct sk_buff *skb = NULL;
1026	unsigned hash;
1027	int st;
1028	int err;
1029	long timeo;
1030
1031	err = unix_mkname(sunaddr, addr_len, &hash);
1032	if (err < 0)
1033		goto out;
1034	addr_len = err;
1035
1036	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1037	    (err = unix_autobind(sock)) != 0)
1038		goto out;
1039
1040	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1041
1042	/* First of all allocate resources.
1043	   If we will make it after state is locked,
1044	   we will have to recheck all again in any case.
1045	 */
1046
1047	err = -ENOMEM;
1048
1049	/* create new sock for complete connection */
1050	newsk = unix_create1(sock_net(sk), NULL);
1051	if (newsk == NULL)
1052		goto out;
1053
1054	/* Allocate skb for sending to listening sock */
1055	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1056	if (skb == NULL)
1057		goto out;
1058
1059restart:
1060	/*  Find listening sock. */
1061	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1062	if (!other)
1063		goto out;
1064
1065	/* Latch state of peer */
1066	unix_state_lock(other);
1067
1068	/* Apparently VFS overslept socket death. Retry. */
1069	if (sock_flag(other, SOCK_DEAD)) {
1070		unix_state_unlock(other);
1071		sock_put(other);
1072		goto restart;
1073	}
1074
1075	err = -ECONNREFUSED;
1076	if (other->sk_state != TCP_LISTEN)
1077		goto out_unlock;
1078	if (other->sk_shutdown & RCV_SHUTDOWN)
1079		goto out_unlock;
1080
1081	if (unix_recvq_full(other)) {
1082		err = -EAGAIN;
1083		if (!timeo)
1084			goto out_unlock;
1085
1086		timeo = unix_wait_for_peer(other, timeo);
1087
1088		err = sock_intr_errno(timeo);
1089		if (signal_pending(current))
1090			goto out;
1091		sock_put(other);
1092		goto restart;
1093	}
1094
1095	/* Latch our state.
1096
1097	   It is tricky place. We need to grab write lock and cannot
1098	   drop lock on peer. It is dangerous because deadlock is
1099	   possible. Connect to self case and simultaneous
1100	   attempt to connect are eliminated by checking socket
1101	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1102	   check this before attempt to grab lock.
1103
1104	   Well, and we have to recheck the state after socket locked.
1105	 */
1106	st = sk->sk_state;
1107
1108	switch (st) {
1109	case TCP_CLOSE:
1110		/* This is ok... continue with connect */
1111		break;
1112	case TCP_ESTABLISHED:
1113		/* Socket is already connected */
1114		err = -EISCONN;
1115		goto out_unlock;
1116	default:
1117		err = -EINVAL;
1118		goto out_unlock;
1119	}
1120
1121	unix_state_lock_nested(sk);
1122
1123	if (sk->sk_state != st) {
1124		unix_state_unlock(sk);
1125		unix_state_unlock(other);
1126		sock_put(other);
1127		goto restart;
1128	}
1129
1130	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1131	if (err) {
1132		unix_state_unlock(sk);
1133		goto out_unlock;
1134	}
1135
1136	/* The way is open! Fastly set all the necessary fields... */
1137
1138	sock_hold(sk);
1139	unix_peer(newsk)	= sk;
1140	newsk->sk_state		= TCP_ESTABLISHED;
1141	newsk->sk_type		= sk->sk_type;
1142	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1143	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1144	newu = unix_sk(newsk);
1145	newsk->sk_sleep		= &newu->peer_wait;
1146	otheru = unix_sk(other);
1147
1148	/* copy address information from listening to new sock*/
1149	if (otheru->addr) {
1150		atomic_inc(&otheru->addr->refcnt);
1151		newu->addr = otheru->addr;
1152	}
1153	if (otheru->dentry) {
1154		newu->dentry	= dget(otheru->dentry);
1155		newu->mnt	= mntget(otheru->mnt);
1156	}
1157
1158	/* Set credentials */
1159	sk->sk_peercred = other->sk_peercred;
1160
1161	sock->state	= SS_CONNECTED;
1162	sk->sk_state	= TCP_ESTABLISHED;
1163	sock_hold(newsk);
1164
1165	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1166	unix_peer(sk)	= newsk;
1167
1168	unix_state_unlock(sk);
1169
1170	/* take ten and and send info to listening sock */
1171	spin_lock(&other->sk_receive_queue.lock);
1172	__skb_queue_tail(&other->sk_receive_queue, skb);
1173	spin_unlock(&other->sk_receive_queue.lock);
1174	unix_state_unlock(other);
1175	other->sk_data_ready(other, 0);
1176	sock_put(other);
1177	return 0;
1178
1179out_unlock:
1180	if (other)
1181		unix_state_unlock(other);
1182
1183out:
1184	kfree_skb(skb);
1185	if (newsk)
1186		unix_release_sock(newsk, 0);
1187	if (other)
1188		sock_put(other);
1189	return err;
1190}
1191
1192static int unix_socketpair(struct socket *socka, struct socket *sockb)
1193{
1194	struct sock *ska = socka->sk, *skb = sockb->sk;
1195
1196	/* Join our sockets back to back */
1197	sock_hold(ska);
1198	sock_hold(skb);
1199	unix_peer(ska) = skb;
1200	unix_peer(skb) = ska;
1201	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1202	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1203	ska->sk_peercred.uid = skb->sk_peercred.uid;
1204	ska->sk_peercred.gid = skb->sk_peercred.gid;
1205
1206	if (ska->sk_type != SOCK_DGRAM) {
1207		ska->sk_state = TCP_ESTABLISHED;
1208		skb->sk_state = TCP_ESTABLISHED;
1209		socka->state  = SS_CONNECTED;
1210		sockb->state  = SS_CONNECTED;
1211	}
1212	return 0;
1213}
1214
1215static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1216{
1217	struct sock *sk = sock->sk;
1218	struct sock *tsk;
1219	struct sk_buff *skb;
1220	int err;
1221
1222	err = -EOPNOTSUPP;
1223	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1224		goto out;
1225
1226	err = -EINVAL;
1227	if (sk->sk_state != TCP_LISTEN)
1228		goto out;
1229
1230	/* If socket state is TCP_LISTEN it cannot change (for now...),
1231	 * so that no locks are necessary.
1232	 */
1233
1234	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1235	if (!skb) {
1236		/* This means receive shutdown. */
1237		if (err == 0)
1238			err = -EINVAL;
1239		goto out;
1240	}
1241
1242	tsk = skb->sk;
1243	skb_free_datagram(sk, skb);
1244	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1245
1246	/* attach accepted sock to socket */
1247	unix_state_lock(tsk);
1248	newsock->state = SS_CONNECTED;
1249	sock_graft(tsk, newsock);
1250	unix_state_unlock(tsk);
1251	return 0;
1252
1253out:
1254	return err;
1255}
1256
1257
1258static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1259{
1260	struct sock *sk = sock->sk;
1261	struct unix_sock *u;
1262	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1263	int err = 0;
1264
1265	if (peer) {
1266		sk = unix_peer_get(sk);
1267
1268		err = -ENOTCONN;
1269		if (!sk)
1270			goto out;
1271		err = 0;
1272	} else {
1273		sock_hold(sk);
1274	}
1275
1276	u = unix_sk(sk);
1277	unix_state_lock(sk);
1278	if (!u->addr) {
1279		sunaddr->sun_family = AF_UNIX;
1280		sunaddr->sun_path[0] = 0;
1281		*uaddr_len = sizeof(short);
1282	} else {
1283		struct unix_address *addr = u->addr;
1284
1285		*uaddr_len = addr->len;
1286		memcpy(sunaddr, addr->name, *uaddr_len);
1287	}
1288	unix_state_unlock(sk);
1289	sock_put(sk);
1290out:
1291	return err;
1292}
1293
1294static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1295{
1296	int i;
1297
1298	scm->fp = UNIXCB(skb).fp;
1299	skb->destructor = sock_wfree;
1300	UNIXCB(skb).fp = NULL;
1301
1302	for (i = scm->fp->count-1; i >= 0; i--)
1303		unix_notinflight(scm->fp->fp[i]);
1304}
1305
1306static void unix_destruct_fds(struct sk_buff *skb)
1307{
1308	struct scm_cookie scm;
1309	memset(&scm, 0, sizeof(scm));
1310	unix_detach_fds(&scm, skb);
1311
1312	/* Alas, it calls VFS */
1313	/* So fscking what? fput() had been SMP-safe since the last Summer */
1314	scm_destroy(&scm);
1315	sock_wfree(skb);
1316}
1317
1318static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1319{
1320	int i;
1321
1322	/*
1323	 * Need to duplicate file references for the sake of garbage
1324	 * collection.  Otherwise a socket in the fps might become a
1325	 * candidate for GC while the skb is not yet queued.
1326	 */
1327	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1328	if (!UNIXCB(skb).fp)
1329		return -ENOMEM;
1330
1331	for (i = scm->fp->count-1; i >= 0; i--)
1332		unix_inflight(scm->fp->fp[i]);
1333	skb->destructor = unix_destruct_fds;
1334	return 0;
1335}
1336
1337/*
1338 *	Send AF_UNIX data.
1339 */
1340
1341static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1342			      struct msghdr *msg, size_t len)
1343{
1344	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1345	struct sock *sk = sock->sk;
1346	struct net *net = sock_net(sk);
1347	struct unix_sock *u = unix_sk(sk);
1348	struct sockaddr_un *sunaddr = msg->msg_name;
1349	struct sock *other = NULL;
1350	int namelen = 0; /* fake GCC */
1351	int err;
1352	unsigned hash;
1353	struct sk_buff *skb;
1354	long timeo;
1355	struct scm_cookie tmp_scm;
1356
1357	if (NULL == siocb->scm)
1358		siocb->scm = &tmp_scm;
1359	wait_for_unix_gc();
1360	err = scm_send(sock, msg, siocb->scm);
1361	if (err < 0)
1362		return err;
1363
1364	err = -EOPNOTSUPP;
1365	if (msg->msg_flags&MSG_OOB)
1366		goto out;
1367
1368	if (msg->msg_namelen) {
1369		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1370		if (err < 0)
1371			goto out;
1372		namelen = err;
1373	} else {
1374		sunaddr = NULL;
1375		err = -ENOTCONN;
1376		other = unix_peer_get(sk);
1377		if (!other)
1378			goto out;
1379	}
1380
1381	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1382	    && (err = unix_autobind(sock)) != 0)
1383		goto out;
1384
1385	err = -EMSGSIZE;
1386	if (len > sk->sk_sndbuf - 32)
1387		goto out;
1388
1389	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1390	if (skb == NULL)
1391		goto out;
1392
1393	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1394	if (siocb->scm->fp) {
1395		err = unix_attach_fds(siocb->scm, skb);
1396		if (err)
1397			goto out_free;
1398	}
1399	unix_get_secdata(siocb->scm, skb);
1400
1401	skb_reset_transport_header(skb);
1402	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1403	if (err)
1404		goto out_free;
1405
1406	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1407
1408restart:
1409	if (!other) {
1410		err = -ECONNRESET;
1411		if (sunaddr == NULL)
1412			goto out_free;
1413
1414		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1415					hash, &err);
1416		if (other == NULL)
1417			goto out_free;
1418	}
1419
1420	unix_state_lock(other);
1421	err = -EPERM;
1422	if (!unix_may_send(sk, other))
1423		goto out_unlock;
1424
1425	if (sock_flag(other, SOCK_DEAD)) {
1426		/*
1427		 *	Check with 1003.1g - what should
1428		 *	datagram error
1429		 */
1430		unix_state_unlock(other);
1431		sock_put(other);
1432
1433		err = 0;
1434		unix_state_lock(sk);
1435		if (unix_peer(sk) == other) {
1436			unix_peer(sk) = NULL;
1437			unix_state_unlock(sk);
1438
1439			unix_dgram_disconnected(sk, other);
1440			sock_put(other);
1441			err = -ECONNREFUSED;
1442		} else {
1443			unix_state_unlock(sk);
1444		}
1445
1446		other = NULL;
1447		if (err)
1448			goto out_free;
1449		goto restart;
1450	}
1451
1452	err = -EPIPE;
1453	if (other->sk_shutdown & RCV_SHUTDOWN)
1454		goto out_unlock;
1455
1456	if (sk->sk_type != SOCK_SEQPACKET) {
1457		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1458		if (err)
1459			goto out_unlock;
1460	}
1461
1462	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1463		if (!timeo) {
1464			err = -EAGAIN;
1465			goto out_unlock;
1466		}
1467
1468		timeo = unix_wait_for_peer(other, timeo);
1469
1470		err = sock_intr_errno(timeo);
1471		if (signal_pending(current))
1472			goto out_free;
1473
1474		goto restart;
1475	}
1476
1477	skb_queue_tail(&other->sk_receive_queue, skb);
1478	unix_state_unlock(other);
1479	other->sk_data_ready(other, len);
1480	sock_put(other);
1481	scm_destroy(siocb->scm);
1482	return len;
1483
1484out_unlock:
1485	unix_state_unlock(other);
1486out_free:
1487	kfree_skb(skb);
1488out:
1489	if (other)
1490		sock_put(other);
1491	scm_destroy(siocb->scm);
1492	return err;
1493}
1494
1495
1496static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1497			       struct msghdr *msg, size_t len)
1498{
1499	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1500	struct sock *sk = sock->sk;
1501	struct sock *other = NULL;
1502	struct sockaddr_un *sunaddr = msg->msg_name;
1503	int err, size;
1504	struct sk_buff *skb;
1505	int sent = 0;
1506	struct scm_cookie tmp_scm;
1507	bool fds_sent = false;
1508
1509	if (NULL == siocb->scm)
1510		siocb->scm = &tmp_scm;
1511	wait_for_unix_gc();
1512	err = scm_send(sock, msg, siocb->scm);
1513	if (err < 0)
1514		return err;
1515
1516	err = -EOPNOTSUPP;
1517	if (msg->msg_flags&MSG_OOB)
1518		goto out_err;
1519
1520	if (msg->msg_namelen) {
1521		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1522		goto out_err;
1523	} else {
1524		sunaddr = NULL;
1525		err = -ENOTCONN;
1526		other = unix_peer(sk);
1527		if (!other)
1528			goto out_err;
1529	}
1530
1531	if (sk->sk_shutdown & SEND_SHUTDOWN)
1532		goto pipe_err;
1533
1534	while (sent < len) {
1535		/*
1536		 *	Optimisation for the fact that under 0.01% of X
1537		 *	messages typically need breaking up.
1538		 */
1539
1540		size = len-sent;
1541
1542		/* Keep two messages in the pipe so it schedules better */
1543		if (size > ((sk->sk_sndbuf >> 1) - 64))
1544			size = (sk->sk_sndbuf >> 1) - 64;
1545
1546		if (size > SKB_MAX_ALLOC)
1547			size = SKB_MAX_ALLOC;
1548
1549		/*
1550		 *	Grab a buffer
1551		 */
1552
1553		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1554					  &err);
1555
1556		if (skb == NULL)
1557			goto out_err;
1558
1559		/*
1560		 *	If you pass two values to the sock_alloc_send_skb
1561		 *	it tries to grab the large buffer with GFP_NOFS
1562		 *	(which can fail easily), and if it fails grab the
1563		 *	fallback size buffer which is under a page and will
1564		 *	succeed. [Alan]
1565		 */
1566		size = min_t(int, size, skb_tailroom(skb));
1567
1568		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1569		/* Only send the fds in the first buffer */
1570		if (siocb->scm->fp && !fds_sent) {
1571			err = unix_attach_fds(siocb->scm, skb);
1572			if (err) {
1573				kfree_skb(skb);
1574				goto out_err;
1575			}
1576			fds_sent = true;
1577		}
1578
1579		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1580		if (err) {
1581			kfree_skb(skb);
1582			goto out_err;
1583		}
1584
1585		unix_state_lock(other);
1586
1587		if (sock_flag(other, SOCK_DEAD) ||
1588		    (other->sk_shutdown & RCV_SHUTDOWN))
1589			goto pipe_err_free;
1590
1591		skb_queue_tail(&other->sk_receive_queue, skb);
1592		unix_state_unlock(other);
1593		other->sk_data_ready(other, size);
1594		sent += size;
1595	}
1596
1597	scm_destroy(siocb->scm);
1598	siocb->scm = NULL;
1599
1600	return sent;
1601
1602pipe_err_free:
1603	unix_state_unlock(other);
1604	kfree_skb(skb);
1605pipe_err:
1606	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1607		send_sig(SIGPIPE, current, 0);
1608	err = -EPIPE;
1609out_err:
1610	scm_destroy(siocb->scm);
1611	siocb->scm = NULL;
1612	return sent ? : err;
1613}
1614
1615static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1616				  struct msghdr *msg, size_t len)
1617{
1618	int err;
1619	struct sock *sk = sock->sk;
1620
1621	err = sock_error(sk);
1622	if (err)
1623		return err;
1624
1625	if (sk->sk_state != TCP_ESTABLISHED)
1626		return -ENOTCONN;
1627
1628	if (msg->msg_namelen)
1629		msg->msg_namelen = 0;
1630
1631	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1632}
1633
1634static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1635{
1636	struct unix_sock *u = unix_sk(sk);
1637
1638	msg->msg_namelen = 0;
1639	if (u->addr) {
1640		msg->msg_namelen = u->addr->len;
1641		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1642	}
1643}
1644
1645static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1646			      struct msghdr *msg, size_t size,
1647			      int flags)
1648{
1649	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1650	struct scm_cookie tmp_scm;
1651	struct sock *sk = sock->sk;
1652	struct unix_sock *u = unix_sk(sk);
1653	int noblock = flags & MSG_DONTWAIT;
1654	struct sk_buff *skb;
1655	int err;
1656
1657	err = -EOPNOTSUPP;
1658	if (flags&MSG_OOB)
1659		goto out;
1660
1661	msg->msg_namelen = 0;
1662
1663	mutex_lock(&u->readlock);
1664
1665	skb = skb_recv_datagram(sk, flags, noblock, &err);
1666	if (!skb) {
1667		unix_state_lock(sk);
1668		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1669		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1670		    (sk->sk_shutdown & RCV_SHUTDOWN))
1671			err = 0;
1672		unix_state_unlock(sk);
1673		goto out_unlock;
1674	}
1675
1676	wake_up_interruptible_sync(&u->peer_wait);
1677
1678	if (msg->msg_name)
1679		unix_copy_addr(msg, skb->sk);
1680
1681	if (size > skb->len)
1682		size = skb->len;
1683	else if (size < skb->len)
1684		msg->msg_flags |= MSG_TRUNC;
1685
1686	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1687	if (err)
1688		goto out_free;
1689
1690	if (!siocb->scm) {
1691		siocb->scm = &tmp_scm;
1692		memset(&tmp_scm, 0, sizeof(tmp_scm));
1693	}
1694	siocb->scm->creds = *UNIXCREDS(skb);
1695	unix_set_secdata(siocb->scm, skb);
1696
1697	if (!(flags & MSG_PEEK)) {
1698		if (UNIXCB(skb).fp)
1699			unix_detach_fds(siocb->scm, skb);
1700	} else {
1701		/* It is questionable: on PEEK we could:
1702		   - do not return fds - good, but too simple 8)
1703		   - return fds, and do not return them on read (old strategy,
1704		     apparently wrong)
1705		   - clone fds (I chose it for now, it is the most universal
1706		     solution)
1707
1708		   POSIX 1003.1g does not actually define this clearly
1709		   at all. POSIX 1003.1g doesn't define a lot of things
1710		   clearly however!
1711
1712		*/
1713		if (UNIXCB(skb).fp)
1714			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1715	}
1716	err = size;
1717
1718	scm_recv(sock, msg, siocb->scm, flags);
1719
1720out_free:
1721	skb_free_datagram(sk, skb);
1722out_unlock:
1723	mutex_unlock(&u->readlock);
1724out:
1725	return err;
1726}
1727
1728/*
1729 *	Sleep until data has arrive. But check for races..
1730 */
1731
1732static long unix_stream_data_wait(struct sock *sk, long timeo)
1733{
1734	DEFINE_WAIT(wait);
1735
1736	unix_state_lock(sk);
1737
1738	for (;;) {
1739		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1740
1741		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1742		    sk->sk_err ||
1743		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1744		    signal_pending(current) ||
1745		    !timeo)
1746			break;
1747
1748		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1749		unix_state_unlock(sk);
1750		timeo = schedule_timeout(timeo);
1751		unix_state_lock(sk);
1752		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1753	}
1754
1755	finish_wait(sk->sk_sleep, &wait);
1756	unix_state_unlock(sk);
1757	return timeo;
1758}
1759
1760
1761
1762static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1763			       struct msghdr *msg, size_t size,
1764			       int flags)
1765{
1766	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1767	struct scm_cookie tmp_scm;
1768	struct sock *sk = sock->sk;
1769	struct unix_sock *u = unix_sk(sk);
1770	struct sockaddr_un *sunaddr = msg->msg_name;
1771	int copied = 0;
1772	int check_creds = 0;
1773	int target;
1774	int err = 0;
1775	long timeo;
1776
1777	err = -EINVAL;
1778	if (sk->sk_state != TCP_ESTABLISHED)
1779		goto out;
1780
1781	err = -EOPNOTSUPP;
1782	if (flags&MSG_OOB)
1783		goto out;
1784
1785	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1786	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1787
1788	msg->msg_namelen = 0;
1789
1790	/* Lock the socket to prevent queue disordering
1791	 * while sleeps in memcpy_tomsg
1792	 */
1793
1794	if (!siocb->scm) {
1795		siocb->scm = &tmp_scm;
1796		memset(&tmp_scm, 0, sizeof(tmp_scm));
1797	}
1798
1799	mutex_lock(&u->readlock);
1800
1801	do {
1802		int chunk;
1803		struct sk_buff *skb;
1804
1805		unix_state_lock(sk);
1806		skb = skb_dequeue(&sk->sk_receive_queue);
1807		if (skb == NULL) {
1808			if (copied >= target)
1809				goto unlock;
1810
1811			/*
1812			 *	POSIX 1003.1g mandates this order.
1813			 */
1814
1815			err = sock_error(sk);
1816			if (err)
1817				goto unlock;
1818			if (sk->sk_shutdown & RCV_SHUTDOWN)
1819				goto unlock;
1820
1821			unix_state_unlock(sk);
1822			err = -EAGAIN;
1823			if (!timeo)
1824				break;
1825			mutex_unlock(&u->readlock);
1826
1827			timeo = unix_stream_data_wait(sk, timeo);
1828
1829			if (signal_pending(current)) {
1830				err = sock_intr_errno(timeo);
1831				goto out;
1832			}
1833			mutex_lock(&u->readlock);
1834			continue;
1835 unlock:
1836			unix_state_unlock(sk);
1837			break;
1838		}
1839		unix_state_unlock(sk);
1840
1841		if (check_creds) {
1842			/* Never glue messages from different writers */
1843			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1844				   sizeof(siocb->scm->creds)) != 0) {
1845				skb_queue_head(&sk->sk_receive_queue, skb);
1846				break;
1847			}
1848		} else {
1849			/* Copy credentials */
1850			siocb->scm->creds = *UNIXCREDS(skb);
1851			check_creds = 1;
1852		}
1853
1854		/* Copy address just once */
1855		if (sunaddr) {
1856			unix_copy_addr(msg, skb->sk);
1857			sunaddr = NULL;
1858		}
1859
1860		chunk = min_t(unsigned int, skb->len, size);
1861		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1862			skb_queue_head(&sk->sk_receive_queue, skb);
1863			if (copied == 0)
1864				copied = -EFAULT;
1865			break;
1866		}
1867		copied += chunk;
1868		size -= chunk;
1869
1870		/* Mark read part of skb as used */
1871		if (!(flags & MSG_PEEK)) {
1872			skb_pull(skb, chunk);
1873
1874			if (UNIXCB(skb).fp)
1875				unix_detach_fds(siocb->scm, skb);
1876
1877			/* put the skb back if we didn't use it up.. */
1878			if (skb->len) {
1879				skb_queue_head(&sk->sk_receive_queue, skb);
1880				break;
1881			}
1882
1883			kfree_skb(skb);
1884
1885			if (siocb->scm->fp)
1886				break;
1887		} else {
1888			/* It is questionable, see note in unix_dgram_recvmsg.
1889			 */
1890			if (UNIXCB(skb).fp)
1891				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1892
1893			/* put message back and return */
1894			skb_queue_head(&sk->sk_receive_queue, skb);
1895			break;
1896		}
1897	} while (size);
1898
1899	mutex_unlock(&u->readlock);
1900	scm_recv(sock, msg, siocb->scm, flags);
1901out:
1902	return copied ? : err;
1903}
1904
1905static int unix_shutdown(struct socket *sock, int mode)
1906{
1907	struct sock *sk = sock->sk;
1908	struct sock *other;
1909
1910	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1911
1912	if (mode) {
1913		unix_state_lock(sk);
1914		sk->sk_shutdown |= mode;
1915		other = unix_peer(sk);
1916		if (other)
1917			sock_hold(other);
1918		unix_state_unlock(sk);
1919		sk->sk_state_change(sk);
1920
1921		if (other &&
1922			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1923
1924			int peer_mode = 0;
1925
1926			if (mode&RCV_SHUTDOWN)
1927				peer_mode |= SEND_SHUTDOWN;
1928			if (mode&SEND_SHUTDOWN)
1929				peer_mode |= RCV_SHUTDOWN;
1930			unix_state_lock(other);
1931			other->sk_shutdown |= peer_mode;
1932			unix_state_unlock(other);
1933			other->sk_state_change(other);
1934			read_lock(&other->sk_callback_lock);
1935			if (peer_mode == SHUTDOWN_MASK)
1936				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1937			else if (peer_mode & RCV_SHUTDOWN)
1938				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1939			read_unlock(&other->sk_callback_lock);
1940		}
1941		if (other)
1942			sock_put(other);
1943	}
1944	return 0;
1945}
1946
1947static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1948{
1949	struct sock *sk = sock->sk;
1950	long amount = 0;
1951	int err;
1952
1953	switch (cmd) {
1954	case SIOCOUTQ:
1955		amount = sk_wmem_alloc_get(sk);
1956		err = put_user(amount, (int __user *)arg);
1957		break;
1958	case SIOCINQ:
1959		{
1960			struct sk_buff *skb;
1961
1962			if (sk->sk_state == TCP_LISTEN) {
1963				err = -EINVAL;
1964				break;
1965			}
1966
1967			spin_lock(&sk->sk_receive_queue.lock);
1968			if (sk->sk_type == SOCK_STREAM ||
1969			    sk->sk_type == SOCK_SEQPACKET) {
1970				skb_queue_walk(&sk->sk_receive_queue, skb)
1971					amount += skb->len;
1972			} else {
1973				skb = skb_peek(&sk->sk_receive_queue);
1974				if (skb)
1975					amount = skb->len;
1976			}
1977			spin_unlock(&sk->sk_receive_queue.lock);
1978			err = put_user(amount, (int __user *)arg);
1979			break;
1980		}
1981
1982	default:
1983		err = -ENOIOCTLCMD;
1984		break;
1985	}
1986	return err;
1987}
1988
1989static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1990{
1991	struct sock *sk = sock->sk;
1992	unsigned int mask;
1993
1994	sock_poll_wait(file, sk->sk_sleep, wait);
1995	mask = 0;
1996
1997	/* exceptional events? */
1998	if (sk->sk_err)
1999		mask |= POLLERR;
2000	if (sk->sk_shutdown == SHUTDOWN_MASK)
2001		mask |= POLLHUP;
2002	if (sk->sk_shutdown & RCV_SHUTDOWN)
2003		mask |= POLLRDHUP;
2004
2005	/* readable? */
2006	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2007	    (sk->sk_shutdown & RCV_SHUTDOWN))
2008		mask |= POLLIN | POLLRDNORM;
2009
2010	/* Connection-based need to check for termination and startup */
2011	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2012	    sk->sk_state == TCP_CLOSE)
2013		mask |= POLLHUP;
2014
2015	/*
2016	 * we set writable also when the other side has shut down the
2017	 * connection. This prevents stuck sockets.
2018	 */
2019	if (unix_writable(sk))
2020		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2021
2022	return mask;
2023}
2024
2025static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2026				    poll_table *wait)
2027{
2028	struct sock *sk = sock->sk, *other;
2029	unsigned int mask, writable;
2030
2031	sock_poll_wait(file, sk->sk_sleep, wait);
2032	mask = 0;
2033
2034	/* exceptional events? */
2035	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2036		mask |= POLLERR;
2037	if (sk->sk_shutdown & RCV_SHUTDOWN)
2038		mask |= POLLRDHUP;
2039	if (sk->sk_shutdown == SHUTDOWN_MASK)
2040		mask |= POLLHUP;
2041
2042	/* readable? */
2043	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2044	    (sk->sk_shutdown & RCV_SHUTDOWN))
2045		mask |= POLLIN | POLLRDNORM;
2046
2047	/* Connection-based need to check for termination and startup */
2048	if (sk->sk_type == SOCK_SEQPACKET) {
2049		if (sk->sk_state == TCP_CLOSE)
2050			mask |= POLLHUP;
2051		/* connection hasn't started yet? */
2052		if (sk->sk_state == TCP_SYN_SENT)
2053			return mask;
2054	}
2055
2056	/* writable? */
2057	writable = unix_writable(sk);
2058	if (writable) {
2059		other = unix_peer_get(sk);
2060		if (other) {
2061			if (unix_peer(other) != sk) {
2062				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2063					  wait);
2064				if (unix_recvq_full(other))
2065					writable = 0;
2066			}
2067
2068			sock_put(other);
2069		}
2070	}
2071
2072	if (writable)
2073		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2074	else
2075		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2076
2077	return mask;
2078}
2079
2080#ifdef CONFIG_PROC_FS
2081static struct sock *first_unix_socket(int *i)
2082{
2083	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2084		if (!hlist_empty(&unix_socket_table[*i]))
2085			return __sk_head(&unix_socket_table[*i]);
2086	}
2087	return NULL;
2088}
2089
2090static struct sock *next_unix_socket(int *i, struct sock *s)
2091{
2092	struct sock *next = sk_next(s);
2093	/* More in this chain? */
2094	if (next)
2095		return next;
2096	/* Look for next non-empty chain. */
2097	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2098		if (!hlist_empty(&unix_socket_table[*i]))
2099			return __sk_head(&unix_socket_table[*i]);
2100	}
2101	return NULL;
2102}
2103
2104struct unix_iter_state {
2105	struct seq_net_private p;
2106	int i;
2107};
2108
2109static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2110{
2111	struct unix_iter_state *iter = seq->private;
2112	loff_t off = 0;
2113	struct sock *s;
2114
2115	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2116		if (sock_net(s) != seq_file_net(seq))
2117			continue;
2118		if (off == pos)
2119			return s;
2120		++off;
2121	}
2122	return NULL;
2123}
2124
2125static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2126	__acquires(unix_table_lock)
2127{
2128	spin_lock(&unix_table_lock);
2129	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2130}
2131
2132static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2133{
2134	struct unix_iter_state *iter = seq->private;
2135	struct sock *sk = v;
2136	++*pos;
2137
2138	if (v == SEQ_START_TOKEN)
2139		sk = first_unix_socket(&iter->i);
2140	else
2141		sk = next_unix_socket(&iter->i, sk);
2142	while (sk && (sock_net(sk) != seq_file_net(seq)))
2143		sk = next_unix_socket(&iter->i, sk);
2144	return sk;
2145}
2146
2147static void unix_seq_stop(struct seq_file *seq, void *v)
2148	__releases(unix_table_lock)
2149{
2150	spin_unlock(&unix_table_lock);
2151}
2152
2153static int unix_seq_show(struct seq_file *seq, void *v)
2154{
2155
2156	if (v == SEQ_START_TOKEN)
2157		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2158			 "Inode Path\n");
2159	else {
2160		struct sock *s = v;
2161		struct unix_sock *u = unix_sk(s);
2162		unix_state_lock(s);
2163
2164		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2165			s,
2166			atomic_read(&s->sk_refcnt),
2167			0,
2168			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2169			s->sk_type,
2170			s->sk_socket ?
2171			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2172			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2173			sock_i_ino(s));
2174
2175		if (u->addr) {
2176			int i, len;
2177			seq_putc(seq, ' ');
2178
2179			i = 0;
2180			len = u->addr->len - sizeof(short);
2181			if (!UNIX_ABSTRACT(s))
2182				len--;
2183			else {
2184				seq_putc(seq, '@');
2185				i++;
2186			}
2187			for ( ; i < len; i++)
2188				seq_putc(seq, u->addr->name->sun_path[i]);
2189		}
2190		unix_state_unlock(s);
2191		seq_putc(seq, '\n');
2192	}
2193
2194	return 0;
2195}
2196
2197static const struct seq_operations unix_seq_ops = {
2198	.start  = unix_seq_start,
2199	.next   = unix_seq_next,
2200	.stop   = unix_seq_stop,
2201	.show   = unix_seq_show,
2202};
2203
2204static int unix_seq_open(struct inode *inode, struct file *file)
2205{
2206	return seq_open_net(inode, file, &unix_seq_ops,
2207			    sizeof(struct unix_iter_state));
2208}
2209
2210static const struct file_operations unix_seq_fops = {
2211	.owner		= THIS_MODULE,
2212	.open		= unix_seq_open,
2213	.read		= seq_read,
2214	.llseek		= seq_lseek,
2215	.release	= seq_release_net,
2216};
2217
2218#endif
2219
2220static const struct net_proto_family unix_family_ops = {
2221	.family = PF_UNIX,
2222	.create = unix_create,
2223	.owner	= THIS_MODULE,
2224};
2225
2226
2227static int __net_init unix_net_init(struct net *net)
2228{
2229	int error = -ENOMEM;
2230
2231	net->unx.sysctl_max_dgram_qlen = 10;
2232	if (unix_sysctl_register(net))
2233		goto out;
2234
2235#ifdef CONFIG_PROC_FS
2236	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2237		unix_sysctl_unregister(net);
2238		goto out;
2239	}
2240#endif
2241	error = 0;
2242out:
2243	return error;
2244}
2245
2246static void __net_exit unix_net_exit(struct net *net)
2247{
2248	unix_sysctl_unregister(net);
2249	proc_net_remove(net, "unix");
2250}
2251
2252static struct pernet_operations unix_net_ops = {
2253	.init = unix_net_init,
2254	.exit = unix_net_exit,
2255};
2256
2257static int __init af_unix_init(void)
2258{
2259	int rc = -1;
2260	struct sk_buff *dummy_skb;
2261
2262	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2263
2264	rc = proto_register(&unix_proto, 1);
2265	if (rc != 0) {
2266		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2267		       __func__);
2268		goto out;
2269	}
2270
2271	sock_register(&unix_family_ops);
2272	register_pernet_subsys(&unix_net_ops);
2273out:
2274	return rc;
2275}
2276
2277static void __exit af_unix_exit(void)
2278{
2279	sock_unregister(PF_UNIX);
2280	proto_unregister(&unix_proto);
2281	unregister_pernet_subsys(&unix_net_ops);
2282}
2283
2284/* Earlier than device_initcall() so that other drivers invoking
2285   request_module() don't end up in a loop when modprobe tries
2286   to use a UNIX socket. But later than subsys_initcall() because
2287   we depend on stuff initialised there */
2288fs_initcall(af_unix_init);
2289module_exit(af_unix_exit);
2290
2291MODULE_LICENSE("GPL");
2292MODULE_ALIAS_NETPROTO(PF_UNIX);
2293