af_unix.c revision 8ba69ba6a324b13e1190fc31e41954d190fd4f1d
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate rwlock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (!net_eq(sock_net(s), net))
296			continue;
297
298		if (dentry && dentry->d_inode == i) {
299			sock_hold(s);
300			goto found;
301		}
302	}
303	s = NULL;
304found:
305	spin_unlock(&unix_table_lock);
306	return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
316	read_lock(&sk->sk_callback_lock);
317	if (unix_writable(sk)) {
318		if (sk_has_sleeper(sk))
319			wake_up_interruptible_sync(sk->sk_sleep);
320		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321	}
322	read_unlock(&sk->sk_callback_lock);
323}
324
325/* When dgram socket disconnects (or changes its peer), we clear its receive
326 * queue of packets arrived from previous peer. First, it allows to do
327 * flow control based only on wmem_alloc; second, sk connected to peer
328 * may receive messages only from that peer. */
329static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330{
331	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332		skb_queue_purge(&sk->sk_receive_queue);
333		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335		/* If one link of bidirectional dgram pipe is disconnected,
336		 * we signal error. Messages are lost. Do not make this,
337		 * when peer was not connected to us.
338		 */
339		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340			other->sk_err = ECONNRESET;
341			other->sk_error_report(other);
342		}
343	}
344}
345
346static void unix_sock_destructor(struct sock *sk)
347{
348	struct unix_sock *u = unix_sk(sk);
349
350	skb_queue_purge(&sk->sk_receive_queue);
351
352	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353	WARN_ON(!sk_unhashed(sk));
354	WARN_ON(sk->sk_socket);
355	if (!sock_flag(sk, SOCK_DEAD)) {
356		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357		return;
358	}
359
360	if (u->addr)
361		unix_release_addr(u->addr);
362
363	atomic_dec(&unix_nr_socks);
364	local_bh_disable();
365	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366	local_bh_enable();
367#ifdef UNIX_REFCNT_DEBUG
368	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369		atomic_read(&unix_nr_socks));
370#endif
371}
372
373static int unix_release_sock(struct sock *sk, int embrion)
374{
375	struct unix_sock *u = unix_sk(sk);
376	struct dentry *dentry;
377	struct vfsmount *mnt;
378	struct sock *skpair;
379	struct sk_buff *skb;
380	int state;
381
382	unix_remove_socket(sk);
383
384	/* Clear state */
385	unix_state_lock(sk);
386	sock_orphan(sk);
387	sk->sk_shutdown = SHUTDOWN_MASK;
388	dentry	     = u->dentry;
389	u->dentry    = NULL;
390	mnt	     = u->mnt;
391	u->mnt	     = NULL;
392	state = sk->sk_state;
393	sk->sk_state = TCP_CLOSE;
394	unix_state_unlock(sk);
395
396	wake_up_interruptible_all(&u->peer_wait);
397
398	skpair = unix_peer(sk);
399
400	if (skpair != NULL) {
401		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402			unix_state_lock(skpair);
403			/* No more writes */
404			skpair->sk_shutdown = SHUTDOWN_MASK;
405			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406				skpair->sk_err = ECONNRESET;
407			unix_state_unlock(skpair);
408			skpair->sk_state_change(skpair);
409			read_lock(&skpair->sk_callback_lock);
410			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411			read_unlock(&skpair->sk_callback_lock);
412		}
413		sock_put(skpair); /* It may now die */
414		unix_peer(sk) = NULL;
415	}
416
417	/* Try to flush out this socket. Throw out buffers at least */
418
419	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420		if (state == TCP_LISTEN)
421			unix_release_sock(skb->sk, 1);
422		/* passed fds are erased in the kfree_skb hook	      */
423		kfree_skb(skb);
424	}
425
426	if (dentry) {
427		dput(dentry);
428		mntput(mnt);
429	}
430
431	sock_put(sk);
432
433	/* ---- Socket is dead now and most probably destroyed ---- */
434
435	/*
436	 * Fixme: BSD difference: In BSD all sockets connected to use get
437	 *	  ECONNRESET and we die on the spot. In Linux we behave
438	 *	  like files and pipes do and wait for the last
439	 *	  dereference.
440	 *
441	 * Can't we simply set sock->err?
442	 *
443	 *	  What the above comment does talk about? --ANK(980817)
444	 */
445
446	if (unix_tot_inflight)
447		unix_gc();		/* Garbage collect fds */
448
449	return 0;
450}
451
452static int unix_listen(struct socket *sock, int backlog)
453{
454	int err;
455	struct sock *sk = sock->sk;
456	struct unix_sock *u = unix_sk(sk);
457
458	err = -EOPNOTSUPP;
459	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460		goto out;	/* Only stream/seqpacket sockets accept */
461	err = -EINVAL;
462	if (!u->addr)
463		goto out;	/* No listens on an unbound socket */
464	unix_state_lock(sk);
465	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466		goto out_unlock;
467	if (backlog > sk->sk_max_ack_backlog)
468		wake_up_interruptible_all(&u->peer_wait);
469	sk->sk_max_ack_backlog	= backlog;
470	sk->sk_state		= TCP_LISTEN;
471	/* set credentials so connect can copy them */
472	sk->sk_peercred.pid	= task_tgid_vnr(current);
473	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474	err = 0;
475
476out_unlock:
477	unix_state_unlock(sk);
478out:
479	return err;
480}
481
482static int unix_release(struct socket *);
483static int unix_bind(struct socket *, struct sockaddr *, int);
484static int unix_stream_connect(struct socket *, struct sockaddr *,
485			       int addr_len, int flags);
486static int unix_socketpair(struct socket *, struct socket *);
487static int unix_accept(struct socket *, struct socket *, int);
488static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490static unsigned int unix_dgram_poll(struct file *, struct socket *,
491				    poll_table *);
492static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493static int unix_shutdown(struct socket *, int);
494static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495			       struct msghdr *, size_t);
496static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497			       struct msghdr *, size_t, int);
498static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499			      struct msghdr *, size_t);
500static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501			      struct msghdr *, size_t, int);
502static int unix_dgram_connect(struct socket *, struct sockaddr *,
503			      int, int);
504static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505				  struct msghdr *, size_t);
506
507static const struct proto_ops unix_stream_ops = {
508	.family =	PF_UNIX,
509	.owner =	THIS_MODULE,
510	.release =	unix_release,
511	.bind =		unix_bind,
512	.connect =	unix_stream_connect,
513	.socketpair =	unix_socketpair,
514	.accept =	unix_accept,
515	.getname =	unix_getname,
516	.poll =		unix_poll,
517	.ioctl =	unix_ioctl,
518	.listen =	unix_listen,
519	.shutdown =	unix_shutdown,
520	.setsockopt =	sock_no_setsockopt,
521	.getsockopt =	sock_no_getsockopt,
522	.sendmsg =	unix_stream_sendmsg,
523	.recvmsg =	unix_stream_recvmsg,
524	.mmap =		sock_no_mmap,
525	.sendpage =	sock_no_sendpage,
526};
527
528static const struct proto_ops unix_dgram_ops = {
529	.family =	PF_UNIX,
530	.owner =	THIS_MODULE,
531	.release =	unix_release,
532	.bind =		unix_bind,
533	.connect =	unix_dgram_connect,
534	.socketpair =	unix_socketpair,
535	.accept =	sock_no_accept,
536	.getname =	unix_getname,
537	.poll =		unix_dgram_poll,
538	.ioctl =	unix_ioctl,
539	.listen =	sock_no_listen,
540	.shutdown =	unix_shutdown,
541	.setsockopt =	sock_no_setsockopt,
542	.getsockopt =	sock_no_getsockopt,
543	.sendmsg =	unix_dgram_sendmsg,
544	.recvmsg =	unix_dgram_recvmsg,
545	.mmap =		sock_no_mmap,
546	.sendpage =	sock_no_sendpage,
547};
548
549static const struct proto_ops unix_seqpacket_ops = {
550	.family =	PF_UNIX,
551	.owner =	THIS_MODULE,
552	.release =	unix_release,
553	.bind =		unix_bind,
554	.connect =	unix_stream_connect,
555	.socketpair =	unix_socketpair,
556	.accept =	unix_accept,
557	.getname =	unix_getname,
558	.poll =		unix_dgram_poll,
559	.ioctl =	unix_ioctl,
560	.listen =	unix_listen,
561	.shutdown =	unix_shutdown,
562	.setsockopt =	sock_no_setsockopt,
563	.getsockopt =	sock_no_getsockopt,
564	.sendmsg =	unix_seqpacket_sendmsg,
565	.recvmsg =	unix_dgram_recvmsg,
566	.mmap =		sock_no_mmap,
567	.sendpage =	sock_no_sendpage,
568};
569
570static struct proto unix_proto = {
571	.name			= "UNIX",
572	.owner			= THIS_MODULE,
573	.obj_size		= sizeof(struct unix_sock),
574};
575
576/*
577 * AF_UNIX sockets do not interact with hardware, hence they
578 * dont trigger interrupts - so it's safe for them to have
579 * bh-unsafe locking for their sk_receive_queue.lock. Split off
580 * this special lock-class by reinitializing the spinlock key:
581 */
582static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583
584static struct sock *unix_create1(struct net *net, struct socket *sock)
585{
586	struct sock *sk = NULL;
587	struct unix_sock *u;
588
589	atomic_inc(&unix_nr_socks);
590	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591		goto out;
592
593	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594	if (!sk)
595		goto out;
596
597	sock_init_data(sock, sk);
598	lockdep_set_class(&sk->sk_receive_queue.lock,
599				&af_unix_sk_receive_queue_lock_key);
600
601	sk->sk_write_space	= unix_write_space;
602	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603	sk->sk_destruct		= unix_sock_destructor;
604	u	  = unix_sk(sk);
605	u->dentry = NULL;
606	u->mnt	  = NULL;
607	spin_lock_init(&u->lock);
608	atomic_long_set(&u->inflight, 0);
609	INIT_LIST_HEAD(&u->link);
610	mutex_init(&u->readlock); /* single task reading lock */
611	init_waitqueue_head(&u->peer_wait);
612	unix_insert_socket(unix_sockets_unbound, sk);
613out:
614	if (sk == NULL)
615		atomic_dec(&unix_nr_socks);
616	else {
617		local_bh_disable();
618		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619		local_bh_enable();
620	}
621	return sk;
622}
623
624static int unix_create(struct net *net, struct socket *sock, int protocol)
625{
626	if (protocol && protocol != PF_UNIX)
627		return -EPROTONOSUPPORT;
628
629	sock->state = SS_UNCONNECTED;
630
631	switch (sock->type) {
632	case SOCK_STREAM:
633		sock->ops = &unix_stream_ops;
634		break;
635		/*
636		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
637		 *	nothing uses it.
638		 */
639	case SOCK_RAW:
640		sock->type = SOCK_DGRAM;
641	case SOCK_DGRAM:
642		sock->ops = &unix_dgram_ops;
643		break;
644	case SOCK_SEQPACKET:
645		sock->ops = &unix_seqpacket_ops;
646		break;
647	default:
648		return -ESOCKTNOSUPPORT;
649	}
650
651	return unix_create1(net, sock) ? 0 : -ENOMEM;
652}
653
654static int unix_release(struct socket *sock)
655{
656	struct sock *sk = sock->sk;
657
658	if (!sk)
659		return 0;
660
661	sock->sk = NULL;
662
663	return unix_release_sock(sk, 0);
664}
665
666static int unix_autobind(struct socket *sock)
667{
668	struct sock *sk = sock->sk;
669	struct net *net = sock_net(sk);
670	struct unix_sock *u = unix_sk(sk);
671	static u32 ordernum = 1;
672	struct unix_address *addr;
673	int err;
674
675	mutex_lock(&u->readlock);
676
677	err = 0;
678	if (u->addr)
679		goto out;
680
681	err = -ENOMEM;
682	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683	if (!addr)
684		goto out;
685
686	addr->name->sun_family = AF_UNIX;
687	atomic_set(&addr->refcnt, 1);
688
689retry:
690	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692
693	spin_lock(&unix_table_lock);
694	ordernum = (ordernum+1)&0xFFFFF;
695
696	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697				      addr->hash)) {
698		spin_unlock(&unix_table_lock);
699		/* Sanity yield. It is unusual case, but yet... */
700		if (!(ordernum&0xFF))
701			yield();
702		goto retry;
703	}
704	addr->hash ^= sk->sk_type;
705
706	__unix_remove_socket(sk);
707	u->addr = addr;
708	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
709	spin_unlock(&unix_table_lock);
710	err = 0;
711
712out:	mutex_unlock(&u->readlock);
713	return err;
714}
715
716static struct sock *unix_find_other(struct net *net,
717				    struct sockaddr_un *sunname, int len,
718				    int type, unsigned hash, int *error)
719{
720	struct sock *u;
721	struct path path;
722	int err = 0;
723
724	if (sunname->sun_path[0]) {
725		struct inode *inode;
726		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727		if (err)
728			goto fail;
729		inode = path.dentry->d_inode;
730		err = inode_permission(inode, MAY_WRITE);
731		if (err)
732			goto put_fail;
733
734		err = -ECONNREFUSED;
735		if (!S_ISSOCK(inode->i_mode))
736			goto put_fail;
737		u = unix_find_socket_byinode(net, inode);
738		if (!u)
739			goto put_fail;
740
741		if (u->sk_type == type)
742			touch_atime(path.mnt, path.dentry);
743
744		path_put(&path);
745
746		err = -EPROTOTYPE;
747		if (u->sk_type != type) {
748			sock_put(u);
749			goto fail;
750		}
751	} else {
752		err = -ECONNREFUSED;
753		u = unix_find_socket_byname(net, sunname, len, type, hash);
754		if (u) {
755			struct dentry *dentry;
756			dentry = unix_sk(u)->dentry;
757			if (dentry)
758				touch_atime(unix_sk(u)->mnt, dentry);
759		} else
760			goto fail;
761	}
762	return u;
763
764put_fail:
765	path_put(&path);
766fail:
767	*error = err;
768	return NULL;
769}
770
771
772static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773{
774	struct sock *sk = sock->sk;
775	struct net *net = sock_net(sk);
776	struct unix_sock *u = unix_sk(sk);
777	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778	struct dentry *dentry = NULL;
779	struct nameidata nd;
780	int err;
781	unsigned hash;
782	struct unix_address *addr;
783	struct hlist_head *list;
784
785	err = -EINVAL;
786	if (sunaddr->sun_family != AF_UNIX)
787		goto out;
788
789	if (addr_len == sizeof(short)) {
790		err = unix_autobind(sock);
791		goto out;
792	}
793
794	err = unix_mkname(sunaddr, addr_len, &hash);
795	if (err < 0)
796		goto out;
797	addr_len = err;
798
799	mutex_lock(&u->readlock);
800
801	err = -EINVAL;
802	if (u->addr)
803		goto out_up;
804
805	err = -ENOMEM;
806	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807	if (!addr)
808		goto out_up;
809
810	memcpy(addr->name, sunaddr, addr_len);
811	addr->len = addr_len;
812	addr->hash = hash ^ sk->sk_type;
813	atomic_set(&addr->refcnt, 1);
814
815	if (sunaddr->sun_path[0]) {
816		unsigned int mode;
817		err = 0;
818		/*
819		 * Get the parent directory, calculate the hash for last
820		 * component.
821		 */
822		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823		if (err)
824			goto out_mknod_parent;
825
826		dentry = lookup_create(&nd, 0);
827		err = PTR_ERR(dentry);
828		if (IS_ERR(dentry))
829			goto out_mknod_unlock;
830
831		/*
832		 * All right, let's create it.
833		 */
834		mode = S_IFSOCK |
835		       (SOCK_INODE(sock)->i_mode & ~current_umask());
836		err = mnt_want_write(nd.path.mnt);
837		if (err)
838			goto out_mknod_dput;
839		err = security_path_mknod(&nd.path, dentry, mode, 0);
840		if (err)
841			goto out_mknod_drop_write;
842		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
843out_mknod_drop_write:
844		mnt_drop_write(nd.path.mnt);
845		if (err)
846			goto out_mknod_dput;
847		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
848		dput(nd.path.dentry);
849		nd.path.dentry = dentry;
850
851		addr->hash = UNIX_HASH_SIZE;
852	}
853
854	spin_lock(&unix_table_lock);
855
856	if (!sunaddr->sun_path[0]) {
857		err = -EADDRINUSE;
858		if (__unix_find_socket_byname(net, sunaddr, addr_len,
859					      sk->sk_type, hash)) {
860			unix_release_addr(addr);
861			goto out_unlock;
862		}
863
864		list = &unix_socket_table[addr->hash];
865	} else {
866		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
867		u->dentry = nd.path.dentry;
868		u->mnt    = nd.path.mnt;
869	}
870
871	err = 0;
872	__unix_remove_socket(sk);
873	u->addr = addr;
874	__unix_insert_socket(list, sk);
875
876out_unlock:
877	spin_unlock(&unix_table_lock);
878out_up:
879	mutex_unlock(&u->readlock);
880out:
881	return err;
882
883out_mknod_dput:
884	dput(dentry);
885out_mknod_unlock:
886	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
887	path_put(&nd.path);
888out_mknod_parent:
889	if (err == -EEXIST)
890		err = -EADDRINUSE;
891	unix_release_addr(addr);
892	goto out_up;
893}
894
895static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
896{
897	if (unlikely(sk1 == sk2) || !sk2) {
898		unix_state_lock(sk1);
899		return;
900	}
901	if (sk1 < sk2) {
902		unix_state_lock(sk1);
903		unix_state_lock_nested(sk2);
904	} else {
905		unix_state_lock(sk2);
906		unix_state_lock_nested(sk1);
907	}
908}
909
910static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
911{
912	if (unlikely(sk1 == sk2) || !sk2) {
913		unix_state_unlock(sk1);
914		return;
915	}
916	unix_state_unlock(sk1);
917	unix_state_unlock(sk2);
918}
919
920static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
921			      int alen, int flags)
922{
923	struct sock *sk = sock->sk;
924	struct net *net = sock_net(sk);
925	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
926	struct sock *other;
927	unsigned hash;
928	int err;
929
930	if (addr->sa_family != AF_UNSPEC) {
931		err = unix_mkname(sunaddr, alen, &hash);
932		if (err < 0)
933			goto out;
934		alen = err;
935
936		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
937		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
938			goto out;
939
940restart:
941		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
942		if (!other)
943			goto out;
944
945		unix_state_double_lock(sk, other);
946
947		/* Apparently VFS overslept socket death. Retry. */
948		if (sock_flag(other, SOCK_DEAD)) {
949			unix_state_double_unlock(sk, other);
950			sock_put(other);
951			goto restart;
952		}
953
954		err = -EPERM;
955		if (!unix_may_send(sk, other))
956			goto out_unlock;
957
958		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
959		if (err)
960			goto out_unlock;
961
962	} else {
963		/*
964		 *	1003.1g breaking connected state with AF_UNSPEC
965		 */
966		other = NULL;
967		unix_state_double_lock(sk, other);
968	}
969
970	/*
971	 * If it was connected, reconnect.
972	 */
973	if (unix_peer(sk)) {
974		struct sock *old_peer = unix_peer(sk);
975		unix_peer(sk) = other;
976		unix_state_double_unlock(sk, other);
977
978		if (other != old_peer)
979			unix_dgram_disconnected(sk, old_peer);
980		sock_put(old_peer);
981	} else {
982		unix_peer(sk) = other;
983		unix_state_double_unlock(sk, other);
984	}
985	return 0;
986
987out_unlock:
988	unix_state_double_unlock(sk, other);
989	sock_put(other);
990out:
991	return err;
992}
993
994static long unix_wait_for_peer(struct sock *other, long timeo)
995{
996	struct unix_sock *u = unix_sk(other);
997	int sched;
998	DEFINE_WAIT(wait);
999
1000	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001
1002	sched = !sock_flag(other, SOCK_DEAD) &&
1003		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1004		unix_recvq_full(other);
1005
1006	unix_state_unlock(other);
1007
1008	if (sched)
1009		timeo = schedule_timeout(timeo);
1010
1011	finish_wait(&u->peer_wait, &wait);
1012	return timeo;
1013}
1014
1015static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016			       int addr_len, int flags)
1017{
1018	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019	struct sock *sk = sock->sk;
1020	struct net *net = sock_net(sk);
1021	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022	struct sock *newsk = NULL;
1023	struct sock *other = NULL;
1024	struct sk_buff *skb = NULL;
1025	unsigned hash;
1026	int st;
1027	int err;
1028	long timeo;
1029
1030	err = unix_mkname(sunaddr, addr_len, &hash);
1031	if (err < 0)
1032		goto out;
1033	addr_len = err;
1034
1035	if (test_bit(SOCK_PASSCRED, &sock->flags)
1036		&& !u->addr && (err = unix_autobind(sock)) != 0)
1037		goto out;
1038
1039	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040
1041	/* First of all allocate resources.
1042	   If we will make it after state is locked,
1043	   we will have to recheck all again in any case.
1044	 */
1045
1046	err = -ENOMEM;
1047
1048	/* create new sock for complete connection */
1049	newsk = unix_create1(sock_net(sk), NULL);
1050	if (newsk == NULL)
1051		goto out;
1052
1053	/* Allocate skb for sending to listening sock */
1054	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055	if (skb == NULL)
1056		goto out;
1057
1058restart:
1059	/*  Find listening sock. */
1060	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061	if (!other)
1062		goto out;
1063
1064	/* Latch state of peer */
1065	unix_state_lock(other);
1066
1067	/* Apparently VFS overslept socket death. Retry. */
1068	if (sock_flag(other, SOCK_DEAD)) {
1069		unix_state_unlock(other);
1070		sock_put(other);
1071		goto restart;
1072	}
1073
1074	err = -ECONNREFUSED;
1075	if (other->sk_state != TCP_LISTEN)
1076		goto out_unlock;
1077
1078	if (unix_recvq_full(other)) {
1079		err = -EAGAIN;
1080		if (!timeo)
1081			goto out_unlock;
1082
1083		timeo = unix_wait_for_peer(other, timeo);
1084
1085		err = sock_intr_errno(timeo);
1086		if (signal_pending(current))
1087			goto out;
1088		sock_put(other);
1089		goto restart;
1090	}
1091
1092	/* Latch our state.
1093
1094	   It is tricky place. We need to grab write lock and cannot
1095	   drop lock on peer. It is dangerous because deadlock is
1096	   possible. Connect to self case and simultaneous
1097	   attempt to connect are eliminated by checking socket
1098	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1099	   check this before attempt to grab lock.
1100
1101	   Well, and we have to recheck the state after socket locked.
1102	 */
1103	st = sk->sk_state;
1104
1105	switch (st) {
1106	case TCP_CLOSE:
1107		/* This is ok... continue with connect */
1108		break;
1109	case TCP_ESTABLISHED:
1110		/* Socket is already connected */
1111		err = -EISCONN;
1112		goto out_unlock;
1113	default:
1114		err = -EINVAL;
1115		goto out_unlock;
1116	}
1117
1118	unix_state_lock_nested(sk);
1119
1120	if (sk->sk_state != st) {
1121		unix_state_unlock(sk);
1122		unix_state_unlock(other);
1123		sock_put(other);
1124		goto restart;
1125	}
1126
1127	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1128	if (err) {
1129		unix_state_unlock(sk);
1130		goto out_unlock;
1131	}
1132
1133	/* The way is open! Fastly set all the necessary fields... */
1134
1135	sock_hold(sk);
1136	unix_peer(newsk)	= sk;
1137	newsk->sk_state		= TCP_ESTABLISHED;
1138	newsk->sk_type		= sk->sk_type;
1139	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1140	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1141	newu = unix_sk(newsk);
1142	newsk->sk_sleep		= &newu->peer_wait;
1143	otheru = unix_sk(other);
1144
1145	/* copy address information from listening to new sock*/
1146	if (otheru->addr) {
1147		atomic_inc(&otheru->addr->refcnt);
1148		newu->addr = otheru->addr;
1149	}
1150	if (otheru->dentry) {
1151		newu->dentry	= dget(otheru->dentry);
1152		newu->mnt	= mntget(otheru->mnt);
1153	}
1154
1155	/* Set credentials */
1156	sk->sk_peercred = other->sk_peercred;
1157
1158	sock->state	= SS_CONNECTED;
1159	sk->sk_state	= TCP_ESTABLISHED;
1160	sock_hold(newsk);
1161
1162	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1163	unix_peer(sk)	= newsk;
1164
1165	unix_state_unlock(sk);
1166
1167	/* take ten and and send info to listening sock */
1168	spin_lock(&other->sk_receive_queue.lock);
1169	__skb_queue_tail(&other->sk_receive_queue, skb);
1170	spin_unlock(&other->sk_receive_queue.lock);
1171	unix_state_unlock(other);
1172	other->sk_data_ready(other, 0);
1173	sock_put(other);
1174	return 0;
1175
1176out_unlock:
1177	if (other)
1178		unix_state_unlock(other);
1179
1180out:
1181	kfree_skb(skb);
1182	if (newsk)
1183		unix_release_sock(newsk, 0);
1184	if (other)
1185		sock_put(other);
1186	return err;
1187}
1188
1189static int unix_socketpair(struct socket *socka, struct socket *sockb)
1190{
1191	struct sock *ska = socka->sk, *skb = sockb->sk;
1192
1193	/* Join our sockets back to back */
1194	sock_hold(ska);
1195	sock_hold(skb);
1196	unix_peer(ska) = skb;
1197	unix_peer(skb) = ska;
1198	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1199	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1200	ska->sk_peercred.uid = skb->sk_peercred.uid;
1201	ska->sk_peercred.gid = skb->sk_peercred.gid;
1202
1203	if (ska->sk_type != SOCK_DGRAM) {
1204		ska->sk_state = TCP_ESTABLISHED;
1205		skb->sk_state = TCP_ESTABLISHED;
1206		socka->state  = SS_CONNECTED;
1207		sockb->state  = SS_CONNECTED;
1208	}
1209	return 0;
1210}
1211
1212static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1213{
1214	struct sock *sk = sock->sk;
1215	struct sock *tsk;
1216	struct sk_buff *skb;
1217	int err;
1218
1219	err = -EOPNOTSUPP;
1220	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1221		goto out;
1222
1223	err = -EINVAL;
1224	if (sk->sk_state != TCP_LISTEN)
1225		goto out;
1226
1227	/* If socket state is TCP_LISTEN it cannot change (for now...),
1228	 * so that no locks are necessary.
1229	 */
1230
1231	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1232	if (!skb) {
1233		/* This means receive shutdown. */
1234		if (err == 0)
1235			err = -EINVAL;
1236		goto out;
1237	}
1238
1239	tsk = skb->sk;
1240	skb_free_datagram(sk, skb);
1241	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1242
1243	/* attach accepted sock to socket */
1244	unix_state_lock(tsk);
1245	newsock->state = SS_CONNECTED;
1246	sock_graft(tsk, newsock);
1247	unix_state_unlock(tsk);
1248	return 0;
1249
1250out:
1251	return err;
1252}
1253
1254
1255static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1256{
1257	struct sock *sk = sock->sk;
1258	struct unix_sock *u;
1259	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1260	int err = 0;
1261
1262	if (peer) {
1263		sk = unix_peer_get(sk);
1264
1265		err = -ENOTCONN;
1266		if (!sk)
1267			goto out;
1268		err = 0;
1269	} else {
1270		sock_hold(sk);
1271	}
1272
1273	u = unix_sk(sk);
1274	unix_state_lock(sk);
1275	if (!u->addr) {
1276		sunaddr->sun_family = AF_UNIX;
1277		sunaddr->sun_path[0] = 0;
1278		*uaddr_len = sizeof(short);
1279	} else {
1280		struct unix_address *addr = u->addr;
1281
1282		*uaddr_len = addr->len;
1283		memcpy(sunaddr, addr->name, *uaddr_len);
1284	}
1285	unix_state_unlock(sk);
1286	sock_put(sk);
1287out:
1288	return err;
1289}
1290
1291static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1292{
1293	int i;
1294
1295	scm->fp = UNIXCB(skb).fp;
1296	skb->destructor = sock_wfree;
1297	UNIXCB(skb).fp = NULL;
1298
1299	for (i = scm->fp->count-1; i >= 0; i--)
1300		unix_notinflight(scm->fp->fp[i]);
1301}
1302
1303static void unix_destruct_fds(struct sk_buff *skb)
1304{
1305	struct scm_cookie scm;
1306	memset(&scm, 0, sizeof(scm));
1307	unix_detach_fds(&scm, skb);
1308
1309	/* Alas, it calls VFS */
1310	/* So fscking what? fput() had been SMP-safe since the last Summer */
1311	scm_destroy(&scm);
1312	sock_wfree(skb);
1313}
1314
1315static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1316{
1317	int i;
1318
1319	/*
1320	 * Need to duplicate file references for the sake of garbage
1321	 * collection.  Otherwise a socket in the fps might become a
1322	 * candidate for GC while the skb is not yet queued.
1323	 */
1324	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1325	if (!UNIXCB(skb).fp)
1326		return -ENOMEM;
1327
1328	for (i = scm->fp->count-1; i >= 0; i--)
1329		unix_inflight(scm->fp->fp[i]);
1330	skb->destructor = unix_destruct_fds;
1331	return 0;
1332}
1333
1334/*
1335 *	Send AF_UNIX data.
1336 */
1337
1338static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1339			      struct msghdr *msg, size_t len)
1340{
1341	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1342	struct sock *sk = sock->sk;
1343	struct net *net = sock_net(sk);
1344	struct unix_sock *u = unix_sk(sk);
1345	struct sockaddr_un *sunaddr = msg->msg_name;
1346	struct sock *other = NULL;
1347	int namelen = 0; /* fake GCC */
1348	int err;
1349	unsigned hash;
1350	struct sk_buff *skb;
1351	long timeo;
1352	struct scm_cookie tmp_scm;
1353
1354	if (NULL == siocb->scm)
1355		siocb->scm = &tmp_scm;
1356	wait_for_unix_gc();
1357	err = scm_send(sock, msg, siocb->scm);
1358	if (err < 0)
1359		return err;
1360
1361	err = -EOPNOTSUPP;
1362	if (msg->msg_flags&MSG_OOB)
1363		goto out;
1364
1365	if (msg->msg_namelen) {
1366		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1367		if (err < 0)
1368			goto out;
1369		namelen = err;
1370	} else {
1371		sunaddr = NULL;
1372		err = -ENOTCONN;
1373		other = unix_peer_get(sk);
1374		if (!other)
1375			goto out;
1376	}
1377
1378	if (test_bit(SOCK_PASSCRED, &sock->flags)
1379		&& !u->addr && (err = unix_autobind(sock)) != 0)
1380		goto out;
1381
1382	err = -EMSGSIZE;
1383	if (len > sk->sk_sndbuf - 32)
1384		goto out;
1385
1386	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1387	if (skb == NULL)
1388		goto out;
1389
1390	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1391	if (siocb->scm->fp) {
1392		err = unix_attach_fds(siocb->scm, skb);
1393		if (err)
1394			goto out_free;
1395	}
1396	unix_get_secdata(siocb->scm, skb);
1397
1398	skb_reset_transport_header(skb);
1399	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1400	if (err)
1401		goto out_free;
1402
1403	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1404
1405restart:
1406	if (!other) {
1407		err = -ECONNRESET;
1408		if (sunaddr == NULL)
1409			goto out_free;
1410
1411		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1412					hash, &err);
1413		if (other == NULL)
1414			goto out_free;
1415	}
1416
1417	unix_state_lock(other);
1418	err = -EPERM;
1419	if (!unix_may_send(sk, other))
1420		goto out_unlock;
1421
1422	if (sock_flag(other, SOCK_DEAD)) {
1423		/*
1424		 *	Check with 1003.1g - what should
1425		 *	datagram error
1426		 */
1427		unix_state_unlock(other);
1428		sock_put(other);
1429
1430		err = 0;
1431		unix_state_lock(sk);
1432		if (unix_peer(sk) == other) {
1433			unix_peer(sk) = NULL;
1434			unix_state_unlock(sk);
1435
1436			unix_dgram_disconnected(sk, other);
1437			sock_put(other);
1438			err = -ECONNREFUSED;
1439		} else {
1440			unix_state_unlock(sk);
1441		}
1442
1443		other = NULL;
1444		if (err)
1445			goto out_free;
1446		goto restart;
1447	}
1448
1449	err = -EPIPE;
1450	if (other->sk_shutdown & RCV_SHUTDOWN)
1451		goto out_unlock;
1452
1453	if (sk->sk_type != SOCK_SEQPACKET) {
1454		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1455		if (err)
1456			goto out_unlock;
1457	}
1458
1459	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1460		if (!timeo) {
1461			err = -EAGAIN;
1462			goto out_unlock;
1463		}
1464
1465		timeo = unix_wait_for_peer(other, timeo);
1466
1467		err = sock_intr_errno(timeo);
1468		if (signal_pending(current))
1469			goto out_free;
1470
1471		goto restart;
1472	}
1473
1474	skb_queue_tail(&other->sk_receive_queue, skb);
1475	unix_state_unlock(other);
1476	other->sk_data_ready(other, len);
1477	sock_put(other);
1478	scm_destroy(siocb->scm);
1479	return len;
1480
1481out_unlock:
1482	unix_state_unlock(other);
1483out_free:
1484	kfree_skb(skb);
1485out:
1486	if (other)
1487		sock_put(other);
1488	scm_destroy(siocb->scm);
1489	return err;
1490}
1491
1492
1493static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1494			       struct msghdr *msg, size_t len)
1495{
1496	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1497	struct sock *sk = sock->sk;
1498	struct sock *other = NULL;
1499	struct sockaddr_un *sunaddr = msg->msg_name;
1500	int err, size;
1501	struct sk_buff *skb;
1502	int sent = 0;
1503	struct scm_cookie tmp_scm;
1504	bool fds_sent = false;
1505
1506	if (NULL == siocb->scm)
1507		siocb->scm = &tmp_scm;
1508	wait_for_unix_gc();
1509	err = scm_send(sock, msg, siocb->scm);
1510	if (err < 0)
1511		return err;
1512
1513	err = -EOPNOTSUPP;
1514	if (msg->msg_flags&MSG_OOB)
1515		goto out_err;
1516
1517	if (msg->msg_namelen) {
1518		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1519		goto out_err;
1520	} else {
1521		sunaddr = NULL;
1522		err = -ENOTCONN;
1523		other = unix_peer(sk);
1524		if (!other)
1525			goto out_err;
1526	}
1527
1528	if (sk->sk_shutdown & SEND_SHUTDOWN)
1529		goto pipe_err;
1530
1531	while (sent < len) {
1532		/*
1533		 *	Optimisation for the fact that under 0.01% of X
1534		 *	messages typically need breaking up.
1535		 */
1536
1537		size = len-sent;
1538
1539		/* Keep two messages in the pipe so it schedules better */
1540		if (size > ((sk->sk_sndbuf >> 1) - 64))
1541			size = (sk->sk_sndbuf >> 1) - 64;
1542
1543		if (size > SKB_MAX_ALLOC)
1544			size = SKB_MAX_ALLOC;
1545
1546		/*
1547		 *	Grab a buffer
1548		 */
1549
1550		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1551					  &err);
1552
1553		if (skb == NULL)
1554			goto out_err;
1555
1556		/*
1557		 *	If you pass two values to the sock_alloc_send_skb
1558		 *	it tries to grab the large buffer with GFP_NOFS
1559		 *	(which can fail easily), and if it fails grab the
1560		 *	fallback size buffer which is under a page and will
1561		 *	succeed. [Alan]
1562		 */
1563		size = min_t(int, size, skb_tailroom(skb));
1564
1565		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1566		/* Only send the fds in the first buffer */
1567		if (siocb->scm->fp && !fds_sent) {
1568			err = unix_attach_fds(siocb->scm, skb);
1569			if (err) {
1570				kfree_skb(skb);
1571				goto out_err;
1572			}
1573			fds_sent = true;
1574		}
1575
1576		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1577		if (err) {
1578			kfree_skb(skb);
1579			goto out_err;
1580		}
1581
1582		unix_state_lock(other);
1583
1584		if (sock_flag(other, SOCK_DEAD) ||
1585		    (other->sk_shutdown & RCV_SHUTDOWN))
1586			goto pipe_err_free;
1587
1588		skb_queue_tail(&other->sk_receive_queue, skb);
1589		unix_state_unlock(other);
1590		other->sk_data_ready(other, size);
1591		sent += size;
1592	}
1593
1594	scm_destroy(siocb->scm);
1595	siocb->scm = NULL;
1596
1597	return sent;
1598
1599pipe_err_free:
1600	unix_state_unlock(other);
1601	kfree_skb(skb);
1602pipe_err:
1603	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1604		send_sig(SIGPIPE, current, 0);
1605	err = -EPIPE;
1606out_err:
1607	scm_destroy(siocb->scm);
1608	siocb->scm = NULL;
1609	return sent ? : err;
1610}
1611
1612static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1613				  struct msghdr *msg, size_t len)
1614{
1615	int err;
1616	struct sock *sk = sock->sk;
1617
1618	err = sock_error(sk);
1619	if (err)
1620		return err;
1621
1622	if (sk->sk_state != TCP_ESTABLISHED)
1623		return -ENOTCONN;
1624
1625	if (msg->msg_namelen)
1626		msg->msg_namelen = 0;
1627
1628	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1629}
1630
1631static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1632{
1633	struct unix_sock *u = unix_sk(sk);
1634
1635	msg->msg_namelen = 0;
1636	if (u->addr) {
1637		msg->msg_namelen = u->addr->len;
1638		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1639	}
1640}
1641
1642static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1643			      struct msghdr *msg, size_t size,
1644			      int flags)
1645{
1646	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1647	struct scm_cookie tmp_scm;
1648	struct sock *sk = sock->sk;
1649	struct unix_sock *u = unix_sk(sk);
1650	int noblock = flags & MSG_DONTWAIT;
1651	struct sk_buff *skb;
1652	int err;
1653
1654	err = -EOPNOTSUPP;
1655	if (flags&MSG_OOB)
1656		goto out;
1657
1658	msg->msg_namelen = 0;
1659
1660	mutex_lock(&u->readlock);
1661
1662	skb = skb_recv_datagram(sk, flags, noblock, &err);
1663	if (!skb) {
1664		unix_state_lock(sk);
1665		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1666		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1667		    (sk->sk_shutdown & RCV_SHUTDOWN))
1668			err = 0;
1669		unix_state_unlock(sk);
1670		goto out_unlock;
1671	}
1672
1673	wake_up_interruptible_sync(&u->peer_wait);
1674
1675	if (msg->msg_name)
1676		unix_copy_addr(msg, skb->sk);
1677
1678	if (size > skb->len)
1679		size = skb->len;
1680	else if (size < skb->len)
1681		msg->msg_flags |= MSG_TRUNC;
1682
1683	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1684	if (err)
1685		goto out_free;
1686
1687	if (!siocb->scm) {
1688		siocb->scm = &tmp_scm;
1689		memset(&tmp_scm, 0, sizeof(tmp_scm));
1690	}
1691	siocb->scm->creds = *UNIXCREDS(skb);
1692	unix_set_secdata(siocb->scm, skb);
1693
1694	if (!(flags & MSG_PEEK)) {
1695		if (UNIXCB(skb).fp)
1696			unix_detach_fds(siocb->scm, skb);
1697	} else {
1698		/* It is questionable: on PEEK we could:
1699		   - do not return fds - good, but too simple 8)
1700		   - return fds, and do not return them on read (old strategy,
1701		     apparently wrong)
1702		   - clone fds (I chose it for now, it is the most universal
1703		     solution)
1704
1705		   POSIX 1003.1g does not actually define this clearly
1706		   at all. POSIX 1003.1g doesn't define a lot of things
1707		   clearly however!
1708
1709		*/
1710		if (UNIXCB(skb).fp)
1711			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1712	}
1713	err = size;
1714
1715	scm_recv(sock, msg, siocb->scm, flags);
1716
1717out_free:
1718	skb_free_datagram(sk, skb);
1719out_unlock:
1720	mutex_unlock(&u->readlock);
1721out:
1722	return err;
1723}
1724
1725/*
1726 *	Sleep until data has arrive. But check for races..
1727 */
1728
1729static long unix_stream_data_wait(struct sock *sk, long timeo)
1730{
1731	DEFINE_WAIT(wait);
1732
1733	unix_state_lock(sk);
1734
1735	for (;;) {
1736		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1737
1738		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1739		    sk->sk_err ||
1740		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1741		    signal_pending(current) ||
1742		    !timeo)
1743			break;
1744
1745		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1746		unix_state_unlock(sk);
1747		timeo = schedule_timeout(timeo);
1748		unix_state_lock(sk);
1749		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1750	}
1751
1752	finish_wait(sk->sk_sleep, &wait);
1753	unix_state_unlock(sk);
1754	return timeo;
1755}
1756
1757
1758
1759static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1760			       struct msghdr *msg, size_t size,
1761			       int flags)
1762{
1763	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1764	struct scm_cookie tmp_scm;
1765	struct sock *sk = sock->sk;
1766	struct unix_sock *u = unix_sk(sk);
1767	struct sockaddr_un *sunaddr = msg->msg_name;
1768	int copied = 0;
1769	int check_creds = 0;
1770	int target;
1771	int err = 0;
1772	long timeo;
1773
1774	err = -EINVAL;
1775	if (sk->sk_state != TCP_ESTABLISHED)
1776		goto out;
1777
1778	err = -EOPNOTSUPP;
1779	if (flags&MSG_OOB)
1780		goto out;
1781
1782	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1783	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1784
1785	msg->msg_namelen = 0;
1786
1787	/* Lock the socket to prevent queue disordering
1788	 * while sleeps in memcpy_tomsg
1789	 */
1790
1791	if (!siocb->scm) {
1792		siocb->scm = &tmp_scm;
1793		memset(&tmp_scm, 0, sizeof(tmp_scm));
1794	}
1795
1796	mutex_lock(&u->readlock);
1797
1798	do {
1799		int chunk;
1800		struct sk_buff *skb;
1801
1802		unix_state_lock(sk);
1803		skb = skb_dequeue(&sk->sk_receive_queue);
1804		if (skb == NULL) {
1805			if (copied >= target)
1806				goto unlock;
1807
1808			/*
1809			 *	POSIX 1003.1g mandates this order.
1810			 */
1811
1812			err = sock_error(sk);
1813			if (err)
1814				goto unlock;
1815			if (sk->sk_shutdown & RCV_SHUTDOWN)
1816				goto unlock;
1817
1818			unix_state_unlock(sk);
1819			err = -EAGAIN;
1820			if (!timeo)
1821				break;
1822			mutex_unlock(&u->readlock);
1823
1824			timeo = unix_stream_data_wait(sk, timeo);
1825
1826			if (signal_pending(current)) {
1827				err = sock_intr_errno(timeo);
1828				goto out;
1829			}
1830			mutex_lock(&u->readlock);
1831			continue;
1832 unlock:
1833			unix_state_unlock(sk);
1834			break;
1835		}
1836		unix_state_unlock(sk);
1837
1838		if (check_creds) {
1839			/* Never glue messages from different writers */
1840			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1841				   sizeof(siocb->scm->creds)) != 0) {
1842				skb_queue_head(&sk->sk_receive_queue, skb);
1843				break;
1844			}
1845		} else {
1846			/* Copy credentials */
1847			siocb->scm->creds = *UNIXCREDS(skb);
1848			check_creds = 1;
1849		}
1850
1851		/* Copy address just once */
1852		if (sunaddr) {
1853			unix_copy_addr(msg, skb->sk);
1854			sunaddr = NULL;
1855		}
1856
1857		chunk = min_t(unsigned int, skb->len, size);
1858		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1859			skb_queue_head(&sk->sk_receive_queue, skb);
1860			if (copied == 0)
1861				copied = -EFAULT;
1862			break;
1863		}
1864		copied += chunk;
1865		size -= chunk;
1866
1867		/* Mark read part of skb as used */
1868		if (!(flags & MSG_PEEK)) {
1869			skb_pull(skb, chunk);
1870
1871			if (UNIXCB(skb).fp)
1872				unix_detach_fds(siocb->scm, skb);
1873
1874			/* put the skb back if we didn't use it up.. */
1875			if (skb->len) {
1876				skb_queue_head(&sk->sk_receive_queue, skb);
1877				break;
1878			}
1879
1880			kfree_skb(skb);
1881
1882			if (siocb->scm->fp)
1883				break;
1884		} else {
1885			/* It is questionable, see note in unix_dgram_recvmsg.
1886			 */
1887			if (UNIXCB(skb).fp)
1888				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1889
1890			/* put message back and return */
1891			skb_queue_head(&sk->sk_receive_queue, skb);
1892			break;
1893		}
1894	} while (size);
1895
1896	mutex_unlock(&u->readlock);
1897	scm_recv(sock, msg, siocb->scm, flags);
1898out:
1899	return copied ? : err;
1900}
1901
1902static int unix_shutdown(struct socket *sock, int mode)
1903{
1904	struct sock *sk = sock->sk;
1905	struct sock *other;
1906
1907	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1908
1909	if (mode) {
1910		unix_state_lock(sk);
1911		sk->sk_shutdown |= mode;
1912		other = unix_peer(sk);
1913		if (other)
1914			sock_hold(other);
1915		unix_state_unlock(sk);
1916		sk->sk_state_change(sk);
1917
1918		if (other &&
1919			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1920
1921			int peer_mode = 0;
1922
1923			if (mode&RCV_SHUTDOWN)
1924				peer_mode |= SEND_SHUTDOWN;
1925			if (mode&SEND_SHUTDOWN)
1926				peer_mode |= RCV_SHUTDOWN;
1927			unix_state_lock(other);
1928			other->sk_shutdown |= peer_mode;
1929			unix_state_unlock(other);
1930			other->sk_state_change(other);
1931			read_lock(&other->sk_callback_lock);
1932			if (peer_mode == SHUTDOWN_MASK)
1933				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1934			else if (peer_mode & RCV_SHUTDOWN)
1935				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1936			read_unlock(&other->sk_callback_lock);
1937		}
1938		if (other)
1939			sock_put(other);
1940	}
1941	return 0;
1942}
1943
1944static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1945{
1946	struct sock *sk = sock->sk;
1947	long amount = 0;
1948	int err;
1949
1950	switch (cmd) {
1951	case SIOCOUTQ:
1952		amount = sk_wmem_alloc_get(sk);
1953		err = put_user(amount, (int __user *)arg);
1954		break;
1955	case SIOCINQ:
1956		{
1957			struct sk_buff *skb;
1958
1959			if (sk->sk_state == TCP_LISTEN) {
1960				err = -EINVAL;
1961				break;
1962			}
1963
1964			spin_lock(&sk->sk_receive_queue.lock);
1965			if (sk->sk_type == SOCK_STREAM ||
1966			    sk->sk_type == SOCK_SEQPACKET) {
1967				skb_queue_walk(&sk->sk_receive_queue, skb)
1968					amount += skb->len;
1969			} else {
1970				skb = skb_peek(&sk->sk_receive_queue);
1971				if (skb)
1972					amount = skb->len;
1973			}
1974			spin_unlock(&sk->sk_receive_queue.lock);
1975			err = put_user(amount, (int __user *)arg);
1976			break;
1977		}
1978
1979	default:
1980		err = -ENOIOCTLCMD;
1981		break;
1982	}
1983	return err;
1984}
1985
1986static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1987{
1988	struct sock *sk = sock->sk;
1989	unsigned int mask;
1990
1991	sock_poll_wait(file, sk->sk_sleep, wait);
1992	mask = 0;
1993
1994	/* exceptional events? */
1995	if (sk->sk_err)
1996		mask |= POLLERR;
1997	if (sk->sk_shutdown == SHUTDOWN_MASK)
1998		mask |= POLLHUP;
1999	if (sk->sk_shutdown & RCV_SHUTDOWN)
2000		mask |= POLLRDHUP;
2001
2002	/* readable? */
2003	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2004	    (sk->sk_shutdown & RCV_SHUTDOWN))
2005		mask |= POLLIN | POLLRDNORM;
2006
2007	/* Connection-based need to check for termination and startup */
2008	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2009	    sk->sk_state == TCP_CLOSE)
2010		mask |= POLLHUP;
2011
2012	/*
2013	 * we set writable also when the other side has shut down the
2014	 * connection. This prevents stuck sockets.
2015	 */
2016	if (unix_writable(sk))
2017		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2018
2019	return mask;
2020}
2021
2022static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2023				    poll_table *wait)
2024{
2025	struct sock *sk = sock->sk, *other;
2026	unsigned int mask, writable;
2027
2028	sock_poll_wait(file, sk->sk_sleep, wait);
2029	mask = 0;
2030
2031	/* exceptional events? */
2032	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2033		mask |= POLLERR;
2034	if (sk->sk_shutdown & RCV_SHUTDOWN)
2035		mask |= POLLRDHUP;
2036	if (sk->sk_shutdown == SHUTDOWN_MASK)
2037		mask |= POLLHUP;
2038
2039	/* readable? */
2040	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2041	    (sk->sk_shutdown & RCV_SHUTDOWN))
2042		mask |= POLLIN | POLLRDNORM;
2043
2044	/* Connection-based need to check for termination and startup */
2045	if (sk->sk_type == SOCK_SEQPACKET) {
2046		if (sk->sk_state == TCP_CLOSE)
2047			mask |= POLLHUP;
2048		/* connection hasn't started yet? */
2049		if (sk->sk_state == TCP_SYN_SENT)
2050			return mask;
2051	}
2052
2053	/* writable? */
2054	writable = unix_writable(sk);
2055	if (writable) {
2056		other = unix_peer_get(sk);
2057		if (other) {
2058			if (unix_peer(other) != sk) {
2059				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2060					  wait);
2061				if (unix_recvq_full(other))
2062					writable = 0;
2063			}
2064
2065			sock_put(other);
2066		}
2067	}
2068
2069	if (writable)
2070		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2071	else
2072		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2073
2074	return mask;
2075}
2076
2077#ifdef CONFIG_PROC_FS
2078static struct sock *first_unix_socket(int *i)
2079{
2080	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2081		if (!hlist_empty(&unix_socket_table[*i]))
2082			return __sk_head(&unix_socket_table[*i]);
2083	}
2084	return NULL;
2085}
2086
2087static struct sock *next_unix_socket(int *i, struct sock *s)
2088{
2089	struct sock *next = sk_next(s);
2090	/* More in this chain? */
2091	if (next)
2092		return next;
2093	/* Look for next non-empty chain. */
2094	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2095		if (!hlist_empty(&unix_socket_table[*i]))
2096			return __sk_head(&unix_socket_table[*i]);
2097	}
2098	return NULL;
2099}
2100
2101struct unix_iter_state {
2102	struct seq_net_private p;
2103	int i;
2104};
2105
2106static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2107{
2108	struct unix_iter_state *iter = seq->private;
2109	loff_t off = 0;
2110	struct sock *s;
2111
2112	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2113		if (sock_net(s) != seq_file_net(seq))
2114			continue;
2115		if (off == pos)
2116			return s;
2117		++off;
2118	}
2119	return NULL;
2120}
2121
2122static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2123	__acquires(unix_table_lock)
2124{
2125	spin_lock(&unix_table_lock);
2126	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2127}
2128
2129static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2130{
2131	struct unix_iter_state *iter = seq->private;
2132	struct sock *sk = v;
2133	++*pos;
2134
2135	if (v == SEQ_START_TOKEN)
2136		sk = first_unix_socket(&iter->i);
2137	else
2138		sk = next_unix_socket(&iter->i, sk);
2139	while (sk && (sock_net(sk) != seq_file_net(seq)))
2140		sk = next_unix_socket(&iter->i, sk);
2141	return sk;
2142}
2143
2144static void unix_seq_stop(struct seq_file *seq, void *v)
2145	__releases(unix_table_lock)
2146{
2147	spin_unlock(&unix_table_lock);
2148}
2149
2150static int unix_seq_show(struct seq_file *seq, void *v)
2151{
2152
2153	if (v == SEQ_START_TOKEN)
2154		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2155			 "Inode Path\n");
2156	else {
2157		struct sock *s = v;
2158		struct unix_sock *u = unix_sk(s);
2159		unix_state_lock(s);
2160
2161		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2162			s,
2163			atomic_read(&s->sk_refcnt),
2164			0,
2165			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2166			s->sk_type,
2167			s->sk_socket ?
2168			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2169			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2170			sock_i_ino(s));
2171
2172		if (u->addr) {
2173			int i, len;
2174			seq_putc(seq, ' ');
2175
2176			i = 0;
2177			len = u->addr->len - sizeof(short);
2178			if (!UNIX_ABSTRACT(s))
2179				len--;
2180			else {
2181				seq_putc(seq, '@');
2182				i++;
2183			}
2184			for ( ; i < len; i++)
2185				seq_putc(seq, u->addr->name->sun_path[i]);
2186		}
2187		unix_state_unlock(s);
2188		seq_putc(seq, '\n');
2189	}
2190
2191	return 0;
2192}
2193
2194static const struct seq_operations unix_seq_ops = {
2195	.start  = unix_seq_start,
2196	.next   = unix_seq_next,
2197	.stop   = unix_seq_stop,
2198	.show   = unix_seq_show,
2199};
2200
2201static int unix_seq_open(struct inode *inode, struct file *file)
2202{
2203	return seq_open_net(inode, file, &unix_seq_ops,
2204			    sizeof(struct unix_iter_state));
2205}
2206
2207static const struct file_operations unix_seq_fops = {
2208	.owner		= THIS_MODULE,
2209	.open		= unix_seq_open,
2210	.read		= seq_read,
2211	.llseek		= seq_lseek,
2212	.release	= seq_release_net,
2213};
2214
2215#endif
2216
2217static struct net_proto_family unix_family_ops = {
2218	.family = PF_UNIX,
2219	.create = unix_create,
2220	.owner	= THIS_MODULE,
2221};
2222
2223
2224static int unix_net_init(struct net *net)
2225{
2226	int error = -ENOMEM;
2227
2228	net->unx.sysctl_max_dgram_qlen = 10;
2229	if (unix_sysctl_register(net))
2230		goto out;
2231
2232#ifdef CONFIG_PROC_FS
2233	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2234		unix_sysctl_unregister(net);
2235		goto out;
2236	}
2237#endif
2238	error = 0;
2239out:
2240	return error;
2241}
2242
2243static void unix_net_exit(struct net *net)
2244{
2245	unix_sysctl_unregister(net);
2246	proc_net_remove(net, "unix");
2247}
2248
2249static struct pernet_operations unix_net_ops = {
2250	.init = unix_net_init,
2251	.exit = unix_net_exit,
2252};
2253
2254static int __init af_unix_init(void)
2255{
2256	int rc = -1;
2257	struct sk_buff *dummy_skb;
2258
2259	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2260
2261	rc = proto_register(&unix_proto, 1);
2262	if (rc != 0) {
2263		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2264		       __func__);
2265		goto out;
2266	}
2267
2268	sock_register(&unix_family_ops);
2269	register_pernet_subsys(&unix_net_ops);
2270out:
2271	return rc;
2272}
2273
2274static void __exit af_unix_exit(void)
2275{
2276	sock_unregister(PF_UNIX);
2277	proto_unregister(&unix_proto);
2278	unregister_pernet_subsys(&unix_net_ops);
2279}
2280
2281/* Earlier than device_initcall() so that other drivers invoking
2282   request_module() don't end up in a loop when modprobe tries
2283   to use a UNIX socket. But later than subsys_initcall() because
2284   we depend on stuff initialised there */
2285fs_initcall(af_unix_init);
2286module_exit(af_unix_exit);
2287
2288MODULE_LICENSE("GPL");
2289MODULE_ALIAS_NETPROTO(PF_UNIX);
2290