af_unix.c revision 6eba6a372b501aa3cdfb7df21a8364099125b9c4
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate rwlock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (!net_eq(sock_net(s), net))
296			continue;
297
298		if (dentry && dentry->d_inode == i) {
299			sock_hold(s);
300			goto found;
301		}
302	}
303	s = NULL;
304found:
305	spin_unlock(&unix_table_lock);
306	return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
316	read_lock(&sk->sk_callback_lock);
317	if (unix_writable(sk)) {
318		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
319			wake_up_interruptible_sync(sk->sk_sleep);
320		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321	}
322	read_unlock(&sk->sk_callback_lock);
323}
324
325/* When dgram socket disconnects (or changes its peer), we clear its receive
326 * queue of packets arrived from previous peer. First, it allows to do
327 * flow control based only on wmem_alloc; second, sk connected to peer
328 * may receive messages only from that peer. */
329static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330{
331	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332		skb_queue_purge(&sk->sk_receive_queue);
333		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335		/* If one link of bidirectional dgram pipe is disconnected,
336		 * we signal error. Messages are lost. Do not make this,
337		 * when peer was not connected to us.
338		 */
339		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340			other->sk_err = ECONNRESET;
341			other->sk_error_report(other);
342		}
343	}
344}
345
346static void unix_sock_destructor(struct sock *sk)
347{
348	struct unix_sock *u = unix_sk(sk);
349
350	skb_queue_purge(&sk->sk_receive_queue);
351
352	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353	WARN_ON(!sk_unhashed(sk));
354	WARN_ON(sk->sk_socket);
355	if (!sock_flag(sk, SOCK_DEAD)) {
356		printk(KERN_DEBUG "Attempt to release alive unix socket: %p\n", sk);
357		return;
358	}
359
360	if (u->addr)
361		unix_release_addr(u->addr);
362
363	atomic_dec(&unix_nr_socks);
364#ifdef UNIX_REFCNT_DEBUG
365	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
366		atomic_read(&unix_nr_socks));
367#endif
368}
369
370static int unix_release_sock(struct sock *sk, int embrion)
371{
372	struct unix_sock *u = unix_sk(sk);
373	struct dentry *dentry;
374	struct vfsmount *mnt;
375	struct sock *skpair;
376	struct sk_buff *skb;
377	int state;
378
379	unix_remove_socket(sk);
380
381	/* Clear state */
382	unix_state_lock(sk);
383	sock_orphan(sk);
384	sk->sk_shutdown = SHUTDOWN_MASK;
385	dentry	     = u->dentry;
386	u->dentry    = NULL;
387	mnt	     = u->mnt;
388	u->mnt	     = NULL;
389	state = sk->sk_state;
390	sk->sk_state = TCP_CLOSE;
391	unix_state_unlock(sk);
392
393	wake_up_interruptible_all(&u->peer_wait);
394
395	skpair = unix_peer(sk);
396
397	if (skpair != NULL) {
398		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
399			unix_state_lock(skpair);
400			/* No more writes */
401			skpair->sk_shutdown = SHUTDOWN_MASK;
402			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
403				skpair->sk_err = ECONNRESET;
404			unix_state_unlock(skpair);
405			skpair->sk_state_change(skpair);
406			read_lock(&skpair->sk_callback_lock);
407			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
408			read_unlock(&skpair->sk_callback_lock);
409		}
410		sock_put(skpair); /* It may now die */
411		unix_peer(sk) = NULL;
412	}
413
414	/* Try to flush out this socket. Throw out buffers at least */
415
416	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
417		if (state == TCP_LISTEN)
418			unix_release_sock(skb->sk, 1);
419		/* passed fds are erased in the kfree_skb hook	      */
420		kfree_skb(skb);
421	}
422
423	if (dentry) {
424		dput(dentry);
425		mntput(mnt);
426	}
427
428	sock_put(sk);
429
430	/* ---- Socket is dead now and most probably destroyed ---- */
431
432	/*
433	 * Fixme: BSD difference: In BSD all sockets connected to use get
434	 *	  ECONNRESET and we die on the spot. In Linux we behave
435	 *	  like files and pipes do and wait for the last
436	 *	  dereference.
437	 *
438	 * Can't we simply set sock->err?
439	 *
440	 *	  What the above comment does talk about? --ANK(980817)
441	 */
442
443	if (unix_tot_inflight)
444		unix_gc();		/* Garbage collect fds */
445
446	return 0;
447}
448
449static int unix_listen(struct socket *sock, int backlog)
450{
451	int err;
452	struct sock *sk = sock->sk;
453	struct unix_sock *u = unix_sk(sk);
454
455	err = -EOPNOTSUPP;
456	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
457		goto out;	/* Only stream/seqpacket sockets accept */
458	err = -EINVAL;
459	if (!u->addr)
460		goto out;	/* No listens on an unbound socket */
461	unix_state_lock(sk);
462	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
463		goto out_unlock;
464	if (backlog > sk->sk_max_ack_backlog)
465		wake_up_interruptible_all(&u->peer_wait);
466	sk->sk_max_ack_backlog	= backlog;
467	sk->sk_state		= TCP_LISTEN;
468	/* set credentials so connect can copy them */
469	sk->sk_peercred.pid	= task_tgid_vnr(current);
470	sk->sk_peercred.uid	= current->euid;
471	sk->sk_peercred.gid	= current->egid;
472	err = 0;
473
474out_unlock:
475	unix_state_unlock(sk);
476out:
477	return err;
478}
479
480static int unix_release(struct socket *);
481static int unix_bind(struct socket *, struct sockaddr *, int);
482static int unix_stream_connect(struct socket *, struct sockaddr *,
483			       int addr_len, int flags);
484static int unix_socketpair(struct socket *, struct socket *);
485static int unix_accept(struct socket *, struct socket *, int);
486static int unix_getname(struct socket *, struct sockaddr *, int *, int);
487static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
488static unsigned int unix_dgram_poll(struct file *, struct socket *,
489				    poll_table *);
490static int unix_ioctl(struct socket *, unsigned int, unsigned long);
491static int unix_shutdown(struct socket *, int);
492static int unix_stream_sendmsg(struct kiocb *, struct socket *,
493			       struct msghdr *, size_t);
494static int unix_stream_recvmsg(struct kiocb *, struct socket *,
495			       struct msghdr *, size_t, int);
496static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
497			      struct msghdr *, size_t);
498static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
499			      struct msghdr *, size_t, int);
500static int unix_dgram_connect(struct socket *, struct sockaddr *,
501			      int, int);
502static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
503				  struct msghdr *, size_t);
504
505static const struct proto_ops unix_stream_ops = {
506	.family =	PF_UNIX,
507	.owner =	THIS_MODULE,
508	.release =	unix_release,
509	.bind =		unix_bind,
510	.connect =	unix_stream_connect,
511	.socketpair =	unix_socketpair,
512	.accept =	unix_accept,
513	.getname =	unix_getname,
514	.poll =		unix_poll,
515	.ioctl =	unix_ioctl,
516	.listen =	unix_listen,
517	.shutdown =	unix_shutdown,
518	.setsockopt =	sock_no_setsockopt,
519	.getsockopt =	sock_no_getsockopt,
520	.sendmsg =	unix_stream_sendmsg,
521	.recvmsg =	unix_stream_recvmsg,
522	.mmap =		sock_no_mmap,
523	.sendpage =	sock_no_sendpage,
524};
525
526static const struct proto_ops unix_dgram_ops = {
527	.family =	PF_UNIX,
528	.owner =	THIS_MODULE,
529	.release =	unix_release,
530	.bind =		unix_bind,
531	.connect =	unix_dgram_connect,
532	.socketpair =	unix_socketpair,
533	.accept =	sock_no_accept,
534	.getname =	unix_getname,
535	.poll =		unix_dgram_poll,
536	.ioctl =	unix_ioctl,
537	.listen =	sock_no_listen,
538	.shutdown =	unix_shutdown,
539	.setsockopt =	sock_no_setsockopt,
540	.getsockopt =	sock_no_getsockopt,
541	.sendmsg =	unix_dgram_sendmsg,
542	.recvmsg =	unix_dgram_recvmsg,
543	.mmap =		sock_no_mmap,
544	.sendpage =	sock_no_sendpage,
545};
546
547static const struct proto_ops unix_seqpacket_ops = {
548	.family =	PF_UNIX,
549	.owner =	THIS_MODULE,
550	.release =	unix_release,
551	.bind =		unix_bind,
552	.connect =	unix_stream_connect,
553	.socketpair =	unix_socketpair,
554	.accept =	unix_accept,
555	.getname =	unix_getname,
556	.poll =		unix_dgram_poll,
557	.ioctl =	unix_ioctl,
558	.listen =	unix_listen,
559	.shutdown =	unix_shutdown,
560	.setsockopt =	sock_no_setsockopt,
561	.getsockopt =	sock_no_getsockopt,
562	.sendmsg =	unix_seqpacket_sendmsg,
563	.recvmsg =	unix_dgram_recvmsg,
564	.mmap =		sock_no_mmap,
565	.sendpage =	sock_no_sendpage,
566};
567
568static struct proto unix_proto = {
569	.name	  = "UNIX",
570	.owner	  = THIS_MODULE,
571	.obj_size = sizeof(struct unix_sock),
572};
573
574/*
575 * AF_UNIX sockets do not interact with hardware, hence they
576 * dont trigger interrupts - so it's safe for them to have
577 * bh-unsafe locking for their sk_receive_queue.lock. Split off
578 * this special lock-class by reinitializing the spinlock key:
579 */
580static struct lock_class_key af_unix_sk_receive_queue_lock_key;
581
582static struct sock *unix_create1(struct net *net, struct socket *sock)
583{
584	struct sock *sk = NULL;
585	struct unix_sock *u;
586
587	atomic_inc(&unix_nr_socks);
588	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
589		goto out;
590
591	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
592	if (!sk)
593		goto out;
594
595	sock_init_data(sock, sk);
596	lockdep_set_class(&sk->sk_receive_queue.lock,
597				&af_unix_sk_receive_queue_lock_key);
598
599	sk->sk_write_space	= unix_write_space;
600	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
601	sk->sk_destruct		= unix_sock_destructor;
602	u	  = unix_sk(sk);
603	u->dentry = NULL;
604	u->mnt	  = NULL;
605	spin_lock_init(&u->lock);
606	atomic_long_set(&u->inflight, 0);
607	INIT_LIST_HEAD(&u->link);
608	mutex_init(&u->readlock); /* single task reading lock */
609	init_waitqueue_head(&u->peer_wait);
610	unix_insert_socket(unix_sockets_unbound, sk);
611out:
612	if (sk == NULL)
613		atomic_dec(&unix_nr_socks);
614	return sk;
615}
616
617static int unix_create(struct net *net, struct socket *sock, int protocol)
618{
619	if (protocol && protocol != PF_UNIX)
620		return -EPROTONOSUPPORT;
621
622	sock->state = SS_UNCONNECTED;
623
624	switch (sock->type) {
625	case SOCK_STREAM:
626		sock->ops = &unix_stream_ops;
627		break;
628		/*
629		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
630		 *	nothing uses it.
631		 */
632	case SOCK_RAW:
633		sock->type = SOCK_DGRAM;
634	case SOCK_DGRAM:
635		sock->ops = &unix_dgram_ops;
636		break;
637	case SOCK_SEQPACKET:
638		sock->ops = &unix_seqpacket_ops;
639		break;
640	default:
641		return -ESOCKTNOSUPPORT;
642	}
643
644	return unix_create1(net, sock) ? 0 : -ENOMEM;
645}
646
647static int unix_release(struct socket *sock)
648{
649	struct sock *sk = sock->sk;
650
651	if (!sk)
652		return 0;
653
654	sock->sk = NULL;
655
656	return unix_release_sock(sk, 0);
657}
658
659static int unix_autobind(struct socket *sock)
660{
661	struct sock *sk = sock->sk;
662	struct net *net = sock_net(sk);
663	struct unix_sock *u = unix_sk(sk);
664	static u32 ordernum = 1;
665	struct unix_address *addr;
666	int err;
667
668	mutex_lock(&u->readlock);
669
670	err = 0;
671	if (u->addr)
672		goto out;
673
674	err = -ENOMEM;
675	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
676	if (!addr)
677		goto out;
678
679	addr->name->sun_family = AF_UNIX;
680	atomic_set(&addr->refcnt, 1);
681
682retry:
683	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
684	addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0));
685
686	spin_lock(&unix_table_lock);
687	ordernum = (ordernum+1)&0xFFFFF;
688
689	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
690				      addr->hash)) {
691		spin_unlock(&unix_table_lock);
692		/* Sanity yield. It is unusual case, but yet... */
693		if (!(ordernum&0xFF))
694			yield();
695		goto retry;
696	}
697	addr->hash ^= sk->sk_type;
698
699	__unix_remove_socket(sk);
700	u->addr = addr;
701	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
702	spin_unlock(&unix_table_lock);
703	err = 0;
704
705out:	mutex_unlock(&u->readlock);
706	return err;
707}
708
709static struct sock *unix_find_other(struct net *net,
710				    struct sockaddr_un *sunname, int len,
711				    int type, unsigned hash, int *error)
712{
713	struct sock *u;
714	struct path path;
715	int err = 0;
716
717	if (sunname->sun_path[0]) {
718		struct inode *inode;
719		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
720		if (err)
721			goto fail;
722		inode = path.dentry->d_inode;
723		err = inode_permission(inode, MAY_WRITE);
724		if (err)
725			goto put_fail;
726
727		err = -ECONNREFUSED;
728		if (!S_ISSOCK(inode->i_mode))
729			goto put_fail;
730		u = unix_find_socket_byinode(net, inode);
731		if (!u)
732			goto put_fail;
733
734		if (u->sk_type == type)
735			touch_atime(path.mnt, path.dentry);
736
737		path_put(&path);
738
739		err = -EPROTOTYPE;
740		if (u->sk_type != type) {
741			sock_put(u);
742			goto fail;
743		}
744	} else {
745		err = -ECONNREFUSED;
746		u = unix_find_socket_byname(net, sunname, len, type, hash);
747		if (u) {
748			struct dentry *dentry;
749			dentry = unix_sk(u)->dentry;
750			if (dentry)
751				touch_atime(unix_sk(u)->mnt, dentry);
752		} else
753			goto fail;
754	}
755	return u;
756
757put_fail:
758	path_put(&path);
759fail:
760	*error = err;
761	return NULL;
762}
763
764
765static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
766{
767	struct sock *sk = sock->sk;
768	struct net *net = sock_net(sk);
769	struct unix_sock *u = unix_sk(sk);
770	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
771	struct dentry *dentry = NULL;
772	struct nameidata nd;
773	int err;
774	unsigned hash;
775	struct unix_address *addr;
776	struct hlist_head *list;
777
778	err = -EINVAL;
779	if (sunaddr->sun_family != AF_UNIX)
780		goto out;
781
782	if (addr_len == sizeof(short)) {
783		err = unix_autobind(sock);
784		goto out;
785	}
786
787	err = unix_mkname(sunaddr, addr_len, &hash);
788	if (err < 0)
789		goto out;
790	addr_len = err;
791
792	mutex_lock(&u->readlock);
793
794	err = -EINVAL;
795	if (u->addr)
796		goto out_up;
797
798	err = -ENOMEM;
799	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
800	if (!addr)
801		goto out_up;
802
803	memcpy(addr->name, sunaddr, addr_len);
804	addr->len = addr_len;
805	addr->hash = hash ^ sk->sk_type;
806	atomic_set(&addr->refcnt, 1);
807
808	if (sunaddr->sun_path[0]) {
809		unsigned int mode;
810		err = 0;
811		/*
812		 * Get the parent directory, calculate the hash for last
813		 * component.
814		 */
815		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
816		if (err)
817			goto out_mknod_parent;
818
819		dentry = lookup_create(&nd, 0);
820		err = PTR_ERR(dentry);
821		if (IS_ERR(dentry))
822			goto out_mknod_unlock;
823
824		/*
825		 * All right, let's create it.
826		 */
827		mode = S_IFSOCK |
828		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
829		err = mnt_want_write(nd.path.mnt);
830		if (err)
831			goto out_mknod_dput;
832		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
833		mnt_drop_write(nd.path.mnt);
834		if (err)
835			goto out_mknod_dput;
836		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
837		dput(nd.path.dentry);
838		nd.path.dentry = dentry;
839
840		addr->hash = UNIX_HASH_SIZE;
841	}
842
843	spin_lock(&unix_table_lock);
844
845	if (!sunaddr->sun_path[0]) {
846		err = -EADDRINUSE;
847		if (__unix_find_socket_byname(net, sunaddr, addr_len,
848					      sk->sk_type, hash)) {
849			unix_release_addr(addr);
850			goto out_unlock;
851		}
852
853		list = &unix_socket_table[addr->hash];
854	} else {
855		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
856		u->dentry = nd.path.dentry;
857		u->mnt    = nd.path.mnt;
858	}
859
860	err = 0;
861	__unix_remove_socket(sk);
862	u->addr = addr;
863	__unix_insert_socket(list, sk);
864
865out_unlock:
866	spin_unlock(&unix_table_lock);
867out_up:
868	mutex_unlock(&u->readlock);
869out:
870	return err;
871
872out_mknod_dput:
873	dput(dentry);
874out_mknod_unlock:
875	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
876	path_put(&nd.path);
877out_mknod_parent:
878	if (err == -EEXIST)
879		err = -EADDRINUSE;
880	unix_release_addr(addr);
881	goto out_up;
882}
883
884static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
885{
886	if (unlikely(sk1 == sk2) || !sk2) {
887		unix_state_lock(sk1);
888		return;
889	}
890	if (sk1 < sk2) {
891		unix_state_lock(sk1);
892		unix_state_lock_nested(sk2);
893	} else {
894		unix_state_lock(sk2);
895		unix_state_lock_nested(sk1);
896	}
897}
898
899static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
900{
901	if (unlikely(sk1 == sk2) || !sk2) {
902		unix_state_unlock(sk1);
903		return;
904	}
905	unix_state_unlock(sk1);
906	unix_state_unlock(sk2);
907}
908
909static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
910			      int alen, int flags)
911{
912	struct sock *sk = sock->sk;
913	struct net *net = sock_net(sk);
914	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
915	struct sock *other;
916	unsigned hash;
917	int err;
918
919	if (addr->sa_family != AF_UNSPEC) {
920		err = unix_mkname(sunaddr, alen, &hash);
921		if (err < 0)
922			goto out;
923		alen = err;
924
925		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
926		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
927			goto out;
928
929restart:
930		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
931		if (!other)
932			goto out;
933
934		unix_state_double_lock(sk, other);
935
936		/* Apparently VFS overslept socket death. Retry. */
937		if (sock_flag(other, SOCK_DEAD)) {
938			unix_state_double_unlock(sk, other);
939			sock_put(other);
940			goto restart;
941		}
942
943		err = -EPERM;
944		if (!unix_may_send(sk, other))
945			goto out_unlock;
946
947		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
948		if (err)
949			goto out_unlock;
950
951	} else {
952		/*
953		 *	1003.1g breaking connected state with AF_UNSPEC
954		 */
955		other = NULL;
956		unix_state_double_lock(sk, other);
957	}
958
959	/*
960	 * If it was connected, reconnect.
961	 */
962	if (unix_peer(sk)) {
963		struct sock *old_peer = unix_peer(sk);
964		unix_peer(sk) = other;
965		unix_state_double_unlock(sk, other);
966
967		if (other != old_peer)
968			unix_dgram_disconnected(sk, old_peer);
969		sock_put(old_peer);
970	} else {
971		unix_peer(sk) = other;
972		unix_state_double_unlock(sk, other);
973	}
974	return 0;
975
976out_unlock:
977	unix_state_double_unlock(sk, other);
978	sock_put(other);
979out:
980	return err;
981}
982
983static long unix_wait_for_peer(struct sock *other, long timeo)
984{
985	struct unix_sock *u = unix_sk(other);
986	int sched;
987	DEFINE_WAIT(wait);
988
989	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
990
991	sched = !sock_flag(other, SOCK_DEAD) &&
992		!(other->sk_shutdown & RCV_SHUTDOWN) &&
993		unix_recvq_full(other);
994
995	unix_state_unlock(other);
996
997	if (sched)
998		timeo = schedule_timeout(timeo);
999
1000	finish_wait(&u->peer_wait, &wait);
1001	return timeo;
1002}
1003
1004static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1005			       int addr_len, int flags)
1006{
1007	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1008	struct sock *sk = sock->sk;
1009	struct net *net = sock_net(sk);
1010	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1011	struct sock *newsk = NULL;
1012	struct sock *other = NULL;
1013	struct sk_buff *skb = NULL;
1014	unsigned hash;
1015	int st;
1016	int err;
1017	long timeo;
1018
1019	err = unix_mkname(sunaddr, addr_len, &hash);
1020	if (err < 0)
1021		goto out;
1022	addr_len = err;
1023
1024	if (test_bit(SOCK_PASSCRED, &sock->flags)
1025		&& !u->addr && (err = unix_autobind(sock)) != 0)
1026		goto out;
1027
1028	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1029
1030	/* First of all allocate resources.
1031	   If we will make it after state is locked,
1032	   we will have to recheck all again in any case.
1033	 */
1034
1035	err = -ENOMEM;
1036
1037	/* create new sock for complete connection */
1038	newsk = unix_create1(sock_net(sk), NULL);
1039	if (newsk == NULL)
1040		goto out;
1041
1042	/* Allocate skb for sending to listening sock */
1043	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1044	if (skb == NULL)
1045		goto out;
1046
1047restart:
1048	/*  Find listening sock. */
1049	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1050	if (!other)
1051		goto out;
1052
1053	/* Latch state of peer */
1054	unix_state_lock(other);
1055
1056	/* Apparently VFS overslept socket death. Retry. */
1057	if (sock_flag(other, SOCK_DEAD)) {
1058		unix_state_unlock(other);
1059		sock_put(other);
1060		goto restart;
1061	}
1062
1063	err = -ECONNREFUSED;
1064	if (other->sk_state != TCP_LISTEN)
1065		goto out_unlock;
1066
1067	if (unix_recvq_full(other)) {
1068		err = -EAGAIN;
1069		if (!timeo)
1070			goto out_unlock;
1071
1072		timeo = unix_wait_for_peer(other, timeo);
1073
1074		err = sock_intr_errno(timeo);
1075		if (signal_pending(current))
1076			goto out;
1077		sock_put(other);
1078		goto restart;
1079	}
1080
1081	/* Latch our state.
1082
1083	   It is tricky place. We need to grab write lock and cannot
1084	   drop lock on peer. It is dangerous because deadlock is
1085	   possible. Connect to self case and simultaneous
1086	   attempt to connect are eliminated by checking socket
1087	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1088	   check this before attempt to grab lock.
1089
1090	   Well, and we have to recheck the state after socket locked.
1091	 */
1092	st = sk->sk_state;
1093
1094	switch (st) {
1095	case TCP_CLOSE:
1096		/* This is ok... continue with connect */
1097		break;
1098	case TCP_ESTABLISHED:
1099		/* Socket is already connected */
1100		err = -EISCONN;
1101		goto out_unlock;
1102	default:
1103		err = -EINVAL;
1104		goto out_unlock;
1105	}
1106
1107	unix_state_lock_nested(sk);
1108
1109	if (sk->sk_state != st) {
1110		unix_state_unlock(sk);
1111		unix_state_unlock(other);
1112		sock_put(other);
1113		goto restart;
1114	}
1115
1116	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1117	if (err) {
1118		unix_state_unlock(sk);
1119		goto out_unlock;
1120	}
1121
1122	/* The way is open! Fastly set all the necessary fields... */
1123
1124	sock_hold(sk);
1125	unix_peer(newsk)	= sk;
1126	newsk->sk_state		= TCP_ESTABLISHED;
1127	newsk->sk_type		= sk->sk_type;
1128	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1129	newsk->sk_peercred.uid	= current->euid;
1130	newsk->sk_peercred.gid	= current->egid;
1131	newu = unix_sk(newsk);
1132	newsk->sk_sleep		= &newu->peer_wait;
1133	otheru = unix_sk(other);
1134
1135	/* copy address information from listening to new sock*/
1136	if (otheru->addr) {
1137		atomic_inc(&otheru->addr->refcnt);
1138		newu->addr = otheru->addr;
1139	}
1140	if (otheru->dentry) {
1141		newu->dentry	= dget(otheru->dentry);
1142		newu->mnt	= mntget(otheru->mnt);
1143	}
1144
1145	/* Set credentials */
1146	sk->sk_peercred = other->sk_peercred;
1147
1148	sock->state	= SS_CONNECTED;
1149	sk->sk_state	= TCP_ESTABLISHED;
1150	sock_hold(newsk);
1151
1152	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1153	unix_peer(sk)	= newsk;
1154
1155	unix_state_unlock(sk);
1156
1157	/* take ten and and send info to listening sock */
1158	spin_lock(&other->sk_receive_queue.lock);
1159	__skb_queue_tail(&other->sk_receive_queue, skb);
1160	spin_unlock(&other->sk_receive_queue.lock);
1161	unix_state_unlock(other);
1162	other->sk_data_ready(other, 0);
1163	sock_put(other);
1164	return 0;
1165
1166out_unlock:
1167	if (other)
1168		unix_state_unlock(other);
1169
1170out:
1171	if (skb)
1172		kfree_skb(skb);
1173	if (newsk)
1174		unix_release_sock(newsk, 0);
1175	if (other)
1176		sock_put(other);
1177	return err;
1178}
1179
1180static int unix_socketpair(struct socket *socka, struct socket *sockb)
1181{
1182	struct sock *ska = socka->sk, *skb = sockb->sk;
1183
1184	/* Join our sockets back to back */
1185	sock_hold(ska);
1186	sock_hold(skb);
1187	unix_peer(ska) = skb;
1188	unix_peer(skb) = ska;
1189	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1190	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1191	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1192
1193	if (ska->sk_type != SOCK_DGRAM) {
1194		ska->sk_state = TCP_ESTABLISHED;
1195		skb->sk_state = TCP_ESTABLISHED;
1196		socka->state  = SS_CONNECTED;
1197		sockb->state  = SS_CONNECTED;
1198	}
1199	return 0;
1200}
1201
1202static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1203{
1204	struct sock *sk = sock->sk;
1205	struct sock *tsk;
1206	struct sk_buff *skb;
1207	int err;
1208
1209	err = -EOPNOTSUPP;
1210	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1211		goto out;
1212
1213	err = -EINVAL;
1214	if (sk->sk_state != TCP_LISTEN)
1215		goto out;
1216
1217	/* If socket state is TCP_LISTEN it cannot change (for now...),
1218	 * so that no locks are necessary.
1219	 */
1220
1221	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1222	if (!skb) {
1223		/* This means receive shutdown. */
1224		if (err == 0)
1225			err = -EINVAL;
1226		goto out;
1227	}
1228
1229	tsk = skb->sk;
1230	skb_free_datagram(sk, skb);
1231	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1232
1233	/* attach accepted sock to socket */
1234	unix_state_lock(tsk);
1235	newsock->state = SS_CONNECTED;
1236	sock_graft(tsk, newsock);
1237	unix_state_unlock(tsk);
1238	return 0;
1239
1240out:
1241	return err;
1242}
1243
1244
1245static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1246{
1247	struct sock *sk = sock->sk;
1248	struct unix_sock *u;
1249	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1250	int err = 0;
1251
1252	if (peer) {
1253		sk = unix_peer_get(sk);
1254
1255		err = -ENOTCONN;
1256		if (!sk)
1257			goto out;
1258		err = 0;
1259	} else {
1260		sock_hold(sk);
1261	}
1262
1263	u = unix_sk(sk);
1264	unix_state_lock(sk);
1265	if (!u->addr) {
1266		sunaddr->sun_family = AF_UNIX;
1267		sunaddr->sun_path[0] = 0;
1268		*uaddr_len = sizeof(short);
1269	} else {
1270		struct unix_address *addr = u->addr;
1271
1272		*uaddr_len = addr->len;
1273		memcpy(sunaddr, addr->name, *uaddr_len);
1274	}
1275	unix_state_unlock(sk);
1276	sock_put(sk);
1277out:
1278	return err;
1279}
1280
1281static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1282{
1283	int i;
1284
1285	scm->fp = UNIXCB(skb).fp;
1286	skb->destructor = sock_wfree;
1287	UNIXCB(skb).fp = NULL;
1288
1289	for (i = scm->fp->count-1; i >= 0; i--)
1290		unix_notinflight(scm->fp->fp[i]);
1291}
1292
1293static void unix_destruct_fds(struct sk_buff *skb)
1294{
1295	struct scm_cookie scm;
1296	memset(&scm, 0, sizeof(scm));
1297	unix_detach_fds(&scm, skb);
1298
1299	/* Alas, it calls VFS */
1300	/* So fscking what? fput() had been SMP-safe since the last Summer */
1301	scm_destroy(&scm);
1302	sock_wfree(skb);
1303}
1304
1305static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1306{
1307	int i;
1308
1309	/*
1310	 * Need to duplicate file references for the sake of garbage
1311	 * collection.  Otherwise a socket in the fps might become a
1312	 * candidate for GC while the skb is not yet queued.
1313	 */
1314	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1315	if (!UNIXCB(skb).fp)
1316		return -ENOMEM;
1317
1318	for (i = scm->fp->count-1; i >= 0; i--)
1319		unix_inflight(scm->fp->fp[i]);
1320	skb->destructor = unix_destruct_fds;
1321	return 0;
1322}
1323
1324/*
1325 *	Send AF_UNIX data.
1326 */
1327
1328static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1329			      struct msghdr *msg, size_t len)
1330{
1331	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1332	struct sock *sk = sock->sk;
1333	struct net *net = sock_net(sk);
1334	struct unix_sock *u = unix_sk(sk);
1335	struct sockaddr_un *sunaddr = msg->msg_name;
1336	struct sock *other = NULL;
1337	int namelen = 0; /* fake GCC */
1338	int err;
1339	unsigned hash;
1340	struct sk_buff *skb;
1341	long timeo;
1342	struct scm_cookie tmp_scm;
1343
1344	if (NULL == siocb->scm)
1345		siocb->scm = &tmp_scm;
1346	err = scm_send(sock, msg, siocb->scm);
1347	if (err < 0)
1348		return err;
1349
1350	err = -EOPNOTSUPP;
1351	if (msg->msg_flags&MSG_OOB)
1352		goto out;
1353
1354	if (msg->msg_namelen) {
1355		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1356		if (err < 0)
1357			goto out;
1358		namelen = err;
1359	} else {
1360		sunaddr = NULL;
1361		err = -ENOTCONN;
1362		other = unix_peer_get(sk);
1363		if (!other)
1364			goto out;
1365	}
1366
1367	if (test_bit(SOCK_PASSCRED, &sock->flags)
1368		&& !u->addr && (err = unix_autobind(sock)) != 0)
1369		goto out;
1370
1371	err = -EMSGSIZE;
1372	if (len > sk->sk_sndbuf - 32)
1373		goto out;
1374
1375	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1376	if (skb == NULL)
1377		goto out;
1378
1379	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1380	if (siocb->scm->fp) {
1381		err = unix_attach_fds(siocb->scm, skb);
1382		if (err)
1383			goto out_free;
1384	}
1385	unix_get_secdata(siocb->scm, skb);
1386
1387	skb_reset_transport_header(skb);
1388	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1389	if (err)
1390		goto out_free;
1391
1392	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1393
1394restart:
1395	if (!other) {
1396		err = -ECONNRESET;
1397		if (sunaddr == NULL)
1398			goto out_free;
1399
1400		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1401					hash, &err);
1402		if (other == NULL)
1403			goto out_free;
1404	}
1405
1406	unix_state_lock(other);
1407	err = -EPERM;
1408	if (!unix_may_send(sk, other))
1409		goto out_unlock;
1410
1411	if (sock_flag(other, SOCK_DEAD)) {
1412		/*
1413		 *	Check with 1003.1g - what should
1414		 *	datagram error
1415		 */
1416		unix_state_unlock(other);
1417		sock_put(other);
1418
1419		err = 0;
1420		unix_state_lock(sk);
1421		if (unix_peer(sk) == other) {
1422			unix_peer(sk) = NULL;
1423			unix_state_unlock(sk);
1424
1425			unix_dgram_disconnected(sk, other);
1426			sock_put(other);
1427			err = -ECONNREFUSED;
1428		} else {
1429			unix_state_unlock(sk);
1430		}
1431
1432		other = NULL;
1433		if (err)
1434			goto out_free;
1435		goto restart;
1436	}
1437
1438	err = -EPIPE;
1439	if (other->sk_shutdown & RCV_SHUTDOWN)
1440		goto out_unlock;
1441
1442	if (sk->sk_type != SOCK_SEQPACKET) {
1443		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1444		if (err)
1445			goto out_unlock;
1446	}
1447
1448	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1449		if (!timeo) {
1450			err = -EAGAIN;
1451			goto out_unlock;
1452		}
1453
1454		timeo = unix_wait_for_peer(other, timeo);
1455
1456		err = sock_intr_errno(timeo);
1457		if (signal_pending(current))
1458			goto out_free;
1459
1460		goto restart;
1461	}
1462
1463	skb_queue_tail(&other->sk_receive_queue, skb);
1464	unix_state_unlock(other);
1465	other->sk_data_ready(other, len);
1466	sock_put(other);
1467	scm_destroy(siocb->scm);
1468	return len;
1469
1470out_unlock:
1471	unix_state_unlock(other);
1472out_free:
1473	kfree_skb(skb);
1474out:
1475	if (other)
1476		sock_put(other);
1477	scm_destroy(siocb->scm);
1478	return err;
1479}
1480
1481
1482static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1483			       struct msghdr *msg, size_t len)
1484{
1485	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1486	struct sock *sk = sock->sk;
1487	struct sock *other = NULL;
1488	struct sockaddr_un *sunaddr = msg->msg_name;
1489	int err, size;
1490	struct sk_buff *skb;
1491	int sent = 0;
1492	struct scm_cookie tmp_scm;
1493
1494	if (NULL == siocb->scm)
1495		siocb->scm = &tmp_scm;
1496	err = scm_send(sock, msg, siocb->scm);
1497	if (err < 0)
1498		return err;
1499
1500	err = -EOPNOTSUPP;
1501	if (msg->msg_flags&MSG_OOB)
1502		goto out_err;
1503
1504	if (msg->msg_namelen) {
1505		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1506		goto out_err;
1507	} else {
1508		sunaddr = NULL;
1509		err = -ENOTCONN;
1510		other = unix_peer(sk);
1511		if (!other)
1512			goto out_err;
1513	}
1514
1515	if (sk->sk_shutdown & SEND_SHUTDOWN)
1516		goto pipe_err;
1517
1518	while (sent < len) {
1519		/*
1520		 *	Optimisation for the fact that under 0.01% of X
1521		 *	messages typically need breaking up.
1522		 */
1523
1524		size = len-sent;
1525
1526		/* Keep two messages in the pipe so it schedules better */
1527		if (size > ((sk->sk_sndbuf >> 1) - 64))
1528			size = (sk->sk_sndbuf >> 1) - 64;
1529
1530		if (size > SKB_MAX_ALLOC)
1531			size = SKB_MAX_ALLOC;
1532
1533		/*
1534		 *	Grab a buffer
1535		 */
1536
1537		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1538					  &err);
1539
1540		if (skb == NULL)
1541			goto out_err;
1542
1543		/*
1544		 *	If you pass two values to the sock_alloc_send_skb
1545		 *	it tries to grab the large buffer with GFP_NOFS
1546		 *	(which can fail easily), and if it fails grab the
1547		 *	fallback size buffer which is under a page and will
1548		 *	succeed. [Alan]
1549		 */
1550		size = min_t(int, size, skb_tailroom(skb));
1551
1552		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1553		if (siocb->scm->fp) {
1554			err = unix_attach_fds(siocb->scm, skb);
1555			if (err) {
1556				kfree_skb(skb);
1557				goto out_err;
1558			}
1559		}
1560
1561		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1562		if (err) {
1563			kfree_skb(skb);
1564			goto out_err;
1565		}
1566
1567		unix_state_lock(other);
1568
1569		if (sock_flag(other, SOCK_DEAD) ||
1570		    (other->sk_shutdown & RCV_SHUTDOWN))
1571			goto pipe_err_free;
1572
1573		skb_queue_tail(&other->sk_receive_queue, skb);
1574		unix_state_unlock(other);
1575		other->sk_data_ready(other, size);
1576		sent += size;
1577	}
1578
1579	scm_destroy(siocb->scm);
1580	siocb->scm = NULL;
1581
1582	return sent;
1583
1584pipe_err_free:
1585	unix_state_unlock(other);
1586	kfree_skb(skb);
1587pipe_err:
1588	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1589		send_sig(SIGPIPE, current, 0);
1590	err = -EPIPE;
1591out_err:
1592	scm_destroy(siocb->scm);
1593	siocb->scm = NULL;
1594	return sent ? : err;
1595}
1596
1597static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1598				  struct msghdr *msg, size_t len)
1599{
1600	int err;
1601	struct sock *sk = sock->sk;
1602
1603	err = sock_error(sk);
1604	if (err)
1605		return err;
1606
1607	if (sk->sk_state != TCP_ESTABLISHED)
1608		return -ENOTCONN;
1609
1610	if (msg->msg_namelen)
1611		msg->msg_namelen = 0;
1612
1613	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1614}
1615
1616static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1617{
1618	struct unix_sock *u = unix_sk(sk);
1619
1620	msg->msg_namelen = 0;
1621	if (u->addr) {
1622		msg->msg_namelen = u->addr->len;
1623		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1624	}
1625}
1626
1627static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1628			      struct msghdr *msg, size_t size,
1629			      int flags)
1630{
1631	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1632	struct scm_cookie tmp_scm;
1633	struct sock *sk = sock->sk;
1634	struct unix_sock *u = unix_sk(sk);
1635	int noblock = flags & MSG_DONTWAIT;
1636	struct sk_buff *skb;
1637	int err;
1638
1639	err = -EOPNOTSUPP;
1640	if (flags&MSG_OOB)
1641		goto out;
1642
1643	msg->msg_namelen = 0;
1644
1645	mutex_lock(&u->readlock);
1646
1647	skb = skb_recv_datagram(sk, flags, noblock, &err);
1648	if (!skb) {
1649		unix_state_lock(sk);
1650		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1651		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1652		    (sk->sk_shutdown & RCV_SHUTDOWN))
1653			err = 0;
1654		unix_state_unlock(sk);
1655		goto out_unlock;
1656	}
1657
1658	wake_up_interruptible_sync(&u->peer_wait);
1659
1660	if (msg->msg_name)
1661		unix_copy_addr(msg, skb->sk);
1662
1663	if (size > skb->len)
1664		size = skb->len;
1665	else if (size < skb->len)
1666		msg->msg_flags |= MSG_TRUNC;
1667
1668	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1669	if (err)
1670		goto out_free;
1671
1672	if (!siocb->scm) {
1673		siocb->scm = &tmp_scm;
1674		memset(&tmp_scm, 0, sizeof(tmp_scm));
1675	}
1676	siocb->scm->creds = *UNIXCREDS(skb);
1677	unix_set_secdata(siocb->scm, skb);
1678
1679	if (!(flags & MSG_PEEK)) {
1680		if (UNIXCB(skb).fp)
1681			unix_detach_fds(siocb->scm, skb);
1682	} else {
1683		/* It is questionable: on PEEK we could:
1684		   - do not return fds - good, but too simple 8)
1685		   - return fds, and do not return them on read (old strategy,
1686		     apparently wrong)
1687		   - clone fds (I chose it for now, it is the most universal
1688		     solution)
1689
1690		   POSIX 1003.1g does not actually define this clearly
1691		   at all. POSIX 1003.1g doesn't define a lot of things
1692		   clearly however!
1693
1694		*/
1695		if (UNIXCB(skb).fp)
1696			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1697	}
1698	err = size;
1699
1700	scm_recv(sock, msg, siocb->scm, flags);
1701
1702out_free:
1703	skb_free_datagram(sk, skb);
1704out_unlock:
1705	mutex_unlock(&u->readlock);
1706out:
1707	return err;
1708}
1709
1710/*
1711 *	Sleep until data has arrive. But check for races..
1712 */
1713
1714static long unix_stream_data_wait(struct sock *sk, long timeo)
1715{
1716	DEFINE_WAIT(wait);
1717
1718	unix_state_lock(sk);
1719
1720	for (;;) {
1721		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1722
1723		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1724		    sk->sk_err ||
1725		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1726		    signal_pending(current) ||
1727		    !timeo)
1728			break;
1729
1730		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1731		unix_state_unlock(sk);
1732		timeo = schedule_timeout(timeo);
1733		unix_state_lock(sk);
1734		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1735	}
1736
1737	finish_wait(sk->sk_sleep, &wait);
1738	unix_state_unlock(sk);
1739	return timeo;
1740}
1741
1742
1743
1744static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1745			       struct msghdr *msg, size_t size,
1746			       int flags)
1747{
1748	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1749	struct scm_cookie tmp_scm;
1750	struct sock *sk = sock->sk;
1751	struct unix_sock *u = unix_sk(sk);
1752	struct sockaddr_un *sunaddr = msg->msg_name;
1753	int copied = 0;
1754	int check_creds = 0;
1755	int target;
1756	int err = 0;
1757	long timeo;
1758
1759	err = -EINVAL;
1760	if (sk->sk_state != TCP_ESTABLISHED)
1761		goto out;
1762
1763	err = -EOPNOTSUPP;
1764	if (flags&MSG_OOB)
1765		goto out;
1766
1767	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1768	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1769
1770	msg->msg_namelen = 0;
1771
1772	/* Lock the socket to prevent queue disordering
1773	 * while sleeps in memcpy_tomsg
1774	 */
1775
1776	if (!siocb->scm) {
1777		siocb->scm = &tmp_scm;
1778		memset(&tmp_scm, 0, sizeof(tmp_scm));
1779	}
1780
1781	mutex_lock(&u->readlock);
1782
1783	do {
1784		int chunk;
1785		struct sk_buff *skb;
1786
1787		unix_state_lock(sk);
1788		skb = skb_dequeue(&sk->sk_receive_queue);
1789		if (skb == NULL) {
1790			if (copied >= target)
1791				goto unlock;
1792
1793			/*
1794			 *	POSIX 1003.1g mandates this order.
1795			 */
1796
1797			err = sock_error(sk);
1798			if (err)
1799				goto unlock;
1800			if (sk->sk_shutdown & RCV_SHUTDOWN)
1801				goto unlock;
1802
1803			unix_state_unlock(sk);
1804			err = -EAGAIN;
1805			if (!timeo)
1806				break;
1807			mutex_unlock(&u->readlock);
1808
1809			timeo = unix_stream_data_wait(sk, timeo);
1810
1811			if (signal_pending(current)) {
1812				err = sock_intr_errno(timeo);
1813				goto out;
1814			}
1815			mutex_lock(&u->readlock);
1816			continue;
1817 unlock:
1818			unix_state_unlock(sk);
1819			break;
1820		}
1821		unix_state_unlock(sk);
1822
1823		if (check_creds) {
1824			/* Never glue messages from different writers */
1825			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1826				   sizeof(siocb->scm->creds)) != 0) {
1827				skb_queue_head(&sk->sk_receive_queue, skb);
1828				break;
1829			}
1830		} else {
1831			/* Copy credentials */
1832			siocb->scm->creds = *UNIXCREDS(skb);
1833			check_creds = 1;
1834		}
1835
1836		/* Copy address just once */
1837		if (sunaddr) {
1838			unix_copy_addr(msg, skb->sk);
1839			sunaddr = NULL;
1840		}
1841
1842		chunk = min_t(unsigned int, skb->len, size);
1843		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1844			skb_queue_head(&sk->sk_receive_queue, skb);
1845			if (copied == 0)
1846				copied = -EFAULT;
1847			break;
1848		}
1849		copied += chunk;
1850		size -= chunk;
1851
1852		/* Mark read part of skb as used */
1853		if (!(flags & MSG_PEEK)) {
1854			skb_pull(skb, chunk);
1855
1856			if (UNIXCB(skb).fp)
1857				unix_detach_fds(siocb->scm, skb);
1858
1859			/* put the skb back if we didn't use it up.. */
1860			if (skb->len) {
1861				skb_queue_head(&sk->sk_receive_queue, skb);
1862				break;
1863			}
1864
1865			kfree_skb(skb);
1866
1867			if (siocb->scm->fp)
1868				break;
1869		} else {
1870			/* It is questionable, see note in unix_dgram_recvmsg.
1871			 */
1872			if (UNIXCB(skb).fp)
1873				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1874
1875			/* put message back and return */
1876			skb_queue_head(&sk->sk_receive_queue, skb);
1877			break;
1878		}
1879	} while (size);
1880
1881	mutex_unlock(&u->readlock);
1882	scm_recv(sock, msg, siocb->scm, flags);
1883out:
1884	return copied ? : err;
1885}
1886
1887static int unix_shutdown(struct socket *sock, int mode)
1888{
1889	struct sock *sk = sock->sk;
1890	struct sock *other;
1891
1892	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1893
1894	if (mode) {
1895		unix_state_lock(sk);
1896		sk->sk_shutdown |= mode;
1897		other = unix_peer(sk);
1898		if (other)
1899			sock_hold(other);
1900		unix_state_unlock(sk);
1901		sk->sk_state_change(sk);
1902
1903		if (other &&
1904			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1905
1906			int peer_mode = 0;
1907
1908			if (mode&RCV_SHUTDOWN)
1909				peer_mode |= SEND_SHUTDOWN;
1910			if (mode&SEND_SHUTDOWN)
1911				peer_mode |= RCV_SHUTDOWN;
1912			unix_state_lock(other);
1913			other->sk_shutdown |= peer_mode;
1914			unix_state_unlock(other);
1915			other->sk_state_change(other);
1916			read_lock(&other->sk_callback_lock);
1917			if (peer_mode == SHUTDOWN_MASK)
1918				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1919			else if (peer_mode & RCV_SHUTDOWN)
1920				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1921			read_unlock(&other->sk_callback_lock);
1922		}
1923		if (other)
1924			sock_put(other);
1925	}
1926	return 0;
1927}
1928
1929static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1930{
1931	struct sock *sk = sock->sk;
1932	long amount = 0;
1933	int err;
1934
1935	switch (cmd) {
1936	case SIOCOUTQ:
1937		amount = atomic_read(&sk->sk_wmem_alloc);
1938		err = put_user(amount, (int __user *)arg);
1939		break;
1940	case SIOCINQ:
1941		{
1942			struct sk_buff *skb;
1943
1944			if (sk->sk_state == TCP_LISTEN) {
1945				err = -EINVAL;
1946				break;
1947			}
1948
1949			spin_lock(&sk->sk_receive_queue.lock);
1950			if (sk->sk_type == SOCK_STREAM ||
1951			    sk->sk_type == SOCK_SEQPACKET) {
1952				skb_queue_walk(&sk->sk_receive_queue, skb)
1953					amount += skb->len;
1954			} else {
1955				skb = skb_peek(&sk->sk_receive_queue);
1956				if (skb)
1957					amount = skb->len;
1958			}
1959			spin_unlock(&sk->sk_receive_queue.lock);
1960			err = put_user(amount, (int __user *)arg);
1961			break;
1962		}
1963
1964	default:
1965		err = -ENOIOCTLCMD;
1966		break;
1967	}
1968	return err;
1969}
1970
1971static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1972{
1973	struct sock *sk = sock->sk;
1974	unsigned int mask;
1975
1976	poll_wait(file, sk->sk_sleep, wait);
1977	mask = 0;
1978
1979	/* exceptional events? */
1980	if (sk->sk_err)
1981		mask |= POLLERR;
1982	if (sk->sk_shutdown == SHUTDOWN_MASK)
1983		mask |= POLLHUP;
1984	if (sk->sk_shutdown & RCV_SHUTDOWN)
1985		mask |= POLLRDHUP;
1986
1987	/* readable? */
1988	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1989	    (sk->sk_shutdown & RCV_SHUTDOWN))
1990		mask |= POLLIN | POLLRDNORM;
1991
1992	/* Connection-based need to check for termination and startup */
1993	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
1994	    sk->sk_state == TCP_CLOSE)
1995		mask |= POLLHUP;
1996
1997	/*
1998	 * we set writable also when the other side has shut down the
1999	 * connection. This prevents stuck sockets.
2000	 */
2001	if (unix_writable(sk))
2002		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2003
2004	return mask;
2005}
2006
2007static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2008				    poll_table *wait)
2009{
2010	struct sock *sk = sock->sk, *other;
2011	unsigned int mask, writable;
2012
2013	poll_wait(file, sk->sk_sleep, wait);
2014	mask = 0;
2015
2016	/* exceptional events? */
2017	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2018		mask |= POLLERR;
2019	if (sk->sk_shutdown & RCV_SHUTDOWN)
2020		mask |= POLLRDHUP;
2021	if (sk->sk_shutdown == SHUTDOWN_MASK)
2022		mask |= POLLHUP;
2023
2024	/* readable? */
2025	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2026	    (sk->sk_shutdown & RCV_SHUTDOWN))
2027		mask |= POLLIN | POLLRDNORM;
2028
2029	/* Connection-based need to check for termination and startup */
2030	if (sk->sk_type == SOCK_SEQPACKET) {
2031		if (sk->sk_state == TCP_CLOSE)
2032			mask |= POLLHUP;
2033		/* connection hasn't started yet? */
2034		if (sk->sk_state == TCP_SYN_SENT)
2035			return mask;
2036	}
2037
2038	/* writable? */
2039	writable = unix_writable(sk);
2040	if (writable) {
2041		other = unix_peer_get(sk);
2042		if (other) {
2043			if (unix_peer(other) != sk) {
2044				poll_wait(file, &unix_sk(other)->peer_wait,
2045					  wait);
2046				if (unix_recvq_full(other))
2047					writable = 0;
2048			}
2049
2050			sock_put(other);
2051		}
2052	}
2053
2054	if (writable)
2055		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2056	else
2057		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2058
2059	return mask;
2060}
2061
2062#ifdef CONFIG_PROC_FS
2063static struct sock *first_unix_socket(int *i)
2064{
2065	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2066		if (!hlist_empty(&unix_socket_table[*i]))
2067			return __sk_head(&unix_socket_table[*i]);
2068	}
2069	return NULL;
2070}
2071
2072static struct sock *next_unix_socket(int *i, struct sock *s)
2073{
2074	struct sock *next = sk_next(s);
2075	/* More in this chain? */
2076	if (next)
2077		return next;
2078	/* Look for next non-empty chain. */
2079	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2080		if (!hlist_empty(&unix_socket_table[*i]))
2081			return __sk_head(&unix_socket_table[*i]);
2082	}
2083	return NULL;
2084}
2085
2086struct unix_iter_state {
2087	struct seq_net_private p;
2088	int i;
2089};
2090
2091static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2092{
2093	struct unix_iter_state *iter = seq->private;
2094	loff_t off = 0;
2095	struct sock *s;
2096
2097	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2098		if (sock_net(s) != seq_file_net(seq))
2099			continue;
2100		if (off == pos)
2101			return s;
2102		++off;
2103	}
2104	return NULL;
2105}
2106
2107static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2108	__acquires(unix_table_lock)
2109{
2110	spin_lock(&unix_table_lock);
2111	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2112}
2113
2114static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2115{
2116	struct unix_iter_state *iter = seq->private;
2117	struct sock *sk = v;
2118	++*pos;
2119
2120	if (v == SEQ_START_TOKEN)
2121		sk = first_unix_socket(&iter->i);
2122	else
2123		sk = next_unix_socket(&iter->i, sk);
2124	while (sk && (sock_net(sk) != seq_file_net(seq)))
2125		sk = next_unix_socket(&iter->i, sk);
2126	return sk;
2127}
2128
2129static void unix_seq_stop(struct seq_file *seq, void *v)
2130	__releases(unix_table_lock)
2131{
2132	spin_unlock(&unix_table_lock);
2133}
2134
2135static int unix_seq_show(struct seq_file *seq, void *v)
2136{
2137
2138	if (v == SEQ_START_TOKEN)
2139		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2140			 "Inode Path\n");
2141	else {
2142		struct sock *s = v;
2143		struct unix_sock *u = unix_sk(s);
2144		unix_state_lock(s);
2145
2146		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2147			s,
2148			atomic_read(&s->sk_refcnt),
2149			0,
2150			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2151			s->sk_type,
2152			s->sk_socket ?
2153			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2154			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2155			sock_i_ino(s));
2156
2157		if (u->addr) {
2158			int i, len;
2159			seq_putc(seq, ' ');
2160
2161			i = 0;
2162			len = u->addr->len - sizeof(short);
2163			if (!UNIX_ABSTRACT(s))
2164				len--;
2165			else {
2166				seq_putc(seq, '@');
2167				i++;
2168			}
2169			for ( ; i < len; i++)
2170				seq_putc(seq, u->addr->name->sun_path[i]);
2171		}
2172		unix_state_unlock(s);
2173		seq_putc(seq, '\n');
2174	}
2175
2176	return 0;
2177}
2178
2179static const struct seq_operations unix_seq_ops = {
2180	.start  = unix_seq_start,
2181	.next   = unix_seq_next,
2182	.stop   = unix_seq_stop,
2183	.show   = unix_seq_show,
2184};
2185
2186static int unix_seq_open(struct inode *inode, struct file *file)
2187{
2188	return seq_open_net(inode, file, &unix_seq_ops,
2189			    sizeof(struct unix_iter_state));
2190}
2191
2192static const struct file_operations unix_seq_fops = {
2193	.owner		= THIS_MODULE,
2194	.open		= unix_seq_open,
2195	.read		= seq_read,
2196	.llseek		= seq_lseek,
2197	.release	= seq_release_net,
2198};
2199
2200#endif
2201
2202static struct net_proto_family unix_family_ops = {
2203	.family = PF_UNIX,
2204	.create = unix_create,
2205	.owner	= THIS_MODULE,
2206};
2207
2208
2209static int unix_net_init(struct net *net)
2210{
2211	int error = -ENOMEM;
2212
2213	net->unx.sysctl_max_dgram_qlen = 10;
2214	if (unix_sysctl_register(net))
2215		goto out;
2216
2217#ifdef CONFIG_PROC_FS
2218	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2219		unix_sysctl_unregister(net);
2220		goto out;
2221	}
2222#endif
2223	error = 0;
2224out:
2225	return error;
2226}
2227
2228static void unix_net_exit(struct net *net)
2229{
2230	unix_sysctl_unregister(net);
2231	proc_net_remove(net, "unix");
2232}
2233
2234static struct pernet_operations unix_net_ops = {
2235	.init = unix_net_init,
2236	.exit = unix_net_exit,
2237};
2238
2239static int __init af_unix_init(void)
2240{
2241	int rc = -1;
2242	struct sk_buff *dummy_skb;
2243
2244	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2245
2246	rc = proto_register(&unix_proto, 1);
2247	if (rc != 0) {
2248		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2249		       __func__);
2250		goto out;
2251	}
2252
2253	sock_register(&unix_family_ops);
2254	register_pernet_subsys(&unix_net_ops);
2255out:
2256	return rc;
2257}
2258
2259static void __exit af_unix_exit(void)
2260{
2261	sock_unregister(PF_UNIX);
2262	proto_unregister(&unix_proto);
2263	unregister_pernet_subsys(&unix_net_ops);
2264}
2265
2266/* Earlier than device_initcall() so that other drivers invoking
2267   request_module() don't end up in a loop when modprobe tries
2268   to use a UNIX socket. But later than subsys_initcall() because
2269   we depend on stuff initialised there */
2270fs_initcall(af_unix_init);
2271module_exit(af_unix_exit);
2272
2273MODULE_LICENSE("GPL");
2274MODULE_ALIAS_NETPROTO(PF_UNIX);
2275