af_unix.c revision e5537bfc98f01561fbdfbd8a78f0dc3e2360491d
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_long_t unix_nr_socks;
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate spin lock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (dentry && dentry->d_inode == i) {
296			sock_hold(s);
297			goto found;
298		}
299	}
300	s = NULL;
301found:
302	spin_unlock(&unix_table_lock);
303	return s;
304}
305
306static inline int unix_writable(struct sock *sk)
307{
308	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
309}
310
311static void unix_write_space(struct sock *sk)
312{
313	struct socket_wq *wq;
314
315	rcu_read_lock();
316	if (unix_writable(sk)) {
317		wq = rcu_dereference(sk->sk_wq);
318		if (wq_has_sleeper(wq))
319			wake_up_interruptible_sync_poll(&wq->wait,
320				POLLOUT | POLLWRNORM | POLLWRBAND);
321		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
322	}
323	rcu_read_unlock();
324}
325
326/* When dgram socket disconnects (or changes its peer), we clear its receive
327 * queue of packets arrived from previous peer. First, it allows to do
328 * flow control based only on wmem_alloc; second, sk connected to peer
329 * may receive messages only from that peer. */
330static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
331{
332	if (!skb_queue_empty(&sk->sk_receive_queue)) {
333		skb_queue_purge(&sk->sk_receive_queue);
334		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
335
336		/* If one link of bidirectional dgram pipe is disconnected,
337		 * we signal error. Messages are lost. Do not make this,
338		 * when peer was not connected to us.
339		 */
340		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
341			other->sk_err = ECONNRESET;
342			other->sk_error_report(other);
343		}
344	}
345}
346
347static void unix_sock_destructor(struct sock *sk)
348{
349	struct unix_sock *u = unix_sk(sk);
350
351	skb_queue_purge(&sk->sk_receive_queue);
352
353	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
354	WARN_ON(!sk_unhashed(sk));
355	WARN_ON(sk->sk_socket);
356	if (!sock_flag(sk, SOCK_DEAD)) {
357		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
358		return;
359	}
360
361	if (u->addr)
362		unix_release_addr(u->addr);
363
364	atomic_long_dec(&unix_nr_socks);
365	local_bh_disable();
366	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
367	local_bh_enable();
368#ifdef UNIX_REFCNT_DEBUG
369	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
370		atomic_long_read(&unix_nr_socks));
371#endif
372}
373
374static int unix_release_sock(struct sock *sk, int embrion)
375{
376	struct unix_sock *u = unix_sk(sk);
377	struct dentry *dentry;
378	struct vfsmount *mnt;
379	struct sock *skpair;
380	struct sk_buff *skb;
381	int state;
382
383	unix_remove_socket(sk);
384
385	/* Clear state */
386	unix_state_lock(sk);
387	sock_orphan(sk);
388	sk->sk_shutdown = SHUTDOWN_MASK;
389	dentry	     = u->dentry;
390	u->dentry    = NULL;
391	mnt	     = u->mnt;
392	u->mnt	     = NULL;
393	state = sk->sk_state;
394	sk->sk_state = TCP_CLOSE;
395	unix_state_unlock(sk);
396
397	wake_up_interruptible_all(&u->peer_wait);
398
399	skpair = unix_peer(sk);
400
401	if (skpair != NULL) {
402		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
403			unix_state_lock(skpair);
404			/* No more writes */
405			skpair->sk_shutdown = SHUTDOWN_MASK;
406			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
407				skpair->sk_err = ECONNRESET;
408			unix_state_unlock(skpair);
409			skpair->sk_state_change(skpair);
410			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411		}
412		sock_put(skpair); /* It may now die */
413		unix_peer(sk) = NULL;
414	}
415
416	/* Try to flush out this socket. Throw out buffers at least */
417
418	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
419		if (state == TCP_LISTEN)
420			unix_release_sock(skb->sk, 1);
421		/* passed fds are erased in the kfree_skb hook	      */
422		kfree_skb(skb);
423	}
424
425	if (dentry) {
426		dput(dentry);
427		mntput(mnt);
428	}
429
430	sock_put(sk);
431
432	/* ---- Socket is dead now and most probably destroyed ---- */
433
434	/*
435	 * Fixme: BSD difference: In BSD all sockets connected to use get
436	 *	  ECONNRESET and we die on the spot. In Linux we behave
437	 *	  like files and pipes do and wait for the last
438	 *	  dereference.
439	 *
440	 * Can't we simply set sock->err?
441	 *
442	 *	  What the above comment does talk about? --ANK(980817)
443	 */
444
445	if (unix_tot_inflight)
446		unix_gc();		/* Garbage collect fds */
447
448	return 0;
449}
450
451static void init_peercred(struct sock *sk)
452{
453	put_pid(sk->sk_peer_pid);
454	if (sk->sk_peer_cred)
455		put_cred(sk->sk_peer_cred);
456	sk->sk_peer_pid  = get_pid(task_tgid(current));
457	sk->sk_peer_cred = get_current_cred();
458}
459
460static void copy_peercred(struct sock *sk, struct sock *peersk)
461{
462	put_pid(sk->sk_peer_pid);
463	if (sk->sk_peer_cred)
464		put_cred(sk->sk_peer_cred);
465	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
466	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
467}
468
469static int unix_listen(struct socket *sock, int backlog)
470{
471	int err;
472	struct sock *sk = sock->sk;
473	struct unix_sock *u = unix_sk(sk);
474	struct pid *old_pid = NULL;
475	const struct cred *old_cred = NULL;
476
477	err = -EOPNOTSUPP;
478	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
479		goto out;	/* Only stream/seqpacket sockets accept */
480	err = -EINVAL;
481	if (!u->addr)
482		goto out;	/* No listens on an unbound socket */
483	unix_state_lock(sk);
484	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
485		goto out_unlock;
486	if (backlog > sk->sk_max_ack_backlog)
487		wake_up_interruptible_all(&u->peer_wait);
488	sk->sk_max_ack_backlog	= backlog;
489	sk->sk_state		= TCP_LISTEN;
490	/* set credentials so connect can copy them */
491	init_peercred(sk);
492	err = 0;
493
494out_unlock:
495	unix_state_unlock(sk);
496	put_pid(old_pid);
497	if (old_cred)
498		put_cred(old_cred);
499out:
500	return err;
501}
502
503static int unix_release(struct socket *);
504static int unix_bind(struct socket *, struct sockaddr *, int);
505static int unix_stream_connect(struct socket *, struct sockaddr *,
506			       int addr_len, int flags);
507static int unix_socketpair(struct socket *, struct socket *);
508static int unix_accept(struct socket *, struct socket *, int);
509static int unix_getname(struct socket *, struct sockaddr *, int *, int);
510static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
511static unsigned int unix_dgram_poll(struct file *, struct socket *,
512				    poll_table *);
513static int unix_ioctl(struct socket *, unsigned int, unsigned long);
514static int unix_shutdown(struct socket *, int);
515static int unix_stream_sendmsg(struct kiocb *, struct socket *,
516			       struct msghdr *, size_t);
517static int unix_stream_recvmsg(struct kiocb *, struct socket *,
518			       struct msghdr *, size_t, int);
519static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
520			      struct msghdr *, size_t);
521static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
522			      struct msghdr *, size_t, int);
523static int unix_dgram_connect(struct socket *, struct sockaddr *,
524			      int, int);
525static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
526				  struct msghdr *, size_t);
527
528static const struct proto_ops unix_stream_ops = {
529	.family =	PF_UNIX,
530	.owner =	THIS_MODULE,
531	.release =	unix_release,
532	.bind =		unix_bind,
533	.connect =	unix_stream_connect,
534	.socketpair =	unix_socketpair,
535	.accept =	unix_accept,
536	.getname =	unix_getname,
537	.poll =		unix_poll,
538	.ioctl =	unix_ioctl,
539	.listen =	unix_listen,
540	.shutdown =	unix_shutdown,
541	.setsockopt =	sock_no_setsockopt,
542	.getsockopt =	sock_no_getsockopt,
543	.sendmsg =	unix_stream_sendmsg,
544	.recvmsg =	unix_stream_recvmsg,
545	.mmap =		sock_no_mmap,
546	.sendpage =	sock_no_sendpage,
547};
548
549static const struct proto_ops unix_dgram_ops = {
550	.family =	PF_UNIX,
551	.owner =	THIS_MODULE,
552	.release =	unix_release,
553	.bind =		unix_bind,
554	.connect =	unix_dgram_connect,
555	.socketpair =	unix_socketpair,
556	.accept =	sock_no_accept,
557	.getname =	unix_getname,
558	.poll =		unix_dgram_poll,
559	.ioctl =	unix_ioctl,
560	.listen =	sock_no_listen,
561	.shutdown =	unix_shutdown,
562	.setsockopt =	sock_no_setsockopt,
563	.getsockopt =	sock_no_getsockopt,
564	.sendmsg =	unix_dgram_sendmsg,
565	.recvmsg =	unix_dgram_recvmsg,
566	.mmap =		sock_no_mmap,
567	.sendpage =	sock_no_sendpage,
568};
569
570static const struct proto_ops unix_seqpacket_ops = {
571	.family =	PF_UNIX,
572	.owner =	THIS_MODULE,
573	.release =	unix_release,
574	.bind =		unix_bind,
575	.connect =	unix_stream_connect,
576	.socketpair =	unix_socketpair,
577	.accept =	unix_accept,
578	.getname =	unix_getname,
579	.poll =		unix_dgram_poll,
580	.ioctl =	unix_ioctl,
581	.listen =	unix_listen,
582	.shutdown =	unix_shutdown,
583	.setsockopt =	sock_no_setsockopt,
584	.getsockopt =	sock_no_getsockopt,
585	.sendmsg =	unix_seqpacket_sendmsg,
586	.recvmsg =	unix_dgram_recvmsg,
587	.mmap =		sock_no_mmap,
588	.sendpage =	sock_no_sendpage,
589};
590
591static struct proto unix_proto = {
592	.name			= "UNIX",
593	.owner			= THIS_MODULE,
594	.obj_size		= sizeof(struct unix_sock),
595};
596
597/*
598 * AF_UNIX sockets do not interact with hardware, hence they
599 * dont trigger interrupts - so it's safe for them to have
600 * bh-unsafe locking for their sk_receive_queue.lock. Split off
601 * this special lock-class by reinitializing the spinlock key:
602 */
603static struct lock_class_key af_unix_sk_receive_queue_lock_key;
604
605static struct sock *unix_create1(struct net *net, struct socket *sock)
606{
607	struct sock *sk = NULL;
608	struct unix_sock *u;
609
610	atomic_long_inc(&unix_nr_socks);
611	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
612		goto out;
613
614	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
615	if (!sk)
616		goto out;
617
618	sock_init_data(sock, sk);
619	lockdep_set_class(&sk->sk_receive_queue.lock,
620				&af_unix_sk_receive_queue_lock_key);
621
622	sk->sk_write_space	= unix_write_space;
623	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
624	sk->sk_destruct		= unix_sock_destructor;
625	u	  = unix_sk(sk);
626	u->dentry = NULL;
627	u->mnt	  = NULL;
628	spin_lock_init(&u->lock);
629	atomic_long_set(&u->inflight, 0);
630	INIT_LIST_HEAD(&u->link);
631	mutex_init(&u->readlock); /* single task reading lock */
632	init_waitqueue_head(&u->peer_wait);
633	unix_insert_socket(unix_sockets_unbound, sk);
634out:
635	if (sk == NULL)
636		atomic_long_dec(&unix_nr_socks);
637	else {
638		local_bh_disable();
639		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
640		local_bh_enable();
641	}
642	return sk;
643}
644
645static int unix_create(struct net *net, struct socket *sock, int protocol,
646		       int kern)
647{
648	if (protocol && protocol != PF_UNIX)
649		return -EPROTONOSUPPORT;
650
651	sock->state = SS_UNCONNECTED;
652
653	switch (sock->type) {
654	case SOCK_STREAM:
655		sock->ops = &unix_stream_ops;
656		break;
657		/*
658		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
659		 *	nothing uses it.
660		 */
661	case SOCK_RAW:
662		sock->type = SOCK_DGRAM;
663	case SOCK_DGRAM:
664		sock->ops = &unix_dgram_ops;
665		break;
666	case SOCK_SEQPACKET:
667		sock->ops = &unix_seqpacket_ops;
668		break;
669	default:
670		return -ESOCKTNOSUPPORT;
671	}
672
673	return unix_create1(net, sock) ? 0 : -ENOMEM;
674}
675
676static int unix_release(struct socket *sock)
677{
678	struct sock *sk = sock->sk;
679
680	if (!sk)
681		return 0;
682
683	sock->sk = NULL;
684
685	return unix_release_sock(sk, 0);
686}
687
688static int unix_autobind(struct socket *sock)
689{
690	struct sock *sk = sock->sk;
691	struct net *net = sock_net(sk);
692	struct unix_sock *u = unix_sk(sk);
693	static u32 ordernum = 1;
694	struct unix_address *addr;
695	int err;
696	unsigned int retries = 0;
697
698	mutex_lock(&u->readlock);
699
700	err = 0;
701	if (u->addr)
702		goto out;
703
704	err = -ENOMEM;
705	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
706	if (!addr)
707		goto out;
708
709	addr->name->sun_family = AF_UNIX;
710	atomic_set(&addr->refcnt, 1);
711
712retry:
713	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
714	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
715
716	spin_lock(&unix_table_lock);
717	ordernum = (ordernum+1)&0xFFFFF;
718
719	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
720				      addr->hash)) {
721		spin_unlock(&unix_table_lock);
722		/*
723		 * __unix_find_socket_byname() may take long time if many names
724		 * are already in use.
725		 */
726		cond_resched();
727		/* Give up if all names seems to be in use. */
728		if (retries++ == 0xFFFFF) {
729			err = -ENOSPC;
730			kfree(addr);
731			goto out;
732		}
733		goto retry;
734	}
735	addr->hash ^= sk->sk_type;
736
737	__unix_remove_socket(sk);
738	u->addr = addr;
739	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
740	spin_unlock(&unix_table_lock);
741	err = 0;
742
743out:	mutex_unlock(&u->readlock);
744	return err;
745}
746
747static struct sock *unix_find_other(struct net *net,
748				    struct sockaddr_un *sunname, int len,
749				    int type, unsigned hash, int *error)
750{
751	struct sock *u;
752	struct path path;
753	int err = 0;
754
755	if (sunname->sun_path[0]) {
756		struct inode *inode;
757		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
758		if (err)
759			goto fail;
760		inode = path.dentry->d_inode;
761		err = inode_permission(inode, MAY_WRITE);
762		if (err)
763			goto put_fail;
764
765		err = -ECONNREFUSED;
766		if (!S_ISSOCK(inode->i_mode))
767			goto put_fail;
768		u = unix_find_socket_byinode(inode);
769		if (!u)
770			goto put_fail;
771
772		if (u->sk_type == type)
773			touch_atime(path.mnt, path.dentry);
774
775		path_put(&path);
776
777		err = -EPROTOTYPE;
778		if (u->sk_type != type) {
779			sock_put(u);
780			goto fail;
781		}
782	} else {
783		err = -ECONNREFUSED;
784		u = unix_find_socket_byname(net, sunname, len, type, hash);
785		if (u) {
786			struct dentry *dentry;
787			dentry = unix_sk(u)->dentry;
788			if (dentry)
789				touch_atime(unix_sk(u)->mnt, dentry);
790		} else
791			goto fail;
792	}
793	return u;
794
795put_fail:
796	path_put(&path);
797fail:
798	*error = err;
799	return NULL;
800}
801
802
803static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
804{
805	struct sock *sk = sock->sk;
806	struct net *net = sock_net(sk);
807	struct unix_sock *u = unix_sk(sk);
808	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
809	struct dentry *dentry = NULL;
810	struct nameidata nd;
811	int err;
812	unsigned hash;
813	struct unix_address *addr;
814	struct hlist_head *list;
815
816	err = -EINVAL;
817	if (sunaddr->sun_family != AF_UNIX)
818		goto out;
819
820	if (addr_len == sizeof(short)) {
821		err = unix_autobind(sock);
822		goto out;
823	}
824
825	err = unix_mkname(sunaddr, addr_len, &hash);
826	if (err < 0)
827		goto out;
828	addr_len = err;
829
830	mutex_lock(&u->readlock);
831
832	err = -EINVAL;
833	if (u->addr)
834		goto out_up;
835
836	err = -ENOMEM;
837	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
838	if (!addr)
839		goto out_up;
840
841	memcpy(addr->name, sunaddr, addr_len);
842	addr->len = addr_len;
843	addr->hash = hash ^ sk->sk_type;
844	atomic_set(&addr->refcnt, 1);
845
846	if (sunaddr->sun_path[0]) {
847		unsigned int mode;
848		err = 0;
849		/*
850		 * Get the parent directory, calculate the hash for last
851		 * component.
852		 */
853		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
854		if (err)
855			goto out_mknod_parent;
856
857		dentry = lookup_create(&nd, 0);
858		err = PTR_ERR(dentry);
859		if (IS_ERR(dentry))
860			goto out_mknod_unlock;
861
862		/*
863		 * All right, let's create it.
864		 */
865		mode = S_IFSOCK |
866		       (SOCK_INODE(sock)->i_mode & ~current_umask());
867		err = mnt_want_write(nd.path.mnt);
868		if (err)
869			goto out_mknod_dput;
870		err = security_path_mknod(&nd.path, dentry, mode, 0);
871		if (err)
872			goto out_mknod_drop_write;
873		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
874out_mknod_drop_write:
875		mnt_drop_write(nd.path.mnt);
876		if (err)
877			goto out_mknod_dput;
878		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
879		dput(nd.path.dentry);
880		nd.path.dentry = dentry;
881
882		addr->hash = UNIX_HASH_SIZE;
883	}
884
885	spin_lock(&unix_table_lock);
886
887	if (!sunaddr->sun_path[0]) {
888		err = -EADDRINUSE;
889		if (__unix_find_socket_byname(net, sunaddr, addr_len,
890					      sk->sk_type, hash)) {
891			unix_release_addr(addr);
892			goto out_unlock;
893		}
894
895		list = &unix_socket_table[addr->hash];
896	} else {
897		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
898		u->dentry = nd.path.dentry;
899		u->mnt    = nd.path.mnt;
900	}
901
902	err = 0;
903	__unix_remove_socket(sk);
904	u->addr = addr;
905	__unix_insert_socket(list, sk);
906
907out_unlock:
908	spin_unlock(&unix_table_lock);
909out_up:
910	mutex_unlock(&u->readlock);
911out:
912	return err;
913
914out_mknod_dput:
915	dput(dentry);
916out_mknod_unlock:
917	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
918	path_put(&nd.path);
919out_mknod_parent:
920	if (err == -EEXIST)
921		err = -EADDRINUSE;
922	unix_release_addr(addr);
923	goto out_up;
924}
925
926static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
927{
928	if (unlikely(sk1 == sk2) || !sk2) {
929		unix_state_lock(sk1);
930		return;
931	}
932	if (sk1 < sk2) {
933		unix_state_lock(sk1);
934		unix_state_lock_nested(sk2);
935	} else {
936		unix_state_lock(sk2);
937		unix_state_lock_nested(sk1);
938	}
939}
940
941static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
942{
943	if (unlikely(sk1 == sk2) || !sk2) {
944		unix_state_unlock(sk1);
945		return;
946	}
947	unix_state_unlock(sk1);
948	unix_state_unlock(sk2);
949}
950
951static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
952			      int alen, int flags)
953{
954	struct sock *sk = sock->sk;
955	struct net *net = sock_net(sk);
956	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
957	struct sock *other;
958	unsigned hash;
959	int err;
960
961	if (addr->sa_family != AF_UNSPEC) {
962		err = unix_mkname(sunaddr, alen, &hash);
963		if (err < 0)
964			goto out;
965		alen = err;
966
967		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
968		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
969			goto out;
970
971restart:
972		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
973		if (!other)
974			goto out;
975
976		unix_state_double_lock(sk, other);
977
978		/* Apparently VFS overslept socket death. Retry. */
979		if (sock_flag(other, SOCK_DEAD)) {
980			unix_state_double_unlock(sk, other);
981			sock_put(other);
982			goto restart;
983		}
984
985		err = -EPERM;
986		if (!unix_may_send(sk, other))
987			goto out_unlock;
988
989		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
990		if (err)
991			goto out_unlock;
992
993	} else {
994		/*
995		 *	1003.1g breaking connected state with AF_UNSPEC
996		 */
997		other = NULL;
998		unix_state_double_lock(sk, other);
999	}
1000
1001	/*
1002	 * If it was connected, reconnect.
1003	 */
1004	if (unix_peer(sk)) {
1005		struct sock *old_peer = unix_peer(sk);
1006		unix_peer(sk) = other;
1007		unix_state_double_unlock(sk, other);
1008
1009		if (other != old_peer)
1010			unix_dgram_disconnected(sk, old_peer);
1011		sock_put(old_peer);
1012	} else {
1013		unix_peer(sk) = other;
1014		unix_state_double_unlock(sk, other);
1015	}
1016	return 0;
1017
1018out_unlock:
1019	unix_state_double_unlock(sk, other);
1020	sock_put(other);
1021out:
1022	return err;
1023}
1024
1025static long unix_wait_for_peer(struct sock *other, long timeo)
1026{
1027	struct unix_sock *u = unix_sk(other);
1028	int sched;
1029	DEFINE_WAIT(wait);
1030
1031	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1032
1033	sched = !sock_flag(other, SOCK_DEAD) &&
1034		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1035		unix_recvq_full(other);
1036
1037	unix_state_unlock(other);
1038
1039	if (sched)
1040		timeo = schedule_timeout(timeo);
1041
1042	finish_wait(&u->peer_wait, &wait);
1043	return timeo;
1044}
1045
1046static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1047			       int addr_len, int flags)
1048{
1049	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1050	struct sock *sk = sock->sk;
1051	struct net *net = sock_net(sk);
1052	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1053	struct sock *newsk = NULL;
1054	struct sock *other = NULL;
1055	struct sk_buff *skb = NULL;
1056	unsigned hash;
1057	int st;
1058	int err;
1059	long timeo;
1060
1061	err = unix_mkname(sunaddr, addr_len, &hash);
1062	if (err < 0)
1063		goto out;
1064	addr_len = err;
1065
1066	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1067	    (err = unix_autobind(sock)) != 0)
1068		goto out;
1069
1070	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1071
1072	/* First of all allocate resources.
1073	   If we will make it after state is locked,
1074	   we will have to recheck all again in any case.
1075	 */
1076
1077	err = -ENOMEM;
1078
1079	/* create new sock for complete connection */
1080	newsk = unix_create1(sock_net(sk), NULL);
1081	if (newsk == NULL)
1082		goto out;
1083
1084	/* Allocate skb for sending to listening sock */
1085	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1086	if (skb == NULL)
1087		goto out;
1088
1089restart:
1090	/*  Find listening sock. */
1091	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1092	if (!other)
1093		goto out;
1094
1095	/* Latch state of peer */
1096	unix_state_lock(other);
1097
1098	/* Apparently VFS overslept socket death. Retry. */
1099	if (sock_flag(other, SOCK_DEAD)) {
1100		unix_state_unlock(other);
1101		sock_put(other);
1102		goto restart;
1103	}
1104
1105	err = -ECONNREFUSED;
1106	if (other->sk_state != TCP_LISTEN)
1107		goto out_unlock;
1108	if (other->sk_shutdown & RCV_SHUTDOWN)
1109		goto out_unlock;
1110
1111	if (unix_recvq_full(other)) {
1112		err = -EAGAIN;
1113		if (!timeo)
1114			goto out_unlock;
1115
1116		timeo = unix_wait_for_peer(other, timeo);
1117
1118		err = sock_intr_errno(timeo);
1119		if (signal_pending(current))
1120			goto out;
1121		sock_put(other);
1122		goto restart;
1123	}
1124
1125	/* Latch our state.
1126
1127	   It is tricky place. We need to grab our state lock and cannot
1128	   drop lock on peer. It is dangerous because deadlock is
1129	   possible. Connect to self case and simultaneous
1130	   attempt to connect are eliminated by checking socket
1131	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1132	   check this before attempt to grab lock.
1133
1134	   Well, and we have to recheck the state after socket locked.
1135	 */
1136	st = sk->sk_state;
1137
1138	switch (st) {
1139	case TCP_CLOSE:
1140		/* This is ok... continue with connect */
1141		break;
1142	case TCP_ESTABLISHED:
1143		/* Socket is already connected */
1144		err = -EISCONN;
1145		goto out_unlock;
1146	default:
1147		err = -EINVAL;
1148		goto out_unlock;
1149	}
1150
1151	unix_state_lock_nested(sk);
1152
1153	if (sk->sk_state != st) {
1154		unix_state_unlock(sk);
1155		unix_state_unlock(other);
1156		sock_put(other);
1157		goto restart;
1158	}
1159
1160	err = security_unix_stream_connect(sk, other, newsk);
1161	if (err) {
1162		unix_state_unlock(sk);
1163		goto out_unlock;
1164	}
1165
1166	/* The way is open! Fastly set all the necessary fields... */
1167
1168	sock_hold(sk);
1169	unix_peer(newsk)	= sk;
1170	newsk->sk_state		= TCP_ESTABLISHED;
1171	newsk->sk_type		= sk->sk_type;
1172	init_peercred(newsk);
1173	newu = unix_sk(newsk);
1174	newsk->sk_wq		= &newu->peer_wq;
1175	otheru = unix_sk(other);
1176
1177	/* copy address information from listening to new sock*/
1178	if (otheru->addr) {
1179		atomic_inc(&otheru->addr->refcnt);
1180		newu->addr = otheru->addr;
1181	}
1182	if (otheru->dentry) {
1183		newu->dentry	= dget(otheru->dentry);
1184		newu->mnt	= mntget(otheru->mnt);
1185	}
1186
1187	/* Set credentials */
1188	copy_peercred(sk, other);
1189
1190	sock->state	= SS_CONNECTED;
1191	sk->sk_state	= TCP_ESTABLISHED;
1192	sock_hold(newsk);
1193
1194	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1195	unix_peer(sk)	= newsk;
1196
1197	unix_state_unlock(sk);
1198
1199	/* take ten and and send info to listening sock */
1200	spin_lock(&other->sk_receive_queue.lock);
1201	__skb_queue_tail(&other->sk_receive_queue, skb);
1202	spin_unlock(&other->sk_receive_queue.lock);
1203	unix_state_unlock(other);
1204	other->sk_data_ready(other, 0);
1205	sock_put(other);
1206	return 0;
1207
1208out_unlock:
1209	if (other)
1210		unix_state_unlock(other);
1211
1212out:
1213	kfree_skb(skb);
1214	if (newsk)
1215		unix_release_sock(newsk, 0);
1216	if (other)
1217		sock_put(other);
1218	return err;
1219}
1220
1221static int unix_socketpair(struct socket *socka, struct socket *sockb)
1222{
1223	struct sock *ska = socka->sk, *skb = sockb->sk;
1224
1225	/* Join our sockets back to back */
1226	sock_hold(ska);
1227	sock_hold(skb);
1228	unix_peer(ska) = skb;
1229	unix_peer(skb) = ska;
1230	init_peercred(ska);
1231	init_peercred(skb);
1232
1233	if (ska->sk_type != SOCK_DGRAM) {
1234		ska->sk_state = TCP_ESTABLISHED;
1235		skb->sk_state = TCP_ESTABLISHED;
1236		socka->state  = SS_CONNECTED;
1237		sockb->state  = SS_CONNECTED;
1238	}
1239	return 0;
1240}
1241
1242static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1243{
1244	struct sock *sk = sock->sk;
1245	struct sock *tsk;
1246	struct sk_buff *skb;
1247	int err;
1248
1249	err = -EOPNOTSUPP;
1250	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1251		goto out;
1252
1253	err = -EINVAL;
1254	if (sk->sk_state != TCP_LISTEN)
1255		goto out;
1256
1257	/* If socket state is TCP_LISTEN it cannot change (for now...),
1258	 * so that no locks are necessary.
1259	 */
1260
1261	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1262	if (!skb) {
1263		/* This means receive shutdown. */
1264		if (err == 0)
1265			err = -EINVAL;
1266		goto out;
1267	}
1268
1269	tsk = skb->sk;
1270	skb_free_datagram(sk, skb);
1271	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1272
1273	/* attach accepted sock to socket */
1274	unix_state_lock(tsk);
1275	newsock->state = SS_CONNECTED;
1276	sock_graft(tsk, newsock);
1277	unix_state_unlock(tsk);
1278	return 0;
1279
1280out:
1281	return err;
1282}
1283
1284
1285static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1286{
1287	struct sock *sk = sock->sk;
1288	struct unix_sock *u;
1289	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1290	int err = 0;
1291
1292	if (peer) {
1293		sk = unix_peer_get(sk);
1294
1295		err = -ENOTCONN;
1296		if (!sk)
1297			goto out;
1298		err = 0;
1299	} else {
1300		sock_hold(sk);
1301	}
1302
1303	u = unix_sk(sk);
1304	unix_state_lock(sk);
1305	if (!u->addr) {
1306		sunaddr->sun_family = AF_UNIX;
1307		sunaddr->sun_path[0] = 0;
1308		*uaddr_len = sizeof(short);
1309	} else {
1310		struct unix_address *addr = u->addr;
1311
1312		*uaddr_len = addr->len;
1313		memcpy(sunaddr, addr->name, *uaddr_len);
1314	}
1315	unix_state_unlock(sk);
1316	sock_put(sk);
1317out:
1318	return err;
1319}
1320
1321static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1322{
1323	int i;
1324
1325	scm->fp = UNIXCB(skb).fp;
1326	UNIXCB(skb).fp = NULL;
1327
1328	for (i = scm->fp->count-1; i >= 0; i--)
1329		unix_notinflight(scm->fp->fp[i]);
1330}
1331
1332static void unix_destruct_scm(struct sk_buff *skb)
1333{
1334	struct scm_cookie scm;
1335	memset(&scm, 0, sizeof(scm));
1336	scm.pid  = UNIXCB(skb).pid;
1337	scm.cred = UNIXCB(skb).cred;
1338	if (UNIXCB(skb).fp)
1339		unix_detach_fds(&scm, skb);
1340
1341	/* Alas, it calls VFS */
1342	/* So fscking what? fput() had been SMP-safe since the last Summer */
1343	scm_destroy(&scm);
1344	sock_wfree(skb);
1345}
1346
1347#define MAX_RECURSION_LEVEL 4
1348
1349static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1350{
1351	int i;
1352	unsigned char max_level = 0;
1353	int unix_sock_count = 0;
1354
1355	for (i = scm->fp->count - 1; i >= 0; i--) {
1356		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1357
1358		if (sk) {
1359			unix_sock_count++;
1360			max_level = max(max_level,
1361					unix_sk(sk)->recursion_level);
1362		}
1363	}
1364	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1365		return -ETOOMANYREFS;
1366
1367	/*
1368	 * Need to duplicate file references for the sake of garbage
1369	 * collection.  Otherwise a socket in the fps might become a
1370	 * candidate for GC while the skb is not yet queued.
1371	 */
1372	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1373	if (!UNIXCB(skb).fp)
1374		return -ENOMEM;
1375
1376	if (unix_sock_count) {
1377		for (i = scm->fp->count - 1; i >= 0; i--)
1378			unix_inflight(scm->fp->fp[i]);
1379	}
1380	return max_level;
1381}
1382
1383static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1384{
1385	int err = 0;
1386	UNIXCB(skb).pid  = get_pid(scm->pid);
1387	UNIXCB(skb).cred = get_cred(scm->cred);
1388	UNIXCB(skb).fp = NULL;
1389	if (scm->fp && send_fds)
1390		err = unix_attach_fds(scm, skb);
1391
1392	skb->destructor = unix_destruct_scm;
1393	return err;
1394}
1395
1396/*
1397 *	Send AF_UNIX data.
1398 */
1399
1400static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1401			      struct msghdr *msg, size_t len)
1402{
1403	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1404	struct sock *sk = sock->sk;
1405	struct net *net = sock_net(sk);
1406	struct unix_sock *u = unix_sk(sk);
1407	struct sockaddr_un *sunaddr = msg->msg_name;
1408	struct sock *other = NULL;
1409	int namelen = 0; /* fake GCC */
1410	int err;
1411	unsigned hash;
1412	struct sk_buff *skb;
1413	long timeo;
1414	struct scm_cookie tmp_scm;
1415	int max_level;
1416
1417	if (NULL == siocb->scm)
1418		siocb->scm = &tmp_scm;
1419	wait_for_unix_gc();
1420	err = scm_send(sock, msg, siocb->scm);
1421	if (err < 0)
1422		return err;
1423
1424	err = -EOPNOTSUPP;
1425	if (msg->msg_flags&MSG_OOB)
1426		goto out;
1427
1428	if (msg->msg_namelen) {
1429		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1430		if (err < 0)
1431			goto out;
1432		namelen = err;
1433	} else {
1434		sunaddr = NULL;
1435		err = -ENOTCONN;
1436		other = unix_peer_get(sk);
1437		if (!other)
1438			goto out;
1439	}
1440
1441	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1442	    && (err = unix_autobind(sock)) != 0)
1443		goto out;
1444
1445	err = -EMSGSIZE;
1446	if (len > sk->sk_sndbuf - 32)
1447		goto out;
1448
1449	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1450	if (skb == NULL)
1451		goto out;
1452
1453	err = unix_scm_to_skb(siocb->scm, skb, true);
1454	if (err < 0)
1455		goto out_free;
1456	max_level = err + 1;
1457	unix_get_secdata(siocb->scm, skb);
1458
1459	skb_reset_transport_header(skb);
1460	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1461	if (err)
1462		goto out_free;
1463
1464	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1465
1466restart:
1467	if (!other) {
1468		err = -ECONNRESET;
1469		if (sunaddr == NULL)
1470			goto out_free;
1471
1472		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1473					hash, &err);
1474		if (other == NULL)
1475			goto out_free;
1476	}
1477
1478	unix_state_lock(other);
1479	err = -EPERM;
1480	if (!unix_may_send(sk, other))
1481		goto out_unlock;
1482
1483	if (sock_flag(other, SOCK_DEAD)) {
1484		/*
1485		 *	Check with 1003.1g - what should
1486		 *	datagram error
1487		 */
1488		unix_state_unlock(other);
1489		sock_put(other);
1490
1491		err = 0;
1492		unix_state_lock(sk);
1493		if (unix_peer(sk) == other) {
1494			unix_peer(sk) = NULL;
1495			unix_state_unlock(sk);
1496
1497			unix_dgram_disconnected(sk, other);
1498			sock_put(other);
1499			err = -ECONNREFUSED;
1500		} else {
1501			unix_state_unlock(sk);
1502		}
1503
1504		other = NULL;
1505		if (err)
1506			goto out_free;
1507		goto restart;
1508	}
1509
1510	err = -EPIPE;
1511	if (other->sk_shutdown & RCV_SHUTDOWN)
1512		goto out_unlock;
1513
1514	if (sk->sk_type != SOCK_SEQPACKET) {
1515		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1516		if (err)
1517			goto out_unlock;
1518	}
1519
1520	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1521		if (!timeo) {
1522			err = -EAGAIN;
1523			goto out_unlock;
1524		}
1525
1526		timeo = unix_wait_for_peer(other, timeo);
1527
1528		err = sock_intr_errno(timeo);
1529		if (signal_pending(current))
1530			goto out_free;
1531
1532		goto restart;
1533	}
1534
1535	if (sock_flag(other, SOCK_RCVTSTAMP))
1536		__net_timestamp(skb);
1537	skb_queue_tail(&other->sk_receive_queue, skb);
1538	if (max_level > unix_sk(other)->recursion_level)
1539		unix_sk(other)->recursion_level = max_level;
1540	unix_state_unlock(other);
1541	other->sk_data_ready(other, len);
1542	sock_put(other);
1543	scm_destroy(siocb->scm);
1544	return len;
1545
1546out_unlock:
1547	unix_state_unlock(other);
1548out_free:
1549	kfree_skb(skb);
1550out:
1551	if (other)
1552		sock_put(other);
1553	scm_destroy(siocb->scm);
1554	return err;
1555}
1556
1557
1558static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1559			       struct msghdr *msg, size_t len)
1560{
1561	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1562	struct sock *sk = sock->sk;
1563	struct sock *other = NULL;
1564	struct sockaddr_un *sunaddr = msg->msg_name;
1565	int err, size;
1566	struct sk_buff *skb;
1567	int sent = 0;
1568	struct scm_cookie tmp_scm;
1569	bool fds_sent = false;
1570	int max_level;
1571
1572	if (NULL == siocb->scm)
1573		siocb->scm = &tmp_scm;
1574	wait_for_unix_gc();
1575	err = scm_send(sock, msg, siocb->scm);
1576	if (err < 0)
1577		return err;
1578
1579	err = -EOPNOTSUPP;
1580	if (msg->msg_flags&MSG_OOB)
1581		goto out_err;
1582
1583	if (msg->msg_namelen) {
1584		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1585		goto out_err;
1586	} else {
1587		sunaddr = NULL;
1588		err = -ENOTCONN;
1589		other = unix_peer(sk);
1590		if (!other)
1591			goto out_err;
1592	}
1593
1594	if (sk->sk_shutdown & SEND_SHUTDOWN)
1595		goto pipe_err;
1596
1597	while (sent < len) {
1598		/*
1599		 *	Optimisation for the fact that under 0.01% of X
1600		 *	messages typically need breaking up.
1601		 */
1602
1603		size = len-sent;
1604
1605		/* Keep two messages in the pipe so it schedules better */
1606		if (size > ((sk->sk_sndbuf >> 1) - 64))
1607			size = (sk->sk_sndbuf >> 1) - 64;
1608
1609		if (size > SKB_MAX_ALLOC)
1610			size = SKB_MAX_ALLOC;
1611
1612		/*
1613		 *	Grab a buffer
1614		 */
1615
1616		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1617					  &err);
1618
1619		if (skb == NULL)
1620			goto out_err;
1621
1622		/*
1623		 *	If you pass two values to the sock_alloc_send_skb
1624		 *	it tries to grab the large buffer with GFP_NOFS
1625		 *	(which can fail easily), and if it fails grab the
1626		 *	fallback size buffer which is under a page and will
1627		 *	succeed. [Alan]
1628		 */
1629		size = min_t(int, size, skb_tailroom(skb));
1630
1631
1632		/* Only send the fds in the first buffer */
1633		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1634		if (err < 0) {
1635			kfree_skb(skb);
1636			goto out_err;
1637		}
1638		max_level = err + 1;
1639		fds_sent = true;
1640
1641		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1642		if (err) {
1643			kfree_skb(skb);
1644			goto out_err;
1645		}
1646
1647		unix_state_lock(other);
1648
1649		if (sock_flag(other, SOCK_DEAD) ||
1650		    (other->sk_shutdown & RCV_SHUTDOWN))
1651			goto pipe_err_free;
1652
1653		skb_queue_tail(&other->sk_receive_queue, skb);
1654		if (max_level > unix_sk(other)->recursion_level)
1655			unix_sk(other)->recursion_level = max_level;
1656		unix_state_unlock(other);
1657		other->sk_data_ready(other, size);
1658		sent += size;
1659	}
1660
1661	scm_destroy(siocb->scm);
1662	siocb->scm = NULL;
1663
1664	return sent;
1665
1666pipe_err_free:
1667	unix_state_unlock(other);
1668	kfree_skb(skb);
1669pipe_err:
1670	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1671		send_sig(SIGPIPE, current, 0);
1672	err = -EPIPE;
1673out_err:
1674	scm_destroy(siocb->scm);
1675	siocb->scm = NULL;
1676	return sent ? : err;
1677}
1678
1679static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1680				  struct msghdr *msg, size_t len)
1681{
1682	int err;
1683	struct sock *sk = sock->sk;
1684
1685	err = sock_error(sk);
1686	if (err)
1687		return err;
1688
1689	if (sk->sk_state != TCP_ESTABLISHED)
1690		return -ENOTCONN;
1691
1692	if (msg->msg_namelen)
1693		msg->msg_namelen = 0;
1694
1695	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1696}
1697
1698static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1699{
1700	struct unix_sock *u = unix_sk(sk);
1701
1702	msg->msg_namelen = 0;
1703	if (u->addr) {
1704		msg->msg_namelen = u->addr->len;
1705		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1706	}
1707}
1708
1709static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1710			      struct msghdr *msg, size_t size,
1711			      int flags)
1712{
1713	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1714	struct scm_cookie tmp_scm;
1715	struct sock *sk = sock->sk;
1716	struct unix_sock *u = unix_sk(sk);
1717	int noblock = flags & MSG_DONTWAIT;
1718	struct sk_buff *skb;
1719	int err;
1720
1721	err = -EOPNOTSUPP;
1722	if (flags&MSG_OOB)
1723		goto out;
1724
1725	msg->msg_namelen = 0;
1726
1727	err = mutex_lock_interruptible(&u->readlock);
1728	if (err) {
1729		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1730		goto out;
1731	}
1732
1733	skb = skb_recv_datagram(sk, flags, noblock, &err);
1734	if (!skb) {
1735		unix_state_lock(sk);
1736		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1737		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1738		    (sk->sk_shutdown & RCV_SHUTDOWN))
1739			err = 0;
1740		unix_state_unlock(sk);
1741		goto out_unlock;
1742	}
1743
1744	wake_up_interruptible_sync_poll(&u->peer_wait,
1745					POLLOUT | POLLWRNORM | POLLWRBAND);
1746
1747	if (msg->msg_name)
1748		unix_copy_addr(msg, skb->sk);
1749
1750	if (size > skb->len)
1751		size = skb->len;
1752	else if (size < skb->len)
1753		msg->msg_flags |= MSG_TRUNC;
1754
1755	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1756	if (err)
1757		goto out_free;
1758
1759	if (sock_flag(sk, SOCK_RCVTSTAMP))
1760		__sock_recv_timestamp(msg, sk, skb);
1761
1762	if (!siocb->scm) {
1763		siocb->scm = &tmp_scm;
1764		memset(&tmp_scm, 0, sizeof(tmp_scm));
1765	}
1766	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1767	unix_set_secdata(siocb->scm, skb);
1768
1769	if (!(flags & MSG_PEEK)) {
1770		if (UNIXCB(skb).fp)
1771			unix_detach_fds(siocb->scm, skb);
1772	} else {
1773		/* It is questionable: on PEEK we could:
1774		   - do not return fds - good, but too simple 8)
1775		   - return fds, and do not return them on read (old strategy,
1776		     apparently wrong)
1777		   - clone fds (I chose it for now, it is the most universal
1778		     solution)
1779
1780		   POSIX 1003.1g does not actually define this clearly
1781		   at all. POSIX 1003.1g doesn't define a lot of things
1782		   clearly however!
1783
1784		*/
1785		if (UNIXCB(skb).fp)
1786			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1787	}
1788	err = size;
1789
1790	scm_recv(sock, msg, siocb->scm, flags);
1791
1792out_free:
1793	skb_free_datagram(sk, skb);
1794out_unlock:
1795	mutex_unlock(&u->readlock);
1796out:
1797	return err;
1798}
1799
1800/*
1801 *	Sleep until data has arrive. But check for races..
1802 */
1803
1804static long unix_stream_data_wait(struct sock *sk, long timeo)
1805{
1806	DEFINE_WAIT(wait);
1807
1808	unix_state_lock(sk);
1809
1810	for (;;) {
1811		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1812
1813		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1814		    sk->sk_err ||
1815		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1816		    signal_pending(current) ||
1817		    !timeo)
1818			break;
1819
1820		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1821		unix_state_unlock(sk);
1822		timeo = schedule_timeout(timeo);
1823		unix_state_lock(sk);
1824		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1825	}
1826
1827	finish_wait(sk_sleep(sk), &wait);
1828	unix_state_unlock(sk);
1829	return timeo;
1830}
1831
1832
1833
1834static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1835			       struct msghdr *msg, size_t size,
1836			       int flags)
1837{
1838	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1839	struct scm_cookie tmp_scm;
1840	struct sock *sk = sock->sk;
1841	struct unix_sock *u = unix_sk(sk);
1842	struct sockaddr_un *sunaddr = msg->msg_name;
1843	int copied = 0;
1844	int check_creds = 0;
1845	int target;
1846	int err = 0;
1847	long timeo;
1848
1849	err = -EINVAL;
1850	if (sk->sk_state != TCP_ESTABLISHED)
1851		goto out;
1852
1853	err = -EOPNOTSUPP;
1854	if (flags&MSG_OOB)
1855		goto out;
1856
1857	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1858	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1859
1860	msg->msg_namelen = 0;
1861
1862	/* Lock the socket to prevent queue disordering
1863	 * while sleeps in memcpy_tomsg
1864	 */
1865
1866	if (!siocb->scm) {
1867		siocb->scm = &tmp_scm;
1868		memset(&tmp_scm, 0, sizeof(tmp_scm));
1869	}
1870
1871	err = mutex_lock_interruptible(&u->readlock);
1872	if (err) {
1873		err = sock_intr_errno(timeo);
1874		goto out;
1875	}
1876
1877	do {
1878		int chunk;
1879		struct sk_buff *skb;
1880
1881		unix_state_lock(sk);
1882		skb = skb_dequeue(&sk->sk_receive_queue);
1883		if (skb == NULL) {
1884			unix_sk(sk)->recursion_level = 0;
1885			if (copied >= target)
1886				goto unlock;
1887
1888			/*
1889			 *	POSIX 1003.1g mandates this order.
1890			 */
1891
1892			err = sock_error(sk);
1893			if (err)
1894				goto unlock;
1895			if (sk->sk_shutdown & RCV_SHUTDOWN)
1896				goto unlock;
1897
1898			unix_state_unlock(sk);
1899			err = -EAGAIN;
1900			if (!timeo)
1901				break;
1902			mutex_unlock(&u->readlock);
1903
1904			timeo = unix_stream_data_wait(sk, timeo);
1905
1906			if (signal_pending(current)
1907			    ||  mutex_lock_interruptible(&u->readlock)) {
1908				err = sock_intr_errno(timeo);
1909				goto out;
1910			}
1911
1912			continue;
1913 unlock:
1914			unix_state_unlock(sk);
1915			break;
1916		}
1917		unix_state_unlock(sk);
1918
1919		if (check_creds) {
1920			/* Never glue messages from different writers */
1921			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1922			    (UNIXCB(skb).cred != siocb->scm->cred)) {
1923				skb_queue_head(&sk->sk_receive_queue, skb);
1924				break;
1925			}
1926		} else {
1927			/* Copy credentials */
1928			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1929			check_creds = 1;
1930		}
1931
1932		/* Copy address just once */
1933		if (sunaddr) {
1934			unix_copy_addr(msg, skb->sk);
1935			sunaddr = NULL;
1936		}
1937
1938		chunk = min_t(unsigned int, skb->len, size);
1939		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1940			skb_queue_head(&sk->sk_receive_queue, skb);
1941			if (copied == 0)
1942				copied = -EFAULT;
1943			break;
1944		}
1945		copied += chunk;
1946		size -= chunk;
1947
1948		/* Mark read part of skb as used */
1949		if (!(flags & MSG_PEEK)) {
1950			skb_pull(skb, chunk);
1951
1952			if (UNIXCB(skb).fp)
1953				unix_detach_fds(siocb->scm, skb);
1954
1955			/* put the skb back if we didn't use it up.. */
1956			if (skb->len) {
1957				skb_queue_head(&sk->sk_receive_queue, skb);
1958				break;
1959			}
1960
1961			consume_skb(skb);
1962
1963			if (siocb->scm->fp)
1964				break;
1965		} else {
1966			/* It is questionable, see note in unix_dgram_recvmsg.
1967			 */
1968			if (UNIXCB(skb).fp)
1969				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1970
1971			/* put message back and return */
1972			skb_queue_head(&sk->sk_receive_queue, skb);
1973			break;
1974		}
1975	} while (size);
1976
1977	mutex_unlock(&u->readlock);
1978	scm_recv(sock, msg, siocb->scm, flags);
1979out:
1980	return copied ? : err;
1981}
1982
1983static int unix_shutdown(struct socket *sock, int mode)
1984{
1985	struct sock *sk = sock->sk;
1986	struct sock *other;
1987
1988	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1989
1990	if (mode) {
1991		unix_state_lock(sk);
1992		sk->sk_shutdown |= mode;
1993		other = unix_peer(sk);
1994		if (other)
1995			sock_hold(other);
1996		unix_state_unlock(sk);
1997		sk->sk_state_change(sk);
1998
1999		if (other &&
2000			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2001
2002			int peer_mode = 0;
2003
2004			if (mode&RCV_SHUTDOWN)
2005				peer_mode |= SEND_SHUTDOWN;
2006			if (mode&SEND_SHUTDOWN)
2007				peer_mode |= RCV_SHUTDOWN;
2008			unix_state_lock(other);
2009			other->sk_shutdown |= peer_mode;
2010			unix_state_unlock(other);
2011			other->sk_state_change(other);
2012			if (peer_mode == SHUTDOWN_MASK)
2013				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2014			else if (peer_mode & RCV_SHUTDOWN)
2015				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2016		}
2017		if (other)
2018			sock_put(other);
2019	}
2020	return 0;
2021}
2022
2023static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2024{
2025	struct sock *sk = sock->sk;
2026	long amount = 0;
2027	int err;
2028
2029	switch (cmd) {
2030	case SIOCOUTQ:
2031		amount = sk_wmem_alloc_get(sk);
2032		err = put_user(amount, (int __user *)arg);
2033		break;
2034	case SIOCINQ:
2035		{
2036			struct sk_buff *skb;
2037
2038			if (sk->sk_state == TCP_LISTEN) {
2039				err = -EINVAL;
2040				break;
2041			}
2042
2043			spin_lock(&sk->sk_receive_queue.lock);
2044			if (sk->sk_type == SOCK_STREAM ||
2045			    sk->sk_type == SOCK_SEQPACKET) {
2046				skb_queue_walk(&sk->sk_receive_queue, skb)
2047					amount += skb->len;
2048			} else {
2049				skb = skb_peek(&sk->sk_receive_queue);
2050				if (skb)
2051					amount = skb->len;
2052			}
2053			spin_unlock(&sk->sk_receive_queue.lock);
2054			err = put_user(amount, (int __user *)arg);
2055			break;
2056		}
2057
2058	default:
2059		err = -ENOIOCTLCMD;
2060		break;
2061	}
2062	return err;
2063}
2064
2065static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2066{
2067	struct sock *sk = sock->sk;
2068	unsigned int mask;
2069
2070	sock_poll_wait(file, sk_sleep(sk), wait);
2071	mask = 0;
2072
2073	/* exceptional events? */
2074	if (sk->sk_err)
2075		mask |= POLLERR;
2076	if (sk->sk_shutdown == SHUTDOWN_MASK)
2077		mask |= POLLHUP;
2078	if (sk->sk_shutdown & RCV_SHUTDOWN)
2079		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2080
2081	/* readable? */
2082	if (!skb_queue_empty(&sk->sk_receive_queue))
2083		mask |= POLLIN | POLLRDNORM;
2084
2085	/* Connection-based need to check for termination and startup */
2086	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2087	    sk->sk_state == TCP_CLOSE)
2088		mask |= POLLHUP;
2089
2090	/*
2091	 * we set writable also when the other side has shut down the
2092	 * connection. This prevents stuck sockets.
2093	 */
2094	if (unix_writable(sk))
2095		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2096
2097	return mask;
2098}
2099
2100static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2101				    poll_table *wait)
2102{
2103	struct sock *sk = sock->sk, *other;
2104	unsigned int mask, writable;
2105
2106	sock_poll_wait(file, sk_sleep(sk), wait);
2107	mask = 0;
2108
2109	/* exceptional events? */
2110	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2111		mask |= POLLERR;
2112	if (sk->sk_shutdown & RCV_SHUTDOWN)
2113		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2114	if (sk->sk_shutdown == SHUTDOWN_MASK)
2115		mask |= POLLHUP;
2116
2117	/* readable? */
2118	if (!skb_queue_empty(&sk->sk_receive_queue))
2119		mask |= POLLIN | POLLRDNORM;
2120
2121	/* Connection-based need to check for termination and startup */
2122	if (sk->sk_type == SOCK_SEQPACKET) {
2123		if (sk->sk_state == TCP_CLOSE)
2124			mask |= POLLHUP;
2125		/* connection hasn't started yet? */
2126		if (sk->sk_state == TCP_SYN_SENT)
2127			return mask;
2128	}
2129
2130	/* No write status requested, avoid expensive OUT tests. */
2131	if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2132		return mask;
2133
2134	writable = unix_writable(sk);
2135	other = unix_peer_get(sk);
2136	if (other) {
2137		if (unix_peer(other) != sk) {
2138			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2139			if (unix_recvq_full(other))
2140				writable = 0;
2141		}
2142		sock_put(other);
2143	}
2144
2145	if (writable)
2146		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2147	else
2148		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2149
2150	return mask;
2151}
2152
2153#ifdef CONFIG_PROC_FS
2154static struct sock *first_unix_socket(int *i)
2155{
2156	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2157		if (!hlist_empty(&unix_socket_table[*i]))
2158			return __sk_head(&unix_socket_table[*i]);
2159	}
2160	return NULL;
2161}
2162
2163static struct sock *next_unix_socket(int *i, struct sock *s)
2164{
2165	struct sock *next = sk_next(s);
2166	/* More in this chain? */
2167	if (next)
2168		return next;
2169	/* Look for next non-empty chain. */
2170	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2171		if (!hlist_empty(&unix_socket_table[*i]))
2172			return __sk_head(&unix_socket_table[*i]);
2173	}
2174	return NULL;
2175}
2176
2177struct unix_iter_state {
2178	struct seq_net_private p;
2179	int i;
2180};
2181
2182static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2183{
2184	struct unix_iter_state *iter = seq->private;
2185	loff_t off = 0;
2186	struct sock *s;
2187
2188	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2189		if (sock_net(s) != seq_file_net(seq))
2190			continue;
2191		if (off == pos)
2192			return s;
2193		++off;
2194	}
2195	return NULL;
2196}
2197
2198static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2199	__acquires(unix_table_lock)
2200{
2201	spin_lock(&unix_table_lock);
2202	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2203}
2204
2205static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2206{
2207	struct unix_iter_state *iter = seq->private;
2208	struct sock *sk = v;
2209	++*pos;
2210
2211	if (v == SEQ_START_TOKEN)
2212		sk = first_unix_socket(&iter->i);
2213	else
2214		sk = next_unix_socket(&iter->i, sk);
2215	while (sk && (sock_net(sk) != seq_file_net(seq)))
2216		sk = next_unix_socket(&iter->i, sk);
2217	return sk;
2218}
2219
2220static void unix_seq_stop(struct seq_file *seq, void *v)
2221	__releases(unix_table_lock)
2222{
2223	spin_unlock(&unix_table_lock);
2224}
2225
2226static int unix_seq_show(struct seq_file *seq, void *v)
2227{
2228
2229	if (v == SEQ_START_TOKEN)
2230		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2231			 "Inode Path\n");
2232	else {
2233		struct sock *s = v;
2234		struct unix_sock *u = unix_sk(s);
2235		unix_state_lock(s);
2236
2237		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2238			s,
2239			atomic_read(&s->sk_refcnt),
2240			0,
2241			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2242			s->sk_type,
2243			s->sk_socket ?
2244			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2245			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2246			sock_i_ino(s));
2247
2248		if (u->addr) {
2249			int i, len;
2250			seq_putc(seq, ' ');
2251
2252			i = 0;
2253			len = u->addr->len - sizeof(short);
2254			if (!UNIX_ABSTRACT(s))
2255				len--;
2256			else {
2257				seq_putc(seq, '@');
2258				i++;
2259			}
2260			for ( ; i < len; i++)
2261				seq_putc(seq, u->addr->name->sun_path[i]);
2262		}
2263		unix_state_unlock(s);
2264		seq_putc(seq, '\n');
2265	}
2266
2267	return 0;
2268}
2269
2270static const struct seq_operations unix_seq_ops = {
2271	.start  = unix_seq_start,
2272	.next   = unix_seq_next,
2273	.stop   = unix_seq_stop,
2274	.show   = unix_seq_show,
2275};
2276
2277static int unix_seq_open(struct inode *inode, struct file *file)
2278{
2279	return seq_open_net(inode, file, &unix_seq_ops,
2280			    sizeof(struct unix_iter_state));
2281}
2282
2283static const struct file_operations unix_seq_fops = {
2284	.owner		= THIS_MODULE,
2285	.open		= unix_seq_open,
2286	.read		= seq_read,
2287	.llseek		= seq_lseek,
2288	.release	= seq_release_net,
2289};
2290
2291#endif
2292
2293static const struct net_proto_family unix_family_ops = {
2294	.family = PF_UNIX,
2295	.create = unix_create,
2296	.owner	= THIS_MODULE,
2297};
2298
2299
2300static int __net_init unix_net_init(struct net *net)
2301{
2302	int error = -ENOMEM;
2303
2304	net->unx.sysctl_max_dgram_qlen = 10;
2305	if (unix_sysctl_register(net))
2306		goto out;
2307
2308#ifdef CONFIG_PROC_FS
2309	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2310		unix_sysctl_unregister(net);
2311		goto out;
2312	}
2313#endif
2314	error = 0;
2315out:
2316	return error;
2317}
2318
2319static void __net_exit unix_net_exit(struct net *net)
2320{
2321	unix_sysctl_unregister(net);
2322	proc_net_remove(net, "unix");
2323}
2324
2325static struct pernet_operations unix_net_ops = {
2326	.init = unix_net_init,
2327	.exit = unix_net_exit,
2328};
2329
2330static int __init af_unix_init(void)
2331{
2332	int rc = -1;
2333	struct sk_buff *dummy_skb;
2334
2335	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2336
2337	rc = proto_register(&unix_proto, 1);
2338	if (rc != 0) {
2339		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2340		       __func__);
2341		goto out;
2342	}
2343
2344	sock_register(&unix_family_ops);
2345	register_pernet_subsys(&unix_net_ops);
2346out:
2347	return rc;
2348}
2349
2350static void __exit af_unix_exit(void)
2351{
2352	sock_unregister(PF_UNIX);
2353	proto_unregister(&unix_proto);
2354	unregister_pernet_subsys(&unix_net_ops);
2355}
2356
2357/* Earlier than device_initcall() so that other drivers invoking
2358   request_module() don't end up in a loop when modprobe tries
2359   to use a UNIX socket. But later than subsys_initcall() because
2360   we depend on stuff initialised there */
2361fs_initcall(af_unix_init);
2362module_exit(af_unix_exit);
2363
2364MODULE_LICENSE("GPL");
2365MODULE_ALIAS_NETPROTO(PF_UNIX);
2366