af_unix.c revision 109f6e39fa07c48f580125f531f46cb7c245b528
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate spin lock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (!net_eq(sock_net(s), net))
296			continue;
297
298		if (dentry && dentry->d_inode == i) {
299			sock_hold(s);
300			goto found;
301		}
302	}
303	s = NULL;
304found:
305	spin_unlock(&unix_table_lock);
306	return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
316	struct socket_wq *wq;
317
318	rcu_read_lock();
319	if (unix_writable(sk)) {
320		wq = rcu_dereference(sk->sk_wq);
321		if (wq_has_sleeper(wq))
322			wake_up_interruptible_sync(&wq->wait);
323		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
324	}
325	rcu_read_unlock();
326}
327
328/* When dgram socket disconnects (or changes its peer), we clear its receive
329 * queue of packets arrived from previous peer. First, it allows to do
330 * flow control based only on wmem_alloc; second, sk connected to peer
331 * may receive messages only from that peer. */
332static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
333{
334	if (!skb_queue_empty(&sk->sk_receive_queue)) {
335		skb_queue_purge(&sk->sk_receive_queue);
336		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
337
338		/* If one link of bidirectional dgram pipe is disconnected,
339		 * we signal error. Messages are lost. Do not make this,
340		 * when peer was not connected to us.
341		 */
342		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
343			other->sk_err = ECONNRESET;
344			other->sk_error_report(other);
345		}
346	}
347}
348
349static void unix_sock_destructor(struct sock *sk)
350{
351	struct unix_sock *u = unix_sk(sk);
352
353	skb_queue_purge(&sk->sk_receive_queue);
354
355	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
356	WARN_ON(!sk_unhashed(sk));
357	WARN_ON(sk->sk_socket);
358	if (!sock_flag(sk, SOCK_DEAD)) {
359		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
360		return;
361	}
362
363	if (u->addr)
364		unix_release_addr(u->addr);
365
366	atomic_dec(&unix_nr_socks);
367	local_bh_disable();
368	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
369	local_bh_enable();
370#ifdef UNIX_REFCNT_DEBUG
371	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
372		atomic_read(&unix_nr_socks));
373#endif
374}
375
376static int unix_release_sock(struct sock *sk, int embrion)
377{
378	struct unix_sock *u = unix_sk(sk);
379	struct dentry *dentry;
380	struct vfsmount *mnt;
381	struct sock *skpair;
382	struct sk_buff *skb;
383	int state;
384
385	unix_remove_socket(sk);
386
387	/* Clear state */
388	unix_state_lock(sk);
389	sock_orphan(sk);
390	sk->sk_shutdown = SHUTDOWN_MASK;
391	dentry	     = u->dentry;
392	u->dentry    = NULL;
393	mnt	     = u->mnt;
394	u->mnt	     = NULL;
395	state = sk->sk_state;
396	sk->sk_state = TCP_CLOSE;
397	unix_state_unlock(sk);
398
399	wake_up_interruptible_all(&u->peer_wait);
400
401	skpair = unix_peer(sk);
402
403	if (skpair != NULL) {
404		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
405			unix_state_lock(skpair);
406			/* No more writes */
407			skpair->sk_shutdown = SHUTDOWN_MASK;
408			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
409				skpair->sk_err = ECONNRESET;
410			unix_state_unlock(skpair);
411			skpair->sk_state_change(skpair);
412			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
413		}
414		sock_put(skpair); /* It may now die */
415		unix_peer(sk) = NULL;
416	}
417
418	/* Try to flush out this socket. Throw out buffers at least */
419
420	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
421		if (state == TCP_LISTEN)
422			unix_release_sock(skb->sk, 1);
423		/* passed fds are erased in the kfree_skb hook	      */
424		kfree_skb(skb);
425	}
426
427	if (dentry) {
428		dput(dentry);
429		mntput(mnt);
430	}
431
432	sock_put(sk);
433
434	/* ---- Socket is dead now and most probably destroyed ---- */
435
436	/*
437	 * Fixme: BSD difference: In BSD all sockets connected to use get
438	 *	  ECONNRESET and we die on the spot. In Linux we behave
439	 *	  like files and pipes do and wait for the last
440	 *	  dereference.
441	 *
442	 * Can't we simply set sock->err?
443	 *
444	 *	  What the above comment does talk about? --ANK(980817)
445	 */
446
447	if (unix_tot_inflight)
448		unix_gc();		/* Garbage collect fds */
449
450	return 0;
451}
452
453static void init_peercred(struct sock *sk)
454{
455	put_pid(sk->sk_peer_pid);
456	if (sk->sk_peer_cred)
457		put_cred(sk->sk_peer_cred);
458	sk->sk_peer_pid  = get_pid(task_tgid(current));
459	sk->sk_peer_cred = get_current_cred();
460}
461
462static void copy_peercred(struct sock *sk, struct sock *peersk)
463{
464	put_pid(sk->sk_peer_pid);
465	if (sk->sk_peer_cred)
466		put_cred(sk->sk_peer_cred);
467	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
468	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
469}
470
471static int unix_listen(struct socket *sock, int backlog)
472{
473	int err;
474	struct sock *sk = sock->sk;
475	struct unix_sock *u = unix_sk(sk);
476	struct pid *old_pid = NULL;
477	const struct cred *old_cred = NULL;
478
479	err = -EOPNOTSUPP;
480	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
481		goto out;	/* Only stream/seqpacket sockets accept */
482	err = -EINVAL;
483	if (!u->addr)
484		goto out;	/* No listens on an unbound socket */
485	unix_state_lock(sk);
486	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
487		goto out_unlock;
488	if (backlog > sk->sk_max_ack_backlog)
489		wake_up_interruptible_all(&u->peer_wait);
490	sk->sk_max_ack_backlog	= backlog;
491	sk->sk_state		= TCP_LISTEN;
492	/* set credentials so connect can copy them */
493	init_peercred(sk);
494	err = 0;
495
496out_unlock:
497	unix_state_unlock(sk);
498	put_pid(old_pid);
499	if (old_cred)
500		put_cred(old_cred);
501out:
502	return err;
503}
504
505static int unix_release(struct socket *);
506static int unix_bind(struct socket *, struct sockaddr *, int);
507static int unix_stream_connect(struct socket *, struct sockaddr *,
508			       int addr_len, int flags);
509static int unix_socketpair(struct socket *, struct socket *);
510static int unix_accept(struct socket *, struct socket *, int);
511static int unix_getname(struct socket *, struct sockaddr *, int *, int);
512static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
513static unsigned int unix_dgram_poll(struct file *, struct socket *,
514				    poll_table *);
515static int unix_ioctl(struct socket *, unsigned int, unsigned long);
516static int unix_shutdown(struct socket *, int);
517static int unix_stream_sendmsg(struct kiocb *, struct socket *,
518			       struct msghdr *, size_t);
519static int unix_stream_recvmsg(struct kiocb *, struct socket *,
520			       struct msghdr *, size_t, int);
521static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
522			      struct msghdr *, size_t);
523static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
524			      struct msghdr *, size_t, int);
525static int unix_dgram_connect(struct socket *, struct sockaddr *,
526			      int, int);
527static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
528				  struct msghdr *, size_t);
529
530static const struct proto_ops unix_stream_ops = {
531	.family =	PF_UNIX,
532	.owner =	THIS_MODULE,
533	.release =	unix_release,
534	.bind =		unix_bind,
535	.connect =	unix_stream_connect,
536	.socketpair =	unix_socketpair,
537	.accept =	unix_accept,
538	.getname =	unix_getname,
539	.poll =		unix_poll,
540	.ioctl =	unix_ioctl,
541	.listen =	unix_listen,
542	.shutdown =	unix_shutdown,
543	.setsockopt =	sock_no_setsockopt,
544	.getsockopt =	sock_no_getsockopt,
545	.sendmsg =	unix_stream_sendmsg,
546	.recvmsg =	unix_stream_recvmsg,
547	.mmap =		sock_no_mmap,
548	.sendpage =	sock_no_sendpage,
549};
550
551static const struct proto_ops unix_dgram_ops = {
552	.family =	PF_UNIX,
553	.owner =	THIS_MODULE,
554	.release =	unix_release,
555	.bind =		unix_bind,
556	.connect =	unix_dgram_connect,
557	.socketpair =	unix_socketpair,
558	.accept =	sock_no_accept,
559	.getname =	unix_getname,
560	.poll =		unix_dgram_poll,
561	.ioctl =	unix_ioctl,
562	.listen =	sock_no_listen,
563	.shutdown =	unix_shutdown,
564	.setsockopt =	sock_no_setsockopt,
565	.getsockopt =	sock_no_getsockopt,
566	.sendmsg =	unix_dgram_sendmsg,
567	.recvmsg =	unix_dgram_recvmsg,
568	.mmap =		sock_no_mmap,
569	.sendpage =	sock_no_sendpage,
570};
571
572static const struct proto_ops unix_seqpacket_ops = {
573	.family =	PF_UNIX,
574	.owner =	THIS_MODULE,
575	.release =	unix_release,
576	.bind =		unix_bind,
577	.connect =	unix_stream_connect,
578	.socketpair =	unix_socketpair,
579	.accept =	unix_accept,
580	.getname =	unix_getname,
581	.poll =		unix_dgram_poll,
582	.ioctl =	unix_ioctl,
583	.listen =	unix_listen,
584	.shutdown =	unix_shutdown,
585	.setsockopt =	sock_no_setsockopt,
586	.getsockopt =	sock_no_getsockopt,
587	.sendmsg =	unix_seqpacket_sendmsg,
588	.recvmsg =	unix_dgram_recvmsg,
589	.mmap =		sock_no_mmap,
590	.sendpage =	sock_no_sendpage,
591};
592
593static struct proto unix_proto = {
594	.name			= "UNIX",
595	.owner			= THIS_MODULE,
596	.obj_size		= sizeof(struct unix_sock),
597};
598
599/*
600 * AF_UNIX sockets do not interact with hardware, hence they
601 * dont trigger interrupts - so it's safe for them to have
602 * bh-unsafe locking for their sk_receive_queue.lock. Split off
603 * this special lock-class by reinitializing the spinlock key:
604 */
605static struct lock_class_key af_unix_sk_receive_queue_lock_key;
606
607static struct sock *unix_create1(struct net *net, struct socket *sock)
608{
609	struct sock *sk = NULL;
610	struct unix_sock *u;
611
612	atomic_inc(&unix_nr_socks);
613	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
614		goto out;
615
616	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
617	if (!sk)
618		goto out;
619
620	sock_init_data(sock, sk);
621	lockdep_set_class(&sk->sk_receive_queue.lock,
622				&af_unix_sk_receive_queue_lock_key);
623
624	sk->sk_write_space	= unix_write_space;
625	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
626	sk->sk_destruct		= unix_sock_destructor;
627	u	  = unix_sk(sk);
628	u->dentry = NULL;
629	u->mnt	  = NULL;
630	spin_lock_init(&u->lock);
631	atomic_long_set(&u->inflight, 0);
632	INIT_LIST_HEAD(&u->link);
633	mutex_init(&u->readlock); /* single task reading lock */
634	init_waitqueue_head(&u->peer_wait);
635	unix_insert_socket(unix_sockets_unbound, sk);
636out:
637	if (sk == NULL)
638		atomic_dec(&unix_nr_socks);
639	else {
640		local_bh_disable();
641		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
642		local_bh_enable();
643	}
644	return sk;
645}
646
647static int unix_create(struct net *net, struct socket *sock, int protocol,
648		       int kern)
649{
650	if (protocol && protocol != PF_UNIX)
651		return -EPROTONOSUPPORT;
652
653	sock->state = SS_UNCONNECTED;
654
655	switch (sock->type) {
656	case SOCK_STREAM:
657		sock->ops = &unix_stream_ops;
658		break;
659		/*
660		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
661		 *	nothing uses it.
662		 */
663	case SOCK_RAW:
664		sock->type = SOCK_DGRAM;
665	case SOCK_DGRAM:
666		sock->ops = &unix_dgram_ops;
667		break;
668	case SOCK_SEQPACKET:
669		sock->ops = &unix_seqpacket_ops;
670		break;
671	default:
672		return -ESOCKTNOSUPPORT;
673	}
674
675	return unix_create1(net, sock) ? 0 : -ENOMEM;
676}
677
678static int unix_release(struct socket *sock)
679{
680	struct sock *sk = sock->sk;
681
682	if (!sk)
683		return 0;
684
685	sock->sk = NULL;
686
687	return unix_release_sock(sk, 0);
688}
689
690static int unix_autobind(struct socket *sock)
691{
692	struct sock *sk = sock->sk;
693	struct net *net = sock_net(sk);
694	struct unix_sock *u = unix_sk(sk);
695	static u32 ordernum = 1;
696	struct unix_address *addr;
697	int err;
698
699	mutex_lock(&u->readlock);
700
701	err = 0;
702	if (u->addr)
703		goto out;
704
705	err = -ENOMEM;
706	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
707	if (!addr)
708		goto out;
709
710	addr->name->sun_family = AF_UNIX;
711	atomic_set(&addr->refcnt, 1);
712
713retry:
714	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
715	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
716
717	spin_lock(&unix_table_lock);
718	ordernum = (ordernum+1)&0xFFFFF;
719
720	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
721				      addr->hash)) {
722		spin_unlock(&unix_table_lock);
723		/* Sanity yield. It is unusual case, but yet... */
724		if (!(ordernum&0xFF))
725			yield();
726		goto retry;
727	}
728	addr->hash ^= sk->sk_type;
729
730	__unix_remove_socket(sk);
731	u->addr = addr;
732	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
733	spin_unlock(&unix_table_lock);
734	err = 0;
735
736out:	mutex_unlock(&u->readlock);
737	return err;
738}
739
740static struct sock *unix_find_other(struct net *net,
741				    struct sockaddr_un *sunname, int len,
742				    int type, unsigned hash, int *error)
743{
744	struct sock *u;
745	struct path path;
746	int err = 0;
747
748	if (sunname->sun_path[0]) {
749		struct inode *inode;
750		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
751		if (err)
752			goto fail;
753		inode = path.dentry->d_inode;
754		err = inode_permission(inode, MAY_WRITE);
755		if (err)
756			goto put_fail;
757
758		err = -ECONNREFUSED;
759		if (!S_ISSOCK(inode->i_mode))
760			goto put_fail;
761		u = unix_find_socket_byinode(net, inode);
762		if (!u)
763			goto put_fail;
764
765		if (u->sk_type == type)
766			touch_atime(path.mnt, path.dentry);
767
768		path_put(&path);
769
770		err = -EPROTOTYPE;
771		if (u->sk_type != type) {
772			sock_put(u);
773			goto fail;
774		}
775	} else {
776		err = -ECONNREFUSED;
777		u = unix_find_socket_byname(net, sunname, len, type, hash);
778		if (u) {
779			struct dentry *dentry;
780			dentry = unix_sk(u)->dentry;
781			if (dentry)
782				touch_atime(unix_sk(u)->mnt, dentry);
783		} else
784			goto fail;
785	}
786	return u;
787
788put_fail:
789	path_put(&path);
790fail:
791	*error = err;
792	return NULL;
793}
794
795
796static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
797{
798	struct sock *sk = sock->sk;
799	struct net *net = sock_net(sk);
800	struct unix_sock *u = unix_sk(sk);
801	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
802	struct dentry *dentry = NULL;
803	struct nameidata nd;
804	int err;
805	unsigned hash;
806	struct unix_address *addr;
807	struct hlist_head *list;
808
809	err = -EINVAL;
810	if (sunaddr->sun_family != AF_UNIX)
811		goto out;
812
813	if (addr_len == sizeof(short)) {
814		err = unix_autobind(sock);
815		goto out;
816	}
817
818	err = unix_mkname(sunaddr, addr_len, &hash);
819	if (err < 0)
820		goto out;
821	addr_len = err;
822
823	mutex_lock(&u->readlock);
824
825	err = -EINVAL;
826	if (u->addr)
827		goto out_up;
828
829	err = -ENOMEM;
830	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
831	if (!addr)
832		goto out_up;
833
834	memcpy(addr->name, sunaddr, addr_len);
835	addr->len = addr_len;
836	addr->hash = hash ^ sk->sk_type;
837	atomic_set(&addr->refcnt, 1);
838
839	if (sunaddr->sun_path[0]) {
840		unsigned int mode;
841		err = 0;
842		/*
843		 * Get the parent directory, calculate the hash for last
844		 * component.
845		 */
846		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
847		if (err)
848			goto out_mknod_parent;
849
850		dentry = lookup_create(&nd, 0);
851		err = PTR_ERR(dentry);
852		if (IS_ERR(dentry))
853			goto out_mknod_unlock;
854
855		/*
856		 * All right, let's create it.
857		 */
858		mode = S_IFSOCK |
859		       (SOCK_INODE(sock)->i_mode & ~current_umask());
860		err = mnt_want_write(nd.path.mnt);
861		if (err)
862			goto out_mknod_dput;
863		err = security_path_mknod(&nd.path, dentry, mode, 0);
864		if (err)
865			goto out_mknod_drop_write;
866		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
867out_mknod_drop_write:
868		mnt_drop_write(nd.path.mnt);
869		if (err)
870			goto out_mknod_dput;
871		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
872		dput(nd.path.dentry);
873		nd.path.dentry = dentry;
874
875		addr->hash = UNIX_HASH_SIZE;
876	}
877
878	spin_lock(&unix_table_lock);
879
880	if (!sunaddr->sun_path[0]) {
881		err = -EADDRINUSE;
882		if (__unix_find_socket_byname(net, sunaddr, addr_len,
883					      sk->sk_type, hash)) {
884			unix_release_addr(addr);
885			goto out_unlock;
886		}
887
888		list = &unix_socket_table[addr->hash];
889	} else {
890		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
891		u->dentry = nd.path.dentry;
892		u->mnt    = nd.path.mnt;
893	}
894
895	err = 0;
896	__unix_remove_socket(sk);
897	u->addr = addr;
898	__unix_insert_socket(list, sk);
899
900out_unlock:
901	spin_unlock(&unix_table_lock);
902out_up:
903	mutex_unlock(&u->readlock);
904out:
905	return err;
906
907out_mknod_dput:
908	dput(dentry);
909out_mknod_unlock:
910	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
911	path_put(&nd.path);
912out_mknod_parent:
913	if (err == -EEXIST)
914		err = -EADDRINUSE;
915	unix_release_addr(addr);
916	goto out_up;
917}
918
919static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
920{
921	if (unlikely(sk1 == sk2) || !sk2) {
922		unix_state_lock(sk1);
923		return;
924	}
925	if (sk1 < sk2) {
926		unix_state_lock(sk1);
927		unix_state_lock_nested(sk2);
928	} else {
929		unix_state_lock(sk2);
930		unix_state_lock_nested(sk1);
931	}
932}
933
934static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
935{
936	if (unlikely(sk1 == sk2) || !sk2) {
937		unix_state_unlock(sk1);
938		return;
939	}
940	unix_state_unlock(sk1);
941	unix_state_unlock(sk2);
942}
943
944static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
945			      int alen, int flags)
946{
947	struct sock *sk = sock->sk;
948	struct net *net = sock_net(sk);
949	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
950	struct sock *other;
951	unsigned hash;
952	int err;
953
954	if (addr->sa_family != AF_UNSPEC) {
955		err = unix_mkname(sunaddr, alen, &hash);
956		if (err < 0)
957			goto out;
958		alen = err;
959
960		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
961		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
962			goto out;
963
964restart:
965		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
966		if (!other)
967			goto out;
968
969		unix_state_double_lock(sk, other);
970
971		/* Apparently VFS overslept socket death. Retry. */
972		if (sock_flag(other, SOCK_DEAD)) {
973			unix_state_double_unlock(sk, other);
974			sock_put(other);
975			goto restart;
976		}
977
978		err = -EPERM;
979		if (!unix_may_send(sk, other))
980			goto out_unlock;
981
982		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
983		if (err)
984			goto out_unlock;
985
986	} else {
987		/*
988		 *	1003.1g breaking connected state with AF_UNSPEC
989		 */
990		other = NULL;
991		unix_state_double_lock(sk, other);
992	}
993
994	/*
995	 * If it was connected, reconnect.
996	 */
997	if (unix_peer(sk)) {
998		struct sock *old_peer = unix_peer(sk);
999		unix_peer(sk) = other;
1000		unix_state_double_unlock(sk, other);
1001
1002		if (other != old_peer)
1003			unix_dgram_disconnected(sk, old_peer);
1004		sock_put(old_peer);
1005	} else {
1006		unix_peer(sk) = other;
1007		unix_state_double_unlock(sk, other);
1008	}
1009	return 0;
1010
1011out_unlock:
1012	unix_state_double_unlock(sk, other);
1013	sock_put(other);
1014out:
1015	return err;
1016}
1017
1018static long unix_wait_for_peer(struct sock *other, long timeo)
1019{
1020	struct unix_sock *u = unix_sk(other);
1021	int sched;
1022	DEFINE_WAIT(wait);
1023
1024	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1025
1026	sched = !sock_flag(other, SOCK_DEAD) &&
1027		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1028		unix_recvq_full(other);
1029
1030	unix_state_unlock(other);
1031
1032	if (sched)
1033		timeo = schedule_timeout(timeo);
1034
1035	finish_wait(&u->peer_wait, &wait);
1036	return timeo;
1037}
1038
1039static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1040			       int addr_len, int flags)
1041{
1042	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1043	struct sock *sk = sock->sk;
1044	struct net *net = sock_net(sk);
1045	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1046	struct sock *newsk = NULL;
1047	struct sock *other = NULL;
1048	struct sk_buff *skb = NULL;
1049	unsigned hash;
1050	int st;
1051	int err;
1052	long timeo;
1053
1054	err = unix_mkname(sunaddr, addr_len, &hash);
1055	if (err < 0)
1056		goto out;
1057	addr_len = err;
1058
1059	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1060	    (err = unix_autobind(sock)) != 0)
1061		goto out;
1062
1063	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1064
1065	/* First of all allocate resources.
1066	   If we will make it after state is locked,
1067	   we will have to recheck all again in any case.
1068	 */
1069
1070	err = -ENOMEM;
1071
1072	/* create new sock for complete connection */
1073	newsk = unix_create1(sock_net(sk), NULL);
1074	if (newsk == NULL)
1075		goto out;
1076
1077	/* Allocate skb for sending to listening sock */
1078	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1079	if (skb == NULL)
1080		goto out;
1081
1082restart:
1083	/*  Find listening sock. */
1084	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1085	if (!other)
1086		goto out;
1087
1088	/* Latch state of peer */
1089	unix_state_lock(other);
1090
1091	/* Apparently VFS overslept socket death. Retry. */
1092	if (sock_flag(other, SOCK_DEAD)) {
1093		unix_state_unlock(other);
1094		sock_put(other);
1095		goto restart;
1096	}
1097
1098	err = -ECONNREFUSED;
1099	if (other->sk_state != TCP_LISTEN)
1100		goto out_unlock;
1101	if (other->sk_shutdown & RCV_SHUTDOWN)
1102		goto out_unlock;
1103
1104	if (unix_recvq_full(other)) {
1105		err = -EAGAIN;
1106		if (!timeo)
1107			goto out_unlock;
1108
1109		timeo = unix_wait_for_peer(other, timeo);
1110
1111		err = sock_intr_errno(timeo);
1112		if (signal_pending(current))
1113			goto out;
1114		sock_put(other);
1115		goto restart;
1116	}
1117
1118	/* Latch our state.
1119
1120	   It is tricky place. We need to grab write lock and cannot
1121	   drop lock on peer. It is dangerous because deadlock is
1122	   possible. Connect to self case and simultaneous
1123	   attempt to connect are eliminated by checking socket
1124	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1125	   check this before attempt to grab lock.
1126
1127	   Well, and we have to recheck the state after socket locked.
1128	 */
1129	st = sk->sk_state;
1130
1131	switch (st) {
1132	case TCP_CLOSE:
1133		/* This is ok... continue with connect */
1134		break;
1135	case TCP_ESTABLISHED:
1136		/* Socket is already connected */
1137		err = -EISCONN;
1138		goto out_unlock;
1139	default:
1140		err = -EINVAL;
1141		goto out_unlock;
1142	}
1143
1144	unix_state_lock_nested(sk);
1145
1146	if (sk->sk_state != st) {
1147		unix_state_unlock(sk);
1148		unix_state_unlock(other);
1149		sock_put(other);
1150		goto restart;
1151	}
1152
1153	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1154	if (err) {
1155		unix_state_unlock(sk);
1156		goto out_unlock;
1157	}
1158
1159	/* The way is open! Fastly set all the necessary fields... */
1160
1161	sock_hold(sk);
1162	unix_peer(newsk)	= sk;
1163	newsk->sk_state		= TCP_ESTABLISHED;
1164	newsk->sk_type		= sk->sk_type;
1165	init_peercred(newsk);
1166	newu = unix_sk(newsk);
1167	newsk->sk_wq		= &newu->peer_wq;
1168	otheru = unix_sk(other);
1169
1170	/* copy address information from listening to new sock*/
1171	if (otheru->addr) {
1172		atomic_inc(&otheru->addr->refcnt);
1173		newu->addr = otheru->addr;
1174	}
1175	if (otheru->dentry) {
1176		newu->dentry	= dget(otheru->dentry);
1177		newu->mnt	= mntget(otheru->mnt);
1178	}
1179
1180	/* Set credentials */
1181	copy_peercred(sk, other);
1182
1183	sock->state	= SS_CONNECTED;
1184	sk->sk_state	= TCP_ESTABLISHED;
1185	sock_hold(newsk);
1186
1187	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1188	unix_peer(sk)	= newsk;
1189
1190	unix_state_unlock(sk);
1191
1192	/* take ten and and send info to listening sock */
1193	spin_lock(&other->sk_receive_queue.lock);
1194	__skb_queue_tail(&other->sk_receive_queue, skb);
1195	spin_unlock(&other->sk_receive_queue.lock);
1196	unix_state_unlock(other);
1197	other->sk_data_ready(other, 0);
1198	sock_put(other);
1199	return 0;
1200
1201out_unlock:
1202	if (other)
1203		unix_state_unlock(other);
1204
1205out:
1206	kfree_skb(skb);
1207	if (newsk)
1208		unix_release_sock(newsk, 0);
1209	if (other)
1210		sock_put(other);
1211	return err;
1212}
1213
1214static int unix_socketpair(struct socket *socka, struct socket *sockb)
1215{
1216	struct sock *ska = socka->sk, *skb = sockb->sk;
1217
1218	/* Join our sockets back to back */
1219	sock_hold(ska);
1220	sock_hold(skb);
1221	unix_peer(ska) = skb;
1222	unix_peer(skb) = ska;
1223	init_peercred(ska);
1224	init_peercred(skb);
1225
1226	if (ska->sk_type != SOCK_DGRAM) {
1227		ska->sk_state = TCP_ESTABLISHED;
1228		skb->sk_state = TCP_ESTABLISHED;
1229		socka->state  = SS_CONNECTED;
1230		sockb->state  = SS_CONNECTED;
1231	}
1232	return 0;
1233}
1234
1235static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1236{
1237	struct sock *sk = sock->sk;
1238	struct sock *tsk;
1239	struct sk_buff *skb;
1240	int err;
1241
1242	err = -EOPNOTSUPP;
1243	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1244		goto out;
1245
1246	err = -EINVAL;
1247	if (sk->sk_state != TCP_LISTEN)
1248		goto out;
1249
1250	/* If socket state is TCP_LISTEN it cannot change (for now...),
1251	 * so that no locks are necessary.
1252	 */
1253
1254	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1255	if (!skb) {
1256		/* This means receive shutdown. */
1257		if (err == 0)
1258			err = -EINVAL;
1259		goto out;
1260	}
1261
1262	tsk = skb->sk;
1263	skb_free_datagram(sk, skb);
1264	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1265
1266	/* attach accepted sock to socket */
1267	unix_state_lock(tsk);
1268	newsock->state = SS_CONNECTED;
1269	sock_graft(tsk, newsock);
1270	unix_state_unlock(tsk);
1271	return 0;
1272
1273out:
1274	return err;
1275}
1276
1277
1278static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1279{
1280	struct sock *sk = sock->sk;
1281	struct unix_sock *u;
1282	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1283	int err = 0;
1284
1285	if (peer) {
1286		sk = unix_peer_get(sk);
1287
1288		err = -ENOTCONN;
1289		if (!sk)
1290			goto out;
1291		err = 0;
1292	} else {
1293		sock_hold(sk);
1294	}
1295
1296	u = unix_sk(sk);
1297	unix_state_lock(sk);
1298	if (!u->addr) {
1299		sunaddr->sun_family = AF_UNIX;
1300		sunaddr->sun_path[0] = 0;
1301		*uaddr_len = sizeof(short);
1302	} else {
1303		struct unix_address *addr = u->addr;
1304
1305		*uaddr_len = addr->len;
1306		memcpy(sunaddr, addr->name, *uaddr_len);
1307	}
1308	unix_state_unlock(sk);
1309	sock_put(sk);
1310out:
1311	return err;
1312}
1313
1314static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1315{
1316	int i;
1317
1318	scm->fp = UNIXCB(skb).fp;
1319	skb->destructor = sock_wfree;
1320	UNIXCB(skb).fp = NULL;
1321
1322	for (i = scm->fp->count-1; i >= 0; i--)
1323		unix_notinflight(scm->fp->fp[i]);
1324}
1325
1326static void unix_destruct_fds(struct sk_buff *skb)
1327{
1328	struct scm_cookie scm;
1329	memset(&scm, 0, sizeof(scm));
1330	unix_detach_fds(&scm, skb);
1331
1332	/* Alas, it calls VFS */
1333	/* So fscking what? fput() had been SMP-safe since the last Summer */
1334	scm_destroy(&scm);
1335	sock_wfree(skb);
1336}
1337
1338static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1339{
1340	int i;
1341
1342	/*
1343	 * Need to duplicate file references for the sake of garbage
1344	 * collection.  Otherwise a socket in the fps might become a
1345	 * candidate for GC while the skb is not yet queued.
1346	 */
1347	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1348	if (!UNIXCB(skb).fp)
1349		return -ENOMEM;
1350
1351	for (i = scm->fp->count-1; i >= 0; i--)
1352		unix_inflight(scm->fp->fp[i]);
1353	skb->destructor = unix_destruct_fds;
1354	return 0;
1355}
1356
1357/*
1358 *	Send AF_UNIX data.
1359 */
1360
1361static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1362			      struct msghdr *msg, size_t len)
1363{
1364	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1365	struct sock *sk = sock->sk;
1366	struct net *net = sock_net(sk);
1367	struct unix_sock *u = unix_sk(sk);
1368	struct sockaddr_un *sunaddr = msg->msg_name;
1369	struct sock *other = NULL;
1370	int namelen = 0; /* fake GCC */
1371	int err;
1372	unsigned hash;
1373	struct sk_buff *skb;
1374	long timeo;
1375	struct scm_cookie tmp_scm;
1376
1377	if (NULL == siocb->scm)
1378		siocb->scm = &tmp_scm;
1379	wait_for_unix_gc();
1380	err = scm_send(sock, msg, siocb->scm);
1381	if (err < 0)
1382		return err;
1383
1384	err = -EOPNOTSUPP;
1385	if (msg->msg_flags&MSG_OOB)
1386		goto out;
1387
1388	if (msg->msg_namelen) {
1389		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1390		if (err < 0)
1391			goto out;
1392		namelen = err;
1393	} else {
1394		sunaddr = NULL;
1395		err = -ENOTCONN;
1396		other = unix_peer_get(sk);
1397		if (!other)
1398			goto out;
1399	}
1400
1401	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1402	    && (err = unix_autobind(sock)) != 0)
1403		goto out;
1404
1405	err = -EMSGSIZE;
1406	if (len > sk->sk_sndbuf - 32)
1407		goto out;
1408
1409	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1410	if (skb == NULL)
1411		goto out;
1412
1413	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1414	if (siocb->scm->fp) {
1415		err = unix_attach_fds(siocb->scm, skb);
1416		if (err)
1417			goto out_free;
1418	}
1419	unix_get_secdata(siocb->scm, skb);
1420
1421	skb_reset_transport_header(skb);
1422	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1423	if (err)
1424		goto out_free;
1425
1426	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1427
1428restart:
1429	if (!other) {
1430		err = -ECONNRESET;
1431		if (sunaddr == NULL)
1432			goto out_free;
1433
1434		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1435					hash, &err);
1436		if (other == NULL)
1437			goto out_free;
1438	}
1439
1440	unix_state_lock(other);
1441	err = -EPERM;
1442	if (!unix_may_send(sk, other))
1443		goto out_unlock;
1444
1445	if (sock_flag(other, SOCK_DEAD)) {
1446		/*
1447		 *	Check with 1003.1g - what should
1448		 *	datagram error
1449		 */
1450		unix_state_unlock(other);
1451		sock_put(other);
1452
1453		err = 0;
1454		unix_state_lock(sk);
1455		if (unix_peer(sk) == other) {
1456			unix_peer(sk) = NULL;
1457			unix_state_unlock(sk);
1458
1459			unix_dgram_disconnected(sk, other);
1460			sock_put(other);
1461			err = -ECONNREFUSED;
1462		} else {
1463			unix_state_unlock(sk);
1464		}
1465
1466		other = NULL;
1467		if (err)
1468			goto out_free;
1469		goto restart;
1470	}
1471
1472	err = -EPIPE;
1473	if (other->sk_shutdown & RCV_SHUTDOWN)
1474		goto out_unlock;
1475
1476	if (sk->sk_type != SOCK_SEQPACKET) {
1477		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1478		if (err)
1479			goto out_unlock;
1480	}
1481
1482	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1483		if (!timeo) {
1484			err = -EAGAIN;
1485			goto out_unlock;
1486		}
1487
1488		timeo = unix_wait_for_peer(other, timeo);
1489
1490		err = sock_intr_errno(timeo);
1491		if (signal_pending(current))
1492			goto out_free;
1493
1494		goto restart;
1495	}
1496
1497	skb_queue_tail(&other->sk_receive_queue, skb);
1498	unix_state_unlock(other);
1499	other->sk_data_ready(other, len);
1500	sock_put(other);
1501	scm_destroy(siocb->scm);
1502	return len;
1503
1504out_unlock:
1505	unix_state_unlock(other);
1506out_free:
1507	kfree_skb(skb);
1508out:
1509	if (other)
1510		sock_put(other);
1511	scm_destroy(siocb->scm);
1512	return err;
1513}
1514
1515
1516static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1517			       struct msghdr *msg, size_t len)
1518{
1519	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1520	struct sock *sk = sock->sk;
1521	struct sock *other = NULL;
1522	struct sockaddr_un *sunaddr = msg->msg_name;
1523	int err, size;
1524	struct sk_buff *skb;
1525	int sent = 0;
1526	struct scm_cookie tmp_scm;
1527	bool fds_sent = false;
1528
1529	if (NULL == siocb->scm)
1530		siocb->scm = &tmp_scm;
1531	wait_for_unix_gc();
1532	err = scm_send(sock, msg, siocb->scm);
1533	if (err < 0)
1534		return err;
1535
1536	err = -EOPNOTSUPP;
1537	if (msg->msg_flags&MSG_OOB)
1538		goto out_err;
1539
1540	if (msg->msg_namelen) {
1541		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1542		goto out_err;
1543	} else {
1544		sunaddr = NULL;
1545		err = -ENOTCONN;
1546		other = unix_peer(sk);
1547		if (!other)
1548			goto out_err;
1549	}
1550
1551	if (sk->sk_shutdown & SEND_SHUTDOWN)
1552		goto pipe_err;
1553
1554	while (sent < len) {
1555		/*
1556		 *	Optimisation for the fact that under 0.01% of X
1557		 *	messages typically need breaking up.
1558		 */
1559
1560		size = len-sent;
1561
1562		/* Keep two messages in the pipe so it schedules better */
1563		if (size > ((sk->sk_sndbuf >> 1) - 64))
1564			size = (sk->sk_sndbuf >> 1) - 64;
1565
1566		if (size > SKB_MAX_ALLOC)
1567			size = SKB_MAX_ALLOC;
1568
1569		/*
1570		 *	Grab a buffer
1571		 */
1572
1573		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1574					  &err);
1575
1576		if (skb == NULL)
1577			goto out_err;
1578
1579		/*
1580		 *	If you pass two values to the sock_alloc_send_skb
1581		 *	it tries to grab the large buffer with GFP_NOFS
1582		 *	(which can fail easily), and if it fails grab the
1583		 *	fallback size buffer which is under a page and will
1584		 *	succeed. [Alan]
1585		 */
1586		size = min_t(int, size, skb_tailroom(skb));
1587
1588		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1589		/* Only send the fds in the first buffer */
1590		if (siocb->scm->fp && !fds_sent) {
1591			err = unix_attach_fds(siocb->scm, skb);
1592			if (err) {
1593				kfree_skb(skb);
1594				goto out_err;
1595			}
1596			fds_sent = true;
1597		}
1598
1599		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1600		if (err) {
1601			kfree_skb(skb);
1602			goto out_err;
1603		}
1604
1605		unix_state_lock(other);
1606
1607		if (sock_flag(other, SOCK_DEAD) ||
1608		    (other->sk_shutdown & RCV_SHUTDOWN))
1609			goto pipe_err_free;
1610
1611		skb_queue_tail(&other->sk_receive_queue, skb);
1612		unix_state_unlock(other);
1613		other->sk_data_ready(other, size);
1614		sent += size;
1615	}
1616
1617	scm_destroy(siocb->scm);
1618	siocb->scm = NULL;
1619
1620	return sent;
1621
1622pipe_err_free:
1623	unix_state_unlock(other);
1624	kfree_skb(skb);
1625pipe_err:
1626	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1627		send_sig(SIGPIPE, current, 0);
1628	err = -EPIPE;
1629out_err:
1630	scm_destroy(siocb->scm);
1631	siocb->scm = NULL;
1632	return sent ? : err;
1633}
1634
1635static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1636				  struct msghdr *msg, size_t len)
1637{
1638	int err;
1639	struct sock *sk = sock->sk;
1640
1641	err = sock_error(sk);
1642	if (err)
1643		return err;
1644
1645	if (sk->sk_state != TCP_ESTABLISHED)
1646		return -ENOTCONN;
1647
1648	if (msg->msg_namelen)
1649		msg->msg_namelen = 0;
1650
1651	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1652}
1653
1654static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1655{
1656	struct unix_sock *u = unix_sk(sk);
1657
1658	msg->msg_namelen = 0;
1659	if (u->addr) {
1660		msg->msg_namelen = u->addr->len;
1661		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1662	}
1663}
1664
1665static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1666			      struct msghdr *msg, size_t size,
1667			      int flags)
1668{
1669	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1670	struct scm_cookie tmp_scm;
1671	struct sock *sk = sock->sk;
1672	struct unix_sock *u = unix_sk(sk);
1673	int noblock = flags & MSG_DONTWAIT;
1674	struct sk_buff *skb;
1675	int err;
1676
1677	err = -EOPNOTSUPP;
1678	if (flags&MSG_OOB)
1679		goto out;
1680
1681	msg->msg_namelen = 0;
1682
1683	mutex_lock(&u->readlock);
1684
1685	skb = skb_recv_datagram(sk, flags, noblock, &err);
1686	if (!skb) {
1687		unix_state_lock(sk);
1688		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1689		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1690		    (sk->sk_shutdown & RCV_SHUTDOWN))
1691			err = 0;
1692		unix_state_unlock(sk);
1693		goto out_unlock;
1694	}
1695
1696	wake_up_interruptible_sync(&u->peer_wait);
1697
1698	if (msg->msg_name)
1699		unix_copy_addr(msg, skb->sk);
1700
1701	if (size > skb->len)
1702		size = skb->len;
1703	else if (size < skb->len)
1704		msg->msg_flags |= MSG_TRUNC;
1705
1706	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1707	if (err)
1708		goto out_free;
1709
1710	if (!siocb->scm) {
1711		siocb->scm = &tmp_scm;
1712		memset(&tmp_scm, 0, sizeof(tmp_scm));
1713	}
1714	siocb->scm->creds = *UNIXCREDS(skb);
1715	unix_set_secdata(siocb->scm, skb);
1716
1717	if (!(flags & MSG_PEEK)) {
1718		if (UNIXCB(skb).fp)
1719			unix_detach_fds(siocb->scm, skb);
1720	} else {
1721		/* It is questionable: on PEEK we could:
1722		   - do not return fds - good, but too simple 8)
1723		   - return fds, and do not return them on read (old strategy,
1724		     apparently wrong)
1725		   - clone fds (I chose it for now, it is the most universal
1726		     solution)
1727
1728		   POSIX 1003.1g does not actually define this clearly
1729		   at all. POSIX 1003.1g doesn't define a lot of things
1730		   clearly however!
1731
1732		*/
1733		if (UNIXCB(skb).fp)
1734			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1735	}
1736	err = size;
1737
1738	scm_recv(sock, msg, siocb->scm, flags);
1739
1740out_free:
1741	skb_free_datagram(sk, skb);
1742out_unlock:
1743	mutex_unlock(&u->readlock);
1744out:
1745	return err;
1746}
1747
1748/*
1749 *	Sleep until data has arrive. But check for races..
1750 */
1751
1752static long unix_stream_data_wait(struct sock *sk, long timeo)
1753{
1754	DEFINE_WAIT(wait);
1755
1756	unix_state_lock(sk);
1757
1758	for (;;) {
1759		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1760
1761		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1762		    sk->sk_err ||
1763		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1764		    signal_pending(current) ||
1765		    !timeo)
1766			break;
1767
1768		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1769		unix_state_unlock(sk);
1770		timeo = schedule_timeout(timeo);
1771		unix_state_lock(sk);
1772		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1773	}
1774
1775	finish_wait(sk_sleep(sk), &wait);
1776	unix_state_unlock(sk);
1777	return timeo;
1778}
1779
1780
1781
1782static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1783			       struct msghdr *msg, size_t size,
1784			       int flags)
1785{
1786	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1787	struct scm_cookie tmp_scm;
1788	struct sock *sk = sock->sk;
1789	struct unix_sock *u = unix_sk(sk);
1790	struct sockaddr_un *sunaddr = msg->msg_name;
1791	int copied = 0;
1792	int check_creds = 0;
1793	int target;
1794	int err = 0;
1795	long timeo;
1796
1797	err = -EINVAL;
1798	if (sk->sk_state != TCP_ESTABLISHED)
1799		goto out;
1800
1801	err = -EOPNOTSUPP;
1802	if (flags&MSG_OOB)
1803		goto out;
1804
1805	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1806	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1807
1808	msg->msg_namelen = 0;
1809
1810	/* Lock the socket to prevent queue disordering
1811	 * while sleeps in memcpy_tomsg
1812	 */
1813
1814	if (!siocb->scm) {
1815		siocb->scm = &tmp_scm;
1816		memset(&tmp_scm, 0, sizeof(tmp_scm));
1817	}
1818
1819	mutex_lock(&u->readlock);
1820
1821	do {
1822		int chunk;
1823		struct sk_buff *skb;
1824
1825		unix_state_lock(sk);
1826		skb = skb_dequeue(&sk->sk_receive_queue);
1827		if (skb == NULL) {
1828			if (copied >= target)
1829				goto unlock;
1830
1831			/*
1832			 *	POSIX 1003.1g mandates this order.
1833			 */
1834
1835			err = sock_error(sk);
1836			if (err)
1837				goto unlock;
1838			if (sk->sk_shutdown & RCV_SHUTDOWN)
1839				goto unlock;
1840
1841			unix_state_unlock(sk);
1842			err = -EAGAIN;
1843			if (!timeo)
1844				break;
1845			mutex_unlock(&u->readlock);
1846
1847			timeo = unix_stream_data_wait(sk, timeo);
1848
1849			if (signal_pending(current)) {
1850				err = sock_intr_errno(timeo);
1851				goto out;
1852			}
1853			mutex_lock(&u->readlock);
1854			continue;
1855 unlock:
1856			unix_state_unlock(sk);
1857			break;
1858		}
1859		unix_state_unlock(sk);
1860
1861		if (check_creds) {
1862			/* Never glue messages from different writers */
1863			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1864				   sizeof(siocb->scm->creds)) != 0) {
1865				skb_queue_head(&sk->sk_receive_queue, skb);
1866				break;
1867			}
1868		} else {
1869			/* Copy credentials */
1870			siocb->scm->creds = *UNIXCREDS(skb);
1871			check_creds = 1;
1872		}
1873
1874		/* Copy address just once */
1875		if (sunaddr) {
1876			unix_copy_addr(msg, skb->sk);
1877			sunaddr = NULL;
1878		}
1879
1880		chunk = min_t(unsigned int, skb->len, size);
1881		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1882			skb_queue_head(&sk->sk_receive_queue, skb);
1883			if (copied == 0)
1884				copied = -EFAULT;
1885			break;
1886		}
1887		copied += chunk;
1888		size -= chunk;
1889
1890		/* Mark read part of skb as used */
1891		if (!(flags & MSG_PEEK)) {
1892			skb_pull(skb, chunk);
1893
1894			if (UNIXCB(skb).fp)
1895				unix_detach_fds(siocb->scm, skb);
1896
1897			/* put the skb back if we didn't use it up.. */
1898			if (skb->len) {
1899				skb_queue_head(&sk->sk_receive_queue, skb);
1900				break;
1901			}
1902
1903			kfree_skb(skb);
1904
1905			if (siocb->scm->fp)
1906				break;
1907		} else {
1908			/* It is questionable, see note in unix_dgram_recvmsg.
1909			 */
1910			if (UNIXCB(skb).fp)
1911				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1912
1913			/* put message back and return */
1914			skb_queue_head(&sk->sk_receive_queue, skb);
1915			break;
1916		}
1917	} while (size);
1918
1919	mutex_unlock(&u->readlock);
1920	scm_recv(sock, msg, siocb->scm, flags);
1921out:
1922	return copied ? : err;
1923}
1924
1925static int unix_shutdown(struct socket *sock, int mode)
1926{
1927	struct sock *sk = sock->sk;
1928	struct sock *other;
1929
1930	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1931
1932	if (mode) {
1933		unix_state_lock(sk);
1934		sk->sk_shutdown |= mode;
1935		other = unix_peer(sk);
1936		if (other)
1937			sock_hold(other);
1938		unix_state_unlock(sk);
1939		sk->sk_state_change(sk);
1940
1941		if (other &&
1942			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1943
1944			int peer_mode = 0;
1945
1946			if (mode&RCV_SHUTDOWN)
1947				peer_mode |= SEND_SHUTDOWN;
1948			if (mode&SEND_SHUTDOWN)
1949				peer_mode |= RCV_SHUTDOWN;
1950			unix_state_lock(other);
1951			other->sk_shutdown |= peer_mode;
1952			unix_state_unlock(other);
1953			other->sk_state_change(other);
1954			if (peer_mode == SHUTDOWN_MASK)
1955				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1956			else if (peer_mode & RCV_SHUTDOWN)
1957				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1958		}
1959		if (other)
1960			sock_put(other);
1961	}
1962	return 0;
1963}
1964
1965static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1966{
1967	struct sock *sk = sock->sk;
1968	long amount = 0;
1969	int err;
1970
1971	switch (cmd) {
1972	case SIOCOUTQ:
1973		amount = sk_wmem_alloc_get(sk);
1974		err = put_user(amount, (int __user *)arg);
1975		break;
1976	case SIOCINQ:
1977		{
1978			struct sk_buff *skb;
1979
1980			if (sk->sk_state == TCP_LISTEN) {
1981				err = -EINVAL;
1982				break;
1983			}
1984
1985			spin_lock(&sk->sk_receive_queue.lock);
1986			if (sk->sk_type == SOCK_STREAM ||
1987			    sk->sk_type == SOCK_SEQPACKET) {
1988				skb_queue_walk(&sk->sk_receive_queue, skb)
1989					amount += skb->len;
1990			} else {
1991				skb = skb_peek(&sk->sk_receive_queue);
1992				if (skb)
1993					amount = skb->len;
1994			}
1995			spin_unlock(&sk->sk_receive_queue.lock);
1996			err = put_user(amount, (int __user *)arg);
1997			break;
1998		}
1999
2000	default:
2001		err = -ENOIOCTLCMD;
2002		break;
2003	}
2004	return err;
2005}
2006
2007static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2008{
2009	struct sock *sk = sock->sk;
2010	unsigned int mask;
2011
2012	sock_poll_wait(file, sk_sleep(sk), wait);
2013	mask = 0;
2014
2015	/* exceptional events? */
2016	if (sk->sk_err)
2017		mask |= POLLERR;
2018	if (sk->sk_shutdown == SHUTDOWN_MASK)
2019		mask |= POLLHUP;
2020	if (sk->sk_shutdown & RCV_SHUTDOWN)
2021		mask |= POLLRDHUP;
2022
2023	/* readable? */
2024	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2025	    (sk->sk_shutdown & RCV_SHUTDOWN))
2026		mask |= POLLIN | POLLRDNORM;
2027
2028	/* Connection-based need to check for termination and startup */
2029	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2030	    sk->sk_state == TCP_CLOSE)
2031		mask |= POLLHUP;
2032
2033	/*
2034	 * we set writable also when the other side has shut down the
2035	 * connection. This prevents stuck sockets.
2036	 */
2037	if (unix_writable(sk))
2038		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2039
2040	return mask;
2041}
2042
2043static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2044				    poll_table *wait)
2045{
2046	struct sock *sk = sock->sk, *other;
2047	unsigned int mask, writable;
2048
2049	sock_poll_wait(file, sk_sleep(sk), wait);
2050	mask = 0;
2051
2052	/* exceptional events? */
2053	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2054		mask |= POLLERR;
2055	if (sk->sk_shutdown & RCV_SHUTDOWN)
2056		mask |= POLLRDHUP;
2057	if (sk->sk_shutdown == SHUTDOWN_MASK)
2058		mask |= POLLHUP;
2059
2060	/* readable? */
2061	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2062	    (sk->sk_shutdown & RCV_SHUTDOWN))
2063		mask |= POLLIN | POLLRDNORM;
2064
2065	/* Connection-based need to check for termination and startup */
2066	if (sk->sk_type == SOCK_SEQPACKET) {
2067		if (sk->sk_state == TCP_CLOSE)
2068			mask |= POLLHUP;
2069		/* connection hasn't started yet? */
2070		if (sk->sk_state == TCP_SYN_SENT)
2071			return mask;
2072	}
2073
2074	/* writable? */
2075	writable = unix_writable(sk);
2076	if (writable) {
2077		other = unix_peer_get(sk);
2078		if (other) {
2079			if (unix_peer(other) != sk) {
2080				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2081					  wait);
2082				if (unix_recvq_full(other))
2083					writable = 0;
2084			}
2085
2086			sock_put(other);
2087		}
2088	}
2089
2090	if (writable)
2091		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2092	else
2093		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2094
2095	return mask;
2096}
2097
2098#ifdef CONFIG_PROC_FS
2099static struct sock *first_unix_socket(int *i)
2100{
2101	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2102		if (!hlist_empty(&unix_socket_table[*i]))
2103			return __sk_head(&unix_socket_table[*i]);
2104	}
2105	return NULL;
2106}
2107
2108static struct sock *next_unix_socket(int *i, struct sock *s)
2109{
2110	struct sock *next = sk_next(s);
2111	/* More in this chain? */
2112	if (next)
2113		return next;
2114	/* Look for next non-empty chain. */
2115	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2116		if (!hlist_empty(&unix_socket_table[*i]))
2117			return __sk_head(&unix_socket_table[*i]);
2118	}
2119	return NULL;
2120}
2121
2122struct unix_iter_state {
2123	struct seq_net_private p;
2124	int i;
2125};
2126
2127static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2128{
2129	struct unix_iter_state *iter = seq->private;
2130	loff_t off = 0;
2131	struct sock *s;
2132
2133	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2134		if (sock_net(s) != seq_file_net(seq))
2135			continue;
2136		if (off == pos)
2137			return s;
2138		++off;
2139	}
2140	return NULL;
2141}
2142
2143static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2144	__acquires(unix_table_lock)
2145{
2146	spin_lock(&unix_table_lock);
2147	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2148}
2149
2150static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2151{
2152	struct unix_iter_state *iter = seq->private;
2153	struct sock *sk = v;
2154	++*pos;
2155
2156	if (v == SEQ_START_TOKEN)
2157		sk = first_unix_socket(&iter->i);
2158	else
2159		sk = next_unix_socket(&iter->i, sk);
2160	while (sk && (sock_net(sk) != seq_file_net(seq)))
2161		sk = next_unix_socket(&iter->i, sk);
2162	return sk;
2163}
2164
2165static void unix_seq_stop(struct seq_file *seq, void *v)
2166	__releases(unix_table_lock)
2167{
2168	spin_unlock(&unix_table_lock);
2169}
2170
2171static int unix_seq_show(struct seq_file *seq, void *v)
2172{
2173
2174	if (v == SEQ_START_TOKEN)
2175		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2176			 "Inode Path\n");
2177	else {
2178		struct sock *s = v;
2179		struct unix_sock *u = unix_sk(s);
2180		unix_state_lock(s);
2181
2182		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2183			s,
2184			atomic_read(&s->sk_refcnt),
2185			0,
2186			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2187			s->sk_type,
2188			s->sk_socket ?
2189			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2190			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2191			sock_i_ino(s));
2192
2193		if (u->addr) {
2194			int i, len;
2195			seq_putc(seq, ' ');
2196
2197			i = 0;
2198			len = u->addr->len - sizeof(short);
2199			if (!UNIX_ABSTRACT(s))
2200				len--;
2201			else {
2202				seq_putc(seq, '@');
2203				i++;
2204			}
2205			for ( ; i < len; i++)
2206				seq_putc(seq, u->addr->name->sun_path[i]);
2207		}
2208		unix_state_unlock(s);
2209		seq_putc(seq, '\n');
2210	}
2211
2212	return 0;
2213}
2214
2215static const struct seq_operations unix_seq_ops = {
2216	.start  = unix_seq_start,
2217	.next   = unix_seq_next,
2218	.stop   = unix_seq_stop,
2219	.show   = unix_seq_show,
2220};
2221
2222static int unix_seq_open(struct inode *inode, struct file *file)
2223{
2224	return seq_open_net(inode, file, &unix_seq_ops,
2225			    sizeof(struct unix_iter_state));
2226}
2227
2228static const struct file_operations unix_seq_fops = {
2229	.owner		= THIS_MODULE,
2230	.open		= unix_seq_open,
2231	.read		= seq_read,
2232	.llseek		= seq_lseek,
2233	.release	= seq_release_net,
2234};
2235
2236#endif
2237
2238static const struct net_proto_family unix_family_ops = {
2239	.family = PF_UNIX,
2240	.create = unix_create,
2241	.owner	= THIS_MODULE,
2242};
2243
2244
2245static int __net_init unix_net_init(struct net *net)
2246{
2247	int error = -ENOMEM;
2248
2249	net->unx.sysctl_max_dgram_qlen = 10;
2250	if (unix_sysctl_register(net))
2251		goto out;
2252
2253#ifdef CONFIG_PROC_FS
2254	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2255		unix_sysctl_unregister(net);
2256		goto out;
2257	}
2258#endif
2259	error = 0;
2260out:
2261	return error;
2262}
2263
2264static void __net_exit unix_net_exit(struct net *net)
2265{
2266	unix_sysctl_unregister(net);
2267	proc_net_remove(net, "unix");
2268}
2269
2270static struct pernet_operations unix_net_ops = {
2271	.init = unix_net_init,
2272	.exit = unix_net_exit,
2273};
2274
2275static int __init af_unix_init(void)
2276{
2277	int rc = -1;
2278	struct sk_buff *dummy_skb;
2279
2280	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2281
2282	rc = proto_register(&unix_proto, 1);
2283	if (rc != 0) {
2284		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2285		       __func__);
2286		goto out;
2287	}
2288
2289	sock_register(&unix_family_ops);
2290	register_pernet_subsys(&unix_net_ops);
2291out:
2292	return rc;
2293}
2294
2295static void __exit af_unix_exit(void)
2296{
2297	sock_unregister(PF_UNIX);
2298	proto_unregister(&unix_proto);
2299	unregister_pernet_subsys(&unix_net_ops);
2300}
2301
2302/* Earlier than device_initcall() so that other drivers invoking
2303   request_module() don't end up in a loop when modprobe tries
2304   to use a UNIX socket. But later than subsys_initcall() because
2305   we depend on stuff initialised there */
2306fs_initcall(af_unix_init);
2307module_exit(af_unix_exit);
2308
2309MODULE_LICENSE("GPL");
2310MODULE_ALIAS_NETPROTO(PF_UNIX);
2311