af_unix.c revision 973a34aa8593dbfe84386343c694f5beecb51d8a
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_long_t unix_nr_socks;
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate spin lock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (dentry && dentry->d_inode == i) {
296			sock_hold(s);
297			goto found;
298		}
299	}
300	s = NULL;
301found:
302	spin_unlock(&unix_table_lock);
303	return s;
304}
305
306static inline int unix_writable(struct sock *sk)
307{
308	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
309}
310
311static void unix_write_space(struct sock *sk)
312{
313	struct socket_wq *wq;
314
315	rcu_read_lock();
316	if (unix_writable(sk)) {
317		wq = rcu_dereference(sk->sk_wq);
318		if (wq_has_sleeper(wq))
319			wake_up_interruptible_sync_poll(&wq->wait,
320				POLLOUT | POLLWRNORM | POLLWRBAND);
321		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
322	}
323	rcu_read_unlock();
324}
325
326/* When dgram socket disconnects (or changes its peer), we clear its receive
327 * queue of packets arrived from previous peer. First, it allows to do
328 * flow control based only on wmem_alloc; second, sk connected to peer
329 * may receive messages only from that peer. */
330static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
331{
332	if (!skb_queue_empty(&sk->sk_receive_queue)) {
333		skb_queue_purge(&sk->sk_receive_queue);
334		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
335
336		/* If one link of bidirectional dgram pipe is disconnected,
337		 * we signal error. Messages are lost. Do not make this,
338		 * when peer was not connected to us.
339		 */
340		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
341			other->sk_err = ECONNRESET;
342			other->sk_error_report(other);
343		}
344	}
345}
346
347static void unix_sock_destructor(struct sock *sk)
348{
349	struct unix_sock *u = unix_sk(sk);
350
351	skb_queue_purge(&sk->sk_receive_queue);
352
353	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
354	WARN_ON(!sk_unhashed(sk));
355	WARN_ON(sk->sk_socket);
356	if (!sock_flag(sk, SOCK_DEAD)) {
357		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
358		return;
359	}
360
361	if (u->addr)
362		unix_release_addr(u->addr);
363
364	atomic_long_dec(&unix_nr_socks);
365	local_bh_disable();
366	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
367	local_bh_enable();
368#ifdef UNIX_REFCNT_DEBUG
369	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
370		atomic_long_read(&unix_nr_socks));
371#endif
372}
373
374static int unix_release_sock(struct sock *sk, int embrion)
375{
376	struct unix_sock *u = unix_sk(sk);
377	struct dentry *dentry;
378	struct vfsmount *mnt;
379	struct sock *skpair;
380	struct sk_buff *skb;
381	int state;
382
383	unix_remove_socket(sk);
384
385	/* Clear state */
386	unix_state_lock(sk);
387	sock_orphan(sk);
388	sk->sk_shutdown = SHUTDOWN_MASK;
389	dentry	     = u->dentry;
390	u->dentry    = NULL;
391	mnt	     = u->mnt;
392	u->mnt	     = NULL;
393	state = sk->sk_state;
394	sk->sk_state = TCP_CLOSE;
395	unix_state_unlock(sk);
396
397	wake_up_interruptible_all(&u->peer_wait);
398
399	skpair = unix_peer(sk);
400
401	if (skpair != NULL) {
402		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
403			unix_state_lock(skpair);
404			/* No more writes */
405			skpair->sk_shutdown = SHUTDOWN_MASK;
406			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
407				skpair->sk_err = ECONNRESET;
408			unix_state_unlock(skpair);
409			skpair->sk_state_change(skpair);
410			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411		}
412		sock_put(skpair); /* It may now die */
413		unix_peer(sk) = NULL;
414	}
415
416	/* Try to flush out this socket. Throw out buffers at least */
417
418	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
419		if (state == TCP_LISTEN)
420			unix_release_sock(skb->sk, 1);
421		/* passed fds are erased in the kfree_skb hook	      */
422		kfree_skb(skb);
423	}
424
425	if (dentry) {
426		dput(dentry);
427		mntput(mnt);
428	}
429
430	sock_put(sk);
431
432	/* ---- Socket is dead now and most probably destroyed ---- */
433
434	/*
435	 * Fixme: BSD difference: In BSD all sockets connected to use get
436	 *	  ECONNRESET and we die on the spot. In Linux we behave
437	 *	  like files and pipes do and wait for the last
438	 *	  dereference.
439	 *
440	 * Can't we simply set sock->err?
441	 *
442	 *	  What the above comment does talk about? --ANK(980817)
443	 */
444
445	if (unix_tot_inflight)
446		unix_gc();		/* Garbage collect fds */
447
448	return 0;
449}
450
451static void init_peercred(struct sock *sk)
452{
453	put_pid(sk->sk_peer_pid);
454	if (sk->sk_peer_cred)
455		put_cred(sk->sk_peer_cred);
456	sk->sk_peer_pid  = get_pid(task_tgid(current));
457	sk->sk_peer_cred = get_current_cred();
458}
459
460static void copy_peercred(struct sock *sk, struct sock *peersk)
461{
462	put_pid(sk->sk_peer_pid);
463	if (sk->sk_peer_cred)
464		put_cred(sk->sk_peer_cred);
465	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
466	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
467}
468
469static int unix_listen(struct socket *sock, int backlog)
470{
471	int err;
472	struct sock *sk = sock->sk;
473	struct unix_sock *u = unix_sk(sk);
474	struct pid *old_pid = NULL;
475	const struct cred *old_cred = NULL;
476
477	err = -EOPNOTSUPP;
478	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
479		goto out;	/* Only stream/seqpacket sockets accept */
480	err = -EINVAL;
481	if (!u->addr)
482		goto out;	/* No listens on an unbound socket */
483	unix_state_lock(sk);
484	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
485		goto out_unlock;
486	if (backlog > sk->sk_max_ack_backlog)
487		wake_up_interruptible_all(&u->peer_wait);
488	sk->sk_max_ack_backlog	= backlog;
489	sk->sk_state		= TCP_LISTEN;
490	/* set credentials so connect can copy them */
491	init_peercred(sk);
492	err = 0;
493
494out_unlock:
495	unix_state_unlock(sk);
496	put_pid(old_pid);
497	if (old_cred)
498		put_cred(old_cred);
499out:
500	return err;
501}
502
503static int unix_release(struct socket *);
504static int unix_bind(struct socket *, struct sockaddr *, int);
505static int unix_stream_connect(struct socket *, struct sockaddr *,
506			       int addr_len, int flags);
507static int unix_socketpair(struct socket *, struct socket *);
508static int unix_accept(struct socket *, struct socket *, int);
509static int unix_getname(struct socket *, struct sockaddr *, int *, int);
510static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
511static unsigned int unix_dgram_poll(struct file *, struct socket *,
512				    poll_table *);
513static int unix_ioctl(struct socket *, unsigned int, unsigned long);
514static int unix_shutdown(struct socket *, int);
515static int unix_stream_sendmsg(struct kiocb *, struct socket *,
516			       struct msghdr *, size_t);
517static int unix_stream_recvmsg(struct kiocb *, struct socket *,
518			       struct msghdr *, size_t, int);
519static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
520			      struct msghdr *, size_t);
521static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
522			      struct msghdr *, size_t, int);
523static int unix_dgram_connect(struct socket *, struct sockaddr *,
524			      int, int);
525static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
526				  struct msghdr *, size_t);
527
528static const struct proto_ops unix_stream_ops = {
529	.family =	PF_UNIX,
530	.owner =	THIS_MODULE,
531	.release =	unix_release,
532	.bind =		unix_bind,
533	.connect =	unix_stream_connect,
534	.socketpair =	unix_socketpair,
535	.accept =	unix_accept,
536	.getname =	unix_getname,
537	.poll =		unix_poll,
538	.ioctl =	unix_ioctl,
539	.listen =	unix_listen,
540	.shutdown =	unix_shutdown,
541	.setsockopt =	sock_no_setsockopt,
542	.getsockopt =	sock_no_getsockopt,
543	.sendmsg =	unix_stream_sendmsg,
544	.recvmsg =	unix_stream_recvmsg,
545	.mmap =		sock_no_mmap,
546	.sendpage =	sock_no_sendpage,
547};
548
549static const struct proto_ops unix_dgram_ops = {
550	.family =	PF_UNIX,
551	.owner =	THIS_MODULE,
552	.release =	unix_release,
553	.bind =		unix_bind,
554	.connect =	unix_dgram_connect,
555	.socketpair =	unix_socketpair,
556	.accept =	sock_no_accept,
557	.getname =	unix_getname,
558	.poll =		unix_dgram_poll,
559	.ioctl =	unix_ioctl,
560	.listen =	sock_no_listen,
561	.shutdown =	unix_shutdown,
562	.setsockopt =	sock_no_setsockopt,
563	.getsockopt =	sock_no_getsockopt,
564	.sendmsg =	unix_dgram_sendmsg,
565	.recvmsg =	unix_dgram_recvmsg,
566	.mmap =		sock_no_mmap,
567	.sendpage =	sock_no_sendpage,
568};
569
570static const struct proto_ops unix_seqpacket_ops = {
571	.family =	PF_UNIX,
572	.owner =	THIS_MODULE,
573	.release =	unix_release,
574	.bind =		unix_bind,
575	.connect =	unix_stream_connect,
576	.socketpair =	unix_socketpair,
577	.accept =	unix_accept,
578	.getname =	unix_getname,
579	.poll =		unix_dgram_poll,
580	.ioctl =	unix_ioctl,
581	.listen =	unix_listen,
582	.shutdown =	unix_shutdown,
583	.setsockopt =	sock_no_setsockopt,
584	.getsockopt =	sock_no_getsockopt,
585	.sendmsg =	unix_seqpacket_sendmsg,
586	.recvmsg =	unix_dgram_recvmsg,
587	.mmap =		sock_no_mmap,
588	.sendpage =	sock_no_sendpage,
589};
590
591static struct proto unix_proto = {
592	.name			= "UNIX",
593	.owner			= THIS_MODULE,
594	.obj_size		= sizeof(struct unix_sock),
595};
596
597/*
598 * AF_UNIX sockets do not interact with hardware, hence they
599 * dont trigger interrupts - so it's safe for them to have
600 * bh-unsafe locking for their sk_receive_queue.lock. Split off
601 * this special lock-class by reinitializing the spinlock key:
602 */
603static struct lock_class_key af_unix_sk_receive_queue_lock_key;
604
605static struct sock *unix_create1(struct net *net, struct socket *sock)
606{
607	struct sock *sk = NULL;
608	struct unix_sock *u;
609
610	atomic_long_inc(&unix_nr_socks);
611	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
612		goto out;
613
614	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
615	if (!sk)
616		goto out;
617
618	sock_init_data(sock, sk);
619	lockdep_set_class(&sk->sk_receive_queue.lock,
620				&af_unix_sk_receive_queue_lock_key);
621
622	sk->sk_write_space	= unix_write_space;
623	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
624	sk->sk_destruct		= unix_sock_destructor;
625	u	  = unix_sk(sk);
626	u->dentry = NULL;
627	u->mnt	  = NULL;
628	spin_lock_init(&u->lock);
629	atomic_long_set(&u->inflight, 0);
630	INIT_LIST_HEAD(&u->link);
631	mutex_init(&u->readlock); /* single task reading lock */
632	init_waitqueue_head(&u->peer_wait);
633	unix_insert_socket(unix_sockets_unbound, sk);
634out:
635	if (sk == NULL)
636		atomic_long_dec(&unix_nr_socks);
637	else {
638		local_bh_disable();
639		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
640		local_bh_enable();
641	}
642	return sk;
643}
644
645static int unix_create(struct net *net, struct socket *sock, int protocol,
646		       int kern)
647{
648	if (protocol && protocol != PF_UNIX)
649		return -EPROTONOSUPPORT;
650
651	sock->state = SS_UNCONNECTED;
652
653	switch (sock->type) {
654	case SOCK_STREAM:
655		sock->ops = &unix_stream_ops;
656		break;
657		/*
658		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
659		 *	nothing uses it.
660		 */
661	case SOCK_RAW:
662		sock->type = SOCK_DGRAM;
663	case SOCK_DGRAM:
664		sock->ops = &unix_dgram_ops;
665		break;
666	case SOCK_SEQPACKET:
667		sock->ops = &unix_seqpacket_ops;
668		break;
669	default:
670		return -ESOCKTNOSUPPORT;
671	}
672
673	return unix_create1(net, sock) ? 0 : -ENOMEM;
674}
675
676static int unix_release(struct socket *sock)
677{
678	struct sock *sk = sock->sk;
679
680	if (!sk)
681		return 0;
682
683	sock->sk = NULL;
684
685	return unix_release_sock(sk, 0);
686}
687
688static int unix_autobind(struct socket *sock)
689{
690	struct sock *sk = sock->sk;
691	struct net *net = sock_net(sk);
692	struct unix_sock *u = unix_sk(sk);
693	static u32 ordernum = 1;
694	struct unix_address *addr;
695	int err;
696	unsigned int retries = 0;
697
698	mutex_lock(&u->readlock);
699
700	err = 0;
701	if (u->addr)
702		goto out;
703
704	err = -ENOMEM;
705	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
706	if (!addr)
707		goto out;
708
709	addr->name->sun_family = AF_UNIX;
710	atomic_set(&addr->refcnt, 1);
711
712retry:
713	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
714	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
715
716	spin_lock(&unix_table_lock);
717	ordernum = (ordernum+1)&0xFFFFF;
718
719	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
720				      addr->hash)) {
721		spin_unlock(&unix_table_lock);
722		/*
723		 * __unix_find_socket_byname() may take long time if many names
724		 * are already in use.
725		 */
726		cond_resched();
727		/* Give up if all names seems to be in use. */
728		if (retries++ == 0xFFFFF) {
729			err = -ENOSPC;
730			kfree(addr);
731			goto out;
732		}
733		goto retry;
734	}
735	addr->hash ^= sk->sk_type;
736
737	__unix_remove_socket(sk);
738	u->addr = addr;
739	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
740	spin_unlock(&unix_table_lock);
741	err = 0;
742
743out:	mutex_unlock(&u->readlock);
744	return err;
745}
746
747static struct sock *unix_find_other(struct net *net,
748				    struct sockaddr_un *sunname, int len,
749				    int type, unsigned hash, int *error)
750{
751	struct sock *u;
752	struct path path;
753	int err = 0;
754
755	if (sunname->sun_path[0]) {
756		struct inode *inode;
757		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
758		if (err)
759			goto fail;
760		inode = path.dentry->d_inode;
761		err = inode_permission(inode, MAY_WRITE);
762		if (err)
763			goto put_fail;
764
765		err = -ECONNREFUSED;
766		if (!S_ISSOCK(inode->i_mode))
767			goto put_fail;
768		u = unix_find_socket_byinode(inode);
769		if (!u)
770			goto put_fail;
771
772		if (u->sk_type == type)
773			touch_atime(path.mnt, path.dentry);
774
775		path_put(&path);
776
777		err = -EPROTOTYPE;
778		if (u->sk_type != type) {
779			sock_put(u);
780			goto fail;
781		}
782	} else {
783		err = -ECONNREFUSED;
784		u = unix_find_socket_byname(net, sunname, len, type, hash);
785		if (u) {
786			struct dentry *dentry;
787			dentry = unix_sk(u)->dentry;
788			if (dentry)
789				touch_atime(unix_sk(u)->mnt, dentry);
790		} else
791			goto fail;
792	}
793	return u;
794
795put_fail:
796	path_put(&path);
797fail:
798	*error = err;
799	return NULL;
800}
801
802
803static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
804{
805	struct sock *sk = sock->sk;
806	struct net *net = sock_net(sk);
807	struct unix_sock *u = unix_sk(sk);
808	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
809	struct dentry *dentry = NULL;
810	struct nameidata nd;
811	int err;
812	unsigned hash;
813	struct unix_address *addr;
814	struct hlist_head *list;
815
816	err = -EINVAL;
817	if (sunaddr->sun_family != AF_UNIX)
818		goto out;
819
820	if (addr_len == sizeof(short)) {
821		err = unix_autobind(sock);
822		goto out;
823	}
824
825	err = unix_mkname(sunaddr, addr_len, &hash);
826	if (err < 0)
827		goto out;
828	addr_len = err;
829
830	mutex_lock(&u->readlock);
831
832	err = -EINVAL;
833	if (u->addr)
834		goto out_up;
835
836	err = -ENOMEM;
837	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
838	if (!addr)
839		goto out_up;
840
841	memcpy(addr->name, sunaddr, addr_len);
842	addr->len = addr_len;
843	addr->hash = hash ^ sk->sk_type;
844	atomic_set(&addr->refcnt, 1);
845
846	if (sunaddr->sun_path[0]) {
847		unsigned int mode;
848		err = 0;
849		/*
850		 * Get the parent directory, calculate the hash for last
851		 * component.
852		 */
853		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
854		if (err)
855			goto out_mknod_parent;
856
857		dentry = lookup_create(&nd, 0);
858		err = PTR_ERR(dentry);
859		if (IS_ERR(dentry))
860			goto out_mknod_unlock;
861
862		/*
863		 * All right, let's create it.
864		 */
865		mode = S_IFSOCK |
866		       (SOCK_INODE(sock)->i_mode & ~current_umask());
867		err = mnt_want_write(nd.path.mnt);
868		if (err)
869			goto out_mknod_dput;
870		err = security_path_mknod(&nd.path, dentry, mode, 0);
871		if (err)
872			goto out_mknod_drop_write;
873		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
874out_mknod_drop_write:
875		mnt_drop_write(nd.path.mnt);
876		if (err)
877			goto out_mknod_dput;
878		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
879		dput(nd.path.dentry);
880		nd.path.dentry = dentry;
881
882		addr->hash = UNIX_HASH_SIZE;
883	}
884
885	spin_lock(&unix_table_lock);
886
887	if (!sunaddr->sun_path[0]) {
888		err = -EADDRINUSE;
889		if (__unix_find_socket_byname(net, sunaddr, addr_len,
890					      sk->sk_type, hash)) {
891			unix_release_addr(addr);
892			goto out_unlock;
893		}
894
895		list = &unix_socket_table[addr->hash];
896	} else {
897		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
898		u->dentry = nd.path.dentry;
899		u->mnt    = nd.path.mnt;
900	}
901
902	err = 0;
903	__unix_remove_socket(sk);
904	u->addr = addr;
905	__unix_insert_socket(list, sk);
906
907out_unlock:
908	spin_unlock(&unix_table_lock);
909out_up:
910	mutex_unlock(&u->readlock);
911out:
912	return err;
913
914out_mknod_dput:
915	dput(dentry);
916out_mknod_unlock:
917	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
918	path_put(&nd.path);
919out_mknod_parent:
920	if (err == -EEXIST)
921		err = -EADDRINUSE;
922	unix_release_addr(addr);
923	goto out_up;
924}
925
926static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
927{
928	if (unlikely(sk1 == sk2) || !sk2) {
929		unix_state_lock(sk1);
930		return;
931	}
932	if (sk1 < sk2) {
933		unix_state_lock(sk1);
934		unix_state_lock_nested(sk2);
935	} else {
936		unix_state_lock(sk2);
937		unix_state_lock_nested(sk1);
938	}
939}
940
941static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
942{
943	if (unlikely(sk1 == sk2) || !sk2) {
944		unix_state_unlock(sk1);
945		return;
946	}
947	unix_state_unlock(sk1);
948	unix_state_unlock(sk2);
949}
950
951static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
952			      int alen, int flags)
953{
954	struct sock *sk = sock->sk;
955	struct net *net = sock_net(sk);
956	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
957	struct sock *other;
958	unsigned hash;
959	int err;
960
961	if (addr->sa_family != AF_UNSPEC) {
962		err = unix_mkname(sunaddr, alen, &hash);
963		if (err < 0)
964			goto out;
965		alen = err;
966
967		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
968		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
969			goto out;
970
971restart:
972		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
973		if (!other)
974			goto out;
975
976		unix_state_double_lock(sk, other);
977
978		/* Apparently VFS overslept socket death. Retry. */
979		if (sock_flag(other, SOCK_DEAD)) {
980			unix_state_double_unlock(sk, other);
981			sock_put(other);
982			goto restart;
983		}
984
985		err = -EPERM;
986		if (!unix_may_send(sk, other))
987			goto out_unlock;
988
989		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
990		if (err)
991			goto out_unlock;
992
993	} else {
994		/*
995		 *	1003.1g breaking connected state with AF_UNSPEC
996		 */
997		other = NULL;
998		unix_state_double_lock(sk, other);
999	}
1000
1001	/*
1002	 * If it was connected, reconnect.
1003	 */
1004	if (unix_peer(sk)) {
1005		struct sock *old_peer = unix_peer(sk);
1006		unix_peer(sk) = other;
1007		unix_state_double_unlock(sk, other);
1008
1009		if (other != old_peer)
1010			unix_dgram_disconnected(sk, old_peer);
1011		sock_put(old_peer);
1012	} else {
1013		unix_peer(sk) = other;
1014		unix_state_double_unlock(sk, other);
1015	}
1016	return 0;
1017
1018out_unlock:
1019	unix_state_double_unlock(sk, other);
1020	sock_put(other);
1021out:
1022	return err;
1023}
1024
1025static long unix_wait_for_peer(struct sock *other, long timeo)
1026{
1027	struct unix_sock *u = unix_sk(other);
1028	int sched;
1029	DEFINE_WAIT(wait);
1030
1031	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1032
1033	sched = !sock_flag(other, SOCK_DEAD) &&
1034		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1035		unix_recvq_full(other);
1036
1037	unix_state_unlock(other);
1038
1039	if (sched)
1040		timeo = schedule_timeout(timeo);
1041
1042	finish_wait(&u->peer_wait, &wait);
1043	return timeo;
1044}
1045
1046static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1047			       int addr_len, int flags)
1048{
1049	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1050	struct sock *sk = sock->sk;
1051	struct net *net = sock_net(sk);
1052	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1053	struct sock *newsk = NULL;
1054	struct sock *other = NULL;
1055	struct sk_buff *skb = NULL;
1056	unsigned hash;
1057	int st;
1058	int err;
1059	long timeo;
1060
1061	err = unix_mkname(sunaddr, addr_len, &hash);
1062	if (err < 0)
1063		goto out;
1064	addr_len = err;
1065
1066	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1067	    (err = unix_autobind(sock)) != 0)
1068		goto out;
1069
1070	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1071
1072	/* First of all allocate resources.
1073	   If we will make it after state is locked,
1074	   we will have to recheck all again in any case.
1075	 */
1076
1077	err = -ENOMEM;
1078
1079	/* create new sock for complete connection */
1080	newsk = unix_create1(sock_net(sk), NULL);
1081	if (newsk == NULL)
1082		goto out;
1083
1084	/* Allocate skb for sending to listening sock */
1085	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1086	if (skb == NULL)
1087		goto out;
1088
1089restart:
1090	/*  Find listening sock. */
1091	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1092	if (!other)
1093		goto out;
1094
1095	/* Latch state of peer */
1096	unix_state_lock(other);
1097
1098	/* Apparently VFS overslept socket death. Retry. */
1099	if (sock_flag(other, SOCK_DEAD)) {
1100		unix_state_unlock(other);
1101		sock_put(other);
1102		goto restart;
1103	}
1104
1105	err = -ECONNREFUSED;
1106	if (other->sk_state != TCP_LISTEN)
1107		goto out_unlock;
1108	if (other->sk_shutdown & RCV_SHUTDOWN)
1109		goto out_unlock;
1110
1111	if (unix_recvq_full(other)) {
1112		err = -EAGAIN;
1113		if (!timeo)
1114			goto out_unlock;
1115
1116		timeo = unix_wait_for_peer(other, timeo);
1117
1118		err = sock_intr_errno(timeo);
1119		if (signal_pending(current))
1120			goto out;
1121		sock_put(other);
1122		goto restart;
1123	}
1124
1125	/* Latch our state.
1126
1127	   It is tricky place. We need to grab write lock and cannot
1128	   drop lock on peer. It is dangerous because deadlock is
1129	   possible. Connect to self case and simultaneous
1130	   attempt to connect are eliminated by checking socket
1131	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1132	   check this before attempt to grab lock.
1133
1134	   Well, and we have to recheck the state after socket locked.
1135	 */
1136	st = sk->sk_state;
1137
1138	switch (st) {
1139	case TCP_CLOSE:
1140		/* This is ok... continue with connect */
1141		break;
1142	case TCP_ESTABLISHED:
1143		/* Socket is already connected */
1144		err = -EISCONN;
1145		goto out_unlock;
1146	default:
1147		err = -EINVAL;
1148		goto out_unlock;
1149	}
1150
1151	unix_state_lock_nested(sk);
1152
1153	if (sk->sk_state != st) {
1154		unix_state_unlock(sk);
1155		unix_state_unlock(other);
1156		sock_put(other);
1157		goto restart;
1158	}
1159
1160	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1161	if (err) {
1162		unix_state_unlock(sk);
1163		goto out_unlock;
1164	}
1165
1166	/* The way is open! Fastly set all the necessary fields... */
1167
1168	sock_hold(sk);
1169	unix_peer(newsk)	= sk;
1170	newsk->sk_state		= TCP_ESTABLISHED;
1171	newsk->sk_type		= sk->sk_type;
1172	init_peercred(newsk);
1173	newu = unix_sk(newsk);
1174	newsk->sk_wq		= &newu->peer_wq;
1175	otheru = unix_sk(other);
1176
1177	/* copy address information from listening to new sock*/
1178	if (otheru->addr) {
1179		atomic_inc(&otheru->addr->refcnt);
1180		newu->addr = otheru->addr;
1181	}
1182	if (otheru->dentry) {
1183		newu->dentry	= dget(otheru->dentry);
1184		newu->mnt	= mntget(otheru->mnt);
1185	}
1186
1187	/* Set credentials */
1188	copy_peercred(sk, other);
1189
1190	sock->state	= SS_CONNECTED;
1191	sk->sk_state	= TCP_ESTABLISHED;
1192	sock_hold(newsk);
1193
1194	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1195	unix_peer(sk)	= newsk;
1196
1197	unix_state_unlock(sk);
1198
1199	/* take ten and and send info to listening sock */
1200	spin_lock(&other->sk_receive_queue.lock);
1201	__skb_queue_tail(&other->sk_receive_queue, skb);
1202	spin_unlock(&other->sk_receive_queue.lock);
1203	unix_state_unlock(other);
1204	other->sk_data_ready(other, 0);
1205	sock_put(other);
1206	return 0;
1207
1208out_unlock:
1209	if (other)
1210		unix_state_unlock(other);
1211
1212out:
1213	kfree_skb(skb);
1214	if (newsk)
1215		unix_release_sock(newsk, 0);
1216	if (other)
1217		sock_put(other);
1218	return err;
1219}
1220
1221static int unix_socketpair(struct socket *socka, struct socket *sockb)
1222{
1223	struct sock *ska = socka->sk, *skb = sockb->sk;
1224
1225	/* Join our sockets back to back */
1226	sock_hold(ska);
1227	sock_hold(skb);
1228	unix_peer(ska) = skb;
1229	unix_peer(skb) = ska;
1230	init_peercred(ska);
1231	init_peercred(skb);
1232
1233	if (ska->sk_type != SOCK_DGRAM) {
1234		ska->sk_state = TCP_ESTABLISHED;
1235		skb->sk_state = TCP_ESTABLISHED;
1236		socka->state  = SS_CONNECTED;
1237		sockb->state  = SS_CONNECTED;
1238	}
1239	return 0;
1240}
1241
1242static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1243{
1244	struct sock *sk = sock->sk;
1245	struct sock *tsk;
1246	struct sk_buff *skb;
1247	int err;
1248
1249	err = -EOPNOTSUPP;
1250	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1251		goto out;
1252
1253	err = -EINVAL;
1254	if (sk->sk_state != TCP_LISTEN)
1255		goto out;
1256
1257	/* If socket state is TCP_LISTEN it cannot change (for now...),
1258	 * so that no locks are necessary.
1259	 */
1260
1261	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1262	if (!skb) {
1263		/* This means receive shutdown. */
1264		if (err == 0)
1265			err = -EINVAL;
1266		goto out;
1267	}
1268
1269	tsk = skb->sk;
1270	skb_free_datagram(sk, skb);
1271	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1272
1273	/* attach accepted sock to socket */
1274	unix_state_lock(tsk);
1275	newsock->state = SS_CONNECTED;
1276	sock_graft(tsk, newsock);
1277	unix_state_unlock(tsk);
1278	return 0;
1279
1280out:
1281	return err;
1282}
1283
1284
1285static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1286{
1287	struct sock *sk = sock->sk;
1288	struct unix_sock *u;
1289	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1290	int err = 0;
1291
1292	if (peer) {
1293		sk = unix_peer_get(sk);
1294
1295		err = -ENOTCONN;
1296		if (!sk)
1297			goto out;
1298		err = 0;
1299	} else {
1300		sock_hold(sk);
1301	}
1302
1303	u = unix_sk(sk);
1304	unix_state_lock(sk);
1305	if (!u->addr) {
1306		sunaddr->sun_family = AF_UNIX;
1307		sunaddr->sun_path[0] = 0;
1308		*uaddr_len = sizeof(short);
1309	} else {
1310		struct unix_address *addr = u->addr;
1311
1312		*uaddr_len = addr->len;
1313		memcpy(sunaddr, addr->name, *uaddr_len);
1314	}
1315	unix_state_unlock(sk);
1316	sock_put(sk);
1317out:
1318	return err;
1319}
1320
1321static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1322{
1323	int i;
1324
1325	scm->fp = UNIXCB(skb).fp;
1326	UNIXCB(skb).fp = NULL;
1327
1328	for (i = scm->fp->count-1; i >= 0; i--)
1329		unix_notinflight(scm->fp->fp[i]);
1330}
1331
1332static void unix_destruct_scm(struct sk_buff *skb)
1333{
1334	struct scm_cookie scm;
1335	memset(&scm, 0, sizeof(scm));
1336	scm.pid  = UNIXCB(skb).pid;
1337	scm.cred = UNIXCB(skb).cred;
1338	if (UNIXCB(skb).fp)
1339		unix_detach_fds(&scm, skb);
1340
1341	/* Alas, it calls VFS */
1342	/* So fscking what? fput() had been SMP-safe since the last Summer */
1343	scm_destroy(&scm);
1344	sock_wfree(skb);
1345}
1346
1347static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1348{
1349	int i;
1350
1351	/*
1352	 * Need to duplicate file references for the sake of garbage
1353	 * collection.  Otherwise a socket in the fps might become a
1354	 * candidate for GC while the skb is not yet queued.
1355	 */
1356	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1357	if (!UNIXCB(skb).fp)
1358		return -ENOMEM;
1359
1360	for (i = scm->fp->count-1; i >= 0; i--)
1361		unix_inflight(scm->fp->fp[i]);
1362	return 0;
1363}
1364
1365static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1366{
1367	int err = 0;
1368	UNIXCB(skb).pid  = get_pid(scm->pid);
1369	UNIXCB(skb).cred = get_cred(scm->cred);
1370	UNIXCB(skb).fp = NULL;
1371	if (scm->fp && send_fds)
1372		err = unix_attach_fds(scm, skb);
1373
1374	skb->destructor = unix_destruct_scm;
1375	return err;
1376}
1377
1378/*
1379 *	Send AF_UNIX data.
1380 */
1381
1382static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1383			      struct msghdr *msg, size_t len)
1384{
1385	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1386	struct sock *sk = sock->sk;
1387	struct net *net = sock_net(sk);
1388	struct unix_sock *u = unix_sk(sk);
1389	struct sockaddr_un *sunaddr = msg->msg_name;
1390	struct sock *other = NULL;
1391	int namelen = 0; /* fake GCC */
1392	int err;
1393	unsigned hash;
1394	struct sk_buff *skb;
1395	long timeo;
1396	struct scm_cookie tmp_scm;
1397
1398	if (NULL == siocb->scm)
1399		siocb->scm = &tmp_scm;
1400	wait_for_unix_gc();
1401	err = scm_send(sock, msg, siocb->scm);
1402	if (err < 0)
1403		return err;
1404
1405	err = -EOPNOTSUPP;
1406	if (msg->msg_flags&MSG_OOB)
1407		goto out;
1408
1409	if (msg->msg_namelen) {
1410		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1411		if (err < 0)
1412			goto out;
1413		namelen = err;
1414	} else {
1415		sunaddr = NULL;
1416		err = -ENOTCONN;
1417		other = unix_peer_get(sk);
1418		if (!other)
1419			goto out;
1420	}
1421
1422	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1423	    && (err = unix_autobind(sock)) != 0)
1424		goto out;
1425
1426	err = -EMSGSIZE;
1427	if (len > sk->sk_sndbuf - 32)
1428		goto out;
1429
1430	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1431	if (skb == NULL)
1432		goto out;
1433
1434	err = unix_scm_to_skb(siocb->scm, skb, true);
1435	if (err)
1436		goto out_free;
1437	unix_get_secdata(siocb->scm, skb);
1438
1439	skb_reset_transport_header(skb);
1440	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1441	if (err)
1442		goto out_free;
1443
1444	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1445
1446restart:
1447	if (!other) {
1448		err = -ECONNRESET;
1449		if (sunaddr == NULL)
1450			goto out_free;
1451
1452		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1453					hash, &err);
1454		if (other == NULL)
1455			goto out_free;
1456	}
1457
1458	unix_state_lock(other);
1459	err = -EPERM;
1460	if (!unix_may_send(sk, other))
1461		goto out_unlock;
1462
1463	if (sock_flag(other, SOCK_DEAD)) {
1464		/*
1465		 *	Check with 1003.1g - what should
1466		 *	datagram error
1467		 */
1468		unix_state_unlock(other);
1469		sock_put(other);
1470
1471		err = 0;
1472		unix_state_lock(sk);
1473		if (unix_peer(sk) == other) {
1474			unix_peer(sk) = NULL;
1475			unix_state_unlock(sk);
1476
1477			unix_dgram_disconnected(sk, other);
1478			sock_put(other);
1479			err = -ECONNREFUSED;
1480		} else {
1481			unix_state_unlock(sk);
1482		}
1483
1484		other = NULL;
1485		if (err)
1486			goto out_free;
1487		goto restart;
1488	}
1489
1490	err = -EPIPE;
1491	if (other->sk_shutdown & RCV_SHUTDOWN)
1492		goto out_unlock;
1493
1494	if (sk->sk_type != SOCK_SEQPACKET) {
1495		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1496		if (err)
1497			goto out_unlock;
1498	}
1499
1500	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1501		if (!timeo) {
1502			err = -EAGAIN;
1503			goto out_unlock;
1504		}
1505
1506		timeo = unix_wait_for_peer(other, timeo);
1507
1508		err = sock_intr_errno(timeo);
1509		if (signal_pending(current))
1510			goto out_free;
1511
1512		goto restart;
1513	}
1514
1515	if (sock_flag(other, SOCK_RCVTSTAMP))
1516		__net_timestamp(skb);
1517	skb_queue_tail(&other->sk_receive_queue, skb);
1518	unix_state_unlock(other);
1519	other->sk_data_ready(other, len);
1520	sock_put(other);
1521	scm_destroy(siocb->scm);
1522	return len;
1523
1524out_unlock:
1525	unix_state_unlock(other);
1526out_free:
1527	kfree_skb(skb);
1528out:
1529	if (other)
1530		sock_put(other);
1531	scm_destroy(siocb->scm);
1532	return err;
1533}
1534
1535
1536static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1537			       struct msghdr *msg, size_t len)
1538{
1539	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1540	struct sock *sk = sock->sk;
1541	struct sock *other = NULL;
1542	struct sockaddr_un *sunaddr = msg->msg_name;
1543	int err, size;
1544	struct sk_buff *skb;
1545	int sent = 0;
1546	struct scm_cookie tmp_scm;
1547	bool fds_sent = false;
1548
1549	if (NULL == siocb->scm)
1550		siocb->scm = &tmp_scm;
1551	wait_for_unix_gc();
1552	err = scm_send(sock, msg, siocb->scm);
1553	if (err < 0)
1554		return err;
1555
1556	err = -EOPNOTSUPP;
1557	if (msg->msg_flags&MSG_OOB)
1558		goto out_err;
1559
1560	if (msg->msg_namelen) {
1561		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1562		goto out_err;
1563	} else {
1564		sunaddr = NULL;
1565		err = -ENOTCONN;
1566		other = unix_peer(sk);
1567		if (!other)
1568			goto out_err;
1569	}
1570
1571	if (sk->sk_shutdown & SEND_SHUTDOWN)
1572		goto pipe_err;
1573
1574	while (sent < len) {
1575		/*
1576		 *	Optimisation for the fact that under 0.01% of X
1577		 *	messages typically need breaking up.
1578		 */
1579
1580		size = len-sent;
1581
1582		/* Keep two messages in the pipe so it schedules better */
1583		if (size > ((sk->sk_sndbuf >> 1) - 64))
1584			size = (sk->sk_sndbuf >> 1) - 64;
1585
1586		if (size > SKB_MAX_ALLOC)
1587			size = SKB_MAX_ALLOC;
1588
1589		/*
1590		 *	Grab a buffer
1591		 */
1592
1593		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1594					  &err);
1595
1596		if (skb == NULL)
1597			goto out_err;
1598
1599		/*
1600		 *	If you pass two values to the sock_alloc_send_skb
1601		 *	it tries to grab the large buffer with GFP_NOFS
1602		 *	(which can fail easily), and if it fails grab the
1603		 *	fallback size buffer which is under a page and will
1604		 *	succeed. [Alan]
1605		 */
1606		size = min_t(int, size, skb_tailroom(skb));
1607
1608
1609		/* Only send the fds in the first buffer */
1610		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1611		if (err) {
1612			kfree_skb(skb);
1613			goto out_err;
1614		}
1615		fds_sent = true;
1616
1617		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1618		if (err) {
1619			kfree_skb(skb);
1620			goto out_err;
1621		}
1622
1623		unix_state_lock(other);
1624
1625		if (sock_flag(other, SOCK_DEAD) ||
1626		    (other->sk_shutdown & RCV_SHUTDOWN))
1627			goto pipe_err_free;
1628
1629		skb_queue_tail(&other->sk_receive_queue, skb);
1630		unix_state_unlock(other);
1631		other->sk_data_ready(other, size);
1632		sent += size;
1633	}
1634
1635	scm_destroy(siocb->scm);
1636	siocb->scm = NULL;
1637
1638	return sent;
1639
1640pipe_err_free:
1641	unix_state_unlock(other);
1642	kfree_skb(skb);
1643pipe_err:
1644	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1645		send_sig(SIGPIPE, current, 0);
1646	err = -EPIPE;
1647out_err:
1648	scm_destroy(siocb->scm);
1649	siocb->scm = NULL;
1650	return sent ? : err;
1651}
1652
1653static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1654				  struct msghdr *msg, size_t len)
1655{
1656	int err;
1657	struct sock *sk = sock->sk;
1658
1659	err = sock_error(sk);
1660	if (err)
1661		return err;
1662
1663	if (sk->sk_state != TCP_ESTABLISHED)
1664		return -ENOTCONN;
1665
1666	if (msg->msg_namelen)
1667		msg->msg_namelen = 0;
1668
1669	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1670}
1671
1672static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1673{
1674	struct unix_sock *u = unix_sk(sk);
1675
1676	msg->msg_namelen = 0;
1677	if (u->addr) {
1678		msg->msg_namelen = u->addr->len;
1679		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1680	}
1681}
1682
1683static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1684			      struct msghdr *msg, size_t size,
1685			      int flags)
1686{
1687	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1688	struct scm_cookie tmp_scm;
1689	struct sock *sk = sock->sk;
1690	struct unix_sock *u = unix_sk(sk);
1691	int noblock = flags & MSG_DONTWAIT;
1692	struct sk_buff *skb;
1693	int err;
1694
1695	err = -EOPNOTSUPP;
1696	if (flags&MSG_OOB)
1697		goto out;
1698
1699	msg->msg_namelen = 0;
1700
1701	mutex_lock(&u->readlock);
1702
1703	skb = skb_recv_datagram(sk, flags, noblock, &err);
1704	if (!skb) {
1705		unix_state_lock(sk);
1706		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1707		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1708		    (sk->sk_shutdown & RCV_SHUTDOWN))
1709			err = 0;
1710		unix_state_unlock(sk);
1711		goto out_unlock;
1712	}
1713
1714	wake_up_interruptible_sync_poll(&u->peer_wait,
1715					POLLOUT | POLLWRNORM | POLLWRBAND);
1716
1717	if (msg->msg_name)
1718		unix_copy_addr(msg, skb->sk);
1719
1720	if (size > skb->len)
1721		size = skb->len;
1722	else if (size < skb->len)
1723		msg->msg_flags |= MSG_TRUNC;
1724
1725	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1726	if (err)
1727		goto out_free;
1728
1729	if (sock_flag(sk, SOCK_RCVTSTAMP))
1730		__sock_recv_timestamp(msg, sk, skb);
1731
1732	if (!siocb->scm) {
1733		siocb->scm = &tmp_scm;
1734		memset(&tmp_scm, 0, sizeof(tmp_scm));
1735	}
1736	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1737	unix_set_secdata(siocb->scm, skb);
1738
1739	if (!(flags & MSG_PEEK)) {
1740		if (UNIXCB(skb).fp)
1741			unix_detach_fds(siocb->scm, skb);
1742	} else {
1743		/* It is questionable: on PEEK we could:
1744		   - do not return fds - good, but too simple 8)
1745		   - return fds, and do not return them on read (old strategy,
1746		     apparently wrong)
1747		   - clone fds (I chose it for now, it is the most universal
1748		     solution)
1749
1750		   POSIX 1003.1g does not actually define this clearly
1751		   at all. POSIX 1003.1g doesn't define a lot of things
1752		   clearly however!
1753
1754		*/
1755		if (UNIXCB(skb).fp)
1756			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1757	}
1758	err = size;
1759
1760	scm_recv(sock, msg, siocb->scm, flags);
1761
1762out_free:
1763	skb_free_datagram(sk, skb);
1764out_unlock:
1765	mutex_unlock(&u->readlock);
1766out:
1767	return err;
1768}
1769
1770/*
1771 *	Sleep until data has arrive. But check for races..
1772 */
1773
1774static long unix_stream_data_wait(struct sock *sk, long timeo)
1775{
1776	DEFINE_WAIT(wait);
1777
1778	unix_state_lock(sk);
1779
1780	for (;;) {
1781		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1782
1783		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1784		    sk->sk_err ||
1785		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1786		    signal_pending(current) ||
1787		    !timeo)
1788			break;
1789
1790		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1791		unix_state_unlock(sk);
1792		timeo = schedule_timeout(timeo);
1793		unix_state_lock(sk);
1794		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1795	}
1796
1797	finish_wait(sk_sleep(sk), &wait);
1798	unix_state_unlock(sk);
1799	return timeo;
1800}
1801
1802
1803
1804static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1805			       struct msghdr *msg, size_t size,
1806			       int flags)
1807{
1808	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1809	struct scm_cookie tmp_scm;
1810	struct sock *sk = sock->sk;
1811	struct unix_sock *u = unix_sk(sk);
1812	struct sockaddr_un *sunaddr = msg->msg_name;
1813	int copied = 0;
1814	int check_creds = 0;
1815	int target;
1816	int err = 0;
1817	long timeo;
1818
1819	err = -EINVAL;
1820	if (sk->sk_state != TCP_ESTABLISHED)
1821		goto out;
1822
1823	err = -EOPNOTSUPP;
1824	if (flags&MSG_OOB)
1825		goto out;
1826
1827	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1828	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1829
1830	msg->msg_namelen = 0;
1831
1832	/* Lock the socket to prevent queue disordering
1833	 * while sleeps in memcpy_tomsg
1834	 */
1835
1836	if (!siocb->scm) {
1837		siocb->scm = &tmp_scm;
1838		memset(&tmp_scm, 0, sizeof(tmp_scm));
1839	}
1840
1841	mutex_lock(&u->readlock);
1842
1843	do {
1844		int chunk;
1845		struct sk_buff *skb;
1846
1847		unix_state_lock(sk);
1848		skb = skb_dequeue(&sk->sk_receive_queue);
1849		if (skb == NULL) {
1850			if (copied >= target)
1851				goto unlock;
1852
1853			/*
1854			 *	POSIX 1003.1g mandates this order.
1855			 */
1856
1857			err = sock_error(sk);
1858			if (err)
1859				goto unlock;
1860			if (sk->sk_shutdown & RCV_SHUTDOWN)
1861				goto unlock;
1862
1863			unix_state_unlock(sk);
1864			err = -EAGAIN;
1865			if (!timeo)
1866				break;
1867			mutex_unlock(&u->readlock);
1868
1869			timeo = unix_stream_data_wait(sk, timeo);
1870
1871			if (signal_pending(current)) {
1872				err = sock_intr_errno(timeo);
1873				goto out;
1874			}
1875			mutex_lock(&u->readlock);
1876			continue;
1877 unlock:
1878			unix_state_unlock(sk);
1879			break;
1880		}
1881		unix_state_unlock(sk);
1882
1883		if (check_creds) {
1884			/* Never glue messages from different writers */
1885			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1886			    (UNIXCB(skb).cred != siocb->scm->cred)) {
1887				skb_queue_head(&sk->sk_receive_queue, skb);
1888				break;
1889			}
1890		} else {
1891			/* Copy credentials */
1892			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1893			check_creds = 1;
1894		}
1895
1896		/* Copy address just once */
1897		if (sunaddr) {
1898			unix_copy_addr(msg, skb->sk);
1899			sunaddr = NULL;
1900		}
1901
1902		chunk = min_t(unsigned int, skb->len, size);
1903		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1904			skb_queue_head(&sk->sk_receive_queue, skb);
1905			if (copied == 0)
1906				copied = -EFAULT;
1907			break;
1908		}
1909		copied += chunk;
1910		size -= chunk;
1911
1912		/* Mark read part of skb as used */
1913		if (!(flags & MSG_PEEK)) {
1914			skb_pull(skb, chunk);
1915
1916			if (UNIXCB(skb).fp)
1917				unix_detach_fds(siocb->scm, skb);
1918
1919			/* put the skb back if we didn't use it up.. */
1920			if (skb->len) {
1921				skb_queue_head(&sk->sk_receive_queue, skb);
1922				break;
1923			}
1924
1925			consume_skb(skb);
1926
1927			if (siocb->scm->fp)
1928				break;
1929		} else {
1930			/* It is questionable, see note in unix_dgram_recvmsg.
1931			 */
1932			if (UNIXCB(skb).fp)
1933				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1934
1935			/* put message back and return */
1936			skb_queue_head(&sk->sk_receive_queue, skb);
1937			break;
1938		}
1939	} while (size);
1940
1941	mutex_unlock(&u->readlock);
1942	scm_recv(sock, msg, siocb->scm, flags);
1943out:
1944	return copied ? : err;
1945}
1946
1947static int unix_shutdown(struct socket *sock, int mode)
1948{
1949	struct sock *sk = sock->sk;
1950	struct sock *other;
1951
1952	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1953
1954	if (mode) {
1955		unix_state_lock(sk);
1956		sk->sk_shutdown |= mode;
1957		other = unix_peer(sk);
1958		if (other)
1959			sock_hold(other);
1960		unix_state_unlock(sk);
1961		sk->sk_state_change(sk);
1962
1963		if (other &&
1964			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1965
1966			int peer_mode = 0;
1967
1968			if (mode&RCV_SHUTDOWN)
1969				peer_mode |= SEND_SHUTDOWN;
1970			if (mode&SEND_SHUTDOWN)
1971				peer_mode |= RCV_SHUTDOWN;
1972			unix_state_lock(other);
1973			other->sk_shutdown |= peer_mode;
1974			unix_state_unlock(other);
1975			other->sk_state_change(other);
1976			if (peer_mode == SHUTDOWN_MASK)
1977				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1978			else if (peer_mode & RCV_SHUTDOWN)
1979				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1980		}
1981		if (other)
1982			sock_put(other);
1983	}
1984	return 0;
1985}
1986
1987static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1988{
1989	struct sock *sk = sock->sk;
1990	long amount = 0;
1991	int err;
1992
1993	switch (cmd) {
1994	case SIOCOUTQ:
1995		amount = sk_wmem_alloc_get(sk);
1996		err = put_user(amount, (int __user *)arg);
1997		break;
1998	case SIOCINQ:
1999		{
2000			struct sk_buff *skb;
2001
2002			if (sk->sk_state == TCP_LISTEN) {
2003				err = -EINVAL;
2004				break;
2005			}
2006
2007			spin_lock(&sk->sk_receive_queue.lock);
2008			if (sk->sk_type == SOCK_STREAM ||
2009			    sk->sk_type == SOCK_SEQPACKET) {
2010				skb_queue_walk(&sk->sk_receive_queue, skb)
2011					amount += skb->len;
2012			} else {
2013				skb = skb_peek(&sk->sk_receive_queue);
2014				if (skb)
2015					amount = skb->len;
2016			}
2017			spin_unlock(&sk->sk_receive_queue.lock);
2018			err = put_user(amount, (int __user *)arg);
2019			break;
2020		}
2021
2022	default:
2023		err = -ENOIOCTLCMD;
2024		break;
2025	}
2026	return err;
2027}
2028
2029static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2030{
2031	struct sock *sk = sock->sk;
2032	unsigned int mask;
2033
2034	sock_poll_wait(file, sk_sleep(sk), wait);
2035	mask = 0;
2036
2037	/* exceptional events? */
2038	if (sk->sk_err)
2039		mask |= POLLERR;
2040	if (sk->sk_shutdown == SHUTDOWN_MASK)
2041		mask |= POLLHUP;
2042	if (sk->sk_shutdown & RCV_SHUTDOWN)
2043		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2044
2045	/* readable? */
2046	if (!skb_queue_empty(&sk->sk_receive_queue))
2047		mask |= POLLIN | POLLRDNORM;
2048
2049	/* Connection-based need to check for termination and startup */
2050	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2051	    sk->sk_state == TCP_CLOSE)
2052		mask |= POLLHUP;
2053
2054	/*
2055	 * we set writable also when the other side has shut down the
2056	 * connection. This prevents stuck sockets.
2057	 */
2058	if (unix_writable(sk))
2059		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2060
2061	return mask;
2062}
2063
2064static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2065				    poll_table *wait)
2066{
2067	struct sock *sk = sock->sk, *other;
2068	unsigned int mask, writable;
2069
2070	sock_poll_wait(file, sk_sleep(sk), wait);
2071	mask = 0;
2072
2073	/* exceptional events? */
2074	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2075		mask |= POLLERR;
2076	if (sk->sk_shutdown & RCV_SHUTDOWN)
2077		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2078	if (sk->sk_shutdown == SHUTDOWN_MASK)
2079		mask |= POLLHUP;
2080
2081	/* readable? */
2082	if (!skb_queue_empty(&sk->sk_receive_queue))
2083		mask |= POLLIN | POLLRDNORM;
2084
2085	/* Connection-based need to check for termination and startup */
2086	if (sk->sk_type == SOCK_SEQPACKET) {
2087		if (sk->sk_state == TCP_CLOSE)
2088			mask |= POLLHUP;
2089		/* connection hasn't started yet? */
2090		if (sk->sk_state == TCP_SYN_SENT)
2091			return mask;
2092	}
2093
2094	/* No write status requested, avoid expensive OUT tests. */
2095	if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2096		return mask;
2097
2098	writable = unix_writable(sk);
2099	other = unix_peer_get(sk);
2100	if (other) {
2101		if (unix_peer(other) != sk) {
2102			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2103			if (unix_recvq_full(other))
2104				writable = 0;
2105		}
2106		sock_put(other);
2107	}
2108
2109	if (writable)
2110		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2111	else
2112		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2113
2114	return mask;
2115}
2116
2117#ifdef CONFIG_PROC_FS
2118static struct sock *first_unix_socket(int *i)
2119{
2120	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2121		if (!hlist_empty(&unix_socket_table[*i]))
2122			return __sk_head(&unix_socket_table[*i]);
2123	}
2124	return NULL;
2125}
2126
2127static struct sock *next_unix_socket(int *i, struct sock *s)
2128{
2129	struct sock *next = sk_next(s);
2130	/* More in this chain? */
2131	if (next)
2132		return next;
2133	/* Look for next non-empty chain. */
2134	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2135		if (!hlist_empty(&unix_socket_table[*i]))
2136			return __sk_head(&unix_socket_table[*i]);
2137	}
2138	return NULL;
2139}
2140
2141struct unix_iter_state {
2142	struct seq_net_private p;
2143	int i;
2144};
2145
2146static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2147{
2148	struct unix_iter_state *iter = seq->private;
2149	loff_t off = 0;
2150	struct sock *s;
2151
2152	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2153		if (sock_net(s) != seq_file_net(seq))
2154			continue;
2155		if (off == pos)
2156			return s;
2157		++off;
2158	}
2159	return NULL;
2160}
2161
2162static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2163	__acquires(unix_table_lock)
2164{
2165	spin_lock(&unix_table_lock);
2166	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2167}
2168
2169static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2170{
2171	struct unix_iter_state *iter = seq->private;
2172	struct sock *sk = v;
2173	++*pos;
2174
2175	if (v == SEQ_START_TOKEN)
2176		sk = first_unix_socket(&iter->i);
2177	else
2178		sk = next_unix_socket(&iter->i, sk);
2179	while (sk && (sock_net(sk) != seq_file_net(seq)))
2180		sk = next_unix_socket(&iter->i, sk);
2181	return sk;
2182}
2183
2184static void unix_seq_stop(struct seq_file *seq, void *v)
2185	__releases(unix_table_lock)
2186{
2187	spin_unlock(&unix_table_lock);
2188}
2189
2190static int unix_seq_show(struct seq_file *seq, void *v)
2191{
2192
2193	if (v == SEQ_START_TOKEN)
2194		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2195			 "Inode Path\n");
2196	else {
2197		struct sock *s = v;
2198		struct unix_sock *u = unix_sk(s);
2199		unix_state_lock(s);
2200
2201		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2202			s,
2203			atomic_read(&s->sk_refcnt),
2204			0,
2205			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2206			s->sk_type,
2207			s->sk_socket ?
2208			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2209			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2210			sock_i_ino(s));
2211
2212		if (u->addr) {
2213			int i, len;
2214			seq_putc(seq, ' ');
2215
2216			i = 0;
2217			len = u->addr->len - sizeof(short);
2218			if (!UNIX_ABSTRACT(s))
2219				len--;
2220			else {
2221				seq_putc(seq, '@');
2222				i++;
2223			}
2224			for ( ; i < len; i++)
2225				seq_putc(seq, u->addr->name->sun_path[i]);
2226		}
2227		unix_state_unlock(s);
2228		seq_putc(seq, '\n');
2229	}
2230
2231	return 0;
2232}
2233
2234static const struct seq_operations unix_seq_ops = {
2235	.start  = unix_seq_start,
2236	.next   = unix_seq_next,
2237	.stop   = unix_seq_stop,
2238	.show   = unix_seq_show,
2239};
2240
2241static int unix_seq_open(struct inode *inode, struct file *file)
2242{
2243	return seq_open_net(inode, file, &unix_seq_ops,
2244			    sizeof(struct unix_iter_state));
2245}
2246
2247static const struct file_operations unix_seq_fops = {
2248	.owner		= THIS_MODULE,
2249	.open		= unix_seq_open,
2250	.read		= seq_read,
2251	.llseek		= seq_lseek,
2252	.release	= seq_release_net,
2253};
2254
2255#endif
2256
2257static const struct net_proto_family unix_family_ops = {
2258	.family = PF_UNIX,
2259	.create = unix_create,
2260	.owner	= THIS_MODULE,
2261};
2262
2263
2264static int __net_init unix_net_init(struct net *net)
2265{
2266	int error = -ENOMEM;
2267
2268	net->unx.sysctl_max_dgram_qlen = 10;
2269	if (unix_sysctl_register(net))
2270		goto out;
2271
2272#ifdef CONFIG_PROC_FS
2273	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2274		unix_sysctl_unregister(net);
2275		goto out;
2276	}
2277#endif
2278	error = 0;
2279out:
2280	return error;
2281}
2282
2283static void __net_exit unix_net_exit(struct net *net)
2284{
2285	unix_sysctl_unregister(net);
2286	proc_net_remove(net, "unix");
2287}
2288
2289static struct pernet_operations unix_net_ops = {
2290	.init = unix_net_init,
2291	.exit = unix_net_exit,
2292};
2293
2294static int __init af_unix_init(void)
2295{
2296	int rc = -1;
2297	struct sk_buff *dummy_skb;
2298
2299	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2300
2301	rc = proto_register(&unix_proto, 1);
2302	if (rc != 0) {
2303		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2304		       __func__);
2305		goto out;
2306	}
2307
2308	sock_register(&unix_family_ops);
2309	register_pernet_subsys(&unix_net_ops);
2310out:
2311	return rc;
2312}
2313
2314static void __exit af_unix_exit(void)
2315{
2316	sock_unregister(PF_UNIX);
2317	proto_unregister(&unix_proto);
2318	unregister_pernet_subsys(&unix_net_ops);
2319}
2320
2321/* Earlier than device_initcall() so that other drivers invoking
2322   request_module() don't end up in a loop when modprobe tries
2323   to use a UNIX socket. But later than subsys_initcall() because
2324   we depend on stuff initialised there */
2325fs_initcall(af_unix_init);
2326module_exit(af_unix_exit);
2327
2328MODULE_LICENSE("GPL");
2329MODULE_ALIAS_NETPROTO(PF_UNIX);
2330