af_unix.c revision 518de9b39e854542de59bfb8b9f61c8f7ecf808b
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_long_t unix_nr_socks;
121
122#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126#ifdef CONFIG_SECURITY_NETWORK
127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128{
129	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
134	scm->secid = *UNIXSID(skb);
135}
136#else
137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
144/*
145 *  SMP locking strategy:
146 *    hash table is protected with spinlock unix_table_lock
147 *    each socket state is protected by separate spin lock.
148 */
149
150static inline unsigned unix_hash_fold(__wsum n)
151{
152	unsigned hash = (__force unsigned)n;
153	hash ^= hash>>16;
154	hash ^= hash>>8;
155	return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162	return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
167	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168}
169
170static inline int unix_recvq_full(struct sock const *sk)
171{
172	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
175static struct sock *unix_peer_get(struct sock *s)
176{
177	struct sock *peer;
178
179	unix_state_lock(s);
180	peer = unix_peer(s);
181	if (peer)
182		sock_hold(peer);
183	unix_state_unlock(s);
184	return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189	if (atomic_dec_and_test(&addr->refcnt))
190		kfree(addr);
191}
192
193/*
194 *	Check unix socket name:
195 *		- should be not zero length.
196 *	        - if started by not zero, should be NULL terminated (FS object)
197 *		- if started by zero, it is abstract name.
198 */
199
200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201{
202	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203		return -EINVAL;
204	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205		return -EINVAL;
206	if (sunaddr->sun_path[0]) {
207		/*
208		 * This may look like an off by one error but it is a bit more
209		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210		 * sun_path[108] doesnt as such exist.  However in kernel space
211		 * we are guaranteed that it is a valid memory location in our
212		 * kernel address buffer.
213		 */
214		((char *)sunaddr)[len] = 0;
215		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216		return len;
217	}
218
219	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220	return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225	sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
230	WARN_ON(!sk_unhashed(sk));
231	sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
236	spin_lock(&unix_table_lock);
237	__unix_remove_socket(sk);
238	spin_unlock(&unix_table_lock);
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
243	spin_lock(&unix_table_lock);
244	__unix_insert_socket(list, sk);
245	spin_unlock(&unix_table_lock);
246}
247
248static struct sock *__unix_find_socket_byname(struct net *net,
249					      struct sockaddr_un *sunname,
250					      int len, int type, unsigned hash)
251{
252	struct sock *s;
253	struct hlist_node *node;
254
255	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256		struct unix_sock *u = unix_sk(s);
257
258		if (!net_eq(sock_net(s), net))
259			continue;
260
261		if (u->addr->len == len &&
262		    !memcmp(u->addr->name, sunname, len))
263			goto found;
264	}
265	s = NULL;
266found:
267	return s;
268}
269
270static inline struct sock *unix_find_socket_byname(struct net *net,
271						   struct sockaddr_un *sunname,
272						   int len, int type,
273						   unsigned hash)
274{
275	struct sock *s;
276
277	spin_lock(&unix_table_lock);
278	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279	if (s)
280		sock_hold(s);
281	spin_unlock(&unix_table_lock);
282	return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct inode *i)
286{
287	struct sock *s;
288	struct hlist_node *node;
289
290	spin_lock(&unix_table_lock);
291	sk_for_each(s, node,
292		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293		struct dentry *dentry = unix_sk(s)->dentry;
294
295		if (dentry && dentry->d_inode == i) {
296			sock_hold(s);
297			goto found;
298		}
299	}
300	s = NULL;
301found:
302	spin_unlock(&unix_table_lock);
303	return s;
304}
305
306static inline int unix_writable(struct sock *sk)
307{
308	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
309}
310
311static void unix_write_space(struct sock *sk)
312{
313	struct socket_wq *wq;
314
315	rcu_read_lock();
316	if (unix_writable(sk)) {
317		wq = rcu_dereference(sk->sk_wq);
318		if (wq_has_sleeper(wq))
319			wake_up_interruptible_sync(&wq->wait);
320		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321	}
322	rcu_read_unlock();
323}
324
325/* When dgram socket disconnects (or changes its peer), we clear its receive
326 * queue of packets arrived from previous peer. First, it allows to do
327 * flow control based only on wmem_alloc; second, sk connected to peer
328 * may receive messages only from that peer. */
329static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330{
331	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332		skb_queue_purge(&sk->sk_receive_queue);
333		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335		/* If one link of bidirectional dgram pipe is disconnected,
336		 * we signal error. Messages are lost. Do not make this,
337		 * when peer was not connected to us.
338		 */
339		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340			other->sk_err = ECONNRESET;
341			other->sk_error_report(other);
342		}
343	}
344}
345
346static void unix_sock_destructor(struct sock *sk)
347{
348	struct unix_sock *u = unix_sk(sk);
349
350	skb_queue_purge(&sk->sk_receive_queue);
351
352	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353	WARN_ON(!sk_unhashed(sk));
354	WARN_ON(sk->sk_socket);
355	if (!sock_flag(sk, SOCK_DEAD)) {
356		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357		return;
358	}
359
360	if (u->addr)
361		unix_release_addr(u->addr);
362
363	atomic_long_dec(&unix_nr_socks);
364	local_bh_disable();
365	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366	local_bh_enable();
367#ifdef UNIX_REFCNT_DEBUG
368	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
369		atomic_long_read(&unix_nr_socks));
370#endif
371}
372
373static int unix_release_sock(struct sock *sk, int embrion)
374{
375	struct unix_sock *u = unix_sk(sk);
376	struct dentry *dentry;
377	struct vfsmount *mnt;
378	struct sock *skpair;
379	struct sk_buff *skb;
380	int state;
381
382	unix_remove_socket(sk);
383
384	/* Clear state */
385	unix_state_lock(sk);
386	sock_orphan(sk);
387	sk->sk_shutdown = SHUTDOWN_MASK;
388	dentry	     = u->dentry;
389	u->dentry    = NULL;
390	mnt	     = u->mnt;
391	u->mnt	     = NULL;
392	state = sk->sk_state;
393	sk->sk_state = TCP_CLOSE;
394	unix_state_unlock(sk);
395
396	wake_up_interruptible_all(&u->peer_wait);
397
398	skpair = unix_peer(sk);
399
400	if (skpair != NULL) {
401		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402			unix_state_lock(skpair);
403			/* No more writes */
404			skpair->sk_shutdown = SHUTDOWN_MASK;
405			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406				skpair->sk_err = ECONNRESET;
407			unix_state_unlock(skpair);
408			skpair->sk_state_change(skpair);
409			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
410		}
411		sock_put(skpair); /* It may now die */
412		unix_peer(sk) = NULL;
413	}
414
415	/* Try to flush out this socket. Throw out buffers at least */
416
417	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
418		if (state == TCP_LISTEN)
419			unix_release_sock(skb->sk, 1);
420		/* passed fds are erased in the kfree_skb hook	      */
421		kfree_skb(skb);
422	}
423
424	if (dentry) {
425		dput(dentry);
426		mntput(mnt);
427	}
428
429	sock_put(sk);
430
431	/* ---- Socket is dead now and most probably destroyed ---- */
432
433	/*
434	 * Fixme: BSD difference: In BSD all sockets connected to use get
435	 *	  ECONNRESET and we die on the spot. In Linux we behave
436	 *	  like files and pipes do and wait for the last
437	 *	  dereference.
438	 *
439	 * Can't we simply set sock->err?
440	 *
441	 *	  What the above comment does talk about? --ANK(980817)
442	 */
443
444	if (unix_tot_inflight)
445		unix_gc();		/* Garbage collect fds */
446
447	return 0;
448}
449
450static void init_peercred(struct sock *sk)
451{
452	put_pid(sk->sk_peer_pid);
453	if (sk->sk_peer_cred)
454		put_cred(sk->sk_peer_cred);
455	sk->sk_peer_pid  = get_pid(task_tgid(current));
456	sk->sk_peer_cred = get_current_cred();
457}
458
459static void copy_peercred(struct sock *sk, struct sock *peersk)
460{
461	put_pid(sk->sk_peer_pid);
462	if (sk->sk_peer_cred)
463		put_cred(sk->sk_peer_cred);
464	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
465	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
466}
467
468static int unix_listen(struct socket *sock, int backlog)
469{
470	int err;
471	struct sock *sk = sock->sk;
472	struct unix_sock *u = unix_sk(sk);
473	struct pid *old_pid = NULL;
474	const struct cred *old_cred = NULL;
475
476	err = -EOPNOTSUPP;
477	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
478		goto out;	/* Only stream/seqpacket sockets accept */
479	err = -EINVAL;
480	if (!u->addr)
481		goto out;	/* No listens on an unbound socket */
482	unix_state_lock(sk);
483	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
484		goto out_unlock;
485	if (backlog > sk->sk_max_ack_backlog)
486		wake_up_interruptible_all(&u->peer_wait);
487	sk->sk_max_ack_backlog	= backlog;
488	sk->sk_state		= TCP_LISTEN;
489	/* set credentials so connect can copy them */
490	init_peercred(sk);
491	err = 0;
492
493out_unlock:
494	unix_state_unlock(sk);
495	put_pid(old_pid);
496	if (old_cred)
497		put_cred(old_cred);
498out:
499	return err;
500}
501
502static int unix_release(struct socket *);
503static int unix_bind(struct socket *, struct sockaddr *, int);
504static int unix_stream_connect(struct socket *, struct sockaddr *,
505			       int addr_len, int flags);
506static int unix_socketpair(struct socket *, struct socket *);
507static int unix_accept(struct socket *, struct socket *, int);
508static int unix_getname(struct socket *, struct sockaddr *, int *, int);
509static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
510static unsigned int unix_dgram_poll(struct file *, struct socket *,
511				    poll_table *);
512static int unix_ioctl(struct socket *, unsigned int, unsigned long);
513static int unix_shutdown(struct socket *, int);
514static int unix_stream_sendmsg(struct kiocb *, struct socket *,
515			       struct msghdr *, size_t);
516static int unix_stream_recvmsg(struct kiocb *, struct socket *,
517			       struct msghdr *, size_t, int);
518static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
519			      struct msghdr *, size_t);
520static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
521			      struct msghdr *, size_t, int);
522static int unix_dgram_connect(struct socket *, struct sockaddr *,
523			      int, int);
524static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
525				  struct msghdr *, size_t);
526
527static const struct proto_ops unix_stream_ops = {
528	.family =	PF_UNIX,
529	.owner =	THIS_MODULE,
530	.release =	unix_release,
531	.bind =		unix_bind,
532	.connect =	unix_stream_connect,
533	.socketpair =	unix_socketpair,
534	.accept =	unix_accept,
535	.getname =	unix_getname,
536	.poll =		unix_poll,
537	.ioctl =	unix_ioctl,
538	.listen =	unix_listen,
539	.shutdown =	unix_shutdown,
540	.setsockopt =	sock_no_setsockopt,
541	.getsockopt =	sock_no_getsockopt,
542	.sendmsg =	unix_stream_sendmsg,
543	.recvmsg =	unix_stream_recvmsg,
544	.mmap =		sock_no_mmap,
545	.sendpage =	sock_no_sendpage,
546};
547
548static const struct proto_ops unix_dgram_ops = {
549	.family =	PF_UNIX,
550	.owner =	THIS_MODULE,
551	.release =	unix_release,
552	.bind =		unix_bind,
553	.connect =	unix_dgram_connect,
554	.socketpair =	unix_socketpair,
555	.accept =	sock_no_accept,
556	.getname =	unix_getname,
557	.poll =		unix_dgram_poll,
558	.ioctl =	unix_ioctl,
559	.listen =	sock_no_listen,
560	.shutdown =	unix_shutdown,
561	.setsockopt =	sock_no_setsockopt,
562	.getsockopt =	sock_no_getsockopt,
563	.sendmsg =	unix_dgram_sendmsg,
564	.recvmsg =	unix_dgram_recvmsg,
565	.mmap =		sock_no_mmap,
566	.sendpage =	sock_no_sendpage,
567};
568
569static const struct proto_ops unix_seqpacket_ops = {
570	.family =	PF_UNIX,
571	.owner =	THIS_MODULE,
572	.release =	unix_release,
573	.bind =		unix_bind,
574	.connect =	unix_stream_connect,
575	.socketpair =	unix_socketpair,
576	.accept =	unix_accept,
577	.getname =	unix_getname,
578	.poll =		unix_dgram_poll,
579	.ioctl =	unix_ioctl,
580	.listen =	unix_listen,
581	.shutdown =	unix_shutdown,
582	.setsockopt =	sock_no_setsockopt,
583	.getsockopt =	sock_no_getsockopt,
584	.sendmsg =	unix_seqpacket_sendmsg,
585	.recvmsg =	unix_dgram_recvmsg,
586	.mmap =		sock_no_mmap,
587	.sendpage =	sock_no_sendpage,
588};
589
590static struct proto unix_proto = {
591	.name			= "UNIX",
592	.owner			= THIS_MODULE,
593	.obj_size		= sizeof(struct unix_sock),
594};
595
596/*
597 * AF_UNIX sockets do not interact with hardware, hence they
598 * dont trigger interrupts - so it's safe for them to have
599 * bh-unsafe locking for their sk_receive_queue.lock. Split off
600 * this special lock-class by reinitializing the spinlock key:
601 */
602static struct lock_class_key af_unix_sk_receive_queue_lock_key;
603
604static struct sock *unix_create1(struct net *net, struct socket *sock)
605{
606	struct sock *sk = NULL;
607	struct unix_sock *u;
608
609	atomic_long_inc(&unix_nr_socks);
610	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
611		goto out;
612
613	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
614	if (!sk)
615		goto out;
616
617	sock_init_data(sock, sk);
618	lockdep_set_class(&sk->sk_receive_queue.lock,
619				&af_unix_sk_receive_queue_lock_key);
620
621	sk->sk_write_space	= unix_write_space;
622	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
623	sk->sk_destruct		= unix_sock_destructor;
624	u	  = unix_sk(sk);
625	u->dentry = NULL;
626	u->mnt	  = NULL;
627	spin_lock_init(&u->lock);
628	atomic_long_set(&u->inflight, 0);
629	INIT_LIST_HEAD(&u->link);
630	mutex_init(&u->readlock); /* single task reading lock */
631	init_waitqueue_head(&u->peer_wait);
632	unix_insert_socket(unix_sockets_unbound, sk);
633out:
634	if (sk == NULL)
635		atomic_long_dec(&unix_nr_socks);
636	else {
637		local_bh_disable();
638		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
639		local_bh_enable();
640	}
641	return sk;
642}
643
644static int unix_create(struct net *net, struct socket *sock, int protocol,
645		       int kern)
646{
647	if (protocol && protocol != PF_UNIX)
648		return -EPROTONOSUPPORT;
649
650	sock->state = SS_UNCONNECTED;
651
652	switch (sock->type) {
653	case SOCK_STREAM:
654		sock->ops = &unix_stream_ops;
655		break;
656		/*
657		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
658		 *	nothing uses it.
659		 */
660	case SOCK_RAW:
661		sock->type = SOCK_DGRAM;
662	case SOCK_DGRAM:
663		sock->ops = &unix_dgram_ops;
664		break;
665	case SOCK_SEQPACKET:
666		sock->ops = &unix_seqpacket_ops;
667		break;
668	default:
669		return -ESOCKTNOSUPPORT;
670	}
671
672	return unix_create1(net, sock) ? 0 : -ENOMEM;
673}
674
675static int unix_release(struct socket *sock)
676{
677	struct sock *sk = sock->sk;
678
679	if (!sk)
680		return 0;
681
682	sock->sk = NULL;
683
684	return unix_release_sock(sk, 0);
685}
686
687static int unix_autobind(struct socket *sock)
688{
689	struct sock *sk = sock->sk;
690	struct net *net = sock_net(sk);
691	struct unix_sock *u = unix_sk(sk);
692	static u32 ordernum = 1;
693	struct unix_address *addr;
694	int err;
695	unsigned int retries = 0;
696
697	mutex_lock(&u->readlock);
698
699	err = 0;
700	if (u->addr)
701		goto out;
702
703	err = -ENOMEM;
704	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
705	if (!addr)
706		goto out;
707
708	addr->name->sun_family = AF_UNIX;
709	atomic_set(&addr->refcnt, 1);
710
711retry:
712	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
713	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
714
715	spin_lock(&unix_table_lock);
716	ordernum = (ordernum+1)&0xFFFFF;
717
718	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
719				      addr->hash)) {
720		spin_unlock(&unix_table_lock);
721		/*
722		 * __unix_find_socket_byname() may take long time if many names
723		 * are already in use.
724		 */
725		cond_resched();
726		/* Give up if all names seems to be in use. */
727		if (retries++ == 0xFFFFF) {
728			err = -ENOSPC;
729			kfree(addr);
730			goto out;
731		}
732		goto retry;
733	}
734	addr->hash ^= sk->sk_type;
735
736	__unix_remove_socket(sk);
737	u->addr = addr;
738	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
739	spin_unlock(&unix_table_lock);
740	err = 0;
741
742out:	mutex_unlock(&u->readlock);
743	return err;
744}
745
746static struct sock *unix_find_other(struct net *net,
747				    struct sockaddr_un *sunname, int len,
748				    int type, unsigned hash, int *error)
749{
750	struct sock *u;
751	struct path path;
752	int err = 0;
753
754	if (sunname->sun_path[0]) {
755		struct inode *inode;
756		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
757		if (err)
758			goto fail;
759		inode = path.dentry->d_inode;
760		err = inode_permission(inode, MAY_WRITE);
761		if (err)
762			goto put_fail;
763
764		err = -ECONNREFUSED;
765		if (!S_ISSOCK(inode->i_mode))
766			goto put_fail;
767		u = unix_find_socket_byinode(inode);
768		if (!u)
769			goto put_fail;
770
771		if (u->sk_type == type)
772			touch_atime(path.mnt, path.dentry);
773
774		path_put(&path);
775
776		err = -EPROTOTYPE;
777		if (u->sk_type != type) {
778			sock_put(u);
779			goto fail;
780		}
781	} else {
782		err = -ECONNREFUSED;
783		u = unix_find_socket_byname(net, sunname, len, type, hash);
784		if (u) {
785			struct dentry *dentry;
786			dentry = unix_sk(u)->dentry;
787			if (dentry)
788				touch_atime(unix_sk(u)->mnt, dentry);
789		} else
790			goto fail;
791	}
792	return u;
793
794put_fail:
795	path_put(&path);
796fail:
797	*error = err;
798	return NULL;
799}
800
801
802static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
803{
804	struct sock *sk = sock->sk;
805	struct net *net = sock_net(sk);
806	struct unix_sock *u = unix_sk(sk);
807	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
808	struct dentry *dentry = NULL;
809	struct nameidata nd;
810	int err;
811	unsigned hash;
812	struct unix_address *addr;
813	struct hlist_head *list;
814
815	err = -EINVAL;
816	if (sunaddr->sun_family != AF_UNIX)
817		goto out;
818
819	if (addr_len == sizeof(short)) {
820		err = unix_autobind(sock);
821		goto out;
822	}
823
824	err = unix_mkname(sunaddr, addr_len, &hash);
825	if (err < 0)
826		goto out;
827	addr_len = err;
828
829	mutex_lock(&u->readlock);
830
831	err = -EINVAL;
832	if (u->addr)
833		goto out_up;
834
835	err = -ENOMEM;
836	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
837	if (!addr)
838		goto out_up;
839
840	memcpy(addr->name, sunaddr, addr_len);
841	addr->len = addr_len;
842	addr->hash = hash ^ sk->sk_type;
843	atomic_set(&addr->refcnt, 1);
844
845	if (sunaddr->sun_path[0]) {
846		unsigned int mode;
847		err = 0;
848		/*
849		 * Get the parent directory, calculate the hash for last
850		 * component.
851		 */
852		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
853		if (err)
854			goto out_mknod_parent;
855
856		dentry = lookup_create(&nd, 0);
857		err = PTR_ERR(dentry);
858		if (IS_ERR(dentry))
859			goto out_mknod_unlock;
860
861		/*
862		 * All right, let's create it.
863		 */
864		mode = S_IFSOCK |
865		       (SOCK_INODE(sock)->i_mode & ~current_umask());
866		err = mnt_want_write(nd.path.mnt);
867		if (err)
868			goto out_mknod_dput;
869		err = security_path_mknod(&nd.path, dentry, mode, 0);
870		if (err)
871			goto out_mknod_drop_write;
872		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
873out_mknod_drop_write:
874		mnt_drop_write(nd.path.mnt);
875		if (err)
876			goto out_mknod_dput;
877		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
878		dput(nd.path.dentry);
879		nd.path.dentry = dentry;
880
881		addr->hash = UNIX_HASH_SIZE;
882	}
883
884	spin_lock(&unix_table_lock);
885
886	if (!sunaddr->sun_path[0]) {
887		err = -EADDRINUSE;
888		if (__unix_find_socket_byname(net, sunaddr, addr_len,
889					      sk->sk_type, hash)) {
890			unix_release_addr(addr);
891			goto out_unlock;
892		}
893
894		list = &unix_socket_table[addr->hash];
895	} else {
896		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
897		u->dentry = nd.path.dentry;
898		u->mnt    = nd.path.mnt;
899	}
900
901	err = 0;
902	__unix_remove_socket(sk);
903	u->addr = addr;
904	__unix_insert_socket(list, sk);
905
906out_unlock:
907	spin_unlock(&unix_table_lock);
908out_up:
909	mutex_unlock(&u->readlock);
910out:
911	return err;
912
913out_mknod_dput:
914	dput(dentry);
915out_mknod_unlock:
916	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
917	path_put(&nd.path);
918out_mknod_parent:
919	if (err == -EEXIST)
920		err = -EADDRINUSE;
921	unix_release_addr(addr);
922	goto out_up;
923}
924
925static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
926{
927	if (unlikely(sk1 == sk2) || !sk2) {
928		unix_state_lock(sk1);
929		return;
930	}
931	if (sk1 < sk2) {
932		unix_state_lock(sk1);
933		unix_state_lock_nested(sk2);
934	} else {
935		unix_state_lock(sk2);
936		unix_state_lock_nested(sk1);
937	}
938}
939
940static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
941{
942	if (unlikely(sk1 == sk2) || !sk2) {
943		unix_state_unlock(sk1);
944		return;
945	}
946	unix_state_unlock(sk1);
947	unix_state_unlock(sk2);
948}
949
950static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
951			      int alen, int flags)
952{
953	struct sock *sk = sock->sk;
954	struct net *net = sock_net(sk);
955	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
956	struct sock *other;
957	unsigned hash;
958	int err;
959
960	if (addr->sa_family != AF_UNSPEC) {
961		err = unix_mkname(sunaddr, alen, &hash);
962		if (err < 0)
963			goto out;
964		alen = err;
965
966		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
967		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
968			goto out;
969
970restart:
971		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
972		if (!other)
973			goto out;
974
975		unix_state_double_lock(sk, other);
976
977		/* Apparently VFS overslept socket death. Retry. */
978		if (sock_flag(other, SOCK_DEAD)) {
979			unix_state_double_unlock(sk, other);
980			sock_put(other);
981			goto restart;
982		}
983
984		err = -EPERM;
985		if (!unix_may_send(sk, other))
986			goto out_unlock;
987
988		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
989		if (err)
990			goto out_unlock;
991
992	} else {
993		/*
994		 *	1003.1g breaking connected state with AF_UNSPEC
995		 */
996		other = NULL;
997		unix_state_double_lock(sk, other);
998	}
999
1000	/*
1001	 * If it was connected, reconnect.
1002	 */
1003	if (unix_peer(sk)) {
1004		struct sock *old_peer = unix_peer(sk);
1005		unix_peer(sk) = other;
1006		unix_state_double_unlock(sk, other);
1007
1008		if (other != old_peer)
1009			unix_dgram_disconnected(sk, old_peer);
1010		sock_put(old_peer);
1011	} else {
1012		unix_peer(sk) = other;
1013		unix_state_double_unlock(sk, other);
1014	}
1015	return 0;
1016
1017out_unlock:
1018	unix_state_double_unlock(sk, other);
1019	sock_put(other);
1020out:
1021	return err;
1022}
1023
1024static long unix_wait_for_peer(struct sock *other, long timeo)
1025{
1026	struct unix_sock *u = unix_sk(other);
1027	int sched;
1028	DEFINE_WAIT(wait);
1029
1030	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1031
1032	sched = !sock_flag(other, SOCK_DEAD) &&
1033		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1034		unix_recvq_full(other);
1035
1036	unix_state_unlock(other);
1037
1038	if (sched)
1039		timeo = schedule_timeout(timeo);
1040
1041	finish_wait(&u->peer_wait, &wait);
1042	return timeo;
1043}
1044
1045static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1046			       int addr_len, int flags)
1047{
1048	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1049	struct sock *sk = sock->sk;
1050	struct net *net = sock_net(sk);
1051	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1052	struct sock *newsk = NULL;
1053	struct sock *other = NULL;
1054	struct sk_buff *skb = NULL;
1055	unsigned hash;
1056	int st;
1057	int err;
1058	long timeo;
1059
1060	err = unix_mkname(sunaddr, addr_len, &hash);
1061	if (err < 0)
1062		goto out;
1063	addr_len = err;
1064
1065	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1066	    (err = unix_autobind(sock)) != 0)
1067		goto out;
1068
1069	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1070
1071	/* First of all allocate resources.
1072	   If we will make it after state is locked,
1073	   we will have to recheck all again in any case.
1074	 */
1075
1076	err = -ENOMEM;
1077
1078	/* create new sock for complete connection */
1079	newsk = unix_create1(sock_net(sk), NULL);
1080	if (newsk == NULL)
1081		goto out;
1082
1083	/* Allocate skb for sending to listening sock */
1084	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1085	if (skb == NULL)
1086		goto out;
1087
1088restart:
1089	/*  Find listening sock. */
1090	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1091	if (!other)
1092		goto out;
1093
1094	/* Latch state of peer */
1095	unix_state_lock(other);
1096
1097	/* Apparently VFS overslept socket death. Retry. */
1098	if (sock_flag(other, SOCK_DEAD)) {
1099		unix_state_unlock(other);
1100		sock_put(other);
1101		goto restart;
1102	}
1103
1104	err = -ECONNREFUSED;
1105	if (other->sk_state != TCP_LISTEN)
1106		goto out_unlock;
1107	if (other->sk_shutdown & RCV_SHUTDOWN)
1108		goto out_unlock;
1109
1110	if (unix_recvq_full(other)) {
1111		err = -EAGAIN;
1112		if (!timeo)
1113			goto out_unlock;
1114
1115		timeo = unix_wait_for_peer(other, timeo);
1116
1117		err = sock_intr_errno(timeo);
1118		if (signal_pending(current))
1119			goto out;
1120		sock_put(other);
1121		goto restart;
1122	}
1123
1124	/* Latch our state.
1125
1126	   It is tricky place. We need to grab write lock and cannot
1127	   drop lock on peer. It is dangerous because deadlock is
1128	   possible. Connect to self case and simultaneous
1129	   attempt to connect are eliminated by checking socket
1130	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1131	   check this before attempt to grab lock.
1132
1133	   Well, and we have to recheck the state after socket locked.
1134	 */
1135	st = sk->sk_state;
1136
1137	switch (st) {
1138	case TCP_CLOSE:
1139		/* This is ok... continue with connect */
1140		break;
1141	case TCP_ESTABLISHED:
1142		/* Socket is already connected */
1143		err = -EISCONN;
1144		goto out_unlock;
1145	default:
1146		err = -EINVAL;
1147		goto out_unlock;
1148	}
1149
1150	unix_state_lock_nested(sk);
1151
1152	if (sk->sk_state != st) {
1153		unix_state_unlock(sk);
1154		unix_state_unlock(other);
1155		sock_put(other);
1156		goto restart;
1157	}
1158
1159	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1160	if (err) {
1161		unix_state_unlock(sk);
1162		goto out_unlock;
1163	}
1164
1165	/* The way is open! Fastly set all the necessary fields... */
1166
1167	sock_hold(sk);
1168	unix_peer(newsk)	= sk;
1169	newsk->sk_state		= TCP_ESTABLISHED;
1170	newsk->sk_type		= sk->sk_type;
1171	init_peercred(newsk);
1172	newu = unix_sk(newsk);
1173	newsk->sk_wq		= &newu->peer_wq;
1174	otheru = unix_sk(other);
1175
1176	/* copy address information from listening to new sock*/
1177	if (otheru->addr) {
1178		atomic_inc(&otheru->addr->refcnt);
1179		newu->addr = otheru->addr;
1180	}
1181	if (otheru->dentry) {
1182		newu->dentry	= dget(otheru->dentry);
1183		newu->mnt	= mntget(otheru->mnt);
1184	}
1185
1186	/* Set credentials */
1187	copy_peercred(sk, other);
1188
1189	sock->state	= SS_CONNECTED;
1190	sk->sk_state	= TCP_ESTABLISHED;
1191	sock_hold(newsk);
1192
1193	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1194	unix_peer(sk)	= newsk;
1195
1196	unix_state_unlock(sk);
1197
1198	/* take ten and and send info to listening sock */
1199	spin_lock(&other->sk_receive_queue.lock);
1200	__skb_queue_tail(&other->sk_receive_queue, skb);
1201	spin_unlock(&other->sk_receive_queue.lock);
1202	unix_state_unlock(other);
1203	other->sk_data_ready(other, 0);
1204	sock_put(other);
1205	return 0;
1206
1207out_unlock:
1208	if (other)
1209		unix_state_unlock(other);
1210
1211out:
1212	kfree_skb(skb);
1213	if (newsk)
1214		unix_release_sock(newsk, 0);
1215	if (other)
1216		sock_put(other);
1217	return err;
1218}
1219
1220static int unix_socketpair(struct socket *socka, struct socket *sockb)
1221{
1222	struct sock *ska = socka->sk, *skb = sockb->sk;
1223
1224	/* Join our sockets back to back */
1225	sock_hold(ska);
1226	sock_hold(skb);
1227	unix_peer(ska) = skb;
1228	unix_peer(skb) = ska;
1229	init_peercred(ska);
1230	init_peercred(skb);
1231
1232	if (ska->sk_type != SOCK_DGRAM) {
1233		ska->sk_state = TCP_ESTABLISHED;
1234		skb->sk_state = TCP_ESTABLISHED;
1235		socka->state  = SS_CONNECTED;
1236		sockb->state  = SS_CONNECTED;
1237	}
1238	return 0;
1239}
1240
1241static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1242{
1243	struct sock *sk = sock->sk;
1244	struct sock *tsk;
1245	struct sk_buff *skb;
1246	int err;
1247
1248	err = -EOPNOTSUPP;
1249	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1250		goto out;
1251
1252	err = -EINVAL;
1253	if (sk->sk_state != TCP_LISTEN)
1254		goto out;
1255
1256	/* If socket state is TCP_LISTEN it cannot change (for now...),
1257	 * so that no locks are necessary.
1258	 */
1259
1260	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1261	if (!skb) {
1262		/* This means receive shutdown. */
1263		if (err == 0)
1264			err = -EINVAL;
1265		goto out;
1266	}
1267
1268	tsk = skb->sk;
1269	skb_free_datagram(sk, skb);
1270	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1271
1272	/* attach accepted sock to socket */
1273	unix_state_lock(tsk);
1274	newsock->state = SS_CONNECTED;
1275	sock_graft(tsk, newsock);
1276	unix_state_unlock(tsk);
1277	return 0;
1278
1279out:
1280	return err;
1281}
1282
1283
1284static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1285{
1286	struct sock *sk = sock->sk;
1287	struct unix_sock *u;
1288	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1289	int err = 0;
1290
1291	if (peer) {
1292		sk = unix_peer_get(sk);
1293
1294		err = -ENOTCONN;
1295		if (!sk)
1296			goto out;
1297		err = 0;
1298	} else {
1299		sock_hold(sk);
1300	}
1301
1302	u = unix_sk(sk);
1303	unix_state_lock(sk);
1304	if (!u->addr) {
1305		sunaddr->sun_family = AF_UNIX;
1306		sunaddr->sun_path[0] = 0;
1307		*uaddr_len = sizeof(short);
1308	} else {
1309		struct unix_address *addr = u->addr;
1310
1311		*uaddr_len = addr->len;
1312		memcpy(sunaddr, addr->name, *uaddr_len);
1313	}
1314	unix_state_unlock(sk);
1315	sock_put(sk);
1316out:
1317	return err;
1318}
1319
1320static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1321{
1322	int i;
1323
1324	scm->fp = UNIXCB(skb).fp;
1325	UNIXCB(skb).fp = NULL;
1326
1327	for (i = scm->fp->count-1; i >= 0; i--)
1328		unix_notinflight(scm->fp->fp[i]);
1329}
1330
1331static void unix_destruct_scm(struct sk_buff *skb)
1332{
1333	struct scm_cookie scm;
1334	memset(&scm, 0, sizeof(scm));
1335	scm.pid  = UNIXCB(skb).pid;
1336	scm.cred = UNIXCB(skb).cred;
1337	if (UNIXCB(skb).fp)
1338		unix_detach_fds(&scm, skb);
1339
1340	/* Alas, it calls VFS */
1341	/* So fscking what? fput() had been SMP-safe since the last Summer */
1342	scm_destroy(&scm);
1343	sock_wfree(skb);
1344}
1345
1346static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1347{
1348	int i;
1349
1350	/*
1351	 * Need to duplicate file references for the sake of garbage
1352	 * collection.  Otherwise a socket in the fps might become a
1353	 * candidate for GC while the skb is not yet queued.
1354	 */
1355	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1356	if (!UNIXCB(skb).fp)
1357		return -ENOMEM;
1358
1359	for (i = scm->fp->count-1; i >= 0; i--)
1360		unix_inflight(scm->fp->fp[i]);
1361	return 0;
1362}
1363
1364static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1365{
1366	int err = 0;
1367	UNIXCB(skb).pid  = get_pid(scm->pid);
1368	UNIXCB(skb).cred = get_cred(scm->cred);
1369	UNIXCB(skb).fp = NULL;
1370	if (scm->fp && send_fds)
1371		err = unix_attach_fds(scm, skb);
1372
1373	skb->destructor = unix_destruct_scm;
1374	return err;
1375}
1376
1377/*
1378 *	Send AF_UNIX data.
1379 */
1380
1381static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1382			      struct msghdr *msg, size_t len)
1383{
1384	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1385	struct sock *sk = sock->sk;
1386	struct net *net = sock_net(sk);
1387	struct unix_sock *u = unix_sk(sk);
1388	struct sockaddr_un *sunaddr = msg->msg_name;
1389	struct sock *other = NULL;
1390	int namelen = 0; /* fake GCC */
1391	int err;
1392	unsigned hash;
1393	struct sk_buff *skb;
1394	long timeo;
1395	struct scm_cookie tmp_scm;
1396
1397	if (NULL == siocb->scm)
1398		siocb->scm = &tmp_scm;
1399	wait_for_unix_gc();
1400	err = scm_send(sock, msg, siocb->scm);
1401	if (err < 0)
1402		return err;
1403
1404	err = -EOPNOTSUPP;
1405	if (msg->msg_flags&MSG_OOB)
1406		goto out;
1407
1408	if (msg->msg_namelen) {
1409		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1410		if (err < 0)
1411			goto out;
1412		namelen = err;
1413	} else {
1414		sunaddr = NULL;
1415		err = -ENOTCONN;
1416		other = unix_peer_get(sk);
1417		if (!other)
1418			goto out;
1419	}
1420
1421	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1422	    && (err = unix_autobind(sock)) != 0)
1423		goto out;
1424
1425	err = -EMSGSIZE;
1426	if (len > sk->sk_sndbuf - 32)
1427		goto out;
1428
1429	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1430	if (skb == NULL)
1431		goto out;
1432
1433	err = unix_scm_to_skb(siocb->scm, skb, true);
1434	if (err)
1435		goto out_free;
1436	unix_get_secdata(siocb->scm, skb);
1437
1438	skb_reset_transport_header(skb);
1439	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1440	if (err)
1441		goto out_free;
1442
1443	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1444
1445restart:
1446	if (!other) {
1447		err = -ECONNRESET;
1448		if (sunaddr == NULL)
1449			goto out_free;
1450
1451		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1452					hash, &err);
1453		if (other == NULL)
1454			goto out_free;
1455	}
1456
1457	unix_state_lock(other);
1458	err = -EPERM;
1459	if (!unix_may_send(sk, other))
1460		goto out_unlock;
1461
1462	if (sock_flag(other, SOCK_DEAD)) {
1463		/*
1464		 *	Check with 1003.1g - what should
1465		 *	datagram error
1466		 */
1467		unix_state_unlock(other);
1468		sock_put(other);
1469
1470		err = 0;
1471		unix_state_lock(sk);
1472		if (unix_peer(sk) == other) {
1473			unix_peer(sk) = NULL;
1474			unix_state_unlock(sk);
1475
1476			unix_dgram_disconnected(sk, other);
1477			sock_put(other);
1478			err = -ECONNREFUSED;
1479		} else {
1480			unix_state_unlock(sk);
1481		}
1482
1483		other = NULL;
1484		if (err)
1485			goto out_free;
1486		goto restart;
1487	}
1488
1489	err = -EPIPE;
1490	if (other->sk_shutdown & RCV_SHUTDOWN)
1491		goto out_unlock;
1492
1493	if (sk->sk_type != SOCK_SEQPACKET) {
1494		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1495		if (err)
1496			goto out_unlock;
1497	}
1498
1499	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1500		if (!timeo) {
1501			err = -EAGAIN;
1502			goto out_unlock;
1503		}
1504
1505		timeo = unix_wait_for_peer(other, timeo);
1506
1507		err = sock_intr_errno(timeo);
1508		if (signal_pending(current))
1509			goto out_free;
1510
1511		goto restart;
1512	}
1513
1514	if (sock_flag(other, SOCK_RCVTSTAMP))
1515		__net_timestamp(skb);
1516	skb_queue_tail(&other->sk_receive_queue, skb);
1517	unix_state_unlock(other);
1518	other->sk_data_ready(other, len);
1519	sock_put(other);
1520	scm_destroy(siocb->scm);
1521	return len;
1522
1523out_unlock:
1524	unix_state_unlock(other);
1525out_free:
1526	kfree_skb(skb);
1527out:
1528	if (other)
1529		sock_put(other);
1530	scm_destroy(siocb->scm);
1531	return err;
1532}
1533
1534
1535static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1536			       struct msghdr *msg, size_t len)
1537{
1538	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1539	struct sock *sk = sock->sk;
1540	struct sock *other = NULL;
1541	struct sockaddr_un *sunaddr = msg->msg_name;
1542	int err, size;
1543	struct sk_buff *skb;
1544	int sent = 0;
1545	struct scm_cookie tmp_scm;
1546	bool fds_sent = false;
1547
1548	if (NULL == siocb->scm)
1549		siocb->scm = &tmp_scm;
1550	wait_for_unix_gc();
1551	err = scm_send(sock, msg, siocb->scm);
1552	if (err < 0)
1553		return err;
1554
1555	err = -EOPNOTSUPP;
1556	if (msg->msg_flags&MSG_OOB)
1557		goto out_err;
1558
1559	if (msg->msg_namelen) {
1560		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1561		goto out_err;
1562	} else {
1563		sunaddr = NULL;
1564		err = -ENOTCONN;
1565		other = unix_peer(sk);
1566		if (!other)
1567			goto out_err;
1568	}
1569
1570	if (sk->sk_shutdown & SEND_SHUTDOWN)
1571		goto pipe_err;
1572
1573	while (sent < len) {
1574		/*
1575		 *	Optimisation for the fact that under 0.01% of X
1576		 *	messages typically need breaking up.
1577		 */
1578
1579		size = len-sent;
1580
1581		/* Keep two messages in the pipe so it schedules better */
1582		if (size > ((sk->sk_sndbuf >> 1) - 64))
1583			size = (sk->sk_sndbuf >> 1) - 64;
1584
1585		if (size > SKB_MAX_ALLOC)
1586			size = SKB_MAX_ALLOC;
1587
1588		/*
1589		 *	Grab a buffer
1590		 */
1591
1592		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1593					  &err);
1594
1595		if (skb == NULL)
1596			goto out_err;
1597
1598		/*
1599		 *	If you pass two values to the sock_alloc_send_skb
1600		 *	it tries to grab the large buffer with GFP_NOFS
1601		 *	(which can fail easily), and if it fails grab the
1602		 *	fallback size buffer which is under a page and will
1603		 *	succeed. [Alan]
1604		 */
1605		size = min_t(int, size, skb_tailroom(skb));
1606
1607
1608		/* Only send the fds in the first buffer */
1609		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1610		if (err) {
1611			kfree_skb(skb);
1612			goto out_err;
1613		}
1614		fds_sent = true;
1615
1616		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1617		if (err) {
1618			kfree_skb(skb);
1619			goto out_err;
1620		}
1621
1622		unix_state_lock(other);
1623
1624		if (sock_flag(other, SOCK_DEAD) ||
1625		    (other->sk_shutdown & RCV_SHUTDOWN))
1626			goto pipe_err_free;
1627
1628		skb_queue_tail(&other->sk_receive_queue, skb);
1629		unix_state_unlock(other);
1630		other->sk_data_ready(other, size);
1631		sent += size;
1632	}
1633
1634	scm_destroy(siocb->scm);
1635	siocb->scm = NULL;
1636
1637	return sent;
1638
1639pipe_err_free:
1640	unix_state_unlock(other);
1641	kfree_skb(skb);
1642pipe_err:
1643	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1644		send_sig(SIGPIPE, current, 0);
1645	err = -EPIPE;
1646out_err:
1647	scm_destroy(siocb->scm);
1648	siocb->scm = NULL;
1649	return sent ? : err;
1650}
1651
1652static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1653				  struct msghdr *msg, size_t len)
1654{
1655	int err;
1656	struct sock *sk = sock->sk;
1657
1658	err = sock_error(sk);
1659	if (err)
1660		return err;
1661
1662	if (sk->sk_state != TCP_ESTABLISHED)
1663		return -ENOTCONN;
1664
1665	if (msg->msg_namelen)
1666		msg->msg_namelen = 0;
1667
1668	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1669}
1670
1671static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1672{
1673	struct unix_sock *u = unix_sk(sk);
1674
1675	msg->msg_namelen = 0;
1676	if (u->addr) {
1677		msg->msg_namelen = u->addr->len;
1678		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1679	}
1680}
1681
1682static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1683			      struct msghdr *msg, size_t size,
1684			      int flags)
1685{
1686	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1687	struct scm_cookie tmp_scm;
1688	struct sock *sk = sock->sk;
1689	struct unix_sock *u = unix_sk(sk);
1690	int noblock = flags & MSG_DONTWAIT;
1691	struct sk_buff *skb;
1692	int err;
1693
1694	err = -EOPNOTSUPP;
1695	if (flags&MSG_OOB)
1696		goto out;
1697
1698	msg->msg_namelen = 0;
1699
1700	mutex_lock(&u->readlock);
1701
1702	skb = skb_recv_datagram(sk, flags, noblock, &err);
1703	if (!skb) {
1704		unix_state_lock(sk);
1705		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1706		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1707		    (sk->sk_shutdown & RCV_SHUTDOWN))
1708			err = 0;
1709		unix_state_unlock(sk);
1710		goto out_unlock;
1711	}
1712
1713	wake_up_interruptible_sync(&u->peer_wait);
1714
1715	if (msg->msg_name)
1716		unix_copy_addr(msg, skb->sk);
1717
1718	if (size > skb->len)
1719		size = skb->len;
1720	else if (size < skb->len)
1721		msg->msg_flags |= MSG_TRUNC;
1722
1723	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1724	if (err)
1725		goto out_free;
1726
1727	if (sock_flag(sk, SOCK_RCVTSTAMP))
1728		__sock_recv_timestamp(msg, sk, skb);
1729
1730	if (!siocb->scm) {
1731		siocb->scm = &tmp_scm;
1732		memset(&tmp_scm, 0, sizeof(tmp_scm));
1733	}
1734	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1735	unix_set_secdata(siocb->scm, skb);
1736
1737	if (!(flags & MSG_PEEK)) {
1738		if (UNIXCB(skb).fp)
1739			unix_detach_fds(siocb->scm, skb);
1740	} else {
1741		/* It is questionable: on PEEK we could:
1742		   - do not return fds - good, but too simple 8)
1743		   - return fds, and do not return them on read (old strategy,
1744		     apparently wrong)
1745		   - clone fds (I chose it for now, it is the most universal
1746		     solution)
1747
1748		   POSIX 1003.1g does not actually define this clearly
1749		   at all. POSIX 1003.1g doesn't define a lot of things
1750		   clearly however!
1751
1752		*/
1753		if (UNIXCB(skb).fp)
1754			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1755	}
1756	err = size;
1757
1758	scm_recv(sock, msg, siocb->scm, flags);
1759
1760out_free:
1761	skb_free_datagram(sk, skb);
1762out_unlock:
1763	mutex_unlock(&u->readlock);
1764out:
1765	return err;
1766}
1767
1768/*
1769 *	Sleep until data has arrive. But check for races..
1770 */
1771
1772static long unix_stream_data_wait(struct sock *sk, long timeo)
1773{
1774	DEFINE_WAIT(wait);
1775
1776	unix_state_lock(sk);
1777
1778	for (;;) {
1779		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1780
1781		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1782		    sk->sk_err ||
1783		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1784		    signal_pending(current) ||
1785		    !timeo)
1786			break;
1787
1788		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1789		unix_state_unlock(sk);
1790		timeo = schedule_timeout(timeo);
1791		unix_state_lock(sk);
1792		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1793	}
1794
1795	finish_wait(sk_sleep(sk), &wait);
1796	unix_state_unlock(sk);
1797	return timeo;
1798}
1799
1800
1801
1802static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1803			       struct msghdr *msg, size_t size,
1804			       int flags)
1805{
1806	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1807	struct scm_cookie tmp_scm;
1808	struct sock *sk = sock->sk;
1809	struct unix_sock *u = unix_sk(sk);
1810	struct sockaddr_un *sunaddr = msg->msg_name;
1811	int copied = 0;
1812	int check_creds = 0;
1813	int target;
1814	int err = 0;
1815	long timeo;
1816
1817	err = -EINVAL;
1818	if (sk->sk_state != TCP_ESTABLISHED)
1819		goto out;
1820
1821	err = -EOPNOTSUPP;
1822	if (flags&MSG_OOB)
1823		goto out;
1824
1825	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1826	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1827
1828	msg->msg_namelen = 0;
1829
1830	/* Lock the socket to prevent queue disordering
1831	 * while sleeps in memcpy_tomsg
1832	 */
1833
1834	if (!siocb->scm) {
1835		siocb->scm = &tmp_scm;
1836		memset(&tmp_scm, 0, sizeof(tmp_scm));
1837	}
1838
1839	mutex_lock(&u->readlock);
1840
1841	do {
1842		int chunk;
1843		struct sk_buff *skb;
1844
1845		unix_state_lock(sk);
1846		skb = skb_dequeue(&sk->sk_receive_queue);
1847		if (skb == NULL) {
1848			if (copied >= target)
1849				goto unlock;
1850
1851			/*
1852			 *	POSIX 1003.1g mandates this order.
1853			 */
1854
1855			err = sock_error(sk);
1856			if (err)
1857				goto unlock;
1858			if (sk->sk_shutdown & RCV_SHUTDOWN)
1859				goto unlock;
1860
1861			unix_state_unlock(sk);
1862			err = -EAGAIN;
1863			if (!timeo)
1864				break;
1865			mutex_unlock(&u->readlock);
1866
1867			timeo = unix_stream_data_wait(sk, timeo);
1868
1869			if (signal_pending(current)) {
1870				err = sock_intr_errno(timeo);
1871				goto out;
1872			}
1873			mutex_lock(&u->readlock);
1874			continue;
1875 unlock:
1876			unix_state_unlock(sk);
1877			break;
1878		}
1879		unix_state_unlock(sk);
1880
1881		if (check_creds) {
1882			/* Never glue messages from different writers */
1883			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1884			    (UNIXCB(skb).cred != siocb->scm->cred)) {
1885				skb_queue_head(&sk->sk_receive_queue, skb);
1886				break;
1887			}
1888		} else {
1889			/* Copy credentials */
1890			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1891			check_creds = 1;
1892		}
1893
1894		/* Copy address just once */
1895		if (sunaddr) {
1896			unix_copy_addr(msg, skb->sk);
1897			sunaddr = NULL;
1898		}
1899
1900		chunk = min_t(unsigned int, skb->len, size);
1901		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1902			skb_queue_head(&sk->sk_receive_queue, skb);
1903			if (copied == 0)
1904				copied = -EFAULT;
1905			break;
1906		}
1907		copied += chunk;
1908		size -= chunk;
1909
1910		/* Mark read part of skb as used */
1911		if (!(flags & MSG_PEEK)) {
1912			skb_pull(skb, chunk);
1913
1914			if (UNIXCB(skb).fp)
1915				unix_detach_fds(siocb->scm, skb);
1916
1917			/* put the skb back if we didn't use it up.. */
1918			if (skb->len) {
1919				skb_queue_head(&sk->sk_receive_queue, skb);
1920				break;
1921			}
1922
1923			consume_skb(skb);
1924
1925			if (siocb->scm->fp)
1926				break;
1927		} else {
1928			/* It is questionable, see note in unix_dgram_recvmsg.
1929			 */
1930			if (UNIXCB(skb).fp)
1931				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1932
1933			/* put message back and return */
1934			skb_queue_head(&sk->sk_receive_queue, skb);
1935			break;
1936		}
1937	} while (size);
1938
1939	mutex_unlock(&u->readlock);
1940	scm_recv(sock, msg, siocb->scm, flags);
1941out:
1942	return copied ? : err;
1943}
1944
1945static int unix_shutdown(struct socket *sock, int mode)
1946{
1947	struct sock *sk = sock->sk;
1948	struct sock *other;
1949
1950	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1951
1952	if (mode) {
1953		unix_state_lock(sk);
1954		sk->sk_shutdown |= mode;
1955		other = unix_peer(sk);
1956		if (other)
1957			sock_hold(other);
1958		unix_state_unlock(sk);
1959		sk->sk_state_change(sk);
1960
1961		if (other &&
1962			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1963
1964			int peer_mode = 0;
1965
1966			if (mode&RCV_SHUTDOWN)
1967				peer_mode |= SEND_SHUTDOWN;
1968			if (mode&SEND_SHUTDOWN)
1969				peer_mode |= RCV_SHUTDOWN;
1970			unix_state_lock(other);
1971			other->sk_shutdown |= peer_mode;
1972			unix_state_unlock(other);
1973			other->sk_state_change(other);
1974			if (peer_mode == SHUTDOWN_MASK)
1975				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1976			else if (peer_mode & RCV_SHUTDOWN)
1977				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1978		}
1979		if (other)
1980			sock_put(other);
1981	}
1982	return 0;
1983}
1984
1985static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1986{
1987	struct sock *sk = sock->sk;
1988	long amount = 0;
1989	int err;
1990
1991	switch (cmd) {
1992	case SIOCOUTQ:
1993		amount = sk_wmem_alloc_get(sk);
1994		err = put_user(amount, (int __user *)arg);
1995		break;
1996	case SIOCINQ:
1997		{
1998			struct sk_buff *skb;
1999
2000			if (sk->sk_state == TCP_LISTEN) {
2001				err = -EINVAL;
2002				break;
2003			}
2004
2005			spin_lock(&sk->sk_receive_queue.lock);
2006			if (sk->sk_type == SOCK_STREAM ||
2007			    sk->sk_type == SOCK_SEQPACKET) {
2008				skb_queue_walk(&sk->sk_receive_queue, skb)
2009					amount += skb->len;
2010			} else {
2011				skb = skb_peek(&sk->sk_receive_queue);
2012				if (skb)
2013					amount = skb->len;
2014			}
2015			spin_unlock(&sk->sk_receive_queue.lock);
2016			err = put_user(amount, (int __user *)arg);
2017			break;
2018		}
2019
2020	default:
2021		err = -ENOIOCTLCMD;
2022		break;
2023	}
2024	return err;
2025}
2026
2027static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2028{
2029	struct sock *sk = sock->sk;
2030	unsigned int mask;
2031
2032	sock_poll_wait(file, sk_sleep(sk), wait);
2033	mask = 0;
2034
2035	/* exceptional events? */
2036	if (sk->sk_err)
2037		mask |= POLLERR;
2038	if (sk->sk_shutdown == SHUTDOWN_MASK)
2039		mask |= POLLHUP;
2040	if (sk->sk_shutdown & RCV_SHUTDOWN)
2041		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2042
2043	/* readable? */
2044	if (!skb_queue_empty(&sk->sk_receive_queue))
2045		mask |= POLLIN | POLLRDNORM;
2046
2047	/* Connection-based need to check for termination and startup */
2048	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2049	    sk->sk_state == TCP_CLOSE)
2050		mask |= POLLHUP;
2051
2052	/*
2053	 * we set writable also when the other side has shut down the
2054	 * connection. This prevents stuck sockets.
2055	 */
2056	if (unix_writable(sk))
2057		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2058
2059	return mask;
2060}
2061
2062static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2063				    poll_table *wait)
2064{
2065	struct sock *sk = sock->sk, *other;
2066	unsigned int mask, writable;
2067
2068	sock_poll_wait(file, sk_sleep(sk), wait);
2069	mask = 0;
2070
2071	/* exceptional events? */
2072	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2073		mask |= POLLERR;
2074	if (sk->sk_shutdown & RCV_SHUTDOWN)
2075		mask |= POLLRDHUP;
2076	if (sk->sk_shutdown == SHUTDOWN_MASK)
2077		mask |= POLLHUP;
2078
2079	/* readable? */
2080	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2081	    (sk->sk_shutdown & RCV_SHUTDOWN))
2082		mask |= POLLIN | POLLRDNORM;
2083
2084	/* Connection-based need to check for termination and startup */
2085	if (sk->sk_type == SOCK_SEQPACKET) {
2086		if (sk->sk_state == TCP_CLOSE)
2087			mask |= POLLHUP;
2088		/* connection hasn't started yet? */
2089		if (sk->sk_state == TCP_SYN_SENT)
2090			return mask;
2091	}
2092
2093	/* writable? */
2094	writable = unix_writable(sk);
2095	if (writable) {
2096		other = unix_peer_get(sk);
2097		if (other) {
2098			if (unix_peer(other) != sk) {
2099				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2100					  wait);
2101				if (unix_recvq_full(other))
2102					writable = 0;
2103			}
2104
2105			sock_put(other);
2106		}
2107	}
2108
2109	if (writable)
2110		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2111	else
2112		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2113
2114	return mask;
2115}
2116
2117#ifdef CONFIG_PROC_FS
2118static struct sock *first_unix_socket(int *i)
2119{
2120	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2121		if (!hlist_empty(&unix_socket_table[*i]))
2122			return __sk_head(&unix_socket_table[*i]);
2123	}
2124	return NULL;
2125}
2126
2127static struct sock *next_unix_socket(int *i, struct sock *s)
2128{
2129	struct sock *next = sk_next(s);
2130	/* More in this chain? */
2131	if (next)
2132		return next;
2133	/* Look for next non-empty chain. */
2134	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2135		if (!hlist_empty(&unix_socket_table[*i]))
2136			return __sk_head(&unix_socket_table[*i]);
2137	}
2138	return NULL;
2139}
2140
2141struct unix_iter_state {
2142	struct seq_net_private p;
2143	int i;
2144};
2145
2146static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2147{
2148	struct unix_iter_state *iter = seq->private;
2149	loff_t off = 0;
2150	struct sock *s;
2151
2152	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2153		if (sock_net(s) != seq_file_net(seq))
2154			continue;
2155		if (off == pos)
2156			return s;
2157		++off;
2158	}
2159	return NULL;
2160}
2161
2162static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2163	__acquires(unix_table_lock)
2164{
2165	spin_lock(&unix_table_lock);
2166	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2167}
2168
2169static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2170{
2171	struct unix_iter_state *iter = seq->private;
2172	struct sock *sk = v;
2173	++*pos;
2174
2175	if (v == SEQ_START_TOKEN)
2176		sk = first_unix_socket(&iter->i);
2177	else
2178		sk = next_unix_socket(&iter->i, sk);
2179	while (sk && (sock_net(sk) != seq_file_net(seq)))
2180		sk = next_unix_socket(&iter->i, sk);
2181	return sk;
2182}
2183
2184static void unix_seq_stop(struct seq_file *seq, void *v)
2185	__releases(unix_table_lock)
2186{
2187	spin_unlock(&unix_table_lock);
2188}
2189
2190static int unix_seq_show(struct seq_file *seq, void *v)
2191{
2192
2193	if (v == SEQ_START_TOKEN)
2194		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2195			 "Inode Path\n");
2196	else {
2197		struct sock *s = v;
2198		struct unix_sock *u = unix_sk(s);
2199		unix_state_lock(s);
2200
2201		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2202			s,
2203			atomic_read(&s->sk_refcnt),
2204			0,
2205			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2206			s->sk_type,
2207			s->sk_socket ?
2208			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2209			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2210			sock_i_ino(s));
2211
2212		if (u->addr) {
2213			int i, len;
2214			seq_putc(seq, ' ');
2215
2216			i = 0;
2217			len = u->addr->len - sizeof(short);
2218			if (!UNIX_ABSTRACT(s))
2219				len--;
2220			else {
2221				seq_putc(seq, '@');
2222				i++;
2223			}
2224			for ( ; i < len; i++)
2225				seq_putc(seq, u->addr->name->sun_path[i]);
2226		}
2227		unix_state_unlock(s);
2228		seq_putc(seq, '\n');
2229	}
2230
2231	return 0;
2232}
2233
2234static const struct seq_operations unix_seq_ops = {
2235	.start  = unix_seq_start,
2236	.next   = unix_seq_next,
2237	.stop   = unix_seq_stop,
2238	.show   = unix_seq_show,
2239};
2240
2241static int unix_seq_open(struct inode *inode, struct file *file)
2242{
2243	return seq_open_net(inode, file, &unix_seq_ops,
2244			    sizeof(struct unix_iter_state));
2245}
2246
2247static const struct file_operations unix_seq_fops = {
2248	.owner		= THIS_MODULE,
2249	.open		= unix_seq_open,
2250	.read		= seq_read,
2251	.llseek		= seq_lseek,
2252	.release	= seq_release_net,
2253};
2254
2255#endif
2256
2257static const struct net_proto_family unix_family_ops = {
2258	.family = PF_UNIX,
2259	.create = unix_create,
2260	.owner	= THIS_MODULE,
2261};
2262
2263
2264static int __net_init unix_net_init(struct net *net)
2265{
2266	int error = -ENOMEM;
2267
2268	net->unx.sysctl_max_dgram_qlen = 10;
2269	if (unix_sysctl_register(net))
2270		goto out;
2271
2272#ifdef CONFIG_PROC_FS
2273	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2274		unix_sysctl_unregister(net);
2275		goto out;
2276	}
2277#endif
2278	error = 0;
2279out:
2280	return error;
2281}
2282
2283static void __net_exit unix_net_exit(struct net *net)
2284{
2285	unix_sysctl_unregister(net);
2286	proc_net_remove(net, "unix");
2287}
2288
2289static struct pernet_operations unix_net_ops = {
2290	.init = unix_net_init,
2291	.exit = unix_net_exit,
2292};
2293
2294static int __init af_unix_init(void)
2295{
2296	int rc = -1;
2297	struct sk_buff *dummy_skb;
2298
2299	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2300
2301	rc = proto_register(&unix_proto, 1);
2302	if (rc != 0) {
2303		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2304		       __func__);
2305		goto out;
2306	}
2307
2308	sock_register(&unix_family_ops);
2309	register_pernet_subsys(&unix_net_ops);
2310out:
2311	return rc;
2312}
2313
2314static void __exit af_unix_exit(void)
2315{
2316	sock_unregister(PF_UNIX);
2317	proto_unregister(&unix_proto);
2318	unregister_pernet_subsys(&unix_net_ops);
2319}
2320
2321/* Earlier than device_initcall() so that other drivers invoking
2322   request_module() don't end up in a loop when modprobe tries
2323   to use a UNIX socket. But later than subsys_initcall() because
2324   we depend on stuff initialised there */
2325fs_initcall(af_unix_init);
2326module_exit(af_unix_exit);
2327
2328MODULE_LICENSE("GPL");
2329MODULE_ALIAS_NETPROTO(PF_UNIX);
2330