af_unix.c revision ef047f5e1085d6393748d1ee27d6327905f098dc
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan.cox@linux.org>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Version:	$Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12 *
13 * Fixes:
14 *		Linus Torvalds	:	Assorted bug cures.
15 *		Niibe Yutaka	:	async I/O support.
16 *		Carsten Paeth	:	PF_UNIX check, address fixes.
17 *		Alan Cox	:	Limit size of allocated blocks.
18 *		Alan Cox	:	Fixed the stupid socketpair bug.
19 *		Alan Cox	:	BSD compatibility fine tuning.
20 *		Alan Cox	:	Fixed a bug in connect when interrupted.
21 *		Alan Cox	:	Sorted out a proper draft version of
22 *					file descriptor passing hacked up from
23 *					Mike Shaver's work.
24 *		Marty Leisner	:	Fixes to fd passing
25 *		Nick Nevin	:	recvmsg bugfix.
26 *		Alan Cox	:	Started proper garbage collector
27 *		Heiko EiBfeldt	:	Missing verify_area check
28 *		Alan Cox	:	Started POSIXisms
29 *		Andreas Schwab	:	Replace inode by dentry for proper
30 *					reference counting
31 *		Kirk Petersen	:	Made this a module
32 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
33 *					Lots of bug fixes.
34 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
35 *					by above two patches.
36 *	     Andrea Arcangeli	:	If possible we block in connect(2)
37 *					if the max backlog of the listen socket
38 *					is been reached. This won't break
39 *					old apps and it will avoid huge amount
40 *					of socks hashed (this for unix_gc()
41 *					performances reasons).
42 *					Security fix that limits the max
43 *					number of socks to 2*max_files and
44 *					the number of skb queueable in the
45 *					dgram receiver.
46 *		Artur Skawina   :	Hash function optimizations
47 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
48 *	      Malcolm Beattie   :	Set peercred for socketpair
49 *	     Michal Ostrowski   :       Module initialization cleanup.
50 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
51 *	     				the core infrastructure is doing that
52 *	     				for all net proto families now (2.5.69+)
53 *
54 *
55 * Known differences from reference BSD that was tested:
56 *
57 *	[TO FIX]
58 *	ECONNREFUSED is not returned from one end of a connected() socket to the
59 *		other the moment one end closes.
60 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 *	[NOT TO FIX]
63 *	accept() returns a path name even if the connecting socket has closed
64 *		in the meantime (BSD loses the path and gives up).
65 *	accept() returns 0 length path for an unbound connector. BSD returns 16
66 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 *	BSD af_unix apparently has connect forgetting to block properly.
69 *		(need to check this with the POSIX spec in detail)
70 *
71 * Differences from 2.0.0-11-... (ANK)
72 *	Bug fixes and improvements.
73 *		- client shutdown killed server socket.
74 *		- removed all useless cli/sti pairs.
75 *
76 *	Semantic changes/extensions.
77 *		- generic control message passing.
78 *		- SCM_CREDENTIALS control message.
79 *		- "Abstract" (not FS based) socket bindings.
80 *		  Abstract names are sequences of bytes (not zero terminated)
81 *		  started by 0, so that this name space does not intersect
82 *		  with BSD names.
83 */
84
85#include <linux/module.h>
86#include <linux/kernel.h>
87#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
106#include <net/sock.h>
107#include <net/tcp_states.h>
108#include <net/af_unix.h>
109#include <linux/proc_fs.h>
110#include <linux/seq_file.h>
111#include <net/scm.h>
112#include <linux/init.h>
113#include <linux/poll.h>
114#include <linux/smp_lock.h>
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
119
120int sysctl_unix_max_dgram_qlen = 10;
121
122struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123DEFINE_SPINLOCK(unix_table_lock);
124static atomic_t unix_nr_socks = ATOMIC_INIT(0);
125
126#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
127
128#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129
130#ifdef CONFIG_SECURITY_NETWORK
131static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
132{
133	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
134}
135
136static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
137{
138	scm->secid = *UNIXSID(skb);
139}
140#else
141static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142{ }
143
144static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145{ }
146#endif /* CONFIG_SECURITY_NETWORK */
147
148/*
149 *  SMP locking strategy:
150 *    hash table is protected with spinlock unix_table_lock
151 *    each socket state is protected by separate rwlock.
152 */
153
154static inline unsigned unix_hash_fold(unsigned hash)
155{
156	hash ^= hash>>16;
157	hash ^= hash>>8;
158	return hash&(UNIX_HASH_SIZE-1);
159}
160
161#define unix_peer(sk) (unix_sk(sk)->peer)
162
163static inline int unix_our_peer(struct sock *sk, struct sock *osk)
164{
165	return unix_peer(osk) == sk;
166}
167
168static inline int unix_may_send(struct sock *sk, struct sock *osk)
169{
170	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
171}
172
173static struct sock *unix_peer_get(struct sock *s)
174{
175	struct sock *peer;
176
177	unix_state_rlock(s);
178	peer = unix_peer(s);
179	if (peer)
180		sock_hold(peer);
181	unix_state_runlock(s);
182	return peer;
183}
184
185static inline void unix_release_addr(struct unix_address *addr)
186{
187	if (atomic_dec_and_test(&addr->refcnt))
188		kfree(addr);
189}
190
191/*
192 *	Check unix socket name:
193 *		- should be not zero length.
194 *	        - if started by not zero, should be NULL terminated (FS object)
195 *		- if started by zero, it is abstract name.
196 */
197
198static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
199{
200	if (len <= sizeof(short) || len > sizeof(*sunaddr))
201		return -EINVAL;
202	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
203		return -EINVAL;
204	if (sunaddr->sun_path[0]) {
205		/*
206		 * This may look like an off by one error but it is a bit more
207		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
208		 * sun_path[108] doesnt as such exist.  However in kernel space
209		 * we are guaranteed that it is a valid memory location in our
210		 * kernel address buffer.
211		 */
212		((char *)sunaddr)[len]=0;
213		len = strlen(sunaddr->sun_path)+1+sizeof(short);
214		return len;
215	}
216
217	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
218	return len;
219}
220
221static void __unix_remove_socket(struct sock *sk)
222{
223	sk_del_node_init(sk);
224}
225
226static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
227{
228	BUG_TRAP(sk_unhashed(sk));
229	sk_add_node(sk, list);
230}
231
232static inline void unix_remove_socket(struct sock *sk)
233{
234	spin_lock(&unix_table_lock);
235	__unix_remove_socket(sk);
236	spin_unlock(&unix_table_lock);
237}
238
239static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
240{
241	spin_lock(&unix_table_lock);
242	__unix_insert_socket(list, sk);
243	spin_unlock(&unix_table_lock);
244}
245
246static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
247					      int len, int type, unsigned hash)
248{
249	struct sock *s;
250	struct hlist_node *node;
251
252	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253		struct unix_sock *u = unix_sk(s);
254
255		if (u->addr->len == len &&
256		    !memcmp(u->addr->name, sunname, len))
257			goto found;
258	}
259	s = NULL;
260found:
261	return s;
262}
263
264static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
265						   int len, int type,
266						   unsigned hash)
267{
268	struct sock *s;
269
270	spin_lock(&unix_table_lock);
271	s = __unix_find_socket_byname(sunname, len, type, hash);
272	if (s)
273		sock_hold(s);
274	spin_unlock(&unix_table_lock);
275	return s;
276}
277
278static struct sock *unix_find_socket_byinode(struct inode *i)
279{
280	struct sock *s;
281	struct hlist_node *node;
282
283	spin_lock(&unix_table_lock);
284	sk_for_each(s, node,
285		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
286		struct dentry *dentry = unix_sk(s)->dentry;
287
288		if(dentry && dentry->d_inode == i)
289		{
290			sock_hold(s);
291			goto found;
292		}
293	}
294	s = NULL;
295found:
296	spin_unlock(&unix_table_lock);
297	return s;
298}
299
300static inline int unix_writable(struct sock *sk)
301{
302	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
303}
304
305static void unix_write_space(struct sock *sk)
306{
307	read_lock(&sk->sk_callback_lock);
308	if (unix_writable(sk)) {
309		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
310			wake_up_interruptible(sk->sk_sleep);
311		sk_wake_async(sk, 2, POLL_OUT);
312	}
313	read_unlock(&sk->sk_callback_lock);
314}
315
316/* When dgram socket disconnects (or changes its peer), we clear its receive
317 * queue of packets arrived from previous peer. First, it allows to do
318 * flow control based only on wmem_alloc; second, sk connected to peer
319 * may receive messages only from that peer. */
320static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
321{
322	if (!skb_queue_empty(&sk->sk_receive_queue)) {
323		skb_queue_purge(&sk->sk_receive_queue);
324		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
325
326		/* If one link of bidirectional dgram pipe is disconnected,
327		 * we signal error. Messages are lost. Do not make this,
328		 * when peer was not connected to us.
329		 */
330		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
331			other->sk_err = ECONNRESET;
332			other->sk_error_report(other);
333		}
334	}
335}
336
337static void unix_sock_destructor(struct sock *sk)
338{
339	struct unix_sock *u = unix_sk(sk);
340
341	skb_queue_purge(&sk->sk_receive_queue);
342
343	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
344	BUG_TRAP(sk_unhashed(sk));
345	BUG_TRAP(!sk->sk_socket);
346	if (!sock_flag(sk, SOCK_DEAD)) {
347		printk("Attempt to release alive unix socket: %p\n", sk);
348		return;
349	}
350
351	if (u->addr)
352		unix_release_addr(u->addr);
353
354	atomic_dec(&unix_nr_socks);
355#ifdef UNIX_REFCNT_DEBUG
356	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
357#endif
358}
359
360static int unix_release_sock (struct sock *sk, int embrion)
361{
362	struct unix_sock *u = unix_sk(sk);
363	struct dentry *dentry;
364	struct vfsmount *mnt;
365	struct sock *skpair;
366	struct sk_buff *skb;
367	int state;
368
369	unix_remove_socket(sk);
370
371	/* Clear state */
372	unix_state_wlock(sk);
373	sock_orphan(sk);
374	sk->sk_shutdown = SHUTDOWN_MASK;
375	dentry	     = u->dentry;
376	u->dentry    = NULL;
377	mnt	     = u->mnt;
378	u->mnt	     = NULL;
379	state = sk->sk_state;
380	sk->sk_state = TCP_CLOSE;
381	unix_state_wunlock(sk);
382
383	wake_up_interruptible_all(&u->peer_wait);
384
385	skpair=unix_peer(sk);
386
387	if (skpair!=NULL) {
388		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
389			unix_state_wlock(skpair);
390			/* No more writes */
391			skpair->sk_shutdown = SHUTDOWN_MASK;
392			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
393				skpair->sk_err = ECONNRESET;
394			unix_state_wunlock(skpair);
395			skpair->sk_state_change(skpair);
396			read_lock(&skpair->sk_callback_lock);
397			sk_wake_async(skpair,1,POLL_HUP);
398			read_unlock(&skpair->sk_callback_lock);
399		}
400		sock_put(skpair); /* It may now die */
401		unix_peer(sk) = NULL;
402	}
403
404	/* Try to flush out this socket. Throw out buffers at least */
405
406	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
407		if (state==TCP_LISTEN)
408			unix_release_sock(skb->sk, 1);
409		/* passed fds are erased in the kfree_skb hook	      */
410		kfree_skb(skb);
411	}
412
413	if (dentry) {
414		dput(dentry);
415		mntput(mnt);
416	}
417
418	sock_put(sk);
419
420	/* ---- Socket is dead now and most probably destroyed ---- */
421
422	/*
423	 * Fixme: BSD difference: In BSD all sockets connected to use get
424	 *	  ECONNRESET and we die on the spot. In Linux we behave
425	 *	  like files and pipes do and wait for the last
426	 *	  dereference.
427	 *
428	 * Can't we simply set sock->err?
429	 *
430	 *	  What the above comment does talk about? --ANK(980817)
431	 */
432
433	if (atomic_read(&unix_tot_inflight))
434		unix_gc();		/* Garbage collect fds */
435
436	return 0;
437}
438
439static int unix_listen(struct socket *sock, int backlog)
440{
441	int err;
442	struct sock *sk = sock->sk;
443	struct unix_sock *u = unix_sk(sk);
444
445	err = -EOPNOTSUPP;
446	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
447		goto out;			/* Only stream/seqpacket sockets accept */
448	err = -EINVAL;
449	if (!u->addr)
450		goto out;			/* No listens on an unbound socket */
451	unix_state_wlock(sk);
452	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
453		goto out_unlock;
454	if (backlog > sk->sk_max_ack_backlog)
455		wake_up_interruptible_all(&u->peer_wait);
456	sk->sk_max_ack_backlog	= backlog;
457	sk->sk_state		= TCP_LISTEN;
458	/* set credentials so connect can copy them */
459	sk->sk_peercred.pid	= current->tgid;
460	sk->sk_peercred.uid	= current->euid;
461	sk->sk_peercred.gid	= current->egid;
462	err = 0;
463
464out_unlock:
465	unix_state_wunlock(sk);
466out:
467	return err;
468}
469
470static int unix_release(struct socket *);
471static int unix_bind(struct socket *, struct sockaddr *, int);
472static int unix_stream_connect(struct socket *, struct sockaddr *,
473			       int addr_len, int flags);
474static int unix_socketpair(struct socket *, struct socket *);
475static int unix_accept(struct socket *, struct socket *, int);
476static int unix_getname(struct socket *, struct sockaddr *, int *, int);
477static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
478static int unix_ioctl(struct socket *, unsigned int, unsigned long);
479static int unix_shutdown(struct socket *, int);
480static int unix_stream_sendmsg(struct kiocb *, struct socket *,
481			       struct msghdr *, size_t);
482static int unix_stream_recvmsg(struct kiocb *, struct socket *,
483			       struct msghdr *, size_t, int);
484static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
485			      struct msghdr *, size_t);
486static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
487			      struct msghdr *, size_t, int);
488static int unix_dgram_connect(struct socket *, struct sockaddr *,
489			      int, int);
490static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
491				  struct msghdr *, size_t);
492
493static const struct proto_ops unix_stream_ops = {
494	.family =	PF_UNIX,
495	.owner =	THIS_MODULE,
496	.release =	unix_release,
497	.bind =		unix_bind,
498	.connect =	unix_stream_connect,
499	.socketpair =	unix_socketpair,
500	.accept =	unix_accept,
501	.getname =	unix_getname,
502	.poll =		unix_poll,
503	.ioctl =	unix_ioctl,
504	.listen =	unix_listen,
505	.shutdown =	unix_shutdown,
506	.setsockopt =	sock_no_setsockopt,
507	.getsockopt =	sock_no_getsockopt,
508	.sendmsg =	unix_stream_sendmsg,
509	.recvmsg =	unix_stream_recvmsg,
510	.mmap =		sock_no_mmap,
511	.sendpage =	sock_no_sendpage,
512};
513
514static const struct proto_ops unix_dgram_ops = {
515	.family =	PF_UNIX,
516	.owner =	THIS_MODULE,
517	.release =	unix_release,
518	.bind =		unix_bind,
519	.connect =	unix_dgram_connect,
520	.socketpair =	unix_socketpair,
521	.accept =	sock_no_accept,
522	.getname =	unix_getname,
523	.poll =		datagram_poll,
524	.ioctl =	unix_ioctl,
525	.listen =	sock_no_listen,
526	.shutdown =	unix_shutdown,
527	.setsockopt =	sock_no_setsockopt,
528	.getsockopt =	sock_no_getsockopt,
529	.sendmsg =	unix_dgram_sendmsg,
530	.recvmsg =	unix_dgram_recvmsg,
531	.mmap =		sock_no_mmap,
532	.sendpage =	sock_no_sendpage,
533};
534
535static const struct proto_ops unix_seqpacket_ops = {
536	.family =	PF_UNIX,
537	.owner =	THIS_MODULE,
538	.release =	unix_release,
539	.bind =		unix_bind,
540	.connect =	unix_stream_connect,
541	.socketpair =	unix_socketpair,
542	.accept =	unix_accept,
543	.getname =	unix_getname,
544	.poll =		datagram_poll,
545	.ioctl =	unix_ioctl,
546	.listen =	unix_listen,
547	.shutdown =	unix_shutdown,
548	.setsockopt =	sock_no_setsockopt,
549	.getsockopt =	sock_no_getsockopt,
550	.sendmsg =	unix_seqpacket_sendmsg,
551	.recvmsg =	unix_dgram_recvmsg,
552	.mmap =		sock_no_mmap,
553	.sendpage =	sock_no_sendpage,
554};
555
556static struct proto unix_proto = {
557	.name	  = "UNIX",
558	.owner	  = THIS_MODULE,
559	.obj_size = sizeof(struct unix_sock),
560};
561
562/*
563 * AF_UNIX sockets do not interact with hardware, hence they
564 * dont trigger interrupts - so it's safe for them to have
565 * bh-unsafe locking for their sk_receive_queue.lock. Split off
566 * this special lock-class by reinitializing the spinlock key:
567 */
568static struct lock_class_key af_unix_sk_receive_queue_lock_key;
569
570static struct sock * unix_create1(struct socket *sock)
571{
572	struct sock *sk = NULL;
573	struct unix_sock *u;
574
575	if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
576		goto out;
577
578	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
579	if (!sk)
580		goto out;
581
582	atomic_inc(&unix_nr_socks);
583
584	sock_init_data(sock,sk);
585	lockdep_set_class(&sk->sk_receive_queue.lock,
586				&af_unix_sk_receive_queue_lock_key);
587
588	sk->sk_write_space	= unix_write_space;
589	sk->sk_max_ack_backlog	= sysctl_unix_max_dgram_qlen;
590	sk->sk_destruct		= unix_sock_destructor;
591	u	  = unix_sk(sk);
592	u->dentry = NULL;
593	u->mnt	  = NULL;
594	spin_lock_init(&u->lock);
595	atomic_set(&u->inflight, sock ? 0 : -1);
596	mutex_init(&u->readlock); /* single task reading lock */
597	init_waitqueue_head(&u->peer_wait);
598	unix_insert_socket(unix_sockets_unbound, sk);
599out:
600	return sk;
601}
602
603static int unix_create(struct socket *sock, int protocol)
604{
605	if (protocol && protocol != PF_UNIX)
606		return -EPROTONOSUPPORT;
607
608	sock->state = SS_UNCONNECTED;
609
610	switch (sock->type) {
611	case SOCK_STREAM:
612		sock->ops = &unix_stream_ops;
613		break;
614		/*
615		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
616		 *	nothing uses it.
617		 */
618	case SOCK_RAW:
619		sock->type=SOCK_DGRAM;
620	case SOCK_DGRAM:
621		sock->ops = &unix_dgram_ops;
622		break;
623	case SOCK_SEQPACKET:
624		sock->ops = &unix_seqpacket_ops;
625		break;
626	default:
627		return -ESOCKTNOSUPPORT;
628	}
629
630	return unix_create1(sock) ? 0 : -ENOMEM;
631}
632
633static int unix_release(struct socket *sock)
634{
635	struct sock *sk = sock->sk;
636
637	if (!sk)
638		return 0;
639
640	sock->sk = NULL;
641
642	return unix_release_sock (sk, 0);
643}
644
645static int unix_autobind(struct socket *sock)
646{
647	struct sock *sk = sock->sk;
648	struct unix_sock *u = unix_sk(sk);
649	static u32 ordernum = 1;
650	struct unix_address * addr;
651	int err;
652
653	mutex_lock(&u->readlock);
654
655	err = 0;
656	if (u->addr)
657		goto out;
658
659	err = -ENOMEM;
660	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
661	if (!addr)
662		goto out;
663
664	addr->name->sun_family = AF_UNIX;
665	atomic_set(&addr->refcnt, 1);
666
667retry:
668	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
669	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
670
671	spin_lock(&unix_table_lock);
672	ordernum = (ordernum+1)&0xFFFFF;
673
674	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
675				      addr->hash)) {
676		spin_unlock(&unix_table_lock);
677		/* Sanity yield. It is unusual case, but yet... */
678		if (!(ordernum&0xFF))
679			yield();
680		goto retry;
681	}
682	addr->hash ^= sk->sk_type;
683
684	__unix_remove_socket(sk);
685	u->addr = addr;
686	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
687	spin_unlock(&unix_table_lock);
688	err = 0;
689
690out:	mutex_unlock(&u->readlock);
691	return err;
692}
693
694static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
695				    int type, unsigned hash, int *error)
696{
697	struct sock *u;
698	struct nameidata nd;
699	int err = 0;
700
701	if (sunname->sun_path[0]) {
702		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
703		if (err)
704			goto fail;
705		err = vfs_permission(&nd, MAY_WRITE);
706		if (err)
707			goto put_fail;
708
709		err = -ECONNREFUSED;
710		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
711			goto put_fail;
712		u=unix_find_socket_byinode(nd.dentry->d_inode);
713		if (!u)
714			goto put_fail;
715
716		if (u->sk_type == type)
717			touch_atime(nd.mnt, nd.dentry);
718
719		path_release(&nd);
720
721		err=-EPROTOTYPE;
722		if (u->sk_type != type) {
723			sock_put(u);
724			goto fail;
725		}
726	} else {
727		err = -ECONNREFUSED;
728		u=unix_find_socket_byname(sunname, len, type, hash);
729		if (u) {
730			struct dentry *dentry;
731			dentry = unix_sk(u)->dentry;
732			if (dentry)
733				touch_atime(unix_sk(u)->mnt, dentry);
734		} else
735			goto fail;
736	}
737	return u;
738
739put_fail:
740	path_release(&nd);
741fail:
742	*error=err;
743	return NULL;
744}
745
746
747static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
748{
749	struct sock *sk = sock->sk;
750	struct unix_sock *u = unix_sk(sk);
751	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
752	struct dentry * dentry = NULL;
753	struct nameidata nd;
754	int err;
755	unsigned hash;
756	struct unix_address *addr;
757	struct hlist_head *list;
758
759	err = -EINVAL;
760	if (sunaddr->sun_family != AF_UNIX)
761		goto out;
762
763	if (addr_len==sizeof(short)) {
764		err = unix_autobind(sock);
765		goto out;
766	}
767
768	err = unix_mkname(sunaddr, addr_len, &hash);
769	if (err < 0)
770		goto out;
771	addr_len = err;
772
773	mutex_lock(&u->readlock);
774
775	err = -EINVAL;
776	if (u->addr)
777		goto out_up;
778
779	err = -ENOMEM;
780	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
781	if (!addr)
782		goto out_up;
783
784	memcpy(addr->name, sunaddr, addr_len);
785	addr->len = addr_len;
786	addr->hash = hash ^ sk->sk_type;
787	atomic_set(&addr->refcnt, 1);
788
789	if (sunaddr->sun_path[0]) {
790		unsigned int mode;
791		err = 0;
792		/*
793		 * Get the parent directory, calculate the hash for last
794		 * component.
795		 */
796		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
797		if (err)
798			goto out_mknod_parent;
799
800		dentry = lookup_create(&nd, 0);
801		err = PTR_ERR(dentry);
802		if (IS_ERR(dentry))
803			goto out_mknod_unlock;
804
805		/*
806		 * All right, let's create it.
807		 */
808		mode = S_IFSOCK |
809		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
810		err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
811		if (err)
812			goto out_mknod_dput;
813		mutex_unlock(&nd.dentry->d_inode->i_mutex);
814		dput(nd.dentry);
815		nd.dentry = dentry;
816
817		addr->hash = UNIX_HASH_SIZE;
818	}
819
820	spin_lock(&unix_table_lock);
821
822	if (!sunaddr->sun_path[0]) {
823		err = -EADDRINUSE;
824		if (__unix_find_socket_byname(sunaddr, addr_len,
825					      sk->sk_type, hash)) {
826			unix_release_addr(addr);
827			goto out_unlock;
828		}
829
830		list = &unix_socket_table[addr->hash];
831	} else {
832		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
833		u->dentry = nd.dentry;
834		u->mnt    = nd.mnt;
835	}
836
837	err = 0;
838	__unix_remove_socket(sk);
839	u->addr = addr;
840	__unix_insert_socket(list, sk);
841
842out_unlock:
843	spin_unlock(&unix_table_lock);
844out_up:
845	mutex_unlock(&u->readlock);
846out:
847	return err;
848
849out_mknod_dput:
850	dput(dentry);
851out_mknod_unlock:
852	mutex_unlock(&nd.dentry->d_inode->i_mutex);
853	path_release(&nd);
854out_mknod_parent:
855	if (err==-EEXIST)
856		err=-EADDRINUSE;
857	unix_release_addr(addr);
858	goto out_up;
859}
860
861static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
862			      int alen, int flags)
863{
864	struct sock *sk = sock->sk;
865	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
866	struct sock *other;
867	unsigned hash;
868	int err;
869
870	if (addr->sa_family != AF_UNSPEC) {
871		err = unix_mkname(sunaddr, alen, &hash);
872		if (err < 0)
873			goto out;
874		alen = err;
875
876		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
877		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
878			goto out;
879
880		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
881		if (!other)
882			goto out;
883
884		unix_state_wlock(sk);
885
886		err = -EPERM;
887		if (!unix_may_send(sk, other))
888			goto out_unlock;
889
890		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
891		if (err)
892			goto out_unlock;
893
894	} else {
895		/*
896		 *	1003.1g breaking connected state with AF_UNSPEC
897		 */
898		other = NULL;
899		unix_state_wlock(sk);
900	}
901
902	/*
903	 * If it was connected, reconnect.
904	 */
905	if (unix_peer(sk)) {
906		struct sock *old_peer = unix_peer(sk);
907		unix_peer(sk)=other;
908		unix_state_wunlock(sk);
909
910		if (other != old_peer)
911			unix_dgram_disconnected(sk, old_peer);
912		sock_put(old_peer);
913	} else {
914		unix_peer(sk)=other;
915		unix_state_wunlock(sk);
916	}
917 	return 0;
918
919out_unlock:
920	unix_state_wunlock(sk);
921	sock_put(other);
922out:
923	return err;
924}
925
926static long unix_wait_for_peer(struct sock *other, long timeo)
927{
928	struct unix_sock *u = unix_sk(other);
929	int sched;
930	DEFINE_WAIT(wait);
931
932	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
933
934	sched = !sock_flag(other, SOCK_DEAD) &&
935		!(other->sk_shutdown & RCV_SHUTDOWN) &&
936		(skb_queue_len(&other->sk_receive_queue) >
937		 other->sk_max_ack_backlog);
938
939	unix_state_runlock(other);
940
941	if (sched)
942		timeo = schedule_timeout(timeo);
943
944	finish_wait(&u->peer_wait, &wait);
945	return timeo;
946}
947
948static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
949			       int addr_len, int flags)
950{
951	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
952	struct sock *sk = sock->sk;
953	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
954	struct sock *newsk = NULL;
955	struct sock *other = NULL;
956	struct sk_buff *skb = NULL;
957	unsigned hash;
958	int st;
959	int err;
960	long timeo;
961
962	err = unix_mkname(sunaddr, addr_len, &hash);
963	if (err < 0)
964		goto out;
965	addr_len = err;
966
967	if (test_bit(SOCK_PASSCRED, &sock->flags)
968		&& !u->addr && (err = unix_autobind(sock)) != 0)
969		goto out;
970
971	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
972
973	/* First of all allocate resources.
974	   If we will make it after state is locked,
975	   we will have to recheck all again in any case.
976	 */
977
978	err = -ENOMEM;
979
980	/* create new sock for complete connection */
981	newsk = unix_create1(NULL);
982	if (newsk == NULL)
983		goto out;
984
985	/* Allocate skb for sending to listening sock */
986	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
987	if (skb == NULL)
988		goto out;
989
990restart:
991	/*  Find listening sock. */
992	other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
993	if (!other)
994		goto out;
995
996	/* Latch state of peer */
997	unix_state_rlock(other);
998
999	/* Apparently VFS overslept socket death. Retry. */
1000	if (sock_flag(other, SOCK_DEAD)) {
1001		unix_state_runlock(other);
1002		sock_put(other);
1003		goto restart;
1004	}
1005
1006	err = -ECONNREFUSED;
1007	if (other->sk_state != TCP_LISTEN)
1008		goto out_unlock;
1009
1010	if (skb_queue_len(&other->sk_receive_queue) >
1011	    other->sk_max_ack_backlog) {
1012		err = -EAGAIN;
1013		if (!timeo)
1014			goto out_unlock;
1015
1016		timeo = unix_wait_for_peer(other, timeo);
1017
1018		err = sock_intr_errno(timeo);
1019		if (signal_pending(current))
1020			goto out;
1021		sock_put(other);
1022		goto restart;
1023        }
1024
1025	/* Latch our state.
1026
1027	   It is tricky place. We need to grab write lock and cannot
1028	   drop lock on peer. It is dangerous because deadlock is
1029	   possible. Connect to self case and simultaneous
1030	   attempt to connect are eliminated by checking socket
1031	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1032	   check this before attempt to grab lock.
1033
1034	   Well, and we have to recheck the state after socket locked.
1035	 */
1036	st = sk->sk_state;
1037
1038	switch (st) {
1039	case TCP_CLOSE:
1040		/* This is ok... continue with connect */
1041		break;
1042	case TCP_ESTABLISHED:
1043		/* Socket is already connected */
1044		err = -EISCONN;
1045		goto out_unlock;
1046	default:
1047		err = -EINVAL;
1048		goto out_unlock;
1049	}
1050
1051	unix_state_wlock_nested(sk);
1052
1053	if (sk->sk_state != st) {
1054		unix_state_wunlock(sk);
1055		unix_state_runlock(other);
1056		sock_put(other);
1057		goto restart;
1058	}
1059
1060	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1061	if (err) {
1062		unix_state_wunlock(sk);
1063		goto out_unlock;
1064	}
1065
1066	/* The way is open! Fastly set all the necessary fields... */
1067
1068	sock_hold(sk);
1069	unix_peer(newsk)	= sk;
1070	newsk->sk_state		= TCP_ESTABLISHED;
1071	newsk->sk_type		= sk->sk_type;
1072	newsk->sk_peercred.pid	= current->tgid;
1073	newsk->sk_peercred.uid	= current->euid;
1074	newsk->sk_peercred.gid	= current->egid;
1075	newu = unix_sk(newsk);
1076	newsk->sk_sleep		= &newu->peer_wait;
1077	otheru = unix_sk(other);
1078
1079	/* copy address information from listening to new sock*/
1080	if (otheru->addr) {
1081		atomic_inc(&otheru->addr->refcnt);
1082		newu->addr = otheru->addr;
1083	}
1084	if (otheru->dentry) {
1085		newu->dentry	= dget(otheru->dentry);
1086		newu->mnt	= mntget(otheru->mnt);
1087	}
1088
1089	/* Set credentials */
1090	sk->sk_peercred = other->sk_peercred;
1091
1092	sock->state	= SS_CONNECTED;
1093	sk->sk_state	= TCP_ESTABLISHED;
1094	sock_hold(newsk);
1095
1096	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1097	unix_peer(sk)	= newsk;
1098
1099	unix_state_wunlock(sk);
1100
1101	/* take ten and and send info to listening sock */
1102	spin_lock(&other->sk_receive_queue.lock);
1103	__skb_queue_tail(&other->sk_receive_queue, skb);
1104	/* Undo artificially decreased inflight after embrion
1105	 * is installed to listening socket. */
1106	atomic_inc(&newu->inflight);
1107	spin_unlock(&other->sk_receive_queue.lock);
1108	unix_state_runlock(other);
1109	other->sk_data_ready(other, 0);
1110	sock_put(other);
1111	return 0;
1112
1113out_unlock:
1114	if (other)
1115		unix_state_runlock(other);
1116
1117out:
1118	if (skb)
1119		kfree_skb(skb);
1120	if (newsk)
1121		unix_release_sock(newsk, 0);
1122	if (other)
1123		sock_put(other);
1124	return err;
1125}
1126
1127static int unix_socketpair(struct socket *socka, struct socket *sockb)
1128{
1129	struct sock *ska=socka->sk, *skb = sockb->sk;
1130
1131	/* Join our sockets back to back */
1132	sock_hold(ska);
1133	sock_hold(skb);
1134	unix_peer(ska)=skb;
1135	unix_peer(skb)=ska;
1136	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1137	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1138	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1139
1140	if (ska->sk_type != SOCK_DGRAM) {
1141		ska->sk_state = TCP_ESTABLISHED;
1142		skb->sk_state = TCP_ESTABLISHED;
1143		socka->state  = SS_CONNECTED;
1144		sockb->state  = SS_CONNECTED;
1145	}
1146	return 0;
1147}
1148
1149static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1150{
1151	struct sock *sk = sock->sk;
1152	struct sock *tsk;
1153	struct sk_buff *skb;
1154	int err;
1155
1156	err = -EOPNOTSUPP;
1157	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1158		goto out;
1159
1160	err = -EINVAL;
1161	if (sk->sk_state != TCP_LISTEN)
1162		goto out;
1163
1164	/* If socket state is TCP_LISTEN it cannot change (for now...),
1165	 * so that no locks are necessary.
1166	 */
1167
1168	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1169	if (!skb) {
1170		/* This means receive shutdown. */
1171		if (err == 0)
1172			err = -EINVAL;
1173		goto out;
1174	}
1175
1176	tsk = skb->sk;
1177	skb_free_datagram(sk, skb);
1178	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1179
1180	/* attach accepted sock to socket */
1181	unix_state_wlock(tsk);
1182	newsock->state = SS_CONNECTED;
1183	sock_graft(tsk, newsock);
1184	unix_state_wunlock(tsk);
1185	return 0;
1186
1187out:
1188	return err;
1189}
1190
1191
1192static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1193{
1194	struct sock *sk = sock->sk;
1195	struct unix_sock *u;
1196	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1197	int err = 0;
1198
1199	if (peer) {
1200		sk = unix_peer_get(sk);
1201
1202		err = -ENOTCONN;
1203		if (!sk)
1204			goto out;
1205		err = 0;
1206	} else {
1207		sock_hold(sk);
1208	}
1209
1210	u = unix_sk(sk);
1211	unix_state_rlock(sk);
1212	if (!u->addr) {
1213		sunaddr->sun_family = AF_UNIX;
1214		sunaddr->sun_path[0] = 0;
1215		*uaddr_len = sizeof(short);
1216	} else {
1217		struct unix_address *addr = u->addr;
1218
1219		*uaddr_len = addr->len;
1220		memcpy(sunaddr, addr->name, *uaddr_len);
1221	}
1222	unix_state_runlock(sk);
1223	sock_put(sk);
1224out:
1225	return err;
1226}
1227
1228static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1229{
1230	int i;
1231
1232	scm->fp = UNIXCB(skb).fp;
1233	skb->destructor = sock_wfree;
1234	UNIXCB(skb).fp = NULL;
1235
1236	for (i=scm->fp->count-1; i>=0; i--)
1237		unix_notinflight(scm->fp->fp[i]);
1238}
1239
1240static void unix_destruct_fds(struct sk_buff *skb)
1241{
1242	struct scm_cookie scm;
1243	memset(&scm, 0, sizeof(scm));
1244	unix_detach_fds(&scm, skb);
1245
1246	/* Alas, it calls VFS */
1247	/* So fscking what? fput() had been SMP-safe since the last Summer */
1248	scm_destroy(&scm);
1249	sock_wfree(skb);
1250}
1251
1252static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1253{
1254	int i;
1255	for (i=scm->fp->count-1; i>=0; i--)
1256		unix_inflight(scm->fp->fp[i]);
1257	UNIXCB(skb).fp = scm->fp;
1258	skb->destructor = unix_destruct_fds;
1259	scm->fp = NULL;
1260}
1261
1262/*
1263 *	Send AF_UNIX data.
1264 */
1265
1266static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1267			      struct msghdr *msg, size_t len)
1268{
1269	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1270	struct sock *sk = sock->sk;
1271	struct unix_sock *u = unix_sk(sk);
1272	struct sockaddr_un *sunaddr=msg->msg_name;
1273	struct sock *other = NULL;
1274	int namelen = 0; /* fake GCC */
1275	int err;
1276	unsigned hash;
1277	struct sk_buff *skb;
1278	long timeo;
1279	struct scm_cookie tmp_scm;
1280
1281	if (NULL == siocb->scm)
1282		siocb->scm = &tmp_scm;
1283	err = scm_send(sock, msg, siocb->scm);
1284	if (err < 0)
1285		return err;
1286
1287	err = -EOPNOTSUPP;
1288	if (msg->msg_flags&MSG_OOB)
1289		goto out;
1290
1291	if (msg->msg_namelen) {
1292		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1293		if (err < 0)
1294			goto out;
1295		namelen = err;
1296	} else {
1297		sunaddr = NULL;
1298		err = -ENOTCONN;
1299		other = unix_peer_get(sk);
1300		if (!other)
1301			goto out;
1302	}
1303
1304	if (test_bit(SOCK_PASSCRED, &sock->flags)
1305		&& !u->addr && (err = unix_autobind(sock)) != 0)
1306		goto out;
1307
1308	err = -EMSGSIZE;
1309	if (len > sk->sk_sndbuf - 32)
1310		goto out;
1311
1312	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1313	if (skb==NULL)
1314		goto out;
1315
1316	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1317	if (siocb->scm->fp)
1318		unix_attach_fds(siocb->scm, skb);
1319	unix_get_secdata(siocb->scm, skb);
1320
1321	skb->h.raw = skb->data;
1322	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1323	if (err)
1324		goto out_free;
1325
1326	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1327
1328restart:
1329	if (!other) {
1330		err = -ECONNRESET;
1331		if (sunaddr == NULL)
1332			goto out_free;
1333
1334		other = unix_find_other(sunaddr, namelen, sk->sk_type,
1335					hash, &err);
1336		if (other==NULL)
1337			goto out_free;
1338	}
1339
1340	unix_state_rlock(other);
1341	err = -EPERM;
1342	if (!unix_may_send(sk, other))
1343		goto out_unlock;
1344
1345	if (sock_flag(other, SOCK_DEAD)) {
1346		/*
1347		 *	Check with 1003.1g - what should
1348		 *	datagram error
1349		 */
1350		unix_state_runlock(other);
1351		sock_put(other);
1352
1353		err = 0;
1354		unix_state_wlock(sk);
1355		if (unix_peer(sk) == other) {
1356			unix_peer(sk)=NULL;
1357			unix_state_wunlock(sk);
1358
1359			unix_dgram_disconnected(sk, other);
1360			sock_put(other);
1361			err = -ECONNREFUSED;
1362		} else {
1363			unix_state_wunlock(sk);
1364		}
1365
1366		other = NULL;
1367		if (err)
1368			goto out_free;
1369		goto restart;
1370	}
1371
1372	err = -EPIPE;
1373	if (other->sk_shutdown & RCV_SHUTDOWN)
1374		goto out_unlock;
1375
1376	if (sk->sk_type != SOCK_SEQPACKET) {
1377		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1378		if (err)
1379			goto out_unlock;
1380	}
1381
1382	if (unix_peer(other) != sk &&
1383	    (skb_queue_len(&other->sk_receive_queue) >
1384	     other->sk_max_ack_backlog)) {
1385		if (!timeo) {
1386			err = -EAGAIN;
1387			goto out_unlock;
1388		}
1389
1390		timeo = unix_wait_for_peer(other, timeo);
1391
1392		err = sock_intr_errno(timeo);
1393		if (signal_pending(current))
1394			goto out_free;
1395
1396		goto restart;
1397	}
1398
1399	skb_queue_tail(&other->sk_receive_queue, skb);
1400	unix_state_runlock(other);
1401	other->sk_data_ready(other, len);
1402	sock_put(other);
1403	scm_destroy(siocb->scm);
1404	return len;
1405
1406out_unlock:
1407	unix_state_runlock(other);
1408out_free:
1409	kfree_skb(skb);
1410out:
1411	if (other)
1412		sock_put(other);
1413	scm_destroy(siocb->scm);
1414	return err;
1415}
1416
1417
1418static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1419			       struct msghdr *msg, size_t len)
1420{
1421	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1422	struct sock *sk = sock->sk;
1423	struct sock *other = NULL;
1424	struct sockaddr_un *sunaddr=msg->msg_name;
1425	int err,size;
1426	struct sk_buff *skb;
1427	int sent=0;
1428	struct scm_cookie tmp_scm;
1429
1430	if (NULL == siocb->scm)
1431		siocb->scm = &tmp_scm;
1432	err = scm_send(sock, msg, siocb->scm);
1433	if (err < 0)
1434		return err;
1435
1436	err = -EOPNOTSUPP;
1437	if (msg->msg_flags&MSG_OOB)
1438		goto out_err;
1439
1440	if (msg->msg_namelen) {
1441		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1442		goto out_err;
1443	} else {
1444		sunaddr = NULL;
1445		err = -ENOTCONN;
1446		other = unix_peer(sk);
1447		if (!other)
1448			goto out_err;
1449	}
1450
1451	if (sk->sk_shutdown & SEND_SHUTDOWN)
1452		goto pipe_err;
1453
1454	while(sent < len)
1455	{
1456		/*
1457		 *	Optimisation for the fact that under 0.01% of X
1458		 *	messages typically need breaking up.
1459		 */
1460
1461		size = len-sent;
1462
1463		/* Keep two messages in the pipe so it schedules better */
1464		if (size > ((sk->sk_sndbuf >> 1) - 64))
1465			size = (sk->sk_sndbuf >> 1) - 64;
1466
1467		if (size > SKB_MAX_ALLOC)
1468			size = SKB_MAX_ALLOC;
1469
1470		/*
1471		 *	Grab a buffer
1472		 */
1473
1474		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1475
1476		if (skb==NULL)
1477			goto out_err;
1478
1479		/*
1480		 *	If you pass two values to the sock_alloc_send_skb
1481		 *	it tries to grab the large buffer with GFP_NOFS
1482		 *	(which can fail easily), and if it fails grab the
1483		 *	fallback size buffer which is under a page and will
1484		 *	succeed. [Alan]
1485		 */
1486		size = min_t(int, size, skb_tailroom(skb));
1487
1488		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1489		if (siocb->scm->fp)
1490			unix_attach_fds(siocb->scm, skb);
1491
1492		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1493			kfree_skb(skb);
1494			goto out_err;
1495		}
1496
1497		unix_state_rlock(other);
1498
1499		if (sock_flag(other, SOCK_DEAD) ||
1500		    (other->sk_shutdown & RCV_SHUTDOWN))
1501			goto pipe_err_free;
1502
1503		skb_queue_tail(&other->sk_receive_queue, skb);
1504		unix_state_runlock(other);
1505		other->sk_data_ready(other, size);
1506		sent+=size;
1507	}
1508
1509	scm_destroy(siocb->scm);
1510	siocb->scm = NULL;
1511
1512	return sent;
1513
1514pipe_err_free:
1515	unix_state_runlock(other);
1516	kfree_skb(skb);
1517pipe_err:
1518	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1519		send_sig(SIGPIPE,current,0);
1520	err = -EPIPE;
1521out_err:
1522	scm_destroy(siocb->scm);
1523	siocb->scm = NULL;
1524	return sent ? : err;
1525}
1526
1527static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1528				  struct msghdr *msg, size_t len)
1529{
1530	int err;
1531	struct sock *sk = sock->sk;
1532
1533	err = sock_error(sk);
1534	if (err)
1535		return err;
1536
1537	if (sk->sk_state != TCP_ESTABLISHED)
1538		return -ENOTCONN;
1539
1540	if (msg->msg_namelen)
1541		msg->msg_namelen = 0;
1542
1543	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1544}
1545
1546static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1547{
1548	struct unix_sock *u = unix_sk(sk);
1549
1550	msg->msg_namelen = 0;
1551	if (u->addr) {
1552		msg->msg_namelen = u->addr->len;
1553		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1554	}
1555}
1556
1557static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1558			      struct msghdr *msg, size_t size,
1559			      int flags)
1560{
1561	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1562	struct scm_cookie tmp_scm;
1563	struct sock *sk = sock->sk;
1564	struct unix_sock *u = unix_sk(sk);
1565	int noblock = flags & MSG_DONTWAIT;
1566	struct sk_buff *skb;
1567	int err;
1568
1569	err = -EOPNOTSUPP;
1570	if (flags&MSG_OOB)
1571		goto out;
1572
1573	msg->msg_namelen = 0;
1574
1575	mutex_lock(&u->readlock);
1576
1577	skb = skb_recv_datagram(sk, flags, noblock, &err);
1578	if (!skb)
1579		goto out_unlock;
1580
1581	wake_up_interruptible(&u->peer_wait);
1582
1583	if (msg->msg_name)
1584		unix_copy_addr(msg, skb->sk);
1585
1586	if (size > skb->len)
1587		size = skb->len;
1588	else if (size < skb->len)
1589		msg->msg_flags |= MSG_TRUNC;
1590
1591	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1592	if (err)
1593		goto out_free;
1594
1595	if (!siocb->scm) {
1596		siocb->scm = &tmp_scm;
1597		memset(&tmp_scm, 0, sizeof(tmp_scm));
1598	}
1599	siocb->scm->creds = *UNIXCREDS(skb);
1600	unix_set_secdata(siocb->scm, skb);
1601
1602	if (!(flags & MSG_PEEK))
1603	{
1604		if (UNIXCB(skb).fp)
1605			unix_detach_fds(siocb->scm, skb);
1606	}
1607	else
1608	{
1609		/* It is questionable: on PEEK we could:
1610		   - do not return fds - good, but too simple 8)
1611		   - return fds, and do not return them on read (old strategy,
1612		     apparently wrong)
1613		   - clone fds (I chose it for now, it is the most universal
1614		     solution)
1615
1616	           POSIX 1003.1g does not actually define this clearly
1617	           at all. POSIX 1003.1g doesn't define a lot of things
1618	           clearly however!
1619
1620		*/
1621		if (UNIXCB(skb).fp)
1622			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1623	}
1624	err = size;
1625
1626	scm_recv(sock, msg, siocb->scm, flags);
1627
1628out_free:
1629	skb_free_datagram(sk,skb);
1630out_unlock:
1631	mutex_unlock(&u->readlock);
1632out:
1633	return err;
1634}
1635
1636/*
1637 *	Sleep until data has arrive. But check for races..
1638 */
1639
1640static long unix_stream_data_wait(struct sock * sk, long timeo)
1641{
1642	DEFINE_WAIT(wait);
1643
1644	unix_state_rlock(sk);
1645
1646	for (;;) {
1647		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1648
1649		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1650		    sk->sk_err ||
1651		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1652		    signal_pending(current) ||
1653		    !timeo)
1654			break;
1655
1656		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1657		unix_state_runlock(sk);
1658		timeo = schedule_timeout(timeo);
1659		unix_state_rlock(sk);
1660		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1661	}
1662
1663	finish_wait(sk->sk_sleep, &wait);
1664	unix_state_runlock(sk);
1665	return timeo;
1666}
1667
1668
1669
1670static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1671			       struct msghdr *msg, size_t size,
1672			       int flags)
1673{
1674	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1675	struct scm_cookie tmp_scm;
1676	struct sock *sk = sock->sk;
1677	struct unix_sock *u = unix_sk(sk);
1678	struct sockaddr_un *sunaddr=msg->msg_name;
1679	int copied = 0;
1680	int check_creds = 0;
1681	int target;
1682	int err = 0;
1683	long timeo;
1684
1685	err = -EINVAL;
1686	if (sk->sk_state != TCP_ESTABLISHED)
1687		goto out;
1688
1689	err = -EOPNOTSUPP;
1690	if (flags&MSG_OOB)
1691		goto out;
1692
1693	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1694	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1695
1696	msg->msg_namelen = 0;
1697
1698	/* Lock the socket to prevent queue disordering
1699	 * while sleeps in memcpy_tomsg
1700	 */
1701
1702	if (!siocb->scm) {
1703		siocb->scm = &tmp_scm;
1704		memset(&tmp_scm, 0, sizeof(tmp_scm));
1705	}
1706
1707	mutex_lock(&u->readlock);
1708
1709	do
1710	{
1711		int chunk;
1712		struct sk_buff *skb;
1713
1714		skb = skb_dequeue(&sk->sk_receive_queue);
1715		if (skb==NULL)
1716		{
1717			if (copied >= target)
1718				break;
1719
1720			/*
1721			 *	POSIX 1003.1g mandates this order.
1722			 */
1723
1724			if ((err = sock_error(sk)) != 0)
1725				break;
1726			if (sk->sk_shutdown & RCV_SHUTDOWN)
1727				break;
1728			err = -EAGAIN;
1729			if (!timeo)
1730				break;
1731			mutex_unlock(&u->readlock);
1732
1733			timeo = unix_stream_data_wait(sk, timeo);
1734
1735			if (signal_pending(current)) {
1736				err = sock_intr_errno(timeo);
1737				goto out;
1738			}
1739			mutex_lock(&u->readlock);
1740			continue;
1741		}
1742
1743		if (check_creds) {
1744			/* Never glue messages from different writers */
1745			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1746				skb_queue_head(&sk->sk_receive_queue, skb);
1747				break;
1748			}
1749		} else {
1750			/* Copy credentials */
1751			siocb->scm->creds = *UNIXCREDS(skb);
1752			check_creds = 1;
1753		}
1754
1755		/* Copy address just once */
1756		if (sunaddr)
1757		{
1758			unix_copy_addr(msg, skb->sk);
1759			sunaddr = NULL;
1760		}
1761
1762		chunk = min_t(unsigned int, skb->len, size);
1763		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1764			skb_queue_head(&sk->sk_receive_queue, skb);
1765			if (copied == 0)
1766				copied = -EFAULT;
1767			break;
1768		}
1769		copied += chunk;
1770		size -= chunk;
1771
1772		/* Mark read part of skb as used */
1773		if (!(flags & MSG_PEEK))
1774		{
1775			skb_pull(skb, chunk);
1776
1777			if (UNIXCB(skb).fp)
1778				unix_detach_fds(siocb->scm, skb);
1779
1780			/* put the skb back if we didn't use it up.. */
1781			if (skb->len)
1782			{
1783				skb_queue_head(&sk->sk_receive_queue, skb);
1784				break;
1785			}
1786
1787			kfree_skb(skb);
1788
1789			if (siocb->scm->fp)
1790				break;
1791		}
1792		else
1793		{
1794			/* It is questionable, see note in unix_dgram_recvmsg.
1795			 */
1796			if (UNIXCB(skb).fp)
1797				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1798
1799			/* put message back and return */
1800			skb_queue_head(&sk->sk_receive_queue, skb);
1801			break;
1802		}
1803	} while (size);
1804
1805	mutex_unlock(&u->readlock);
1806	scm_recv(sock, msg, siocb->scm, flags);
1807out:
1808	return copied ? : err;
1809}
1810
1811static int unix_shutdown(struct socket *sock, int mode)
1812{
1813	struct sock *sk = sock->sk;
1814	struct sock *other;
1815
1816	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1817
1818	if (mode) {
1819		unix_state_wlock(sk);
1820		sk->sk_shutdown |= mode;
1821		other=unix_peer(sk);
1822		if (other)
1823			sock_hold(other);
1824		unix_state_wunlock(sk);
1825		sk->sk_state_change(sk);
1826
1827		if (other &&
1828			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1829
1830			int peer_mode = 0;
1831
1832			if (mode&RCV_SHUTDOWN)
1833				peer_mode |= SEND_SHUTDOWN;
1834			if (mode&SEND_SHUTDOWN)
1835				peer_mode |= RCV_SHUTDOWN;
1836			unix_state_wlock(other);
1837			other->sk_shutdown |= peer_mode;
1838			unix_state_wunlock(other);
1839			other->sk_state_change(other);
1840			read_lock(&other->sk_callback_lock);
1841			if (peer_mode == SHUTDOWN_MASK)
1842				sk_wake_async(other,1,POLL_HUP);
1843			else if (peer_mode & RCV_SHUTDOWN)
1844				sk_wake_async(other,1,POLL_IN);
1845			read_unlock(&other->sk_callback_lock);
1846		}
1847		if (other)
1848			sock_put(other);
1849	}
1850	return 0;
1851}
1852
1853static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1854{
1855	struct sock *sk = sock->sk;
1856	long amount=0;
1857	int err;
1858
1859	switch(cmd)
1860	{
1861		case SIOCOUTQ:
1862			amount = atomic_read(&sk->sk_wmem_alloc);
1863			err = put_user(amount, (int __user *)arg);
1864			break;
1865		case SIOCINQ:
1866		{
1867			struct sk_buff *skb;
1868
1869			if (sk->sk_state == TCP_LISTEN) {
1870				err = -EINVAL;
1871				break;
1872			}
1873
1874			spin_lock(&sk->sk_receive_queue.lock);
1875			if (sk->sk_type == SOCK_STREAM ||
1876			    sk->sk_type == SOCK_SEQPACKET) {
1877				skb_queue_walk(&sk->sk_receive_queue, skb)
1878					amount += skb->len;
1879			} else {
1880				skb = skb_peek(&sk->sk_receive_queue);
1881				if (skb)
1882					amount=skb->len;
1883			}
1884			spin_unlock(&sk->sk_receive_queue.lock);
1885			err = put_user(amount, (int __user *)arg);
1886			break;
1887		}
1888
1889		default:
1890			err = -ENOIOCTLCMD;
1891			break;
1892	}
1893	return err;
1894}
1895
1896static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1897{
1898	struct sock *sk = sock->sk;
1899	unsigned int mask;
1900
1901	poll_wait(file, sk->sk_sleep, wait);
1902	mask = 0;
1903
1904	/* exceptional events? */
1905	if (sk->sk_err)
1906		mask |= POLLERR;
1907	if (sk->sk_shutdown == SHUTDOWN_MASK)
1908		mask |= POLLHUP;
1909	if (sk->sk_shutdown & RCV_SHUTDOWN)
1910		mask |= POLLRDHUP;
1911
1912	/* readable? */
1913	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1914	    (sk->sk_shutdown & RCV_SHUTDOWN))
1915		mask |= POLLIN | POLLRDNORM;
1916
1917	/* Connection-based need to check for termination and startup */
1918	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1919		mask |= POLLHUP;
1920
1921	/*
1922	 * we set writable also when the other side has shut down the
1923	 * connection. This prevents stuck sockets.
1924	 */
1925	if (unix_writable(sk))
1926		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1927
1928	return mask;
1929}
1930
1931
1932#ifdef CONFIG_PROC_FS
1933static struct sock *unix_seq_idx(int *iter, loff_t pos)
1934{
1935	loff_t off = 0;
1936	struct sock *s;
1937
1938	for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1939		if (off == pos)
1940			return s;
1941		++off;
1942	}
1943	return NULL;
1944}
1945
1946
1947static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1948{
1949	spin_lock(&unix_table_lock);
1950	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1951}
1952
1953static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1954{
1955	++*pos;
1956
1957	if (v == (void *)1)
1958		return first_unix_socket(seq->private);
1959	return next_unix_socket(seq->private, v);
1960}
1961
1962static void unix_seq_stop(struct seq_file *seq, void *v)
1963{
1964	spin_unlock(&unix_table_lock);
1965}
1966
1967static int unix_seq_show(struct seq_file *seq, void *v)
1968{
1969
1970	if (v == (void *)1)
1971		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1972			 "Inode Path\n");
1973	else {
1974		struct sock *s = v;
1975		struct unix_sock *u = unix_sk(s);
1976		unix_state_rlock(s);
1977
1978		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1979			s,
1980			atomic_read(&s->sk_refcnt),
1981			0,
1982			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1983			s->sk_type,
1984			s->sk_socket ?
1985			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1986			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1987			sock_i_ino(s));
1988
1989		if (u->addr) {
1990			int i, len;
1991			seq_putc(seq, ' ');
1992
1993			i = 0;
1994			len = u->addr->len - sizeof(short);
1995			if (!UNIX_ABSTRACT(s))
1996				len--;
1997			else {
1998				seq_putc(seq, '@');
1999				i++;
2000			}
2001			for ( ; i < len; i++)
2002				seq_putc(seq, u->addr->name->sun_path[i]);
2003		}
2004		unix_state_runlock(s);
2005		seq_putc(seq, '\n');
2006	}
2007
2008	return 0;
2009}
2010
2011static struct seq_operations unix_seq_ops = {
2012	.start  = unix_seq_start,
2013	.next   = unix_seq_next,
2014	.stop   = unix_seq_stop,
2015	.show   = unix_seq_show,
2016};
2017
2018
2019static int unix_seq_open(struct inode *inode, struct file *file)
2020{
2021	struct seq_file *seq;
2022	int rc = -ENOMEM;
2023	int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2024
2025	if (!iter)
2026		goto out;
2027
2028	rc = seq_open(file, &unix_seq_ops);
2029	if (rc)
2030		goto out_kfree;
2031
2032	seq	     = file->private_data;
2033	seq->private = iter;
2034	*iter = 0;
2035out:
2036	return rc;
2037out_kfree:
2038	kfree(iter);
2039	goto out;
2040}
2041
2042static struct file_operations unix_seq_fops = {
2043	.owner		= THIS_MODULE,
2044	.open		= unix_seq_open,
2045	.read		= seq_read,
2046	.llseek		= seq_lseek,
2047	.release	= seq_release_private,
2048};
2049
2050#endif
2051
2052static struct net_proto_family unix_family_ops = {
2053	.family = PF_UNIX,
2054	.create = unix_create,
2055	.owner	= THIS_MODULE,
2056};
2057
2058static int __init af_unix_init(void)
2059{
2060	int rc = -1;
2061	struct sk_buff *dummy_skb;
2062
2063	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2064
2065	rc = proto_register(&unix_proto, 1);
2066        if (rc != 0) {
2067                printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2068		       __FUNCTION__);
2069		goto out;
2070	}
2071
2072	sock_register(&unix_family_ops);
2073#ifdef CONFIG_PROC_FS
2074	proc_net_fops_create("unix", 0, &unix_seq_fops);
2075#endif
2076	unix_sysctl_register();
2077out:
2078	return rc;
2079}
2080
2081static void __exit af_unix_exit(void)
2082{
2083	sock_unregister(PF_UNIX);
2084	unix_sysctl_unregister();
2085	proc_net_remove("unix");
2086	proto_unregister(&unix_proto);
2087}
2088
2089module_init(af_unix_init);
2090module_exit(af_unix_exit);
2091
2092MODULE_LICENSE("GPL");
2093MODULE_ALIAS_NETPROTO(PF_UNIX);
2094