af_unix.c revision f81a0bffa116ea22149aa7cfb0b4ee09096d9d92
1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan.cox@linux.org>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Version:	$Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12 *
13 * Fixes:
14 *		Linus Torvalds	:	Assorted bug cures.
15 *		Niibe Yutaka	:	async I/O support.
16 *		Carsten Paeth	:	PF_UNIX check, address fixes.
17 *		Alan Cox	:	Limit size of allocated blocks.
18 *		Alan Cox	:	Fixed the stupid socketpair bug.
19 *		Alan Cox	:	BSD compatibility fine tuning.
20 *		Alan Cox	:	Fixed a bug in connect when interrupted.
21 *		Alan Cox	:	Sorted out a proper draft version of
22 *					file descriptor passing hacked up from
23 *					Mike Shaver's work.
24 *		Marty Leisner	:	Fixes to fd passing
25 *		Nick Nevin	:	recvmsg bugfix.
26 *		Alan Cox	:	Started proper garbage collector
27 *		Heiko EiBfeldt	:	Missing verify_area check
28 *		Alan Cox	:	Started POSIXisms
29 *		Andreas Schwab	:	Replace inode by dentry for proper
30 *					reference counting
31 *		Kirk Petersen	:	Made this a module
32 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
33 *					Lots of bug fixes.
34 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
35 *					by above two patches.
36 *	     Andrea Arcangeli	:	If possible we block in connect(2)
37 *					if the max backlog of the listen socket
38 *					is been reached. This won't break
39 *					old apps and it will avoid huge amount
40 *					of socks hashed (this for unix_gc()
41 *					performances reasons).
42 *					Security fix that limits the max
43 *					number of socks to 2*max_files and
44 *					the number of skb queueable in the
45 *					dgram receiver.
46 *		Artur Skawina   :	Hash function optimizations
47 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
48 *	      Malcolm Beattie   :	Set peercred for socketpair
49 *	     Michal Ostrowski   :       Module initialization cleanup.
50 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
51 *	     				the core infrastructure is doing that
52 *	     				for all net proto families now (2.5.69+)
53 *
54 *
55 * Known differences from reference BSD that was tested:
56 *
57 *	[TO FIX]
58 *	ECONNREFUSED is not returned from one end of a connected() socket to the
59 *		other the moment one end closes.
60 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 *	[NOT TO FIX]
63 *	accept() returns a path name even if the connecting socket has closed
64 *		in the meantime (BSD loses the path and gives up).
65 *	accept() returns 0 length path for an unbound connector. BSD returns 16
66 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 *	BSD af_unix apparently has connect forgetting to block properly.
69 *		(need to check this with the POSIX spec in detail)
70 *
71 * Differences from 2.0.0-11-... (ANK)
72 *	Bug fixes and improvements.
73 *		- client shutdown killed server socket.
74 *		- removed all useless cli/sti pairs.
75 *
76 *	Semantic changes/extensions.
77 *		- generic control message passing.
78 *		- SCM_CREDENTIALS control message.
79 *		- "Abstract" (not FS based) socket bindings.
80 *		  Abstract names are sequences of bytes (not zero terminated)
81 *		  started by 0, so that this name space does not intersect
82 *		  with BSD names.
83 */
84
85#include <linux/module.h>
86#include <linux/config.h>
87#include <linux/kernel.h>
88#include <linux/signal.h>
89#include <linux/sched.h>
90#include <linux/errno.h>
91#include <linux/string.h>
92#include <linux/stat.h>
93#include <linux/dcache.h>
94#include <linux/namei.h>
95#include <linux/socket.h>
96#include <linux/un.h>
97#include <linux/fcntl.h>
98#include <linux/termios.h>
99#include <linux/sockios.h>
100#include <linux/net.h>
101#include <linux/in.h>
102#include <linux/fs.h>
103#include <linux/slab.h>
104#include <asm/uaccess.h>
105#include <linux/skbuff.h>
106#include <linux/netdevice.h>
107#include <net/sock.h>
108#include <linux/tcp.h>
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
115#include <linux/smp_lock.h>
116#include <linux/rtnetlink.h>
117#include <linux/mount.h>
118#include <net/checksum.h>
119#include <linux/security.h>
120
121int sysctl_unix_max_dgram_qlen = 10;
122
123struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
124DEFINE_RWLOCK(unix_table_lock);
125static atomic_t unix_nr_socks = ATOMIC_INIT(0);
126
127#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
128
129#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
130
131/*
132 *  SMP locking strategy:
133 *    hash table is protected with rwlock unix_table_lock
134 *    each socket state is protected by separate rwlock.
135 */
136
137static inline unsigned unix_hash_fold(unsigned hash)
138{
139	hash ^= hash>>16;
140	hash ^= hash>>8;
141	return hash&(UNIX_HASH_SIZE-1);
142}
143
144#define unix_peer(sk) (unix_sk(sk)->peer)
145
146static inline int unix_our_peer(struct sock *sk, struct sock *osk)
147{
148	return unix_peer(osk) == sk;
149}
150
151static inline int unix_may_send(struct sock *sk, struct sock *osk)
152{
153	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
154}
155
156static struct sock *unix_peer_get(struct sock *s)
157{
158	struct sock *peer;
159
160	unix_state_rlock(s);
161	peer = unix_peer(s);
162	if (peer)
163		sock_hold(peer);
164	unix_state_runlock(s);
165	return peer;
166}
167
168static inline void unix_release_addr(struct unix_address *addr)
169{
170	if (atomic_dec_and_test(&addr->refcnt))
171		kfree(addr);
172}
173
174/*
175 *	Check unix socket name:
176 *		- should be not zero length.
177 *	        - if started by not zero, should be NULL terminated (FS object)
178 *		- if started by zero, it is abstract name.
179 */
180
181static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
182{
183	if (len <= sizeof(short) || len > sizeof(*sunaddr))
184		return -EINVAL;
185	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
186		return -EINVAL;
187	if (sunaddr->sun_path[0]) {
188		/*
189		 * This may look like an off by one error but it is a bit more
190		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
191		 * sun_path[108] doesnt as such exist.  However in kernel space
192		 * we are guaranteed that it is a valid memory location in our
193		 * kernel address buffer.
194		 */
195		((char *)sunaddr)[len]=0;
196		len = strlen(sunaddr->sun_path)+1+sizeof(short);
197		return len;
198	}
199
200	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
201	return len;
202}
203
204static void __unix_remove_socket(struct sock *sk)
205{
206	sk_del_node_init(sk);
207}
208
209static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
210{
211	BUG_TRAP(sk_unhashed(sk));
212	sk_add_node(sk, list);
213}
214
215static inline void unix_remove_socket(struct sock *sk)
216{
217	write_lock(&unix_table_lock);
218	__unix_remove_socket(sk);
219	write_unlock(&unix_table_lock);
220}
221
222static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
223{
224	write_lock(&unix_table_lock);
225	__unix_insert_socket(list, sk);
226	write_unlock(&unix_table_lock);
227}
228
229static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
230					      int len, int type, unsigned hash)
231{
232	struct sock *s;
233	struct hlist_node *node;
234
235	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
236		struct unix_sock *u = unix_sk(s);
237
238		if (u->addr->len == len &&
239		    !memcmp(u->addr->name, sunname, len))
240			goto found;
241	}
242	s = NULL;
243found:
244	return s;
245}
246
247static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
248						   int len, int type,
249						   unsigned hash)
250{
251	struct sock *s;
252
253	read_lock(&unix_table_lock);
254	s = __unix_find_socket_byname(sunname, len, type, hash);
255	if (s)
256		sock_hold(s);
257	read_unlock(&unix_table_lock);
258	return s;
259}
260
261static struct sock *unix_find_socket_byinode(struct inode *i)
262{
263	struct sock *s;
264	struct hlist_node *node;
265
266	read_lock(&unix_table_lock);
267	sk_for_each(s, node,
268		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
269		struct dentry *dentry = unix_sk(s)->dentry;
270
271		if(dentry && dentry->d_inode == i)
272		{
273			sock_hold(s);
274			goto found;
275		}
276	}
277	s = NULL;
278found:
279	read_unlock(&unix_table_lock);
280	return s;
281}
282
283static inline int unix_writable(struct sock *sk)
284{
285	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
286}
287
288static void unix_write_space(struct sock *sk)
289{
290	read_lock(&sk->sk_callback_lock);
291	if (unix_writable(sk)) {
292		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
293			wake_up_interruptible(sk->sk_sleep);
294		sk_wake_async(sk, 2, POLL_OUT);
295	}
296	read_unlock(&sk->sk_callback_lock);
297}
298
299/* When dgram socket disconnects (or changes its peer), we clear its receive
300 * queue of packets arrived from previous peer. First, it allows to do
301 * flow control based only on wmem_alloc; second, sk connected to peer
302 * may receive messages only from that peer. */
303static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
304{
305	if (skb_queue_len(&sk->sk_receive_queue)) {
306		skb_queue_purge(&sk->sk_receive_queue);
307		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
308
309		/* If one link of bidirectional dgram pipe is disconnected,
310		 * we signal error. Messages are lost. Do not make this,
311		 * when peer was not connected to us.
312		 */
313		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
314			other->sk_err = ECONNRESET;
315			other->sk_error_report(other);
316		}
317	}
318}
319
320static void unix_sock_destructor(struct sock *sk)
321{
322	struct unix_sock *u = unix_sk(sk);
323
324	skb_queue_purge(&sk->sk_receive_queue);
325
326	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
327	BUG_TRAP(sk_unhashed(sk));
328	BUG_TRAP(!sk->sk_socket);
329	if (!sock_flag(sk, SOCK_DEAD)) {
330		printk("Attempt to release alive unix socket: %p\n", sk);
331		return;
332	}
333
334	if (u->addr)
335		unix_release_addr(u->addr);
336
337	atomic_dec(&unix_nr_socks);
338#ifdef UNIX_REFCNT_DEBUG
339	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
340#endif
341}
342
343static int unix_release_sock (struct sock *sk, int embrion)
344{
345	struct unix_sock *u = unix_sk(sk);
346	struct dentry *dentry;
347	struct vfsmount *mnt;
348	struct sock *skpair;
349	struct sk_buff *skb;
350	int state;
351
352	unix_remove_socket(sk);
353
354	/* Clear state */
355	unix_state_wlock(sk);
356	sock_orphan(sk);
357	sk->sk_shutdown = SHUTDOWN_MASK;
358	dentry	     = u->dentry;
359	u->dentry    = NULL;
360	mnt	     = u->mnt;
361	u->mnt	     = NULL;
362	state = sk->sk_state;
363	sk->sk_state = TCP_CLOSE;
364	unix_state_wunlock(sk);
365
366	wake_up_interruptible_all(&u->peer_wait);
367
368	skpair=unix_peer(sk);
369
370	if (skpair!=NULL) {
371		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
372			unix_state_wlock(skpair);
373			/* No more writes */
374			skpair->sk_shutdown = SHUTDOWN_MASK;
375			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
376				skpair->sk_err = ECONNRESET;
377			unix_state_wunlock(skpair);
378			skpair->sk_state_change(skpair);
379			read_lock(&skpair->sk_callback_lock);
380			sk_wake_async(skpair,1,POLL_HUP);
381			read_unlock(&skpair->sk_callback_lock);
382		}
383		sock_put(skpair); /* It may now die */
384		unix_peer(sk) = NULL;
385	}
386
387	/* Try to flush out this socket. Throw out buffers at least */
388
389	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
390		if (state==TCP_LISTEN)
391			unix_release_sock(skb->sk, 1);
392		/* passed fds are erased in the kfree_skb hook	      */
393		kfree_skb(skb);
394	}
395
396	if (dentry) {
397		dput(dentry);
398		mntput(mnt);
399	}
400
401	sock_put(sk);
402
403	/* ---- Socket is dead now and most probably destroyed ---- */
404
405	/*
406	 * Fixme: BSD difference: In BSD all sockets connected to use get
407	 *	  ECONNRESET and we die on the spot. In Linux we behave
408	 *	  like files and pipes do and wait for the last
409	 *	  dereference.
410	 *
411	 * Can't we simply set sock->err?
412	 *
413	 *	  What the above comment does talk about? --ANK(980817)
414	 */
415
416	if (atomic_read(&unix_tot_inflight))
417		unix_gc();		/* Garbage collect fds */
418
419	return 0;
420}
421
422static int unix_listen(struct socket *sock, int backlog)
423{
424	int err;
425	struct sock *sk = sock->sk;
426	struct unix_sock *u = unix_sk(sk);
427
428	err = -EOPNOTSUPP;
429	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
430		goto out;			/* Only stream/seqpacket sockets accept */
431	err = -EINVAL;
432	if (!u->addr)
433		goto out;			/* No listens on an unbound socket */
434	unix_state_wlock(sk);
435	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
436		goto out_unlock;
437	if (backlog > sk->sk_max_ack_backlog)
438		wake_up_interruptible_all(&u->peer_wait);
439	sk->sk_max_ack_backlog	= backlog;
440	sk->sk_state		= TCP_LISTEN;
441	/* set credentials so connect can copy them */
442	sk->sk_peercred.pid	= current->tgid;
443	sk->sk_peercred.uid	= current->euid;
444	sk->sk_peercred.gid	= current->egid;
445	err = 0;
446
447out_unlock:
448	unix_state_wunlock(sk);
449out:
450	return err;
451}
452
453static int unix_release(struct socket *);
454static int unix_bind(struct socket *, struct sockaddr *, int);
455static int unix_stream_connect(struct socket *, struct sockaddr *,
456			       int addr_len, int flags);
457static int unix_socketpair(struct socket *, struct socket *);
458static int unix_accept(struct socket *, struct socket *, int);
459static int unix_getname(struct socket *, struct sockaddr *, int *, int);
460static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
461static int unix_ioctl(struct socket *, unsigned int, unsigned long);
462static int unix_shutdown(struct socket *, int);
463static int unix_stream_sendmsg(struct kiocb *, struct socket *,
464			       struct msghdr *, size_t);
465static int unix_stream_recvmsg(struct kiocb *, struct socket *,
466			       struct msghdr *, size_t, int);
467static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
468			      struct msghdr *, size_t);
469static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
470			      struct msghdr *, size_t, int);
471static int unix_dgram_connect(struct socket *, struct sockaddr *,
472			      int, int);
473static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
474				  struct msghdr *, size_t);
475
476static struct proto_ops unix_stream_ops = {
477	.family =	PF_UNIX,
478	.owner =	THIS_MODULE,
479	.release =	unix_release,
480	.bind =		unix_bind,
481	.connect =	unix_stream_connect,
482	.socketpair =	unix_socketpair,
483	.accept =	unix_accept,
484	.getname =	unix_getname,
485	.poll =		unix_poll,
486	.ioctl =	unix_ioctl,
487	.listen =	unix_listen,
488	.shutdown =	unix_shutdown,
489	.setsockopt =	sock_no_setsockopt,
490	.getsockopt =	sock_no_getsockopt,
491	.sendmsg =	unix_stream_sendmsg,
492	.recvmsg =	unix_stream_recvmsg,
493	.mmap =		sock_no_mmap,
494	.sendpage =	sock_no_sendpage,
495};
496
497static struct proto_ops unix_dgram_ops = {
498	.family =	PF_UNIX,
499	.owner =	THIS_MODULE,
500	.release =	unix_release,
501	.bind =		unix_bind,
502	.connect =	unix_dgram_connect,
503	.socketpair =	unix_socketpair,
504	.accept =	sock_no_accept,
505	.getname =	unix_getname,
506	.poll =		datagram_poll,
507	.ioctl =	unix_ioctl,
508	.listen =	sock_no_listen,
509	.shutdown =	unix_shutdown,
510	.setsockopt =	sock_no_setsockopt,
511	.getsockopt =	sock_no_getsockopt,
512	.sendmsg =	unix_dgram_sendmsg,
513	.recvmsg =	unix_dgram_recvmsg,
514	.mmap =		sock_no_mmap,
515	.sendpage =	sock_no_sendpage,
516};
517
518static struct proto_ops unix_seqpacket_ops = {
519	.family =	PF_UNIX,
520	.owner =	THIS_MODULE,
521	.release =	unix_release,
522	.bind =		unix_bind,
523	.connect =	unix_stream_connect,
524	.socketpair =	unix_socketpair,
525	.accept =	unix_accept,
526	.getname =	unix_getname,
527	.poll =		datagram_poll,
528	.ioctl =	unix_ioctl,
529	.listen =	unix_listen,
530	.shutdown =	unix_shutdown,
531	.setsockopt =	sock_no_setsockopt,
532	.getsockopt =	sock_no_getsockopt,
533	.sendmsg =	unix_seqpacket_sendmsg,
534	.recvmsg =	unix_dgram_recvmsg,
535	.mmap =		sock_no_mmap,
536	.sendpage =	sock_no_sendpage,
537};
538
539static struct proto unix_proto = {
540	.name	  = "UNIX",
541	.owner	  = THIS_MODULE,
542	.obj_size = sizeof(struct unix_sock),
543};
544
545static struct sock * unix_create1(struct socket *sock)
546{
547	struct sock *sk = NULL;
548	struct unix_sock *u;
549
550	if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
551		goto out;
552
553	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
554	if (!sk)
555		goto out;
556
557	atomic_inc(&unix_nr_socks);
558
559	sock_init_data(sock,sk);
560
561	sk->sk_write_space	= unix_write_space;
562	sk->sk_max_ack_backlog	= sysctl_unix_max_dgram_qlen;
563	sk->sk_destruct		= unix_sock_destructor;
564	u	  = unix_sk(sk);
565	u->dentry = NULL;
566	u->mnt	  = NULL;
567	rwlock_init(&u->lock);
568	atomic_set(&u->inflight, sock ? 0 : -1);
569	init_MUTEX(&u->readsem); /* single task reading lock */
570	init_waitqueue_head(&u->peer_wait);
571	unix_insert_socket(unix_sockets_unbound, sk);
572out:
573	return sk;
574}
575
576static int unix_create(struct socket *sock, int protocol)
577{
578	if (protocol && protocol != PF_UNIX)
579		return -EPROTONOSUPPORT;
580
581	sock->state = SS_UNCONNECTED;
582
583	switch (sock->type) {
584	case SOCK_STREAM:
585		sock->ops = &unix_stream_ops;
586		break;
587		/*
588		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
589		 *	nothing uses it.
590		 */
591	case SOCK_RAW:
592		sock->type=SOCK_DGRAM;
593	case SOCK_DGRAM:
594		sock->ops = &unix_dgram_ops;
595		break;
596	case SOCK_SEQPACKET:
597		sock->ops = &unix_seqpacket_ops;
598		break;
599	default:
600		return -ESOCKTNOSUPPORT;
601	}
602
603	return unix_create1(sock) ? 0 : -ENOMEM;
604}
605
606static int unix_release(struct socket *sock)
607{
608	struct sock *sk = sock->sk;
609
610	if (!sk)
611		return 0;
612
613	sock->sk = NULL;
614
615	return unix_release_sock (sk, 0);
616}
617
618static int unix_autobind(struct socket *sock)
619{
620	struct sock *sk = sock->sk;
621	struct unix_sock *u = unix_sk(sk);
622	static u32 ordernum = 1;
623	struct unix_address * addr;
624	int err;
625
626	down(&u->readsem);
627
628	err = 0;
629	if (u->addr)
630		goto out;
631
632	err = -ENOMEM;
633	addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
634	if (!addr)
635		goto out;
636
637	memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
638	addr->name->sun_family = AF_UNIX;
639	atomic_set(&addr->refcnt, 1);
640
641retry:
642	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
643	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
644
645	write_lock(&unix_table_lock);
646	ordernum = (ordernum+1)&0xFFFFF;
647
648	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
649				      addr->hash)) {
650		write_unlock(&unix_table_lock);
651		/* Sanity yield. It is unusual case, but yet... */
652		if (!(ordernum&0xFF))
653			yield();
654		goto retry;
655	}
656	addr->hash ^= sk->sk_type;
657
658	__unix_remove_socket(sk);
659	u->addr = addr;
660	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
661	write_unlock(&unix_table_lock);
662	err = 0;
663
664out:	up(&u->readsem);
665	return err;
666}
667
668static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
669				    int type, unsigned hash, int *error)
670{
671	struct sock *u;
672	struct nameidata nd;
673	int err = 0;
674
675	if (sunname->sun_path[0]) {
676		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
677		if (err)
678			goto fail;
679		err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
680		if (err)
681			goto put_fail;
682
683		err = -ECONNREFUSED;
684		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
685			goto put_fail;
686		u=unix_find_socket_byinode(nd.dentry->d_inode);
687		if (!u)
688			goto put_fail;
689
690		if (u->sk_type == type)
691			touch_atime(nd.mnt, nd.dentry);
692
693		path_release(&nd);
694
695		err=-EPROTOTYPE;
696		if (u->sk_type != type) {
697			sock_put(u);
698			goto fail;
699		}
700	} else {
701		err = -ECONNREFUSED;
702		u=unix_find_socket_byname(sunname, len, type, hash);
703		if (u) {
704			struct dentry *dentry;
705			dentry = unix_sk(u)->dentry;
706			if (dentry)
707				touch_atime(unix_sk(u)->mnt, dentry);
708		} else
709			goto fail;
710	}
711	return u;
712
713put_fail:
714	path_release(&nd);
715fail:
716	*error=err;
717	return NULL;
718}
719
720
721static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
722{
723	struct sock *sk = sock->sk;
724	struct unix_sock *u = unix_sk(sk);
725	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
726	struct dentry * dentry = NULL;
727	struct nameidata nd;
728	int err;
729	unsigned hash;
730	struct unix_address *addr;
731	struct hlist_head *list;
732
733	err = -EINVAL;
734	if (sunaddr->sun_family != AF_UNIX)
735		goto out;
736
737	if (addr_len==sizeof(short)) {
738		err = unix_autobind(sock);
739		goto out;
740	}
741
742	err = unix_mkname(sunaddr, addr_len, &hash);
743	if (err < 0)
744		goto out;
745	addr_len = err;
746
747	down(&u->readsem);
748
749	err = -EINVAL;
750	if (u->addr)
751		goto out_up;
752
753	err = -ENOMEM;
754	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
755	if (!addr)
756		goto out_up;
757
758	memcpy(addr->name, sunaddr, addr_len);
759	addr->len = addr_len;
760	addr->hash = hash ^ sk->sk_type;
761	atomic_set(&addr->refcnt, 1);
762
763	if (sunaddr->sun_path[0]) {
764		unsigned int mode;
765		err = 0;
766		/*
767		 * Get the parent directory, calculate the hash for last
768		 * component.
769		 */
770		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
771		if (err)
772			goto out_mknod_parent;
773
774		dentry = lookup_create(&nd, 0);
775		err = PTR_ERR(dentry);
776		if (IS_ERR(dentry))
777			goto out_mknod_unlock;
778
779		/*
780		 * All right, let's create it.
781		 */
782		mode = S_IFSOCK |
783		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
784		err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
785		if (err)
786			goto out_mknod_dput;
787		up(&nd.dentry->d_inode->i_sem);
788		dput(nd.dentry);
789		nd.dentry = dentry;
790
791		addr->hash = UNIX_HASH_SIZE;
792	}
793
794	write_lock(&unix_table_lock);
795
796	if (!sunaddr->sun_path[0]) {
797		err = -EADDRINUSE;
798		if (__unix_find_socket_byname(sunaddr, addr_len,
799					      sk->sk_type, hash)) {
800			unix_release_addr(addr);
801			goto out_unlock;
802		}
803
804		list = &unix_socket_table[addr->hash];
805	} else {
806		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
807		u->dentry = nd.dentry;
808		u->mnt    = nd.mnt;
809	}
810
811	err = 0;
812	__unix_remove_socket(sk);
813	u->addr = addr;
814	__unix_insert_socket(list, sk);
815
816out_unlock:
817	write_unlock(&unix_table_lock);
818out_up:
819	up(&u->readsem);
820out:
821	return err;
822
823out_mknod_dput:
824	dput(dentry);
825out_mknod_unlock:
826	up(&nd.dentry->d_inode->i_sem);
827	path_release(&nd);
828out_mknod_parent:
829	if (err==-EEXIST)
830		err=-EADDRINUSE;
831	unix_release_addr(addr);
832	goto out_up;
833}
834
835static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
836			      int alen, int flags)
837{
838	struct sock *sk = sock->sk;
839	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
840	struct sock *other;
841	unsigned hash;
842	int err;
843
844	if (addr->sa_family != AF_UNSPEC) {
845		err = unix_mkname(sunaddr, alen, &hash);
846		if (err < 0)
847			goto out;
848		alen = err;
849
850		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
851		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
852			goto out;
853
854		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
855		if (!other)
856			goto out;
857
858		unix_state_wlock(sk);
859
860		err = -EPERM;
861		if (!unix_may_send(sk, other))
862			goto out_unlock;
863
864		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
865		if (err)
866			goto out_unlock;
867
868	} else {
869		/*
870		 *	1003.1g breaking connected state with AF_UNSPEC
871		 */
872		other = NULL;
873		unix_state_wlock(sk);
874	}
875
876	/*
877	 * If it was connected, reconnect.
878	 */
879	if (unix_peer(sk)) {
880		struct sock *old_peer = unix_peer(sk);
881		unix_peer(sk)=other;
882		unix_state_wunlock(sk);
883
884		if (other != old_peer)
885			unix_dgram_disconnected(sk, old_peer);
886		sock_put(old_peer);
887	} else {
888		unix_peer(sk)=other;
889		unix_state_wunlock(sk);
890	}
891 	return 0;
892
893out_unlock:
894	unix_state_wunlock(sk);
895	sock_put(other);
896out:
897	return err;
898}
899
900static long unix_wait_for_peer(struct sock *other, long timeo)
901{
902	struct unix_sock *u = unix_sk(other);
903	int sched;
904	DEFINE_WAIT(wait);
905
906	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
907
908	sched = !sock_flag(other, SOCK_DEAD) &&
909		!(other->sk_shutdown & RCV_SHUTDOWN) &&
910		(skb_queue_len(&other->sk_receive_queue) >
911		 other->sk_max_ack_backlog);
912
913	unix_state_runlock(other);
914
915	if (sched)
916		timeo = schedule_timeout(timeo);
917
918	finish_wait(&u->peer_wait, &wait);
919	return timeo;
920}
921
922static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
923			       int addr_len, int flags)
924{
925	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
926	struct sock *sk = sock->sk;
927	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
928	struct sock *newsk = NULL;
929	struct sock *other = NULL;
930	struct sk_buff *skb = NULL;
931	unsigned hash;
932	int st;
933	int err;
934	long timeo;
935
936	err = unix_mkname(sunaddr, addr_len, &hash);
937	if (err < 0)
938		goto out;
939	addr_len = err;
940
941	if (test_bit(SOCK_PASSCRED, &sock->flags)
942		&& !u->addr && (err = unix_autobind(sock)) != 0)
943		goto out;
944
945	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
946
947	/* First of all allocate resources.
948	   If we will make it after state is locked,
949	   we will have to recheck all again in any case.
950	 */
951
952	err = -ENOMEM;
953
954	/* create new sock for complete connection */
955	newsk = unix_create1(NULL);
956	if (newsk == NULL)
957		goto out;
958
959	/* Allocate skb for sending to listening sock */
960	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
961	if (skb == NULL)
962		goto out;
963
964restart:
965	/*  Find listening sock. */
966	other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
967	if (!other)
968		goto out;
969
970	/* Latch state of peer */
971	unix_state_rlock(other);
972
973	/* Apparently VFS overslept socket death. Retry. */
974	if (sock_flag(other, SOCK_DEAD)) {
975		unix_state_runlock(other);
976		sock_put(other);
977		goto restart;
978	}
979
980	err = -ECONNREFUSED;
981	if (other->sk_state != TCP_LISTEN)
982		goto out_unlock;
983
984	if (skb_queue_len(&other->sk_receive_queue) >
985	    other->sk_max_ack_backlog) {
986		err = -EAGAIN;
987		if (!timeo)
988			goto out_unlock;
989
990		timeo = unix_wait_for_peer(other, timeo);
991
992		err = sock_intr_errno(timeo);
993		if (signal_pending(current))
994			goto out;
995		sock_put(other);
996		goto restart;
997        }
998
999	/* Latch our state.
1000
1001	   It is tricky place. We need to grab write lock and cannot
1002	   drop lock on peer. It is dangerous because deadlock is
1003	   possible. Connect to self case and simultaneous
1004	   attempt to connect are eliminated by checking socket
1005	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1006	   check this before attempt to grab lock.
1007
1008	   Well, and we have to recheck the state after socket locked.
1009	 */
1010	st = sk->sk_state;
1011
1012	switch (st) {
1013	case TCP_CLOSE:
1014		/* This is ok... continue with connect */
1015		break;
1016	case TCP_ESTABLISHED:
1017		/* Socket is already connected */
1018		err = -EISCONN;
1019		goto out_unlock;
1020	default:
1021		err = -EINVAL;
1022		goto out_unlock;
1023	}
1024
1025	unix_state_wlock(sk);
1026
1027	if (sk->sk_state != st) {
1028		unix_state_wunlock(sk);
1029		unix_state_runlock(other);
1030		sock_put(other);
1031		goto restart;
1032	}
1033
1034	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1035	if (err) {
1036		unix_state_wunlock(sk);
1037		goto out_unlock;
1038	}
1039
1040	/* The way is open! Fastly set all the necessary fields... */
1041
1042	sock_hold(sk);
1043	unix_peer(newsk)	= sk;
1044	newsk->sk_state		= TCP_ESTABLISHED;
1045	newsk->sk_type		= sk->sk_type;
1046	newsk->sk_peercred.pid	= current->tgid;
1047	newsk->sk_peercred.uid	= current->euid;
1048	newsk->sk_peercred.gid	= current->egid;
1049	newu = unix_sk(newsk);
1050	newsk->sk_sleep		= &newu->peer_wait;
1051	otheru = unix_sk(other);
1052
1053	/* copy address information from listening to new sock*/
1054	if (otheru->addr) {
1055		atomic_inc(&otheru->addr->refcnt);
1056		newu->addr = otheru->addr;
1057	}
1058	if (otheru->dentry) {
1059		newu->dentry	= dget(otheru->dentry);
1060		newu->mnt	= mntget(otheru->mnt);
1061	}
1062
1063	/* Set credentials */
1064	sk->sk_peercred = other->sk_peercred;
1065
1066	sock_hold(newsk);
1067	unix_peer(sk)	= newsk;
1068	sock->state	= SS_CONNECTED;
1069	sk->sk_state	= TCP_ESTABLISHED;
1070
1071	unix_state_wunlock(sk);
1072
1073	/* take ten and and send info to listening sock */
1074	spin_lock(&other->sk_receive_queue.lock);
1075	__skb_queue_tail(&other->sk_receive_queue, skb);
1076	/* Undo artificially decreased inflight after embrion
1077	 * is installed to listening socket. */
1078	atomic_inc(&newu->inflight);
1079	spin_unlock(&other->sk_receive_queue.lock);
1080	unix_state_runlock(other);
1081	other->sk_data_ready(other, 0);
1082	sock_put(other);
1083	return 0;
1084
1085out_unlock:
1086	if (other)
1087		unix_state_runlock(other);
1088
1089out:
1090	if (skb)
1091		kfree_skb(skb);
1092	if (newsk)
1093		unix_release_sock(newsk, 0);
1094	if (other)
1095		sock_put(other);
1096	return err;
1097}
1098
1099static int unix_socketpair(struct socket *socka, struct socket *sockb)
1100{
1101	struct sock *ska=socka->sk, *skb = sockb->sk;
1102
1103	/* Join our sockets back to back */
1104	sock_hold(ska);
1105	sock_hold(skb);
1106	unix_peer(ska)=skb;
1107	unix_peer(skb)=ska;
1108	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1109	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1110	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1111
1112	if (ska->sk_type != SOCK_DGRAM) {
1113		ska->sk_state = TCP_ESTABLISHED;
1114		skb->sk_state = TCP_ESTABLISHED;
1115		socka->state  = SS_CONNECTED;
1116		sockb->state  = SS_CONNECTED;
1117	}
1118	return 0;
1119}
1120
1121static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1122{
1123	struct sock *sk = sock->sk;
1124	struct sock *tsk;
1125	struct sk_buff *skb;
1126	int err;
1127
1128	err = -EOPNOTSUPP;
1129	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1130		goto out;
1131
1132	err = -EINVAL;
1133	if (sk->sk_state != TCP_LISTEN)
1134		goto out;
1135
1136	/* If socket state is TCP_LISTEN it cannot change (for now...),
1137	 * so that no locks are necessary.
1138	 */
1139
1140	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1141	if (!skb) {
1142		/* This means receive shutdown. */
1143		if (err == 0)
1144			err = -EINVAL;
1145		goto out;
1146	}
1147
1148	tsk = skb->sk;
1149	skb_free_datagram(sk, skb);
1150	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1151
1152	/* attach accepted sock to socket */
1153	unix_state_wlock(tsk);
1154	newsock->state = SS_CONNECTED;
1155	sock_graft(tsk, newsock);
1156	unix_state_wunlock(tsk);
1157	return 0;
1158
1159out:
1160	return err;
1161}
1162
1163
1164static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1165{
1166	struct sock *sk = sock->sk;
1167	struct unix_sock *u;
1168	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1169	int err = 0;
1170
1171	if (peer) {
1172		sk = unix_peer_get(sk);
1173
1174		err = -ENOTCONN;
1175		if (!sk)
1176			goto out;
1177		err = 0;
1178	} else {
1179		sock_hold(sk);
1180	}
1181
1182	u = unix_sk(sk);
1183	unix_state_rlock(sk);
1184	if (!u->addr) {
1185		sunaddr->sun_family = AF_UNIX;
1186		sunaddr->sun_path[0] = 0;
1187		*uaddr_len = sizeof(short);
1188	} else {
1189		struct unix_address *addr = u->addr;
1190
1191		*uaddr_len = addr->len;
1192		memcpy(sunaddr, addr->name, *uaddr_len);
1193	}
1194	unix_state_runlock(sk);
1195	sock_put(sk);
1196out:
1197	return err;
1198}
1199
1200static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1201{
1202	int i;
1203
1204	scm->fp = UNIXCB(skb).fp;
1205	skb->destructor = sock_wfree;
1206	UNIXCB(skb).fp = NULL;
1207
1208	for (i=scm->fp->count-1; i>=0; i--)
1209		unix_notinflight(scm->fp->fp[i]);
1210}
1211
1212static void unix_destruct_fds(struct sk_buff *skb)
1213{
1214	struct scm_cookie scm;
1215	memset(&scm, 0, sizeof(scm));
1216	unix_detach_fds(&scm, skb);
1217
1218	/* Alas, it calls VFS */
1219	/* So fscking what? fput() had been SMP-safe since the last Summer */
1220	scm_destroy(&scm);
1221	sock_wfree(skb);
1222}
1223
1224static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1225{
1226	int i;
1227	for (i=scm->fp->count-1; i>=0; i--)
1228		unix_inflight(scm->fp->fp[i]);
1229	UNIXCB(skb).fp = scm->fp;
1230	skb->destructor = unix_destruct_fds;
1231	scm->fp = NULL;
1232}
1233
1234/*
1235 *	Send AF_UNIX data.
1236 */
1237
1238static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1239			      struct msghdr *msg, size_t len)
1240{
1241	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1242	struct sock *sk = sock->sk;
1243	struct unix_sock *u = unix_sk(sk);
1244	struct sockaddr_un *sunaddr=msg->msg_name;
1245	struct sock *other = NULL;
1246	int namelen = 0; /* fake GCC */
1247	int err;
1248	unsigned hash;
1249	struct sk_buff *skb;
1250	long timeo;
1251	struct scm_cookie tmp_scm;
1252
1253	if (NULL == siocb->scm)
1254		siocb->scm = &tmp_scm;
1255	err = scm_send(sock, msg, siocb->scm);
1256	if (err < 0)
1257		return err;
1258
1259	err = -EOPNOTSUPP;
1260	if (msg->msg_flags&MSG_OOB)
1261		goto out;
1262
1263	if (msg->msg_namelen) {
1264		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1265		if (err < 0)
1266			goto out;
1267		namelen = err;
1268	} else {
1269		sunaddr = NULL;
1270		err = -ENOTCONN;
1271		other = unix_peer_get(sk);
1272		if (!other)
1273			goto out;
1274	}
1275
1276	if (test_bit(SOCK_PASSCRED, &sock->flags)
1277		&& !u->addr && (err = unix_autobind(sock)) != 0)
1278		goto out;
1279
1280	err = -EMSGSIZE;
1281	if (len > sk->sk_sndbuf - 32)
1282		goto out;
1283
1284	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1285	if (skb==NULL)
1286		goto out;
1287
1288	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1289	if (siocb->scm->fp)
1290		unix_attach_fds(siocb->scm, skb);
1291
1292	skb->h.raw = skb->data;
1293	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1294	if (err)
1295		goto out_free;
1296
1297	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1298
1299restart:
1300	if (!other) {
1301		err = -ECONNRESET;
1302		if (sunaddr == NULL)
1303			goto out_free;
1304
1305		other = unix_find_other(sunaddr, namelen, sk->sk_type,
1306					hash, &err);
1307		if (other==NULL)
1308			goto out_free;
1309	}
1310
1311	unix_state_rlock(other);
1312	err = -EPERM;
1313	if (!unix_may_send(sk, other))
1314		goto out_unlock;
1315
1316	if (sock_flag(other, SOCK_DEAD)) {
1317		/*
1318		 *	Check with 1003.1g - what should
1319		 *	datagram error
1320		 */
1321		unix_state_runlock(other);
1322		sock_put(other);
1323
1324		err = 0;
1325		unix_state_wlock(sk);
1326		if (unix_peer(sk) == other) {
1327			unix_peer(sk)=NULL;
1328			unix_state_wunlock(sk);
1329
1330			unix_dgram_disconnected(sk, other);
1331			sock_put(other);
1332			err = -ECONNREFUSED;
1333		} else {
1334			unix_state_wunlock(sk);
1335		}
1336
1337		other = NULL;
1338		if (err)
1339			goto out_free;
1340		goto restart;
1341	}
1342
1343	err = -EPIPE;
1344	if (other->sk_shutdown & RCV_SHUTDOWN)
1345		goto out_unlock;
1346
1347	if (sk->sk_type != SOCK_SEQPACKET) {
1348		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1349		if (err)
1350			goto out_unlock;
1351	}
1352
1353	if (unix_peer(other) != sk &&
1354	    (skb_queue_len(&other->sk_receive_queue) >
1355	     other->sk_max_ack_backlog)) {
1356		if (!timeo) {
1357			err = -EAGAIN;
1358			goto out_unlock;
1359		}
1360
1361		timeo = unix_wait_for_peer(other, timeo);
1362
1363		err = sock_intr_errno(timeo);
1364		if (signal_pending(current))
1365			goto out_free;
1366
1367		goto restart;
1368	}
1369
1370	skb_queue_tail(&other->sk_receive_queue, skb);
1371	unix_state_runlock(other);
1372	other->sk_data_ready(other, len);
1373	sock_put(other);
1374	scm_destroy(siocb->scm);
1375	return len;
1376
1377out_unlock:
1378	unix_state_runlock(other);
1379out_free:
1380	kfree_skb(skb);
1381out:
1382	if (other)
1383		sock_put(other);
1384	scm_destroy(siocb->scm);
1385	return err;
1386}
1387
1388
1389static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1390			       struct msghdr *msg, size_t len)
1391{
1392	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1393	struct sock *sk = sock->sk;
1394	struct sock *other = NULL;
1395	struct sockaddr_un *sunaddr=msg->msg_name;
1396	int err,size;
1397	struct sk_buff *skb;
1398	int sent=0;
1399	struct scm_cookie tmp_scm;
1400
1401	if (NULL == siocb->scm)
1402		siocb->scm = &tmp_scm;
1403	err = scm_send(sock, msg, siocb->scm);
1404	if (err < 0)
1405		return err;
1406
1407	err = -EOPNOTSUPP;
1408	if (msg->msg_flags&MSG_OOB)
1409		goto out_err;
1410
1411	if (msg->msg_namelen) {
1412		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1413		goto out_err;
1414	} else {
1415		sunaddr = NULL;
1416		err = -ENOTCONN;
1417		other = unix_peer_get(sk);
1418		if (!other)
1419			goto out_err;
1420	}
1421
1422	if (sk->sk_shutdown & SEND_SHUTDOWN)
1423		goto pipe_err;
1424
1425	while(sent < len)
1426	{
1427		/*
1428		 *	Optimisation for the fact that under 0.01% of X messages typically
1429		 *	need breaking up.
1430		 */
1431
1432		size=len-sent;
1433
1434		/* Keep two messages in the pipe so it schedules better */
1435		if (size > sk->sk_sndbuf / 2 - 64)
1436			size = sk->sk_sndbuf / 2 - 64;
1437
1438		if (size > SKB_MAX_ALLOC)
1439			size = SKB_MAX_ALLOC;
1440
1441		/*
1442		 *	Grab a buffer
1443		 */
1444
1445		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1446
1447		if (skb==NULL)
1448			goto out_err;
1449
1450		/*
1451		 *	If you pass two values to the sock_alloc_send_skb
1452		 *	it tries to grab the large buffer with GFP_NOFS
1453		 *	(which can fail easily), and if it fails grab the
1454		 *	fallback size buffer which is under a page and will
1455		 *	succeed. [Alan]
1456		 */
1457		size = min_t(int, size, skb_tailroom(skb));
1458
1459		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1460		if (siocb->scm->fp)
1461			unix_attach_fds(siocb->scm, skb);
1462
1463		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1464			kfree_skb(skb);
1465			goto out_err;
1466		}
1467
1468		unix_state_rlock(other);
1469
1470		if (sock_flag(other, SOCK_DEAD) ||
1471		    (other->sk_shutdown & RCV_SHUTDOWN))
1472			goto pipe_err_free;
1473
1474		skb_queue_tail(&other->sk_receive_queue, skb);
1475		unix_state_runlock(other);
1476		other->sk_data_ready(other, size);
1477		sent+=size;
1478	}
1479	sock_put(other);
1480
1481	scm_destroy(siocb->scm);
1482	siocb->scm = NULL;
1483
1484	return sent;
1485
1486pipe_err_free:
1487	unix_state_runlock(other);
1488	kfree_skb(skb);
1489pipe_err:
1490	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1491		send_sig(SIGPIPE,current,0);
1492	err = -EPIPE;
1493out_err:
1494        if (other)
1495		sock_put(other);
1496	scm_destroy(siocb->scm);
1497	siocb->scm = NULL;
1498	return sent ? : err;
1499}
1500
1501static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1502				  struct msghdr *msg, size_t len)
1503{
1504	int err;
1505	struct sock *sk = sock->sk;
1506
1507	err = sock_error(sk);
1508	if (err)
1509		return err;
1510
1511	if (sk->sk_state != TCP_ESTABLISHED)
1512		return -ENOTCONN;
1513
1514	if (msg->msg_namelen)
1515		msg->msg_namelen = 0;
1516
1517	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1518}
1519
1520static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1521{
1522	struct unix_sock *u = unix_sk(sk);
1523
1524	msg->msg_namelen = 0;
1525	if (u->addr) {
1526		msg->msg_namelen = u->addr->len;
1527		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1528	}
1529}
1530
1531static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1532			      struct msghdr *msg, size_t size,
1533			      int flags)
1534{
1535	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1536	struct scm_cookie tmp_scm;
1537	struct sock *sk = sock->sk;
1538	struct unix_sock *u = unix_sk(sk);
1539	int noblock = flags & MSG_DONTWAIT;
1540	struct sk_buff *skb;
1541	int err;
1542
1543	err = -EOPNOTSUPP;
1544	if (flags&MSG_OOB)
1545		goto out;
1546
1547	msg->msg_namelen = 0;
1548
1549	down(&u->readsem);
1550
1551	skb = skb_recv_datagram(sk, flags, noblock, &err);
1552	if (!skb)
1553		goto out_unlock;
1554
1555	wake_up_interruptible(&u->peer_wait);
1556
1557	if (msg->msg_name)
1558		unix_copy_addr(msg, skb->sk);
1559
1560	if (size > skb->len)
1561		size = skb->len;
1562	else if (size < skb->len)
1563		msg->msg_flags |= MSG_TRUNC;
1564
1565	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1566	if (err)
1567		goto out_free;
1568
1569	if (!siocb->scm) {
1570		siocb->scm = &tmp_scm;
1571		memset(&tmp_scm, 0, sizeof(tmp_scm));
1572	}
1573	siocb->scm->creds = *UNIXCREDS(skb);
1574
1575	if (!(flags & MSG_PEEK))
1576	{
1577		if (UNIXCB(skb).fp)
1578			unix_detach_fds(siocb->scm, skb);
1579	}
1580	else
1581	{
1582		/* It is questionable: on PEEK we could:
1583		   - do not return fds - good, but too simple 8)
1584		   - return fds, and do not return them on read (old strategy,
1585		     apparently wrong)
1586		   - clone fds (I chose it for now, it is the most universal
1587		     solution)
1588
1589	           POSIX 1003.1g does not actually define this clearly
1590	           at all. POSIX 1003.1g doesn't define a lot of things
1591	           clearly however!
1592
1593		*/
1594		if (UNIXCB(skb).fp)
1595			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1596	}
1597	err = size;
1598
1599	scm_recv(sock, msg, siocb->scm, flags);
1600
1601out_free:
1602	skb_free_datagram(sk,skb);
1603out_unlock:
1604	up(&u->readsem);
1605out:
1606	return err;
1607}
1608
1609/*
1610 *	Sleep until data has arrive. But check for races..
1611 */
1612
1613static long unix_stream_data_wait(struct sock * sk, long timeo)
1614{
1615	DEFINE_WAIT(wait);
1616
1617	unix_state_rlock(sk);
1618
1619	for (;;) {
1620		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1621
1622		if (skb_queue_len(&sk->sk_receive_queue) ||
1623		    sk->sk_err ||
1624		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1625		    signal_pending(current) ||
1626		    !timeo)
1627			break;
1628
1629		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1630		unix_state_runlock(sk);
1631		timeo = schedule_timeout(timeo);
1632		unix_state_rlock(sk);
1633		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1634	}
1635
1636	finish_wait(sk->sk_sleep, &wait);
1637	unix_state_runlock(sk);
1638	return timeo;
1639}
1640
1641
1642
1643static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1644			       struct msghdr *msg, size_t size,
1645			       int flags)
1646{
1647	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1648	struct scm_cookie tmp_scm;
1649	struct sock *sk = sock->sk;
1650	struct unix_sock *u = unix_sk(sk);
1651	struct sockaddr_un *sunaddr=msg->msg_name;
1652	int copied = 0;
1653	int check_creds = 0;
1654	int target;
1655	int err = 0;
1656	long timeo;
1657
1658	err = -EINVAL;
1659	if (sk->sk_state != TCP_ESTABLISHED)
1660		goto out;
1661
1662	err = -EOPNOTSUPP;
1663	if (flags&MSG_OOB)
1664		goto out;
1665
1666	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1667	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1668
1669	msg->msg_namelen = 0;
1670
1671	/* Lock the socket to prevent queue disordering
1672	 * while sleeps in memcpy_tomsg
1673	 */
1674
1675	if (!siocb->scm) {
1676		siocb->scm = &tmp_scm;
1677		memset(&tmp_scm, 0, sizeof(tmp_scm));
1678	}
1679
1680	down(&u->readsem);
1681
1682	do
1683	{
1684		int chunk;
1685		struct sk_buff *skb;
1686
1687		skb = skb_dequeue(&sk->sk_receive_queue);
1688		if (skb==NULL)
1689		{
1690			if (copied >= target)
1691				break;
1692
1693			/*
1694			 *	POSIX 1003.1g mandates this order.
1695			 */
1696
1697			if ((err = sock_error(sk)) != 0)
1698				break;
1699			if (sk->sk_shutdown & RCV_SHUTDOWN)
1700				break;
1701			err = -EAGAIN;
1702			if (!timeo)
1703				break;
1704			up(&u->readsem);
1705
1706			timeo = unix_stream_data_wait(sk, timeo);
1707
1708			if (signal_pending(current)) {
1709				err = sock_intr_errno(timeo);
1710				goto out;
1711			}
1712			down(&u->readsem);
1713			continue;
1714		}
1715
1716		if (check_creds) {
1717			/* Never glue messages from different writers */
1718			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1719				skb_queue_head(&sk->sk_receive_queue, skb);
1720				break;
1721			}
1722		} else {
1723			/* Copy credentials */
1724			siocb->scm->creds = *UNIXCREDS(skb);
1725			check_creds = 1;
1726		}
1727
1728		/* Copy address just once */
1729		if (sunaddr)
1730		{
1731			unix_copy_addr(msg, skb->sk);
1732			sunaddr = NULL;
1733		}
1734
1735		chunk = min_t(unsigned int, skb->len, size);
1736		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1737			skb_queue_head(&sk->sk_receive_queue, skb);
1738			if (copied == 0)
1739				copied = -EFAULT;
1740			break;
1741		}
1742		copied += chunk;
1743		size -= chunk;
1744
1745		/* Mark read part of skb as used */
1746		if (!(flags & MSG_PEEK))
1747		{
1748			skb_pull(skb, chunk);
1749
1750			if (UNIXCB(skb).fp)
1751				unix_detach_fds(siocb->scm, skb);
1752
1753			/* put the skb back if we didn't use it up.. */
1754			if (skb->len)
1755			{
1756				skb_queue_head(&sk->sk_receive_queue, skb);
1757				break;
1758			}
1759
1760			kfree_skb(skb);
1761
1762			if (siocb->scm->fp)
1763				break;
1764		}
1765		else
1766		{
1767			/* It is questionable, see note in unix_dgram_recvmsg.
1768			 */
1769			if (UNIXCB(skb).fp)
1770				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1771
1772			/* put message back and return */
1773			skb_queue_head(&sk->sk_receive_queue, skb);
1774			break;
1775		}
1776	} while (size);
1777
1778	up(&u->readsem);
1779	scm_recv(sock, msg, siocb->scm, flags);
1780out:
1781	return copied ? : err;
1782}
1783
1784static int unix_shutdown(struct socket *sock, int mode)
1785{
1786	struct sock *sk = sock->sk;
1787	struct sock *other;
1788
1789	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1790
1791	if (mode) {
1792		unix_state_wlock(sk);
1793		sk->sk_shutdown |= mode;
1794		other=unix_peer(sk);
1795		if (other)
1796			sock_hold(other);
1797		unix_state_wunlock(sk);
1798		sk->sk_state_change(sk);
1799
1800		if (other &&
1801			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1802
1803			int peer_mode = 0;
1804
1805			if (mode&RCV_SHUTDOWN)
1806				peer_mode |= SEND_SHUTDOWN;
1807			if (mode&SEND_SHUTDOWN)
1808				peer_mode |= RCV_SHUTDOWN;
1809			unix_state_wlock(other);
1810			other->sk_shutdown |= peer_mode;
1811			unix_state_wunlock(other);
1812			other->sk_state_change(other);
1813			read_lock(&other->sk_callback_lock);
1814			if (peer_mode == SHUTDOWN_MASK)
1815				sk_wake_async(other,1,POLL_HUP);
1816			else if (peer_mode & RCV_SHUTDOWN)
1817				sk_wake_async(other,1,POLL_IN);
1818			read_unlock(&other->sk_callback_lock);
1819		}
1820		if (other)
1821			sock_put(other);
1822	}
1823	return 0;
1824}
1825
1826static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1827{
1828	struct sock *sk = sock->sk;
1829	long amount=0;
1830	int err;
1831
1832	switch(cmd)
1833	{
1834		case SIOCOUTQ:
1835			amount = atomic_read(&sk->sk_wmem_alloc);
1836			err = put_user(amount, (int __user *)arg);
1837			break;
1838		case SIOCINQ:
1839		{
1840			struct sk_buff *skb;
1841
1842			if (sk->sk_state == TCP_LISTEN) {
1843				err = -EINVAL;
1844				break;
1845			}
1846
1847			spin_lock(&sk->sk_receive_queue.lock);
1848			if (sk->sk_type == SOCK_STREAM ||
1849			    sk->sk_type == SOCK_SEQPACKET) {
1850				skb_queue_walk(&sk->sk_receive_queue, skb)
1851					amount += skb->len;
1852			} else {
1853				skb = skb_peek(&sk->sk_receive_queue);
1854				if (skb)
1855					amount=skb->len;
1856			}
1857			spin_unlock(&sk->sk_receive_queue.lock);
1858			err = put_user(amount, (int __user *)arg);
1859			break;
1860		}
1861
1862		default:
1863			err = dev_ioctl(cmd, (void __user *)arg);
1864			break;
1865	}
1866	return err;
1867}
1868
1869static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1870{
1871	struct sock *sk = sock->sk;
1872	unsigned int mask;
1873
1874	poll_wait(file, sk->sk_sleep, wait);
1875	mask = 0;
1876
1877	/* exceptional events? */
1878	if (sk->sk_err)
1879		mask |= POLLERR;
1880	if (sk->sk_shutdown == SHUTDOWN_MASK)
1881		mask |= POLLHUP;
1882
1883	/* readable? */
1884	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1885	    (sk->sk_shutdown & RCV_SHUTDOWN))
1886		mask |= POLLIN | POLLRDNORM;
1887
1888	/* Connection-based need to check for termination and startup */
1889	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1890		mask |= POLLHUP;
1891
1892	/*
1893	 * we set writable also when the other side has shut down the
1894	 * connection. This prevents stuck sockets.
1895	 */
1896	if (unix_writable(sk))
1897		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1898
1899	return mask;
1900}
1901
1902
1903#ifdef CONFIG_PROC_FS
1904static struct sock *unix_seq_idx(int *iter, loff_t pos)
1905{
1906	loff_t off = 0;
1907	struct sock *s;
1908
1909	for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1910		if (off == pos)
1911			return s;
1912		++off;
1913	}
1914	return NULL;
1915}
1916
1917
1918static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1919{
1920	read_lock(&unix_table_lock);
1921	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1922}
1923
1924static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1925{
1926	++*pos;
1927
1928	if (v == (void *)1)
1929		return first_unix_socket(seq->private);
1930	return next_unix_socket(seq->private, v);
1931}
1932
1933static void unix_seq_stop(struct seq_file *seq, void *v)
1934{
1935	read_unlock(&unix_table_lock);
1936}
1937
1938static int unix_seq_show(struct seq_file *seq, void *v)
1939{
1940
1941	if (v == (void *)1)
1942		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1943			 "Inode Path\n");
1944	else {
1945		struct sock *s = v;
1946		struct unix_sock *u = unix_sk(s);
1947		unix_state_rlock(s);
1948
1949		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1950			s,
1951			atomic_read(&s->sk_refcnt),
1952			0,
1953			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1954			s->sk_type,
1955			s->sk_socket ?
1956			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1957			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1958			sock_i_ino(s));
1959
1960		if (u->addr) {
1961			int i, len;
1962			seq_putc(seq, ' ');
1963
1964			i = 0;
1965			len = u->addr->len - sizeof(short);
1966			if (!UNIX_ABSTRACT(s))
1967				len--;
1968			else {
1969				seq_putc(seq, '@');
1970				i++;
1971			}
1972			for ( ; i < len; i++)
1973				seq_putc(seq, u->addr->name->sun_path[i]);
1974		}
1975		unix_state_runlock(s);
1976		seq_putc(seq, '\n');
1977	}
1978
1979	return 0;
1980}
1981
1982static struct seq_operations unix_seq_ops = {
1983	.start  = unix_seq_start,
1984	.next   = unix_seq_next,
1985	.stop   = unix_seq_stop,
1986	.show   = unix_seq_show,
1987};
1988
1989
1990static int unix_seq_open(struct inode *inode, struct file *file)
1991{
1992	struct seq_file *seq;
1993	int rc = -ENOMEM;
1994	int *iter = kmalloc(sizeof(int), GFP_KERNEL);
1995
1996	if (!iter)
1997		goto out;
1998
1999	rc = seq_open(file, &unix_seq_ops);
2000	if (rc)
2001		goto out_kfree;
2002
2003	seq	     = file->private_data;
2004	seq->private = iter;
2005	*iter = 0;
2006out:
2007	return rc;
2008out_kfree:
2009	kfree(iter);
2010	goto out;
2011}
2012
2013static struct file_operations unix_seq_fops = {
2014	.owner		= THIS_MODULE,
2015	.open		= unix_seq_open,
2016	.read		= seq_read,
2017	.llseek		= seq_lseek,
2018	.release	= seq_release_private,
2019};
2020
2021#endif
2022
2023static struct net_proto_family unix_family_ops = {
2024	.family = PF_UNIX,
2025	.create = unix_create,
2026	.owner	= THIS_MODULE,
2027};
2028
2029#ifdef CONFIG_SYSCTL
2030extern void unix_sysctl_register(void);
2031extern void unix_sysctl_unregister(void);
2032#else
2033static inline void unix_sysctl_register(void) {}
2034static inline void unix_sysctl_unregister(void) {}
2035#endif
2036
2037static int __init af_unix_init(void)
2038{
2039	int rc = -1;
2040	struct sk_buff *dummy_skb;
2041
2042	if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2043		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2044		goto out;
2045	}
2046
2047	rc = proto_register(&unix_proto, 1);
2048        if (rc != 0) {
2049                printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2050		       __FUNCTION__);
2051		goto out;
2052	}
2053
2054	sock_register(&unix_family_ops);
2055#ifdef CONFIG_PROC_FS
2056	proc_net_fops_create("unix", 0, &unix_seq_fops);
2057#endif
2058	unix_sysctl_register();
2059out:
2060	return rc;
2061}
2062
2063static void __exit af_unix_exit(void)
2064{
2065	sock_unregister(PF_UNIX);
2066	unix_sysctl_unregister();
2067	proc_net_remove("unix");
2068	proto_unregister(&unix_proto);
2069}
2070
2071module_init(af_unix_init);
2072module_exit(af_unix_exit);
2073
2074MODULE_LICENSE("GPL");
2075MODULE_ALIAS_NETPROTO(PF_UNIX);
2076