af_unix.c revision 877ce7c1b3afd69a9b1caeb1b9964c992641f52a
1/* 2 * NET4: Implementation of BSD Unix domain sockets. 3 * 4 * Authors: Alan Cox, <alan.cox@linux.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $ 12 * 13 * Fixes: 14 * Linus Torvalds : Assorted bug cures. 15 * Niibe Yutaka : async I/O support. 16 * Carsten Paeth : PF_UNIX check, address fixes. 17 * Alan Cox : Limit size of allocated blocks. 18 * Alan Cox : Fixed the stupid socketpair bug. 19 * Alan Cox : BSD compatibility fine tuning. 20 * Alan Cox : Fixed a bug in connect when interrupted. 21 * Alan Cox : Sorted out a proper draft version of 22 * file descriptor passing hacked up from 23 * Mike Shaver's work. 24 * Marty Leisner : Fixes to fd passing 25 * Nick Nevin : recvmsg bugfix. 26 * Alan Cox : Started proper garbage collector 27 * Heiko EiBfeldt : Missing verify_area check 28 * Alan Cox : Started POSIXisms 29 * Andreas Schwab : Replace inode by dentry for proper 30 * reference counting 31 * Kirk Petersen : Made this a module 32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 33 * Lots of bug fixes. 34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 35 * by above two patches. 36 * Andrea Arcangeli : If possible we block in connect(2) 37 * if the max backlog of the listen socket 38 * is been reached. This won't break 39 * old apps and it will avoid huge amount 40 * of socks hashed (this for unix_gc() 41 * performances reasons). 42 * Security fix that limits the max 43 * number of socks to 2*max_files and 44 * the number of skb queueable in the 45 * dgram receiver. 46 * Artur Skawina : Hash function optimizations 47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 48 * Malcolm Beattie : Set peercred for socketpair 49 * Michal Ostrowski : Module initialization cleanup. 50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 51 * the core infrastructure is doing that 52 * for all net proto families now (2.5.69+) 53 * 54 * 55 * Known differences from reference BSD that was tested: 56 * 57 * [TO FIX] 58 * ECONNREFUSED is not returned from one end of a connected() socket to the 59 * other the moment one end closes. 60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 61 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 62 * [NOT TO FIX] 63 * accept() returns a path name even if the connecting socket has closed 64 * in the meantime (BSD loses the path and gives up). 65 * accept() returns 0 length path for an unbound connector. BSD returns 16 66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 67 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 68 * BSD af_unix apparently has connect forgetting to block properly. 69 * (need to check this with the POSIX spec in detail) 70 * 71 * Differences from 2.0.0-11-... (ANK) 72 * Bug fixes and improvements. 73 * - client shutdown killed server socket. 74 * - removed all useless cli/sti pairs. 75 * 76 * Semantic changes/extensions. 77 * - generic control message passing. 78 * - SCM_CREDENTIALS control message. 79 * - "Abstract" (not FS based) socket bindings. 80 * Abstract names are sequences of bytes (not zero terminated) 81 * started by 0, so that this name space does not intersect 82 * with BSD names. 83 */ 84 85#include <linux/module.h> 86#include <linux/config.h> 87#include <linux/kernel.h> 88#include <linux/signal.h> 89#include <linux/sched.h> 90#include <linux/errno.h> 91#include <linux/string.h> 92#include <linux/stat.h> 93#include <linux/dcache.h> 94#include <linux/namei.h> 95#include <linux/socket.h> 96#include <linux/un.h> 97#include <linux/fcntl.h> 98#include <linux/termios.h> 99#include <linux/sockios.h> 100#include <linux/net.h> 101#include <linux/in.h> 102#include <linux/fs.h> 103#include <linux/slab.h> 104#include <asm/uaccess.h> 105#include <linux/skbuff.h> 106#include <linux/netdevice.h> 107#include <net/sock.h> 108#include <net/tcp_states.h> 109#include <net/af_unix.h> 110#include <linux/proc_fs.h> 111#include <linux/seq_file.h> 112#include <net/scm.h> 113#include <linux/init.h> 114#include <linux/poll.h> 115#include <linux/smp_lock.h> 116#include <linux/rtnetlink.h> 117#include <linux/mount.h> 118#include <net/checksum.h> 119#include <linux/security.h> 120 121int sysctl_unix_max_dgram_qlen = 10; 122 123struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 124DEFINE_SPINLOCK(unix_table_lock); 125static atomic_t unix_nr_socks = ATOMIC_INIT(0); 126 127#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) 128 129#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) 130 131#ifdef CONFIG_SECURITY_NETWORK 132static void unix_get_peersec_dgram(struct sk_buff *skb) 133{ 134 int err; 135 136 err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb), 137 UNIXSECLEN(skb)); 138 if (err) 139 *(UNIXSECDATA(skb)) = NULL; 140} 141 142static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 143{ 144 scm->secdata = *UNIXSECDATA(skb); 145 scm->seclen = *UNIXSECLEN(skb); 146} 147#else 148static void unix_get_peersec_dgram(struct sk_buff *skb) 149{ } 150 151static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 152{ } 153#endif /* CONFIG_SECURITY_NETWORK */ 154 155/* 156 * SMP locking strategy: 157 * hash table is protected with spinlock unix_table_lock 158 * each socket state is protected by separate rwlock. 159 */ 160 161static inline unsigned unix_hash_fold(unsigned hash) 162{ 163 hash ^= hash>>16; 164 hash ^= hash>>8; 165 return hash&(UNIX_HASH_SIZE-1); 166} 167 168#define unix_peer(sk) (unix_sk(sk)->peer) 169 170static inline int unix_our_peer(struct sock *sk, struct sock *osk) 171{ 172 return unix_peer(osk) == sk; 173} 174 175static inline int unix_may_send(struct sock *sk, struct sock *osk) 176{ 177 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); 178} 179 180static struct sock *unix_peer_get(struct sock *s) 181{ 182 struct sock *peer; 183 184 unix_state_rlock(s); 185 peer = unix_peer(s); 186 if (peer) 187 sock_hold(peer); 188 unix_state_runlock(s); 189 return peer; 190} 191 192static inline void unix_release_addr(struct unix_address *addr) 193{ 194 if (atomic_dec_and_test(&addr->refcnt)) 195 kfree(addr); 196} 197 198/* 199 * Check unix socket name: 200 * - should be not zero length. 201 * - if started by not zero, should be NULL terminated (FS object) 202 * - if started by zero, it is abstract name. 203 */ 204 205static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) 206{ 207 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 208 return -EINVAL; 209 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 210 return -EINVAL; 211 if (sunaddr->sun_path[0]) { 212 /* 213 * This may look like an off by one error but it is a bit more 214 * subtle. 108 is the longest valid AF_UNIX path for a binding. 215 * sun_path[108] doesnt as such exist. However in kernel space 216 * we are guaranteed that it is a valid memory location in our 217 * kernel address buffer. 218 */ 219 ((char *)sunaddr)[len]=0; 220 len = strlen(sunaddr->sun_path)+1+sizeof(short); 221 return len; 222 } 223 224 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); 225 return len; 226} 227 228static void __unix_remove_socket(struct sock *sk) 229{ 230 sk_del_node_init(sk); 231} 232 233static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 234{ 235 BUG_TRAP(sk_unhashed(sk)); 236 sk_add_node(sk, list); 237} 238 239static inline void unix_remove_socket(struct sock *sk) 240{ 241 spin_lock(&unix_table_lock); 242 __unix_remove_socket(sk); 243 spin_unlock(&unix_table_lock); 244} 245 246static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 247{ 248 spin_lock(&unix_table_lock); 249 __unix_insert_socket(list, sk); 250 spin_unlock(&unix_table_lock); 251} 252 253static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, 254 int len, int type, unsigned hash) 255{ 256 struct sock *s; 257 struct hlist_node *node; 258 259 sk_for_each(s, node, &unix_socket_table[hash ^ type]) { 260 struct unix_sock *u = unix_sk(s); 261 262 if (u->addr->len == len && 263 !memcmp(u->addr->name, sunname, len)) 264 goto found; 265 } 266 s = NULL; 267found: 268 return s; 269} 270 271static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, 272 int len, int type, 273 unsigned hash) 274{ 275 struct sock *s; 276 277 spin_lock(&unix_table_lock); 278 s = __unix_find_socket_byname(sunname, len, type, hash); 279 if (s) 280 sock_hold(s); 281 spin_unlock(&unix_table_lock); 282 return s; 283} 284 285static struct sock *unix_find_socket_byinode(struct inode *i) 286{ 287 struct sock *s; 288 struct hlist_node *node; 289 290 spin_lock(&unix_table_lock); 291 sk_for_each(s, node, 292 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 293 struct dentry *dentry = unix_sk(s)->dentry; 294 295 if(dentry && dentry->d_inode == i) 296 { 297 sock_hold(s); 298 goto found; 299 } 300 } 301 s = NULL; 302found: 303 spin_unlock(&unix_table_lock); 304 return s; 305} 306 307static inline int unix_writable(struct sock *sk) 308{ 309 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 310} 311 312static void unix_write_space(struct sock *sk) 313{ 314 read_lock(&sk->sk_callback_lock); 315 if (unix_writable(sk)) { 316 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 317 wake_up_interruptible(sk->sk_sleep); 318 sk_wake_async(sk, 2, POLL_OUT); 319 } 320 read_unlock(&sk->sk_callback_lock); 321} 322 323/* When dgram socket disconnects (or changes its peer), we clear its receive 324 * queue of packets arrived from previous peer. First, it allows to do 325 * flow control based only on wmem_alloc; second, sk connected to peer 326 * may receive messages only from that peer. */ 327static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 328{ 329 if (!skb_queue_empty(&sk->sk_receive_queue)) { 330 skb_queue_purge(&sk->sk_receive_queue); 331 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 332 333 /* If one link of bidirectional dgram pipe is disconnected, 334 * we signal error. Messages are lost. Do not make this, 335 * when peer was not connected to us. 336 */ 337 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 338 other->sk_err = ECONNRESET; 339 other->sk_error_report(other); 340 } 341 } 342} 343 344static void unix_sock_destructor(struct sock *sk) 345{ 346 struct unix_sock *u = unix_sk(sk); 347 348 skb_queue_purge(&sk->sk_receive_queue); 349 350 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); 351 BUG_TRAP(sk_unhashed(sk)); 352 BUG_TRAP(!sk->sk_socket); 353 if (!sock_flag(sk, SOCK_DEAD)) { 354 printk("Attempt to release alive unix socket: %p\n", sk); 355 return; 356 } 357 358 if (u->addr) 359 unix_release_addr(u->addr); 360 361 atomic_dec(&unix_nr_socks); 362#ifdef UNIX_REFCNT_DEBUG 363 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks)); 364#endif 365} 366 367static int unix_release_sock (struct sock *sk, int embrion) 368{ 369 struct unix_sock *u = unix_sk(sk); 370 struct dentry *dentry; 371 struct vfsmount *mnt; 372 struct sock *skpair; 373 struct sk_buff *skb; 374 int state; 375 376 unix_remove_socket(sk); 377 378 /* Clear state */ 379 unix_state_wlock(sk); 380 sock_orphan(sk); 381 sk->sk_shutdown = SHUTDOWN_MASK; 382 dentry = u->dentry; 383 u->dentry = NULL; 384 mnt = u->mnt; 385 u->mnt = NULL; 386 state = sk->sk_state; 387 sk->sk_state = TCP_CLOSE; 388 unix_state_wunlock(sk); 389 390 wake_up_interruptible_all(&u->peer_wait); 391 392 skpair=unix_peer(sk); 393 394 if (skpair!=NULL) { 395 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 396 unix_state_wlock(skpair); 397 /* No more writes */ 398 skpair->sk_shutdown = SHUTDOWN_MASK; 399 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 400 skpair->sk_err = ECONNRESET; 401 unix_state_wunlock(skpair); 402 skpair->sk_state_change(skpair); 403 read_lock(&skpair->sk_callback_lock); 404 sk_wake_async(skpair,1,POLL_HUP); 405 read_unlock(&skpair->sk_callback_lock); 406 } 407 sock_put(skpair); /* It may now die */ 408 unix_peer(sk) = NULL; 409 } 410 411 /* Try to flush out this socket. Throw out buffers at least */ 412 413 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 414 if (state==TCP_LISTEN) 415 unix_release_sock(skb->sk, 1); 416 /* passed fds are erased in the kfree_skb hook */ 417 kfree_skb(skb); 418 } 419 420 if (dentry) { 421 dput(dentry); 422 mntput(mnt); 423 } 424 425 sock_put(sk); 426 427 /* ---- Socket is dead now and most probably destroyed ---- */ 428 429 /* 430 * Fixme: BSD difference: In BSD all sockets connected to use get 431 * ECONNRESET and we die on the spot. In Linux we behave 432 * like files and pipes do and wait for the last 433 * dereference. 434 * 435 * Can't we simply set sock->err? 436 * 437 * What the above comment does talk about? --ANK(980817) 438 */ 439 440 if (atomic_read(&unix_tot_inflight)) 441 unix_gc(); /* Garbage collect fds */ 442 443 return 0; 444} 445 446static int unix_listen(struct socket *sock, int backlog) 447{ 448 int err; 449 struct sock *sk = sock->sk; 450 struct unix_sock *u = unix_sk(sk); 451 452 err = -EOPNOTSUPP; 453 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) 454 goto out; /* Only stream/seqpacket sockets accept */ 455 err = -EINVAL; 456 if (!u->addr) 457 goto out; /* No listens on an unbound socket */ 458 unix_state_wlock(sk); 459 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 460 goto out_unlock; 461 if (backlog > sk->sk_max_ack_backlog) 462 wake_up_interruptible_all(&u->peer_wait); 463 sk->sk_max_ack_backlog = backlog; 464 sk->sk_state = TCP_LISTEN; 465 /* set credentials so connect can copy them */ 466 sk->sk_peercred.pid = current->tgid; 467 sk->sk_peercred.uid = current->euid; 468 sk->sk_peercred.gid = current->egid; 469 err = 0; 470 471out_unlock: 472 unix_state_wunlock(sk); 473out: 474 return err; 475} 476 477static int unix_release(struct socket *); 478static int unix_bind(struct socket *, struct sockaddr *, int); 479static int unix_stream_connect(struct socket *, struct sockaddr *, 480 int addr_len, int flags); 481static int unix_socketpair(struct socket *, struct socket *); 482static int unix_accept(struct socket *, struct socket *, int); 483static int unix_getname(struct socket *, struct sockaddr *, int *, int); 484static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 485static int unix_ioctl(struct socket *, unsigned int, unsigned long); 486static int unix_shutdown(struct socket *, int); 487static int unix_stream_sendmsg(struct kiocb *, struct socket *, 488 struct msghdr *, size_t); 489static int unix_stream_recvmsg(struct kiocb *, struct socket *, 490 struct msghdr *, size_t, int); 491static int unix_dgram_sendmsg(struct kiocb *, struct socket *, 492 struct msghdr *, size_t); 493static int unix_dgram_recvmsg(struct kiocb *, struct socket *, 494 struct msghdr *, size_t, int); 495static int unix_dgram_connect(struct socket *, struct sockaddr *, 496 int, int); 497static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, 498 struct msghdr *, size_t); 499 500static const struct proto_ops unix_stream_ops = { 501 .family = PF_UNIX, 502 .owner = THIS_MODULE, 503 .release = unix_release, 504 .bind = unix_bind, 505 .connect = unix_stream_connect, 506 .socketpair = unix_socketpair, 507 .accept = unix_accept, 508 .getname = unix_getname, 509 .poll = unix_poll, 510 .ioctl = unix_ioctl, 511 .listen = unix_listen, 512 .shutdown = unix_shutdown, 513 .setsockopt = sock_no_setsockopt, 514 .getsockopt = sock_no_getsockopt, 515 .sendmsg = unix_stream_sendmsg, 516 .recvmsg = unix_stream_recvmsg, 517 .mmap = sock_no_mmap, 518 .sendpage = sock_no_sendpage, 519}; 520 521static const struct proto_ops unix_dgram_ops = { 522 .family = PF_UNIX, 523 .owner = THIS_MODULE, 524 .release = unix_release, 525 .bind = unix_bind, 526 .connect = unix_dgram_connect, 527 .socketpair = unix_socketpair, 528 .accept = sock_no_accept, 529 .getname = unix_getname, 530 .poll = datagram_poll, 531 .ioctl = unix_ioctl, 532 .listen = sock_no_listen, 533 .shutdown = unix_shutdown, 534 .setsockopt = sock_no_setsockopt, 535 .getsockopt = sock_no_getsockopt, 536 .sendmsg = unix_dgram_sendmsg, 537 .recvmsg = unix_dgram_recvmsg, 538 .mmap = sock_no_mmap, 539 .sendpage = sock_no_sendpage, 540}; 541 542static const struct proto_ops unix_seqpacket_ops = { 543 .family = PF_UNIX, 544 .owner = THIS_MODULE, 545 .release = unix_release, 546 .bind = unix_bind, 547 .connect = unix_stream_connect, 548 .socketpair = unix_socketpair, 549 .accept = unix_accept, 550 .getname = unix_getname, 551 .poll = datagram_poll, 552 .ioctl = unix_ioctl, 553 .listen = unix_listen, 554 .shutdown = unix_shutdown, 555 .setsockopt = sock_no_setsockopt, 556 .getsockopt = sock_no_getsockopt, 557 .sendmsg = unix_seqpacket_sendmsg, 558 .recvmsg = unix_dgram_recvmsg, 559 .mmap = sock_no_mmap, 560 .sendpage = sock_no_sendpage, 561}; 562 563static struct proto unix_proto = { 564 .name = "UNIX", 565 .owner = THIS_MODULE, 566 .obj_size = sizeof(struct unix_sock), 567}; 568 569static struct sock * unix_create1(struct socket *sock) 570{ 571 struct sock *sk = NULL; 572 struct unix_sock *u; 573 574 if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) 575 goto out; 576 577 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); 578 if (!sk) 579 goto out; 580 581 atomic_inc(&unix_nr_socks); 582 583 sock_init_data(sock,sk); 584 585 sk->sk_write_space = unix_write_space; 586 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; 587 sk->sk_destruct = unix_sock_destructor; 588 u = unix_sk(sk); 589 u->dentry = NULL; 590 u->mnt = NULL; 591 spin_lock_init(&u->lock); 592 atomic_set(&u->inflight, sock ? 0 : -1); 593 mutex_init(&u->readlock); /* single task reading lock */ 594 init_waitqueue_head(&u->peer_wait); 595 unix_insert_socket(unix_sockets_unbound, sk); 596out: 597 return sk; 598} 599 600static int unix_create(struct socket *sock, int protocol) 601{ 602 if (protocol && protocol != PF_UNIX) 603 return -EPROTONOSUPPORT; 604 605 sock->state = SS_UNCONNECTED; 606 607 switch (sock->type) { 608 case SOCK_STREAM: 609 sock->ops = &unix_stream_ops; 610 break; 611 /* 612 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 613 * nothing uses it. 614 */ 615 case SOCK_RAW: 616 sock->type=SOCK_DGRAM; 617 case SOCK_DGRAM: 618 sock->ops = &unix_dgram_ops; 619 break; 620 case SOCK_SEQPACKET: 621 sock->ops = &unix_seqpacket_ops; 622 break; 623 default: 624 return -ESOCKTNOSUPPORT; 625 } 626 627 return unix_create1(sock) ? 0 : -ENOMEM; 628} 629 630static int unix_release(struct socket *sock) 631{ 632 struct sock *sk = sock->sk; 633 634 if (!sk) 635 return 0; 636 637 sock->sk = NULL; 638 639 return unix_release_sock (sk, 0); 640} 641 642static int unix_autobind(struct socket *sock) 643{ 644 struct sock *sk = sock->sk; 645 struct unix_sock *u = unix_sk(sk); 646 static u32 ordernum = 1; 647 struct unix_address * addr; 648 int err; 649 650 mutex_lock(&u->readlock); 651 652 err = 0; 653 if (u->addr) 654 goto out; 655 656 err = -ENOMEM; 657 addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 658 if (!addr) 659 goto out; 660 661 memset(addr, 0, sizeof(*addr) + sizeof(short) + 16); 662 addr->name->sun_family = AF_UNIX; 663 atomic_set(&addr->refcnt, 1); 664 665retry: 666 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 667 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); 668 669 spin_lock(&unix_table_lock); 670 ordernum = (ordernum+1)&0xFFFFF; 671 672 if (__unix_find_socket_byname(addr->name, addr->len, sock->type, 673 addr->hash)) { 674 spin_unlock(&unix_table_lock); 675 /* Sanity yield. It is unusual case, but yet... */ 676 if (!(ordernum&0xFF)) 677 yield(); 678 goto retry; 679 } 680 addr->hash ^= sk->sk_type; 681 682 __unix_remove_socket(sk); 683 u->addr = addr; 684 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 685 spin_unlock(&unix_table_lock); 686 err = 0; 687 688out: mutex_unlock(&u->readlock); 689 return err; 690} 691 692static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, 693 int type, unsigned hash, int *error) 694{ 695 struct sock *u; 696 struct nameidata nd; 697 int err = 0; 698 699 if (sunname->sun_path[0]) { 700 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); 701 if (err) 702 goto fail; 703 err = vfs_permission(&nd, MAY_WRITE); 704 if (err) 705 goto put_fail; 706 707 err = -ECONNREFUSED; 708 if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) 709 goto put_fail; 710 u=unix_find_socket_byinode(nd.dentry->d_inode); 711 if (!u) 712 goto put_fail; 713 714 if (u->sk_type == type) 715 touch_atime(nd.mnt, nd.dentry); 716 717 path_release(&nd); 718 719 err=-EPROTOTYPE; 720 if (u->sk_type != type) { 721 sock_put(u); 722 goto fail; 723 } 724 } else { 725 err = -ECONNREFUSED; 726 u=unix_find_socket_byname(sunname, len, type, hash); 727 if (u) { 728 struct dentry *dentry; 729 dentry = unix_sk(u)->dentry; 730 if (dentry) 731 touch_atime(unix_sk(u)->mnt, dentry); 732 } else 733 goto fail; 734 } 735 return u; 736 737put_fail: 738 path_release(&nd); 739fail: 740 *error=err; 741 return NULL; 742} 743 744 745static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 746{ 747 struct sock *sk = sock->sk; 748 struct unix_sock *u = unix_sk(sk); 749 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 750 struct dentry * dentry = NULL; 751 struct nameidata nd; 752 int err; 753 unsigned hash; 754 struct unix_address *addr; 755 struct hlist_head *list; 756 757 err = -EINVAL; 758 if (sunaddr->sun_family != AF_UNIX) 759 goto out; 760 761 if (addr_len==sizeof(short)) { 762 err = unix_autobind(sock); 763 goto out; 764 } 765 766 err = unix_mkname(sunaddr, addr_len, &hash); 767 if (err < 0) 768 goto out; 769 addr_len = err; 770 771 mutex_lock(&u->readlock); 772 773 err = -EINVAL; 774 if (u->addr) 775 goto out_up; 776 777 err = -ENOMEM; 778 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 779 if (!addr) 780 goto out_up; 781 782 memcpy(addr->name, sunaddr, addr_len); 783 addr->len = addr_len; 784 addr->hash = hash ^ sk->sk_type; 785 atomic_set(&addr->refcnt, 1); 786 787 if (sunaddr->sun_path[0]) { 788 unsigned int mode; 789 err = 0; 790 /* 791 * Get the parent directory, calculate the hash for last 792 * component. 793 */ 794 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 795 if (err) 796 goto out_mknod_parent; 797 798 dentry = lookup_create(&nd, 0); 799 err = PTR_ERR(dentry); 800 if (IS_ERR(dentry)) 801 goto out_mknod_unlock; 802 803 /* 804 * All right, let's create it. 805 */ 806 mode = S_IFSOCK | 807 (SOCK_INODE(sock)->i_mode & ~current->fs->umask); 808 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); 809 if (err) 810 goto out_mknod_dput; 811 mutex_unlock(&nd.dentry->d_inode->i_mutex); 812 dput(nd.dentry); 813 nd.dentry = dentry; 814 815 addr->hash = UNIX_HASH_SIZE; 816 } 817 818 spin_lock(&unix_table_lock); 819 820 if (!sunaddr->sun_path[0]) { 821 err = -EADDRINUSE; 822 if (__unix_find_socket_byname(sunaddr, addr_len, 823 sk->sk_type, hash)) { 824 unix_release_addr(addr); 825 goto out_unlock; 826 } 827 828 list = &unix_socket_table[addr->hash]; 829 } else { 830 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; 831 u->dentry = nd.dentry; 832 u->mnt = nd.mnt; 833 } 834 835 err = 0; 836 __unix_remove_socket(sk); 837 u->addr = addr; 838 __unix_insert_socket(list, sk); 839 840out_unlock: 841 spin_unlock(&unix_table_lock); 842out_up: 843 mutex_unlock(&u->readlock); 844out: 845 return err; 846 847out_mknod_dput: 848 dput(dentry); 849out_mknod_unlock: 850 mutex_unlock(&nd.dentry->d_inode->i_mutex); 851 path_release(&nd); 852out_mknod_parent: 853 if (err==-EEXIST) 854 err=-EADDRINUSE; 855 unix_release_addr(addr); 856 goto out_up; 857} 858 859static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 860 int alen, int flags) 861{ 862 struct sock *sk = sock->sk; 863 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; 864 struct sock *other; 865 unsigned hash; 866 int err; 867 868 if (addr->sa_family != AF_UNSPEC) { 869 err = unix_mkname(sunaddr, alen, &hash); 870 if (err < 0) 871 goto out; 872 alen = err; 873 874 if (test_bit(SOCK_PASSCRED, &sock->flags) && 875 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 876 goto out; 877 878 other=unix_find_other(sunaddr, alen, sock->type, hash, &err); 879 if (!other) 880 goto out; 881 882 unix_state_wlock(sk); 883 884 err = -EPERM; 885 if (!unix_may_send(sk, other)) 886 goto out_unlock; 887 888 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 889 if (err) 890 goto out_unlock; 891 892 } else { 893 /* 894 * 1003.1g breaking connected state with AF_UNSPEC 895 */ 896 other = NULL; 897 unix_state_wlock(sk); 898 } 899 900 /* 901 * If it was connected, reconnect. 902 */ 903 if (unix_peer(sk)) { 904 struct sock *old_peer = unix_peer(sk); 905 unix_peer(sk)=other; 906 unix_state_wunlock(sk); 907 908 if (other != old_peer) 909 unix_dgram_disconnected(sk, old_peer); 910 sock_put(old_peer); 911 } else { 912 unix_peer(sk)=other; 913 unix_state_wunlock(sk); 914 } 915 return 0; 916 917out_unlock: 918 unix_state_wunlock(sk); 919 sock_put(other); 920out: 921 return err; 922} 923 924static long unix_wait_for_peer(struct sock *other, long timeo) 925{ 926 struct unix_sock *u = unix_sk(other); 927 int sched; 928 DEFINE_WAIT(wait); 929 930 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 931 932 sched = !sock_flag(other, SOCK_DEAD) && 933 !(other->sk_shutdown & RCV_SHUTDOWN) && 934 (skb_queue_len(&other->sk_receive_queue) > 935 other->sk_max_ack_backlog); 936 937 unix_state_runlock(other); 938 939 if (sched) 940 timeo = schedule_timeout(timeo); 941 942 finish_wait(&u->peer_wait, &wait); 943 return timeo; 944} 945 946static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 947 int addr_len, int flags) 948{ 949 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 950 struct sock *sk = sock->sk; 951 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 952 struct sock *newsk = NULL; 953 struct sock *other = NULL; 954 struct sk_buff *skb = NULL; 955 unsigned hash; 956 int st; 957 int err; 958 long timeo; 959 960 err = unix_mkname(sunaddr, addr_len, &hash); 961 if (err < 0) 962 goto out; 963 addr_len = err; 964 965 if (test_bit(SOCK_PASSCRED, &sock->flags) 966 && !u->addr && (err = unix_autobind(sock)) != 0) 967 goto out; 968 969 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 970 971 /* First of all allocate resources. 972 If we will make it after state is locked, 973 we will have to recheck all again in any case. 974 */ 975 976 err = -ENOMEM; 977 978 /* create new sock for complete connection */ 979 newsk = unix_create1(NULL); 980 if (newsk == NULL) 981 goto out; 982 983 /* Allocate skb for sending to listening sock */ 984 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 985 if (skb == NULL) 986 goto out; 987 988restart: 989 /* Find listening sock. */ 990 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); 991 if (!other) 992 goto out; 993 994 /* Latch state of peer */ 995 unix_state_rlock(other); 996 997 /* Apparently VFS overslept socket death. Retry. */ 998 if (sock_flag(other, SOCK_DEAD)) { 999 unix_state_runlock(other); 1000 sock_put(other); 1001 goto restart; 1002 } 1003 1004 err = -ECONNREFUSED; 1005 if (other->sk_state != TCP_LISTEN) 1006 goto out_unlock; 1007 1008 if (skb_queue_len(&other->sk_receive_queue) > 1009 other->sk_max_ack_backlog) { 1010 err = -EAGAIN; 1011 if (!timeo) 1012 goto out_unlock; 1013 1014 timeo = unix_wait_for_peer(other, timeo); 1015 1016 err = sock_intr_errno(timeo); 1017 if (signal_pending(current)) 1018 goto out; 1019 sock_put(other); 1020 goto restart; 1021 } 1022 1023 /* Latch our state. 1024 1025 It is tricky place. We need to grab write lock and cannot 1026 drop lock on peer. It is dangerous because deadlock is 1027 possible. Connect to self case and simultaneous 1028 attempt to connect are eliminated by checking socket 1029 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1030 check this before attempt to grab lock. 1031 1032 Well, and we have to recheck the state after socket locked. 1033 */ 1034 st = sk->sk_state; 1035 1036 switch (st) { 1037 case TCP_CLOSE: 1038 /* This is ok... continue with connect */ 1039 break; 1040 case TCP_ESTABLISHED: 1041 /* Socket is already connected */ 1042 err = -EISCONN; 1043 goto out_unlock; 1044 default: 1045 err = -EINVAL; 1046 goto out_unlock; 1047 } 1048 1049 unix_state_wlock(sk); 1050 1051 if (sk->sk_state != st) { 1052 unix_state_wunlock(sk); 1053 unix_state_runlock(other); 1054 sock_put(other); 1055 goto restart; 1056 } 1057 1058 err = security_unix_stream_connect(sock, other->sk_socket, newsk); 1059 if (err) { 1060 unix_state_wunlock(sk); 1061 goto out_unlock; 1062 } 1063 1064 /* The way is open! Fastly set all the necessary fields... */ 1065 1066 sock_hold(sk); 1067 unix_peer(newsk) = sk; 1068 newsk->sk_state = TCP_ESTABLISHED; 1069 newsk->sk_type = sk->sk_type; 1070 newsk->sk_peercred.pid = current->tgid; 1071 newsk->sk_peercred.uid = current->euid; 1072 newsk->sk_peercred.gid = current->egid; 1073 newu = unix_sk(newsk); 1074 newsk->sk_sleep = &newu->peer_wait; 1075 otheru = unix_sk(other); 1076 1077 /* copy address information from listening to new sock*/ 1078 if (otheru->addr) { 1079 atomic_inc(&otheru->addr->refcnt); 1080 newu->addr = otheru->addr; 1081 } 1082 if (otheru->dentry) { 1083 newu->dentry = dget(otheru->dentry); 1084 newu->mnt = mntget(otheru->mnt); 1085 } 1086 1087 /* Set credentials */ 1088 sk->sk_peercred = other->sk_peercred; 1089 1090 sock->state = SS_CONNECTED; 1091 sk->sk_state = TCP_ESTABLISHED; 1092 sock_hold(newsk); 1093 1094 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ 1095 unix_peer(sk) = newsk; 1096 1097 unix_state_wunlock(sk); 1098 1099 /* take ten and and send info to listening sock */ 1100 spin_lock(&other->sk_receive_queue.lock); 1101 __skb_queue_tail(&other->sk_receive_queue, skb); 1102 /* Undo artificially decreased inflight after embrion 1103 * is installed to listening socket. */ 1104 atomic_inc(&newu->inflight); 1105 spin_unlock(&other->sk_receive_queue.lock); 1106 unix_state_runlock(other); 1107 other->sk_data_ready(other, 0); 1108 sock_put(other); 1109 return 0; 1110 1111out_unlock: 1112 if (other) 1113 unix_state_runlock(other); 1114 1115out: 1116 if (skb) 1117 kfree_skb(skb); 1118 if (newsk) 1119 unix_release_sock(newsk, 0); 1120 if (other) 1121 sock_put(other); 1122 return err; 1123} 1124 1125static int unix_socketpair(struct socket *socka, struct socket *sockb) 1126{ 1127 struct sock *ska=socka->sk, *skb = sockb->sk; 1128 1129 /* Join our sockets back to back */ 1130 sock_hold(ska); 1131 sock_hold(skb); 1132 unix_peer(ska)=skb; 1133 unix_peer(skb)=ska; 1134 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid; 1135 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid; 1136 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid; 1137 1138 if (ska->sk_type != SOCK_DGRAM) { 1139 ska->sk_state = TCP_ESTABLISHED; 1140 skb->sk_state = TCP_ESTABLISHED; 1141 socka->state = SS_CONNECTED; 1142 sockb->state = SS_CONNECTED; 1143 } 1144 return 0; 1145} 1146 1147static int unix_accept(struct socket *sock, struct socket *newsock, int flags) 1148{ 1149 struct sock *sk = sock->sk; 1150 struct sock *tsk; 1151 struct sk_buff *skb; 1152 int err; 1153 1154 err = -EOPNOTSUPP; 1155 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) 1156 goto out; 1157 1158 err = -EINVAL; 1159 if (sk->sk_state != TCP_LISTEN) 1160 goto out; 1161 1162 /* If socket state is TCP_LISTEN it cannot change (for now...), 1163 * so that no locks are necessary. 1164 */ 1165 1166 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1167 if (!skb) { 1168 /* This means receive shutdown. */ 1169 if (err == 0) 1170 err = -EINVAL; 1171 goto out; 1172 } 1173 1174 tsk = skb->sk; 1175 skb_free_datagram(sk, skb); 1176 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1177 1178 /* attach accepted sock to socket */ 1179 unix_state_wlock(tsk); 1180 newsock->state = SS_CONNECTED; 1181 sock_graft(tsk, newsock); 1182 unix_state_wunlock(tsk); 1183 return 0; 1184 1185out: 1186 return err; 1187} 1188 1189 1190static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) 1191{ 1192 struct sock *sk = sock->sk; 1193 struct unix_sock *u; 1194 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 1195 int err = 0; 1196 1197 if (peer) { 1198 sk = unix_peer_get(sk); 1199 1200 err = -ENOTCONN; 1201 if (!sk) 1202 goto out; 1203 err = 0; 1204 } else { 1205 sock_hold(sk); 1206 } 1207 1208 u = unix_sk(sk); 1209 unix_state_rlock(sk); 1210 if (!u->addr) { 1211 sunaddr->sun_family = AF_UNIX; 1212 sunaddr->sun_path[0] = 0; 1213 *uaddr_len = sizeof(short); 1214 } else { 1215 struct unix_address *addr = u->addr; 1216 1217 *uaddr_len = addr->len; 1218 memcpy(sunaddr, addr->name, *uaddr_len); 1219 } 1220 unix_state_runlock(sk); 1221 sock_put(sk); 1222out: 1223 return err; 1224} 1225 1226static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1227{ 1228 int i; 1229 1230 scm->fp = UNIXCB(skb).fp; 1231 skb->destructor = sock_wfree; 1232 UNIXCB(skb).fp = NULL; 1233 1234 for (i=scm->fp->count-1; i>=0; i--) 1235 unix_notinflight(scm->fp->fp[i]); 1236} 1237 1238static void unix_destruct_fds(struct sk_buff *skb) 1239{ 1240 struct scm_cookie scm; 1241 memset(&scm, 0, sizeof(scm)); 1242 unix_detach_fds(&scm, skb); 1243 1244 /* Alas, it calls VFS */ 1245 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1246 scm_destroy(&scm); 1247 sock_wfree(skb); 1248} 1249 1250static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1251{ 1252 int i; 1253 for (i=scm->fp->count-1; i>=0; i--) 1254 unix_inflight(scm->fp->fp[i]); 1255 UNIXCB(skb).fp = scm->fp; 1256 skb->destructor = unix_destruct_fds; 1257 scm->fp = NULL; 1258} 1259 1260/* 1261 * Send AF_UNIX data. 1262 */ 1263 1264static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, 1265 struct msghdr *msg, size_t len) 1266{ 1267 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1268 struct sock *sk = sock->sk; 1269 struct unix_sock *u = unix_sk(sk); 1270 struct sockaddr_un *sunaddr=msg->msg_name; 1271 struct sock *other = NULL; 1272 int namelen = 0; /* fake GCC */ 1273 int err; 1274 unsigned hash; 1275 struct sk_buff *skb; 1276 long timeo; 1277 struct scm_cookie tmp_scm; 1278 1279 if (NULL == siocb->scm) 1280 siocb->scm = &tmp_scm; 1281 err = scm_send(sock, msg, siocb->scm); 1282 if (err < 0) 1283 return err; 1284 1285 err = -EOPNOTSUPP; 1286 if (msg->msg_flags&MSG_OOB) 1287 goto out; 1288 1289 if (msg->msg_namelen) { 1290 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1291 if (err < 0) 1292 goto out; 1293 namelen = err; 1294 } else { 1295 sunaddr = NULL; 1296 err = -ENOTCONN; 1297 other = unix_peer_get(sk); 1298 if (!other) 1299 goto out; 1300 } 1301 1302 if (test_bit(SOCK_PASSCRED, &sock->flags) 1303 && !u->addr && (err = unix_autobind(sock)) != 0) 1304 goto out; 1305 1306 err = -EMSGSIZE; 1307 if (len > sk->sk_sndbuf - 32) 1308 goto out; 1309 1310 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err); 1311 if (skb==NULL) 1312 goto out; 1313 1314 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1315 if (siocb->scm->fp) 1316 unix_attach_fds(siocb->scm, skb); 1317 1318 unix_get_peersec_dgram(skb); 1319 1320 skb->h.raw = skb->data; 1321 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); 1322 if (err) 1323 goto out_free; 1324 1325 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1326 1327restart: 1328 if (!other) { 1329 err = -ECONNRESET; 1330 if (sunaddr == NULL) 1331 goto out_free; 1332 1333 other = unix_find_other(sunaddr, namelen, sk->sk_type, 1334 hash, &err); 1335 if (other==NULL) 1336 goto out_free; 1337 } 1338 1339 unix_state_rlock(other); 1340 err = -EPERM; 1341 if (!unix_may_send(sk, other)) 1342 goto out_unlock; 1343 1344 if (sock_flag(other, SOCK_DEAD)) { 1345 /* 1346 * Check with 1003.1g - what should 1347 * datagram error 1348 */ 1349 unix_state_runlock(other); 1350 sock_put(other); 1351 1352 err = 0; 1353 unix_state_wlock(sk); 1354 if (unix_peer(sk) == other) { 1355 unix_peer(sk)=NULL; 1356 unix_state_wunlock(sk); 1357 1358 unix_dgram_disconnected(sk, other); 1359 sock_put(other); 1360 err = -ECONNREFUSED; 1361 } else { 1362 unix_state_wunlock(sk); 1363 } 1364 1365 other = NULL; 1366 if (err) 1367 goto out_free; 1368 goto restart; 1369 } 1370 1371 err = -EPIPE; 1372 if (other->sk_shutdown & RCV_SHUTDOWN) 1373 goto out_unlock; 1374 1375 if (sk->sk_type != SOCK_SEQPACKET) { 1376 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1377 if (err) 1378 goto out_unlock; 1379 } 1380 1381 if (unix_peer(other) != sk && 1382 (skb_queue_len(&other->sk_receive_queue) > 1383 other->sk_max_ack_backlog)) { 1384 if (!timeo) { 1385 err = -EAGAIN; 1386 goto out_unlock; 1387 } 1388 1389 timeo = unix_wait_for_peer(other, timeo); 1390 1391 err = sock_intr_errno(timeo); 1392 if (signal_pending(current)) 1393 goto out_free; 1394 1395 goto restart; 1396 } 1397 1398 skb_queue_tail(&other->sk_receive_queue, skb); 1399 unix_state_runlock(other); 1400 other->sk_data_ready(other, len); 1401 sock_put(other); 1402 scm_destroy(siocb->scm); 1403 return len; 1404 1405out_unlock: 1406 unix_state_runlock(other); 1407out_free: 1408 kfree_skb(skb); 1409out: 1410 if (other) 1411 sock_put(other); 1412 scm_destroy(siocb->scm); 1413 return err; 1414} 1415 1416 1417static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 1418 struct msghdr *msg, size_t len) 1419{ 1420 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1421 struct sock *sk = sock->sk; 1422 struct sock *other = NULL; 1423 struct sockaddr_un *sunaddr=msg->msg_name; 1424 int err,size; 1425 struct sk_buff *skb; 1426 int sent=0; 1427 struct scm_cookie tmp_scm; 1428 1429 if (NULL == siocb->scm) 1430 siocb->scm = &tmp_scm; 1431 err = scm_send(sock, msg, siocb->scm); 1432 if (err < 0) 1433 return err; 1434 1435 err = -EOPNOTSUPP; 1436 if (msg->msg_flags&MSG_OOB) 1437 goto out_err; 1438 1439 if (msg->msg_namelen) { 1440 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1441 goto out_err; 1442 } else { 1443 sunaddr = NULL; 1444 err = -ENOTCONN; 1445 other = unix_peer(sk); 1446 if (!other) 1447 goto out_err; 1448 } 1449 1450 if (sk->sk_shutdown & SEND_SHUTDOWN) 1451 goto pipe_err; 1452 1453 while(sent < len) 1454 { 1455 /* 1456 * Optimisation for the fact that under 0.01% of X 1457 * messages typically need breaking up. 1458 */ 1459 1460 size = len-sent; 1461 1462 /* Keep two messages in the pipe so it schedules better */ 1463 if (size > ((sk->sk_sndbuf >> 1) - 64)) 1464 size = (sk->sk_sndbuf >> 1) - 64; 1465 1466 if (size > SKB_MAX_ALLOC) 1467 size = SKB_MAX_ALLOC; 1468 1469 /* 1470 * Grab a buffer 1471 */ 1472 1473 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err); 1474 1475 if (skb==NULL) 1476 goto out_err; 1477 1478 /* 1479 * If you pass two values to the sock_alloc_send_skb 1480 * it tries to grab the large buffer with GFP_NOFS 1481 * (which can fail easily), and if it fails grab the 1482 * fallback size buffer which is under a page and will 1483 * succeed. [Alan] 1484 */ 1485 size = min_t(int, size, skb_tailroom(skb)); 1486 1487 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1488 if (siocb->scm->fp) 1489 unix_attach_fds(siocb->scm, skb); 1490 1491 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { 1492 kfree_skb(skb); 1493 goto out_err; 1494 } 1495 1496 unix_state_rlock(other); 1497 1498 if (sock_flag(other, SOCK_DEAD) || 1499 (other->sk_shutdown & RCV_SHUTDOWN)) 1500 goto pipe_err_free; 1501 1502 skb_queue_tail(&other->sk_receive_queue, skb); 1503 unix_state_runlock(other); 1504 other->sk_data_ready(other, size); 1505 sent+=size; 1506 } 1507 1508 scm_destroy(siocb->scm); 1509 siocb->scm = NULL; 1510 1511 return sent; 1512 1513pipe_err_free: 1514 unix_state_runlock(other); 1515 kfree_skb(skb); 1516pipe_err: 1517 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1518 send_sig(SIGPIPE,current,0); 1519 err = -EPIPE; 1520out_err: 1521 scm_destroy(siocb->scm); 1522 siocb->scm = NULL; 1523 return sent ? : err; 1524} 1525 1526static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, 1527 struct msghdr *msg, size_t len) 1528{ 1529 int err; 1530 struct sock *sk = sock->sk; 1531 1532 err = sock_error(sk); 1533 if (err) 1534 return err; 1535 1536 if (sk->sk_state != TCP_ESTABLISHED) 1537 return -ENOTCONN; 1538 1539 if (msg->msg_namelen) 1540 msg->msg_namelen = 0; 1541 1542 return unix_dgram_sendmsg(kiocb, sock, msg, len); 1543} 1544 1545static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 1546{ 1547 struct unix_sock *u = unix_sk(sk); 1548 1549 msg->msg_namelen = 0; 1550 if (u->addr) { 1551 msg->msg_namelen = u->addr->len; 1552 memcpy(msg->msg_name, u->addr->name, u->addr->len); 1553 } 1554} 1555 1556static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, 1557 struct msghdr *msg, size_t size, 1558 int flags) 1559{ 1560 struct sock_iocb *siocb = kiocb_to_siocb(iocb); 1561 struct scm_cookie tmp_scm; 1562 struct sock *sk = sock->sk; 1563 struct unix_sock *u = unix_sk(sk); 1564 int noblock = flags & MSG_DONTWAIT; 1565 struct sk_buff *skb; 1566 int err; 1567 1568 err = -EOPNOTSUPP; 1569 if (flags&MSG_OOB) 1570 goto out; 1571 1572 msg->msg_namelen = 0; 1573 1574 mutex_lock(&u->readlock); 1575 1576 skb = skb_recv_datagram(sk, flags, noblock, &err); 1577 if (!skb) 1578 goto out_unlock; 1579 1580 wake_up_interruptible(&u->peer_wait); 1581 1582 if (msg->msg_name) 1583 unix_copy_addr(msg, skb->sk); 1584 1585 if (size > skb->len) 1586 size = skb->len; 1587 else if (size < skb->len) 1588 msg->msg_flags |= MSG_TRUNC; 1589 1590 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size); 1591 if (err) 1592 goto out_free; 1593 1594 if (!siocb->scm) { 1595 siocb->scm = &tmp_scm; 1596 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1597 } 1598 siocb->scm->creds = *UNIXCREDS(skb); 1599 unix_set_secdata(siocb->scm, skb); 1600 1601 if (!(flags & MSG_PEEK)) 1602 { 1603 if (UNIXCB(skb).fp) 1604 unix_detach_fds(siocb->scm, skb); 1605 } 1606 else 1607 { 1608 /* It is questionable: on PEEK we could: 1609 - do not return fds - good, but too simple 8) 1610 - return fds, and do not return them on read (old strategy, 1611 apparently wrong) 1612 - clone fds (I chose it for now, it is the most universal 1613 solution) 1614 1615 POSIX 1003.1g does not actually define this clearly 1616 at all. POSIX 1003.1g doesn't define a lot of things 1617 clearly however! 1618 1619 */ 1620 if (UNIXCB(skb).fp) 1621 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1622 } 1623 err = size; 1624 1625 scm_recv(sock, msg, siocb->scm, flags); 1626 1627out_free: 1628 skb_free_datagram(sk,skb); 1629out_unlock: 1630 mutex_unlock(&u->readlock); 1631out: 1632 return err; 1633} 1634 1635/* 1636 * Sleep until data has arrive. But check for races.. 1637 */ 1638 1639static long unix_stream_data_wait(struct sock * sk, long timeo) 1640{ 1641 DEFINE_WAIT(wait); 1642 1643 unix_state_rlock(sk); 1644 1645 for (;;) { 1646 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1647 1648 if (!skb_queue_empty(&sk->sk_receive_queue) || 1649 sk->sk_err || 1650 (sk->sk_shutdown & RCV_SHUTDOWN) || 1651 signal_pending(current) || 1652 !timeo) 1653 break; 1654 1655 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1656 unix_state_runlock(sk); 1657 timeo = schedule_timeout(timeo); 1658 unix_state_rlock(sk); 1659 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1660 } 1661 1662 finish_wait(sk->sk_sleep, &wait); 1663 unix_state_runlock(sk); 1664 return timeo; 1665} 1666 1667 1668 1669static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 1670 struct msghdr *msg, size_t size, 1671 int flags) 1672{ 1673 struct sock_iocb *siocb = kiocb_to_siocb(iocb); 1674 struct scm_cookie tmp_scm; 1675 struct sock *sk = sock->sk; 1676 struct unix_sock *u = unix_sk(sk); 1677 struct sockaddr_un *sunaddr=msg->msg_name; 1678 int copied = 0; 1679 int check_creds = 0; 1680 int target; 1681 int err = 0; 1682 long timeo; 1683 1684 err = -EINVAL; 1685 if (sk->sk_state != TCP_ESTABLISHED) 1686 goto out; 1687 1688 err = -EOPNOTSUPP; 1689 if (flags&MSG_OOB) 1690 goto out; 1691 1692 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); 1693 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); 1694 1695 msg->msg_namelen = 0; 1696 1697 /* Lock the socket to prevent queue disordering 1698 * while sleeps in memcpy_tomsg 1699 */ 1700 1701 if (!siocb->scm) { 1702 siocb->scm = &tmp_scm; 1703 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1704 } 1705 1706 mutex_lock(&u->readlock); 1707 1708 do 1709 { 1710 int chunk; 1711 struct sk_buff *skb; 1712 1713 skb = skb_dequeue(&sk->sk_receive_queue); 1714 if (skb==NULL) 1715 { 1716 if (copied >= target) 1717 break; 1718 1719 /* 1720 * POSIX 1003.1g mandates this order. 1721 */ 1722 1723 if ((err = sock_error(sk)) != 0) 1724 break; 1725 if (sk->sk_shutdown & RCV_SHUTDOWN) 1726 break; 1727 err = -EAGAIN; 1728 if (!timeo) 1729 break; 1730 mutex_unlock(&u->readlock); 1731 1732 timeo = unix_stream_data_wait(sk, timeo); 1733 1734 if (signal_pending(current)) { 1735 err = sock_intr_errno(timeo); 1736 goto out; 1737 } 1738 mutex_lock(&u->readlock); 1739 continue; 1740 } 1741 1742 if (check_creds) { 1743 /* Never glue messages from different writers */ 1744 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) { 1745 skb_queue_head(&sk->sk_receive_queue, skb); 1746 break; 1747 } 1748 } else { 1749 /* Copy credentials */ 1750 siocb->scm->creds = *UNIXCREDS(skb); 1751 check_creds = 1; 1752 } 1753 1754 /* Copy address just once */ 1755 if (sunaddr) 1756 { 1757 unix_copy_addr(msg, skb->sk); 1758 sunaddr = NULL; 1759 } 1760 1761 chunk = min_t(unsigned int, skb->len, size); 1762 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { 1763 skb_queue_head(&sk->sk_receive_queue, skb); 1764 if (copied == 0) 1765 copied = -EFAULT; 1766 break; 1767 } 1768 copied += chunk; 1769 size -= chunk; 1770 1771 /* Mark read part of skb as used */ 1772 if (!(flags & MSG_PEEK)) 1773 { 1774 skb_pull(skb, chunk); 1775 1776 if (UNIXCB(skb).fp) 1777 unix_detach_fds(siocb->scm, skb); 1778 1779 /* put the skb back if we didn't use it up.. */ 1780 if (skb->len) 1781 { 1782 skb_queue_head(&sk->sk_receive_queue, skb); 1783 break; 1784 } 1785 1786 kfree_skb(skb); 1787 1788 if (siocb->scm->fp) 1789 break; 1790 } 1791 else 1792 { 1793 /* It is questionable, see note in unix_dgram_recvmsg. 1794 */ 1795 if (UNIXCB(skb).fp) 1796 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1797 1798 /* put message back and return */ 1799 skb_queue_head(&sk->sk_receive_queue, skb); 1800 break; 1801 } 1802 } while (size); 1803 1804 mutex_unlock(&u->readlock); 1805 scm_recv(sock, msg, siocb->scm, flags); 1806out: 1807 return copied ? : err; 1808} 1809 1810static int unix_shutdown(struct socket *sock, int mode) 1811{ 1812 struct sock *sk = sock->sk; 1813 struct sock *other; 1814 1815 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); 1816 1817 if (mode) { 1818 unix_state_wlock(sk); 1819 sk->sk_shutdown |= mode; 1820 other=unix_peer(sk); 1821 if (other) 1822 sock_hold(other); 1823 unix_state_wunlock(sk); 1824 sk->sk_state_change(sk); 1825 1826 if (other && 1827 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 1828 1829 int peer_mode = 0; 1830 1831 if (mode&RCV_SHUTDOWN) 1832 peer_mode |= SEND_SHUTDOWN; 1833 if (mode&SEND_SHUTDOWN) 1834 peer_mode |= RCV_SHUTDOWN; 1835 unix_state_wlock(other); 1836 other->sk_shutdown |= peer_mode; 1837 unix_state_wunlock(other); 1838 other->sk_state_change(other); 1839 read_lock(&other->sk_callback_lock); 1840 if (peer_mode == SHUTDOWN_MASK) 1841 sk_wake_async(other,1,POLL_HUP); 1842 else if (peer_mode & RCV_SHUTDOWN) 1843 sk_wake_async(other,1,POLL_IN); 1844 read_unlock(&other->sk_callback_lock); 1845 } 1846 if (other) 1847 sock_put(other); 1848 } 1849 return 0; 1850} 1851 1852static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1853{ 1854 struct sock *sk = sock->sk; 1855 long amount=0; 1856 int err; 1857 1858 switch(cmd) 1859 { 1860 case SIOCOUTQ: 1861 amount = atomic_read(&sk->sk_wmem_alloc); 1862 err = put_user(amount, (int __user *)arg); 1863 break; 1864 case SIOCINQ: 1865 { 1866 struct sk_buff *skb; 1867 1868 if (sk->sk_state == TCP_LISTEN) { 1869 err = -EINVAL; 1870 break; 1871 } 1872 1873 spin_lock(&sk->sk_receive_queue.lock); 1874 if (sk->sk_type == SOCK_STREAM || 1875 sk->sk_type == SOCK_SEQPACKET) { 1876 skb_queue_walk(&sk->sk_receive_queue, skb) 1877 amount += skb->len; 1878 } else { 1879 skb = skb_peek(&sk->sk_receive_queue); 1880 if (skb) 1881 amount=skb->len; 1882 } 1883 spin_unlock(&sk->sk_receive_queue.lock); 1884 err = put_user(amount, (int __user *)arg); 1885 break; 1886 } 1887 1888 default: 1889 err = -ENOIOCTLCMD; 1890 break; 1891 } 1892 return err; 1893} 1894 1895static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) 1896{ 1897 struct sock *sk = sock->sk; 1898 unsigned int mask; 1899 1900 poll_wait(file, sk->sk_sleep, wait); 1901 mask = 0; 1902 1903 /* exceptional events? */ 1904 if (sk->sk_err) 1905 mask |= POLLERR; 1906 if (sk->sk_shutdown == SHUTDOWN_MASK) 1907 mask |= POLLHUP; 1908 if (sk->sk_shutdown & RCV_SHUTDOWN) 1909 mask |= POLLRDHUP; 1910 1911 /* readable? */ 1912 if (!skb_queue_empty(&sk->sk_receive_queue) || 1913 (sk->sk_shutdown & RCV_SHUTDOWN)) 1914 mask |= POLLIN | POLLRDNORM; 1915 1916 /* Connection-based need to check for termination and startup */ 1917 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE) 1918 mask |= POLLHUP; 1919 1920 /* 1921 * we set writable also when the other side has shut down the 1922 * connection. This prevents stuck sockets. 1923 */ 1924 if (unix_writable(sk)) 1925 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 1926 1927 return mask; 1928} 1929 1930 1931#ifdef CONFIG_PROC_FS 1932static struct sock *unix_seq_idx(int *iter, loff_t pos) 1933{ 1934 loff_t off = 0; 1935 struct sock *s; 1936 1937 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { 1938 if (off == pos) 1939 return s; 1940 ++off; 1941 } 1942 return NULL; 1943} 1944 1945 1946static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 1947{ 1948 spin_lock(&unix_table_lock); 1949 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); 1950} 1951 1952static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1953{ 1954 ++*pos; 1955 1956 if (v == (void *)1) 1957 return first_unix_socket(seq->private); 1958 return next_unix_socket(seq->private, v); 1959} 1960 1961static void unix_seq_stop(struct seq_file *seq, void *v) 1962{ 1963 spin_unlock(&unix_table_lock); 1964} 1965 1966static int unix_seq_show(struct seq_file *seq, void *v) 1967{ 1968 1969 if (v == (void *)1) 1970 seq_puts(seq, "Num RefCount Protocol Flags Type St " 1971 "Inode Path\n"); 1972 else { 1973 struct sock *s = v; 1974 struct unix_sock *u = unix_sk(s); 1975 unix_state_rlock(s); 1976 1977 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu", 1978 s, 1979 atomic_read(&s->sk_refcnt), 1980 0, 1981 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 1982 s->sk_type, 1983 s->sk_socket ? 1984 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 1985 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 1986 sock_i_ino(s)); 1987 1988 if (u->addr) { 1989 int i, len; 1990 seq_putc(seq, ' '); 1991 1992 i = 0; 1993 len = u->addr->len - sizeof(short); 1994 if (!UNIX_ABSTRACT(s)) 1995 len--; 1996 else { 1997 seq_putc(seq, '@'); 1998 i++; 1999 } 2000 for ( ; i < len; i++) 2001 seq_putc(seq, u->addr->name->sun_path[i]); 2002 } 2003 unix_state_runlock(s); 2004 seq_putc(seq, '\n'); 2005 } 2006 2007 return 0; 2008} 2009 2010static struct seq_operations unix_seq_ops = { 2011 .start = unix_seq_start, 2012 .next = unix_seq_next, 2013 .stop = unix_seq_stop, 2014 .show = unix_seq_show, 2015}; 2016 2017 2018static int unix_seq_open(struct inode *inode, struct file *file) 2019{ 2020 struct seq_file *seq; 2021 int rc = -ENOMEM; 2022 int *iter = kmalloc(sizeof(int), GFP_KERNEL); 2023 2024 if (!iter) 2025 goto out; 2026 2027 rc = seq_open(file, &unix_seq_ops); 2028 if (rc) 2029 goto out_kfree; 2030 2031 seq = file->private_data; 2032 seq->private = iter; 2033 *iter = 0; 2034out: 2035 return rc; 2036out_kfree: 2037 kfree(iter); 2038 goto out; 2039} 2040 2041static struct file_operations unix_seq_fops = { 2042 .owner = THIS_MODULE, 2043 .open = unix_seq_open, 2044 .read = seq_read, 2045 .llseek = seq_lseek, 2046 .release = seq_release_private, 2047}; 2048 2049#endif 2050 2051static struct net_proto_family unix_family_ops = { 2052 .family = PF_UNIX, 2053 .create = unix_create, 2054 .owner = THIS_MODULE, 2055}; 2056 2057static int __init af_unix_init(void) 2058{ 2059 int rc = -1; 2060 struct sk_buff *dummy_skb; 2061 2062 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { 2063 printk(KERN_CRIT "%s: panic\n", __FUNCTION__); 2064 goto out; 2065 } 2066 2067 rc = proto_register(&unix_proto, 1); 2068 if (rc != 0) { 2069 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n", 2070 __FUNCTION__); 2071 goto out; 2072 } 2073 2074 sock_register(&unix_family_ops); 2075#ifdef CONFIG_PROC_FS 2076 proc_net_fops_create("unix", 0, &unix_seq_fops); 2077#endif 2078 unix_sysctl_register(); 2079out: 2080 return rc; 2081} 2082 2083static void __exit af_unix_exit(void) 2084{ 2085 sock_unregister(PF_UNIX); 2086 unix_sysctl_unregister(); 2087 proc_net_remove("unix"); 2088 proto_unregister(&unix_proto); 2089} 2090 2091module_init(af_unix_init); 2092module_exit(af_unix_exit); 2093 2094MODULE_LICENSE("GPL"); 2095MODULE_ALIAS_NETPROTO(PF_UNIX); 2096