af_unix.c revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2
1/* 2 * NET4: Implementation of BSD Unix domain sockets. 3 * 4 * Authors: Alan Cox, <alan.cox@linux.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $ 12 * 13 * Fixes: 14 * Linus Torvalds : Assorted bug cures. 15 * Niibe Yutaka : async I/O support. 16 * Carsten Paeth : PF_UNIX check, address fixes. 17 * Alan Cox : Limit size of allocated blocks. 18 * Alan Cox : Fixed the stupid socketpair bug. 19 * Alan Cox : BSD compatibility fine tuning. 20 * Alan Cox : Fixed a bug in connect when interrupted. 21 * Alan Cox : Sorted out a proper draft version of 22 * file descriptor passing hacked up from 23 * Mike Shaver's work. 24 * Marty Leisner : Fixes to fd passing 25 * Nick Nevin : recvmsg bugfix. 26 * Alan Cox : Started proper garbage collector 27 * Heiko EiBfeldt : Missing verify_area check 28 * Alan Cox : Started POSIXisms 29 * Andreas Schwab : Replace inode by dentry for proper 30 * reference counting 31 * Kirk Petersen : Made this a module 32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 33 * Lots of bug fixes. 34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 35 * by above two patches. 36 * Andrea Arcangeli : If possible we block in connect(2) 37 * if the max backlog of the listen socket 38 * is been reached. This won't break 39 * old apps and it will avoid huge amount 40 * of socks hashed (this for unix_gc() 41 * performances reasons). 42 * Security fix that limits the max 43 * number of socks to 2*max_files and 44 * the number of skb queueable in the 45 * dgram receiver. 46 * Artur Skawina : Hash function optimizations 47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 48 * Malcolm Beattie : Set peercred for socketpair 49 * Michal Ostrowski : Module initialization cleanup. 50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 51 * the core infrastructure is doing that 52 * for all net proto families now (2.5.69+) 53 * 54 * 55 * Known differences from reference BSD that was tested: 56 * 57 * [TO FIX] 58 * ECONNREFUSED is not returned from one end of a connected() socket to the 59 * other the moment one end closes. 60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 61 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 62 * [NOT TO FIX] 63 * accept() returns a path name even if the connecting socket has closed 64 * in the meantime (BSD loses the path and gives up). 65 * accept() returns 0 length path for an unbound connector. BSD returns 16 66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 67 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 68 * BSD af_unix apparently has connect forgetting to block properly. 69 * (need to check this with the POSIX spec in detail) 70 * 71 * Differences from 2.0.0-11-... (ANK) 72 * Bug fixes and improvements. 73 * - client shutdown killed server socket. 74 * - removed all useless cli/sti pairs. 75 * 76 * Semantic changes/extensions. 77 * - generic control message passing. 78 * - SCM_CREDENTIALS control message. 79 * - "Abstract" (not FS based) socket bindings. 80 * Abstract names are sequences of bytes (not zero terminated) 81 * started by 0, so that this name space does not intersect 82 * with BSD names. 83 */ 84 85#include <linux/module.h> 86#include <linux/config.h> 87#include <linux/kernel.h> 88#include <linux/major.h> 89#include <linux/signal.h> 90#include <linux/sched.h> 91#include <linux/errno.h> 92#include <linux/string.h> 93#include <linux/stat.h> 94#include <linux/dcache.h> 95#include <linux/namei.h> 96#include <linux/socket.h> 97#include <linux/un.h> 98#include <linux/fcntl.h> 99#include <linux/termios.h> 100#include <linux/sockios.h> 101#include <linux/net.h> 102#include <linux/in.h> 103#include <linux/fs.h> 104#include <linux/slab.h> 105#include <asm/uaccess.h> 106#include <linux/skbuff.h> 107#include <linux/netdevice.h> 108#include <net/sock.h> 109#include <linux/tcp.h> 110#include <net/af_unix.h> 111#include <linux/proc_fs.h> 112#include <linux/seq_file.h> 113#include <net/scm.h> 114#include <linux/init.h> 115#include <linux/poll.h> 116#include <linux/smp_lock.h> 117#include <linux/rtnetlink.h> 118#include <linux/mount.h> 119#include <net/checksum.h> 120#include <linux/security.h> 121 122int sysctl_unix_max_dgram_qlen = 10; 123 124struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 125DEFINE_RWLOCK(unix_table_lock); 126static atomic_t unix_nr_socks = ATOMIC_INIT(0); 127 128#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) 129 130#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) 131 132/* 133 * SMP locking strategy: 134 * hash table is protected with rwlock unix_table_lock 135 * each socket state is protected by separate rwlock. 136 */ 137 138static inline unsigned unix_hash_fold(unsigned hash) 139{ 140 hash ^= hash>>16; 141 hash ^= hash>>8; 142 return hash&(UNIX_HASH_SIZE-1); 143} 144 145#define unix_peer(sk) (unix_sk(sk)->peer) 146 147static inline int unix_our_peer(struct sock *sk, struct sock *osk) 148{ 149 return unix_peer(osk) == sk; 150} 151 152static inline int unix_may_send(struct sock *sk, struct sock *osk) 153{ 154 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); 155} 156 157static struct sock *unix_peer_get(struct sock *s) 158{ 159 struct sock *peer; 160 161 unix_state_rlock(s); 162 peer = unix_peer(s); 163 if (peer) 164 sock_hold(peer); 165 unix_state_runlock(s); 166 return peer; 167} 168 169static inline void unix_release_addr(struct unix_address *addr) 170{ 171 if (atomic_dec_and_test(&addr->refcnt)) 172 kfree(addr); 173} 174 175/* 176 * Check unix socket name: 177 * - should be not zero length. 178 * - if started by not zero, should be NULL terminated (FS object) 179 * - if started by zero, it is abstract name. 180 */ 181 182static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) 183{ 184 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 185 return -EINVAL; 186 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 187 return -EINVAL; 188 if (sunaddr->sun_path[0]) { 189 /* 190 * This may look like an off by one error but it is a bit more 191 * subtle. 108 is the longest valid AF_UNIX path for a binding. 192 * sun_path[108] doesnt as such exist. However in kernel space 193 * we are guaranteed that it is a valid memory location in our 194 * kernel address buffer. 195 */ 196 ((char *)sunaddr)[len]=0; 197 len = strlen(sunaddr->sun_path)+1+sizeof(short); 198 return len; 199 } 200 201 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); 202 return len; 203} 204 205static void __unix_remove_socket(struct sock *sk) 206{ 207 sk_del_node_init(sk); 208} 209 210static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 211{ 212 BUG_TRAP(sk_unhashed(sk)); 213 sk_add_node(sk, list); 214} 215 216static inline void unix_remove_socket(struct sock *sk) 217{ 218 write_lock(&unix_table_lock); 219 __unix_remove_socket(sk); 220 write_unlock(&unix_table_lock); 221} 222 223static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 224{ 225 write_lock(&unix_table_lock); 226 __unix_insert_socket(list, sk); 227 write_unlock(&unix_table_lock); 228} 229 230static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, 231 int len, int type, unsigned hash) 232{ 233 struct sock *s; 234 struct hlist_node *node; 235 236 sk_for_each(s, node, &unix_socket_table[hash ^ type]) { 237 struct unix_sock *u = unix_sk(s); 238 239 if (u->addr->len == len && 240 !memcmp(u->addr->name, sunname, len)) 241 goto found; 242 } 243 s = NULL; 244found: 245 return s; 246} 247 248static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, 249 int len, int type, 250 unsigned hash) 251{ 252 struct sock *s; 253 254 read_lock(&unix_table_lock); 255 s = __unix_find_socket_byname(sunname, len, type, hash); 256 if (s) 257 sock_hold(s); 258 read_unlock(&unix_table_lock); 259 return s; 260} 261 262static struct sock *unix_find_socket_byinode(struct inode *i) 263{ 264 struct sock *s; 265 struct hlist_node *node; 266 267 read_lock(&unix_table_lock); 268 sk_for_each(s, node, 269 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 270 struct dentry *dentry = unix_sk(s)->dentry; 271 272 if(dentry && dentry->d_inode == i) 273 { 274 sock_hold(s); 275 goto found; 276 } 277 } 278 s = NULL; 279found: 280 read_unlock(&unix_table_lock); 281 return s; 282} 283 284static inline int unix_writable(struct sock *sk) 285{ 286 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 287} 288 289static void unix_write_space(struct sock *sk) 290{ 291 read_lock(&sk->sk_callback_lock); 292 if (unix_writable(sk)) { 293 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 294 wake_up_interruptible(sk->sk_sleep); 295 sk_wake_async(sk, 2, POLL_OUT); 296 } 297 read_unlock(&sk->sk_callback_lock); 298} 299 300/* When dgram socket disconnects (or changes its peer), we clear its receive 301 * queue of packets arrived from previous peer. First, it allows to do 302 * flow control based only on wmem_alloc; second, sk connected to peer 303 * may receive messages only from that peer. */ 304static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 305{ 306 if (skb_queue_len(&sk->sk_receive_queue)) { 307 skb_queue_purge(&sk->sk_receive_queue); 308 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 309 310 /* If one link of bidirectional dgram pipe is disconnected, 311 * we signal error. Messages are lost. Do not make this, 312 * when peer was not connected to us. 313 */ 314 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 315 other->sk_err = ECONNRESET; 316 other->sk_error_report(other); 317 } 318 } 319} 320 321static void unix_sock_destructor(struct sock *sk) 322{ 323 struct unix_sock *u = unix_sk(sk); 324 325 skb_queue_purge(&sk->sk_receive_queue); 326 327 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); 328 BUG_TRAP(sk_unhashed(sk)); 329 BUG_TRAP(!sk->sk_socket); 330 if (!sock_flag(sk, SOCK_DEAD)) { 331 printk("Attempt to release alive unix socket: %p\n", sk); 332 return; 333 } 334 335 if (u->addr) 336 unix_release_addr(u->addr); 337 338 atomic_dec(&unix_nr_socks); 339#ifdef UNIX_REFCNT_DEBUG 340 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks)); 341#endif 342} 343 344static int unix_release_sock (struct sock *sk, int embrion) 345{ 346 struct unix_sock *u = unix_sk(sk); 347 struct dentry *dentry; 348 struct vfsmount *mnt; 349 struct sock *skpair; 350 struct sk_buff *skb; 351 int state; 352 353 unix_remove_socket(sk); 354 355 /* Clear state */ 356 unix_state_wlock(sk); 357 sock_orphan(sk); 358 sk->sk_shutdown = SHUTDOWN_MASK; 359 dentry = u->dentry; 360 u->dentry = NULL; 361 mnt = u->mnt; 362 u->mnt = NULL; 363 state = sk->sk_state; 364 sk->sk_state = TCP_CLOSE; 365 unix_state_wunlock(sk); 366 367 wake_up_interruptible_all(&u->peer_wait); 368 369 skpair=unix_peer(sk); 370 371 if (skpair!=NULL) { 372 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 373 unix_state_wlock(skpair); 374 /* No more writes */ 375 skpair->sk_shutdown = SHUTDOWN_MASK; 376 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 377 skpair->sk_err = ECONNRESET; 378 unix_state_wunlock(skpair); 379 skpair->sk_state_change(skpair); 380 read_lock(&skpair->sk_callback_lock); 381 sk_wake_async(skpair,1,POLL_HUP); 382 read_unlock(&skpair->sk_callback_lock); 383 } 384 sock_put(skpair); /* It may now die */ 385 unix_peer(sk) = NULL; 386 } 387 388 /* Try to flush out this socket. Throw out buffers at least */ 389 390 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 391 if (state==TCP_LISTEN) 392 unix_release_sock(skb->sk, 1); 393 /* passed fds are erased in the kfree_skb hook */ 394 kfree_skb(skb); 395 } 396 397 if (dentry) { 398 dput(dentry); 399 mntput(mnt); 400 } 401 402 sock_put(sk); 403 404 /* ---- Socket is dead now and most probably destroyed ---- */ 405 406 /* 407 * Fixme: BSD difference: In BSD all sockets connected to use get 408 * ECONNRESET and we die on the spot. In Linux we behave 409 * like files and pipes do and wait for the last 410 * dereference. 411 * 412 * Can't we simply set sock->err? 413 * 414 * What the above comment does talk about? --ANK(980817) 415 */ 416 417 if (atomic_read(&unix_tot_inflight)) 418 unix_gc(); /* Garbage collect fds */ 419 420 return 0; 421} 422 423static int unix_listen(struct socket *sock, int backlog) 424{ 425 int err; 426 struct sock *sk = sock->sk; 427 struct unix_sock *u = unix_sk(sk); 428 429 err = -EOPNOTSUPP; 430 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) 431 goto out; /* Only stream/seqpacket sockets accept */ 432 err = -EINVAL; 433 if (!u->addr) 434 goto out; /* No listens on an unbound socket */ 435 unix_state_wlock(sk); 436 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 437 goto out_unlock; 438 if (backlog > sk->sk_max_ack_backlog) 439 wake_up_interruptible_all(&u->peer_wait); 440 sk->sk_max_ack_backlog = backlog; 441 sk->sk_state = TCP_LISTEN; 442 /* set credentials so connect can copy them */ 443 sk->sk_peercred.pid = current->tgid; 444 sk->sk_peercred.uid = current->euid; 445 sk->sk_peercred.gid = current->egid; 446 err = 0; 447 448out_unlock: 449 unix_state_wunlock(sk); 450out: 451 return err; 452} 453 454static int unix_release(struct socket *); 455static int unix_bind(struct socket *, struct sockaddr *, int); 456static int unix_stream_connect(struct socket *, struct sockaddr *, 457 int addr_len, int flags); 458static int unix_socketpair(struct socket *, struct socket *); 459static int unix_accept(struct socket *, struct socket *, int); 460static int unix_getname(struct socket *, struct sockaddr *, int *, int); 461static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 462static int unix_ioctl(struct socket *, unsigned int, unsigned long); 463static int unix_shutdown(struct socket *, int); 464static int unix_stream_sendmsg(struct kiocb *, struct socket *, 465 struct msghdr *, size_t); 466static int unix_stream_recvmsg(struct kiocb *, struct socket *, 467 struct msghdr *, size_t, int); 468static int unix_dgram_sendmsg(struct kiocb *, struct socket *, 469 struct msghdr *, size_t); 470static int unix_dgram_recvmsg(struct kiocb *, struct socket *, 471 struct msghdr *, size_t, int); 472static int unix_dgram_connect(struct socket *, struct sockaddr *, 473 int, int); 474static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, 475 struct msghdr *, size_t); 476 477static struct proto_ops unix_stream_ops = { 478 .family = PF_UNIX, 479 .owner = THIS_MODULE, 480 .release = unix_release, 481 .bind = unix_bind, 482 .connect = unix_stream_connect, 483 .socketpair = unix_socketpair, 484 .accept = unix_accept, 485 .getname = unix_getname, 486 .poll = unix_poll, 487 .ioctl = unix_ioctl, 488 .listen = unix_listen, 489 .shutdown = unix_shutdown, 490 .setsockopt = sock_no_setsockopt, 491 .getsockopt = sock_no_getsockopt, 492 .sendmsg = unix_stream_sendmsg, 493 .recvmsg = unix_stream_recvmsg, 494 .mmap = sock_no_mmap, 495 .sendpage = sock_no_sendpage, 496}; 497 498static struct proto_ops unix_dgram_ops = { 499 .family = PF_UNIX, 500 .owner = THIS_MODULE, 501 .release = unix_release, 502 .bind = unix_bind, 503 .connect = unix_dgram_connect, 504 .socketpair = unix_socketpair, 505 .accept = sock_no_accept, 506 .getname = unix_getname, 507 .poll = datagram_poll, 508 .ioctl = unix_ioctl, 509 .listen = sock_no_listen, 510 .shutdown = unix_shutdown, 511 .setsockopt = sock_no_setsockopt, 512 .getsockopt = sock_no_getsockopt, 513 .sendmsg = unix_dgram_sendmsg, 514 .recvmsg = unix_dgram_recvmsg, 515 .mmap = sock_no_mmap, 516 .sendpage = sock_no_sendpage, 517}; 518 519static struct proto_ops unix_seqpacket_ops = { 520 .family = PF_UNIX, 521 .owner = THIS_MODULE, 522 .release = unix_release, 523 .bind = unix_bind, 524 .connect = unix_stream_connect, 525 .socketpair = unix_socketpair, 526 .accept = unix_accept, 527 .getname = unix_getname, 528 .poll = datagram_poll, 529 .ioctl = unix_ioctl, 530 .listen = unix_listen, 531 .shutdown = unix_shutdown, 532 .setsockopt = sock_no_setsockopt, 533 .getsockopt = sock_no_getsockopt, 534 .sendmsg = unix_seqpacket_sendmsg, 535 .recvmsg = unix_dgram_recvmsg, 536 .mmap = sock_no_mmap, 537 .sendpage = sock_no_sendpage, 538}; 539 540static struct proto unix_proto = { 541 .name = "UNIX", 542 .owner = THIS_MODULE, 543 .obj_size = sizeof(struct unix_sock), 544}; 545 546static struct sock * unix_create1(struct socket *sock) 547{ 548 struct sock *sk = NULL; 549 struct unix_sock *u; 550 551 if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files) 552 goto out; 553 554 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); 555 if (!sk) 556 goto out; 557 558 atomic_inc(&unix_nr_socks); 559 560 sock_init_data(sock,sk); 561 562 sk->sk_write_space = unix_write_space; 563 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; 564 sk->sk_destruct = unix_sock_destructor; 565 u = unix_sk(sk); 566 u->dentry = NULL; 567 u->mnt = NULL; 568 rwlock_init(&u->lock); 569 atomic_set(&u->inflight, sock ? 0 : -1); 570 init_MUTEX(&u->readsem); /* single task reading lock */ 571 init_waitqueue_head(&u->peer_wait); 572 unix_insert_socket(unix_sockets_unbound, sk); 573out: 574 return sk; 575} 576 577static int unix_create(struct socket *sock, int protocol) 578{ 579 if (protocol && protocol != PF_UNIX) 580 return -EPROTONOSUPPORT; 581 582 sock->state = SS_UNCONNECTED; 583 584 switch (sock->type) { 585 case SOCK_STREAM: 586 sock->ops = &unix_stream_ops; 587 break; 588 /* 589 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 590 * nothing uses it. 591 */ 592 case SOCK_RAW: 593 sock->type=SOCK_DGRAM; 594 case SOCK_DGRAM: 595 sock->ops = &unix_dgram_ops; 596 break; 597 case SOCK_SEQPACKET: 598 sock->ops = &unix_seqpacket_ops; 599 break; 600 default: 601 return -ESOCKTNOSUPPORT; 602 } 603 604 return unix_create1(sock) ? 0 : -ENOMEM; 605} 606 607static int unix_release(struct socket *sock) 608{ 609 struct sock *sk = sock->sk; 610 611 if (!sk) 612 return 0; 613 614 sock->sk = NULL; 615 616 return unix_release_sock (sk, 0); 617} 618 619static int unix_autobind(struct socket *sock) 620{ 621 struct sock *sk = sock->sk; 622 struct unix_sock *u = unix_sk(sk); 623 static u32 ordernum = 1; 624 struct unix_address * addr; 625 int err; 626 627 down(&u->readsem); 628 629 err = 0; 630 if (u->addr) 631 goto out; 632 633 err = -ENOMEM; 634 addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 635 if (!addr) 636 goto out; 637 638 memset(addr, 0, sizeof(*addr) + sizeof(short) + 16); 639 addr->name->sun_family = AF_UNIX; 640 atomic_set(&addr->refcnt, 1); 641 642retry: 643 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 644 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); 645 646 write_lock(&unix_table_lock); 647 ordernum = (ordernum+1)&0xFFFFF; 648 649 if (__unix_find_socket_byname(addr->name, addr->len, sock->type, 650 addr->hash)) { 651 write_unlock(&unix_table_lock); 652 /* Sanity yield. It is unusual case, but yet... */ 653 if (!(ordernum&0xFF)) 654 yield(); 655 goto retry; 656 } 657 addr->hash ^= sk->sk_type; 658 659 __unix_remove_socket(sk); 660 u->addr = addr; 661 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 662 write_unlock(&unix_table_lock); 663 err = 0; 664 665out: up(&u->readsem); 666 return err; 667} 668 669static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, 670 int type, unsigned hash, int *error) 671{ 672 struct sock *u; 673 struct nameidata nd; 674 int err = 0; 675 676 if (sunname->sun_path[0]) { 677 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); 678 if (err) 679 goto fail; 680 err = permission(nd.dentry->d_inode,MAY_WRITE, &nd); 681 if (err) 682 goto put_fail; 683 684 err = -ECONNREFUSED; 685 if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) 686 goto put_fail; 687 u=unix_find_socket_byinode(nd.dentry->d_inode); 688 if (!u) 689 goto put_fail; 690 691 if (u->sk_type == type) 692 touch_atime(nd.mnt, nd.dentry); 693 694 path_release(&nd); 695 696 err=-EPROTOTYPE; 697 if (u->sk_type != type) { 698 sock_put(u); 699 goto fail; 700 } 701 } else { 702 err = -ECONNREFUSED; 703 u=unix_find_socket_byname(sunname, len, type, hash); 704 if (u) { 705 struct dentry *dentry; 706 dentry = unix_sk(u)->dentry; 707 if (dentry) 708 touch_atime(unix_sk(u)->mnt, dentry); 709 } else 710 goto fail; 711 } 712 return u; 713 714put_fail: 715 path_release(&nd); 716fail: 717 *error=err; 718 return NULL; 719} 720 721 722static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 723{ 724 struct sock *sk = sock->sk; 725 struct unix_sock *u = unix_sk(sk); 726 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 727 struct dentry * dentry = NULL; 728 struct nameidata nd; 729 int err; 730 unsigned hash; 731 struct unix_address *addr; 732 struct hlist_head *list; 733 734 err = -EINVAL; 735 if (sunaddr->sun_family != AF_UNIX) 736 goto out; 737 738 if (addr_len==sizeof(short)) { 739 err = unix_autobind(sock); 740 goto out; 741 } 742 743 err = unix_mkname(sunaddr, addr_len, &hash); 744 if (err < 0) 745 goto out; 746 addr_len = err; 747 748 down(&u->readsem); 749 750 err = -EINVAL; 751 if (u->addr) 752 goto out_up; 753 754 err = -ENOMEM; 755 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 756 if (!addr) 757 goto out_up; 758 759 memcpy(addr->name, sunaddr, addr_len); 760 addr->len = addr_len; 761 addr->hash = hash ^ sk->sk_type; 762 atomic_set(&addr->refcnt, 1); 763 764 if (sunaddr->sun_path[0]) { 765 unsigned int mode; 766 err = 0; 767 /* 768 * Get the parent directory, calculate the hash for last 769 * component. 770 */ 771 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 772 if (err) 773 goto out_mknod_parent; 774 /* 775 * Yucky last component or no last component at all? 776 * (foo/., foo/.., /////) 777 */ 778 err = -EEXIST; 779 if (nd.last_type != LAST_NORM) 780 goto out_mknod; 781 /* 782 * Lock the directory. 783 */ 784 down(&nd.dentry->d_inode->i_sem); 785 /* 786 * Do the final lookup. 787 */ 788 dentry = lookup_hash(&nd.last, nd.dentry); 789 err = PTR_ERR(dentry); 790 if (IS_ERR(dentry)) 791 goto out_mknod_unlock; 792 err = -ENOENT; 793 /* 794 * Special case - lookup gave negative, but... we had foo/bar/ 795 * From the vfs_mknod() POV we just have a negative dentry - 796 * all is fine. Let's be bastards - you had / on the end, you've 797 * been asking for (non-existent) directory. -ENOENT for you. 798 */ 799 if (nd.last.name[nd.last.len] && !dentry->d_inode) 800 goto out_mknod_dput; 801 /* 802 * All right, let's create it. 803 */ 804 mode = S_IFSOCK | 805 (SOCK_INODE(sock)->i_mode & ~current->fs->umask); 806 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); 807 if (err) 808 goto out_mknod_dput; 809 up(&nd.dentry->d_inode->i_sem); 810 dput(nd.dentry); 811 nd.dentry = dentry; 812 813 addr->hash = UNIX_HASH_SIZE; 814 } 815 816 write_lock(&unix_table_lock); 817 818 if (!sunaddr->sun_path[0]) { 819 err = -EADDRINUSE; 820 if (__unix_find_socket_byname(sunaddr, addr_len, 821 sk->sk_type, hash)) { 822 unix_release_addr(addr); 823 goto out_unlock; 824 } 825 826 list = &unix_socket_table[addr->hash]; 827 } else { 828 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; 829 u->dentry = nd.dentry; 830 u->mnt = nd.mnt; 831 } 832 833 err = 0; 834 __unix_remove_socket(sk); 835 u->addr = addr; 836 __unix_insert_socket(list, sk); 837 838out_unlock: 839 write_unlock(&unix_table_lock); 840out_up: 841 up(&u->readsem); 842out: 843 return err; 844 845out_mknod_dput: 846 dput(dentry); 847out_mknod_unlock: 848 up(&nd.dentry->d_inode->i_sem); 849out_mknod: 850 path_release(&nd); 851out_mknod_parent: 852 if (err==-EEXIST) 853 err=-EADDRINUSE; 854 unix_release_addr(addr); 855 goto out_up; 856} 857 858static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 859 int alen, int flags) 860{ 861 struct sock *sk = sock->sk; 862 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; 863 struct sock *other; 864 unsigned hash; 865 int err; 866 867 if (addr->sa_family != AF_UNSPEC) { 868 err = unix_mkname(sunaddr, alen, &hash); 869 if (err < 0) 870 goto out; 871 alen = err; 872 873 if (test_bit(SOCK_PASSCRED, &sock->flags) && 874 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 875 goto out; 876 877 other=unix_find_other(sunaddr, alen, sock->type, hash, &err); 878 if (!other) 879 goto out; 880 881 unix_state_wlock(sk); 882 883 err = -EPERM; 884 if (!unix_may_send(sk, other)) 885 goto out_unlock; 886 887 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 888 if (err) 889 goto out_unlock; 890 891 } else { 892 /* 893 * 1003.1g breaking connected state with AF_UNSPEC 894 */ 895 other = NULL; 896 unix_state_wlock(sk); 897 } 898 899 /* 900 * If it was connected, reconnect. 901 */ 902 if (unix_peer(sk)) { 903 struct sock *old_peer = unix_peer(sk); 904 unix_peer(sk)=other; 905 unix_state_wunlock(sk); 906 907 if (other != old_peer) 908 unix_dgram_disconnected(sk, old_peer); 909 sock_put(old_peer); 910 } else { 911 unix_peer(sk)=other; 912 unix_state_wunlock(sk); 913 } 914 return 0; 915 916out_unlock: 917 unix_state_wunlock(sk); 918 sock_put(other); 919out: 920 return err; 921} 922 923static long unix_wait_for_peer(struct sock *other, long timeo) 924{ 925 struct unix_sock *u = unix_sk(other); 926 int sched; 927 DEFINE_WAIT(wait); 928 929 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 930 931 sched = !sock_flag(other, SOCK_DEAD) && 932 !(other->sk_shutdown & RCV_SHUTDOWN) && 933 (skb_queue_len(&other->sk_receive_queue) > 934 other->sk_max_ack_backlog); 935 936 unix_state_runlock(other); 937 938 if (sched) 939 timeo = schedule_timeout(timeo); 940 941 finish_wait(&u->peer_wait, &wait); 942 return timeo; 943} 944 945static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 946 int addr_len, int flags) 947{ 948 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 949 struct sock *sk = sock->sk; 950 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 951 struct sock *newsk = NULL; 952 struct sock *other = NULL; 953 struct sk_buff *skb = NULL; 954 unsigned hash; 955 int st; 956 int err; 957 long timeo; 958 959 err = unix_mkname(sunaddr, addr_len, &hash); 960 if (err < 0) 961 goto out; 962 addr_len = err; 963 964 if (test_bit(SOCK_PASSCRED, &sock->flags) 965 && !u->addr && (err = unix_autobind(sock)) != 0) 966 goto out; 967 968 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 969 970 /* First of all allocate resources. 971 If we will make it after state is locked, 972 we will have to recheck all again in any case. 973 */ 974 975 err = -ENOMEM; 976 977 /* create new sock for complete connection */ 978 newsk = unix_create1(NULL); 979 if (newsk == NULL) 980 goto out; 981 982 /* Allocate skb for sending to listening sock */ 983 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 984 if (skb == NULL) 985 goto out; 986 987restart: 988 /* Find listening sock. */ 989 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); 990 if (!other) 991 goto out; 992 993 /* Latch state of peer */ 994 unix_state_rlock(other); 995 996 /* Apparently VFS overslept socket death. Retry. */ 997 if (sock_flag(other, SOCK_DEAD)) { 998 unix_state_runlock(other); 999 sock_put(other); 1000 goto restart; 1001 } 1002 1003 err = -ECONNREFUSED; 1004 if (other->sk_state != TCP_LISTEN) 1005 goto out_unlock; 1006 1007 if (skb_queue_len(&other->sk_receive_queue) > 1008 other->sk_max_ack_backlog) { 1009 err = -EAGAIN; 1010 if (!timeo) 1011 goto out_unlock; 1012 1013 timeo = unix_wait_for_peer(other, timeo); 1014 1015 err = sock_intr_errno(timeo); 1016 if (signal_pending(current)) 1017 goto out; 1018 sock_put(other); 1019 goto restart; 1020 } 1021 1022 /* Latch our state. 1023 1024 It is tricky place. We need to grab write lock and cannot 1025 drop lock on peer. It is dangerous because deadlock is 1026 possible. Connect to self case and simultaneous 1027 attempt to connect are eliminated by checking socket 1028 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1029 check this before attempt to grab lock. 1030 1031 Well, and we have to recheck the state after socket locked. 1032 */ 1033 st = sk->sk_state; 1034 1035 switch (st) { 1036 case TCP_CLOSE: 1037 /* This is ok... continue with connect */ 1038 break; 1039 case TCP_ESTABLISHED: 1040 /* Socket is already connected */ 1041 err = -EISCONN; 1042 goto out_unlock; 1043 default: 1044 err = -EINVAL; 1045 goto out_unlock; 1046 } 1047 1048 unix_state_wlock(sk); 1049 1050 if (sk->sk_state != st) { 1051 unix_state_wunlock(sk); 1052 unix_state_runlock(other); 1053 sock_put(other); 1054 goto restart; 1055 } 1056 1057 err = security_unix_stream_connect(sock, other->sk_socket, newsk); 1058 if (err) { 1059 unix_state_wunlock(sk); 1060 goto out_unlock; 1061 } 1062 1063 /* The way is open! Fastly set all the necessary fields... */ 1064 1065 sock_hold(sk); 1066 unix_peer(newsk) = sk; 1067 newsk->sk_state = TCP_ESTABLISHED; 1068 newsk->sk_type = sk->sk_type; 1069 newsk->sk_peercred.pid = current->tgid; 1070 newsk->sk_peercred.uid = current->euid; 1071 newsk->sk_peercred.gid = current->egid; 1072 newu = unix_sk(newsk); 1073 newsk->sk_sleep = &newu->peer_wait; 1074 otheru = unix_sk(other); 1075 1076 /* copy address information from listening to new sock*/ 1077 if (otheru->addr) { 1078 atomic_inc(&otheru->addr->refcnt); 1079 newu->addr = otheru->addr; 1080 } 1081 if (otheru->dentry) { 1082 newu->dentry = dget(otheru->dentry); 1083 newu->mnt = mntget(otheru->mnt); 1084 } 1085 1086 /* Set credentials */ 1087 sk->sk_peercred = other->sk_peercred; 1088 1089 sock_hold(newsk); 1090 unix_peer(sk) = newsk; 1091 sock->state = SS_CONNECTED; 1092 sk->sk_state = TCP_ESTABLISHED; 1093 1094 unix_state_wunlock(sk); 1095 1096 /* take ten and and send info to listening sock */ 1097 spin_lock(&other->sk_receive_queue.lock); 1098 __skb_queue_tail(&other->sk_receive_queue, skb); 1099 /* Undo artificially decreased inflight after embrion 1100 * is installed to listening socket. */ 1101 atomic_inc(&newu->inflight); 1102 spin_unlock(&other->sk_receive_queue.lock); 1103 unix_state_runlock(other); 1104 other->sk_data_ready(other, 0); 1105 sock_put(other); 1106 return 0; 1107 1108out_unlock: 1109 if (other) 1110 unix_state_runlock(other); 1111 1112out: 1113 if (skb) 1114 kfree_skb(skb); 1115 if (newsk) 1116 unix_release_sock(newsk, 0); 1117 if (other) 1118 sock_put(other); 1119 return err; 1120} 1121 1122static int unix_socketpair(struct socket *socka, struct socket *sockb) 1123{ 1124 struct sock *ska=socka->sk, *skb = sockb->sk; 1125 1126 /* Join our sockets back to back */ 1127 sock_hold(ska); 1128 sock_hold(skb); 1129 unix_peer(ska)=skb; 1130 unix_peer(skb)=ska; 1131 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid; 1132 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid; 1133 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid; 1134 1135 if (ska->sk_type != SOCK_DGRAM) { 1136 ska->sk_state = TCP_ESTABLISHED; 1137 skb->sk_state = TCP_ESTABLISHED; 1138 socka->state = SS_CONNECTED; 1139 sockb->state = SS_CONNECTED; 1140 } 1141 return 0; 1142} 1143 1144static int unix_accept(struct socket *sock, struct socket *newsock, int flags) 1145{ 1146 struct sock *sk = sock->sk; 1147 struct sock *tsk; 1148 struct sk_buff *skb; 1149 int err; 1150 1151 err = -EOPNOTSUPP; 1152 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) 1153 goto out; 1154 1155 err = -EINVAL; 1156 if (sk->sk_state != TCP_LISTEN) 1157 goto out; 1158 1159 /* If socket state is TCP_LISTEN it cannot change (for now...), 1160 * so that no locks are necessary. 1161 */ 1162 1163 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1164 if (!skb) { 1165 /* This means receive shutdown. */ 1166 if (err == 0) 1167 err = -EINVAL; 1168 goto out; 1169 } 1170 1171 tsk = skb->sk; 1172 skb_free_datagram(sk, skb); 1173 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1174 1175 /* attach accepted sock to socket */ 1176 unix_state_wlock(tsk); 1177 newsock->state = SS_CONNECTED; 1178 sock_graft(tsk, newsock); 1179 unix_state_wunlock(tsk); 1180 return 0; 1181 1182out: 1183 return err; 1184} 1185 1186 1187static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) 1188{ 1189 struct sock *sk = sock->sk; 1190 struct unix_sock *u; 1191 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 1192 int err = 0; 1193 1194 if (peer) { 1195 sk = unix_peer_get(sk); 1196 1197 err = -ENOTCONN; 1198 if (!sk) 1199 goto out; 1200 err = 0; 1201 } else { 1202 sock_hold(sk); 1203 } 1204 1205 u = unix_sk(sk); 1206 unix_state_rlock(sk); 1207 if (!u->addr) { 1208 sunaddr->sun_family = AF_UNIX; 1209 sunaddr->sun_path[0] = 0; 1210 *uaddr_len = sizeof(short); 1211 } else { 1212 struct unix_address *addr = u->addr; 1213 1214 *uaddr_len = addr->len; 1215 memcpy(sunaddr, addr->name, *uaddr_len); 1216 } 1217 unix_state_runlock(sk); 1218 sock_put(sk); 1219out: 1220 return err; 1221} 1222 1223static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1224{ 1225 int i; 1226 1227 scm->fp = UNIXCB(skb).fp; 1228 skb->destructor = sock_wfree; 1229 UNIXCB(skb).fp = NULL; 1230 1231 for (i=scm->fp->count-1; i>=0; i--) 1232 unix_notinflight(scm->fp->fp[i]); 1233} 1234 1235static void unix_destruct_fds(struct sk_buff *skb) 1236{ 1237 struct scm_cookie scm; 1238 memset(&scm, 0, sizeof(scm)); 1239 unix_detach_fds(&scm, skb); 1240 1241 /* Alas, it calls VFS */ 1242 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1243 scm_destroy(&scm); 1244 sock_wfree(skb); 1245} 1246 1247static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1248{ 1249 int i; 1250 for (i=scm->fp->count-1; i>=0; i--) 1251 unix_inflight(scm->fp->fp[i]); 1252 UNIXCB(skb).fp = scm->fp; 1253 skb->destructor = unix_destruct_fds; 1254 scm->fp = NULL; 1255} 1256 1257/* 1258 * Send AF_UNIX data. 1259 */ 1260 1261static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, 1262 struct msghdr *msg, size_t len) 1263{ 1264 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1265 struct sock *sk = sock->sk; 1266 struct unix_sock *u = unix_sk(sk); 1267 struct sockaddr_un *sunaddr=msg->msg_name; 1268 struct sock *other = NULL; 1269 int namelen = 0; /* fake GCC */ 1270 int err; 1271 unsigned hash; 1272 struct sk_buff *skb; 1273 long timeo; 1274 struct scm_cookie tmp_scm; 1275 1276 if (NULL == siocb->scm) 1277 siocb->scm = &tmp_scm; 1278 err = scm_send(sock, msg, siocb->scm); 1279 if (err < 0) 1280 return err; 1281 1282 err = -EOPNOTSUPP; 1283 if (msg->msg_flags&MSG_OOB) 1284 goto out; 1285 1286 if (msg->msg_namelen) { 1287 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1288 if (err < 0) 1289 goto out; 1290 namelen = err; 1291 } else { 1292 sunaddr = NULL; 1293 err = -ENOTCONN; 1294 other = unix_peer_get(sk); 1295 if (!other) 1296 goto out; 1297 } 1298 1299 if (test_bit(SOCK_PASSCRED, &sock->flags) 1300 && !u->addr && (err = unix_autobind(sock)) != 0) 1301 goto out; 1302 1303 err = -EMSGSIZE; 1304 if (len > sk->sk_sndbuf - 32) 1305 goto out; 1306 1307 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err); 1308 if (skb==NULL) 1309 goto out; 1310 1311 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1312 if (siocb->scm->fp) 1313 unix_attach_fds(siocb->scm, skb); 1314 1315 skb->h.raw = skb->data; 1316 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); 1317 if (err) 1318 goto out_free; 1319 1320 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1321 1322restart: 1323 if (!other) { 1324 err = -ECONNRESET; 1325 if (sunaddr == NULL) 1326 goto out_free; 1327 1328 other = unix_find_other(sunaddr, namelen, sk->sk_type, 1329 hash, &err); 1330 if (other==NULL) 1331 goto out_free; 1332 } 1333 1334 unix_state_rlock(other); 1335 err = -EPERM; 1336 if (!unix_may_send(sk, other)) 1337 goto out_unlock; 1338 1339 if (sock_flag(other, SOCK_DEAD)) { 1340 /* 1341 * Check with 1003.1g - what should 1342 * datagram error 1343 */ 1344 unix_state_runlock(other); 1345 sock_put(other); 1346 1347 err = 0; 1348 unix_state_wlock(sk); 1349 if (unix_peer(sk) == other) { 1350 unix_peer(sk)=NULL; 1351 unix_state_wunlock(sk); 1352 1353 unix_dgram_disconnected(sk, other); 1354 sock_put(other); 1355 err = -ECONNREFUSED; 1356 } else { 1357 unix_state_wunlock(sk); 1358 } 1359 1360 other = NULL; 1361 if (err) 1362 goto out_free; 1363 goto restart; 1364 } 1365 1366 err = -EPIPE; 1367 if (other->sk_shutdown & RCV_SHUTDOWN) 1368 goto out_unlock; 1369 1370 if (sk->sk_type != SOCK_SEQPACKET) { 1371 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1372 if (err) 1373 goto out_unlock; 1374 } 1375 1376 if (unix_peer(other) != sk && 1377 (skb_queue_len(&other->sk_receive_queue) > 1378 other->sk_max_ack_backlog)) { 1379 if (!timeo) { 1380 err = -EAGAIN; 1381 goto out_unlock; 1382 } 1383 1384 timeo = unix_wait_for_peer(other, timeo); 1385 1386 err = sock_intr_errno(timeo); 1387 if (signal_pending(current)) 1388 goto out_free; 1389 1390 goto restart; 1391 } 1392 1393 skb_queue_tail(&other->sk_receive_queue, skb); 1394 unix_state_runlock(other); 1395 other->sk_data_ready(other, len); 1396 sock_put(other); 1397 scm_destroy(siocb->scm); 1398 return len; 1399 1400out_unlock: 1401 unix_state_runlock(other); 1402out_free: 1403 kfree_skb(skb); 1404out: 1405 if (other) 1406 sock_put(other); 1407 scm_destroy(siocb->scm); 1408 return err; 1409} 1410 1411 1412static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 1413 struct msghdr *msg, size_t len) 1414{ 1415 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1416 struct sock *sk = sock->sk; 1417 struct sock *other = NULL; 1418 struct sockaddr_un *sunaddr=msg->msg_name; 1419 int err,size; 1420 struct sk_buff *skb; 1421 int sent=0; 1422 struct scm_cookie tmp_scm; 1423 1424 if (NULL == siocb->scm) 1425 siocb->scm = &tmp_scm; 1426 err = scm_send(sock, msg, siocb->scm); 1427 if (err < 0) 1428 return err; 1429 1430 err = -EOPNOTSUPP; 1431 if (msg->msg_flags&MSG_OOB) 1432 goto out_err; 1433 1434 if (msg->msg_namelen) { 1435 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1436 goto out_err; 1437 } else { 1438 sunaddr = NULL; 1439 err = -ENOTCONN; 1440 other = unix_peer_get(sk); 1441 if (!other) 1442 goto out_err; 1443 } 1444 1445 if (sk->sk_shutdown & SEND_SHUTDOWN) 1446 goto pipe_err; 1447 1448 while(sent < len) 1449 { 1450 /* 1451 * Optimisation for the fact that under 0.01% of X messages typically 1452 * need breaking up. 1453 */ 1454 1455 size=len-sent; 1456 1457 /* Keep two messages in the pipe so it schedules better */ 1458 if (size > sk->sk_sndbuf / 2 - 64) 1459 size = sk->sk_sndbuf / 2 - 64; 1460 1461 if (size > SKB_MAX_ALLOC) 1462 size = SKB_MAX_ALLOC; 1463 1464 /* 1465 * Grab a buffer 1466 */ 1467 1468 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err); 1469 1470 if (skb==NULL) 1471 goto out_err; 1472 1473 /* 1474 * If you pass two values to the sock_alloc_send_skb 1475 * it tries to grab the large buffer with GFP_NOFS 1476 * (which can fail easily), and if it fails grab the 1477 * fallback size buffer which is under a page and will 1478 * succeed. [Alan] 1479 */ 1480 size = min_t(int, size, skb_tailroom(skb)); 1481 1482 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1483 if (siocb->scm->fp) 1484 unix_attach_fds(siocb->scm, skb); 1485 1486 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { 1487 kfree_skb(skb); 1488 goto out_err; 1489 } 1490 1491 unix_state_rlock(other); 1492 1493 if (sock_flag(other, SOCK_DEAD) || 1494 (other->sk_shutdown & RCV_SHUTDOWN)) 1495 goto pipe_err_free; 1496 1497 skb_queue_tail(&other->sk_receive_queue, skb); 1498 unix_state_runlock(other); 1499 other->sk_data_ready(other, size); 1500 sent+=size; 1501 } 1502 sock_put(other); 1503 1504 scm_destroy(siocb->scm); 1505 siocb->scm = NULL; 1506 1507 return sent; 1508 1509pipe_err_free: 1510 unix_state_runlock(other); 1511 kfree_skb(skb); 1512pipe_err: 1513 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1514 send_sig(SIGPIPE,current,0); 1515 err = -EPIPE; 1516out_err: 1517 if (other) 1518 sock_put(other); 1519 scm_destroy(siocb->scm); 1520 siocb->scm = NULL; 1521 return sent ? : err; 1522} 1523 1524static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, 1525 struct msghdr *msg, size_t len) 1526{ 1527 int err; 1528 struct sock *sk = sock->sk; 1529 1530 err = sock_error(sk); 1531 if (err) 1532 return err; 1533 1534 if (sk->sk_state != TCP_ESTABLISHED) 1535 return -ENOTCONN; 1536 1537 if (msg->msg_namelen) 1538 msg->msg_namelen = 0; 1539 1540 return unix_dgram_sendmsg(kiocb, sock, msg, len); 1541} 1542 1543static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 1544{ 1545 struct unix_sock *u = unix_sk(sk); 1546 1547 msg->msg_namelen = 0; 1548 if (u->addr) { 1549 msg->msg_namelen = u->addr->len; 1550 memcpy(msg->msg_name, u->addr->name, u->addr->len); 1551 } 1552} 1553 1554static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, 1555 struct msghdr *msg, size_t size, 1556 int flags) 1557{ 1558 struct sock_iocb *siocb = kiocb_to_siocb(iocb); 1559 struct scm_cookie tmp_scm; 1560 struct sock *sk = sock->sk; 1561 struct unix_sock *u = unix_sk(sk); 1562 int noblock = flags & MSG_DONTWAIT; 1563 struct sk_buff *skb; 1564 int err; 1565 1566 err = -EOPNOTSUPP; 1567 if (flags&MSG_OOB) 1568 goto out; 1569 1570 msg->msg_namelen = 0; 1571 1572 down(&u->readsem); 1573 1574 skb = skb_recv_datagram(sk, flags, noblock, &err); 1575 if (!skb) 1576 goto out_unlock; 1577 1578 wake_up_interruptible(&u->peer_wait); 1579 1580 if (msg->msg_name) 1581 unix_copy_addr(msg, skb->sk); 1582 1583 if (size > skb->len) 1584 size = skb->len; 1585 else if (size < skb->len) 1586 msg->msg_flags |= MSG_TRUNC; 1587 1588 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size); 1589 if (err) 1590 goto out_free; 1591 1592 if (!siocb->scm) { 1593 siocb->scm = &tmp_scm; 1594 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1595 } 1596 siocb->scm->creds = *UNIXCREDS(skb); 1597 1598 if (!(flags & MSG_PEEK)) 1599 { 1600 if (UNIXCB(skb).fp) 1601 unix_detach_fds(siocb->scm, skb); 1602 } 1603 else 1604 { 1605 /* It is questionable: on PEEK we could: 1606 - do not return fds - good, but too simple 8) 1607 - return fds, and do not return them on read (old strategy, 1608 apparently wrong) 1609 - clone fds (I chose it for now, it is the most universal 1610 solution) 1611 1612 POSIX 1003.1g does not actually define this clearly 1613 at all. POSIX 1003.1g doesn't define a lot of things 1614 clearly however! 1615 1616 */ 1617 if (UNIXCB(skb).fp) 1618 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1619 } 1620 err = size; 1621 1622 scm_recv(sock, msg, siocb->scm, flags); 1623 1624out_free: 1625 skb_free_datagram(sk,skb); 1626out_unlock: 1627 up(&u->readsem); 1628out: 1629 return err; 1630} 1631 1632/* 1633 * Sleep until data has arrive. But check for races.. 1634 */ 1635 1636static long unix_stream_data_wait(struct sock * sk, long timeo) 1637{ 1638 DEFINE_WAIT(wait); 1639 1640 unix_state_rlock(sk); 1641 1642 for (;;) { 1643 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1644 1645 if (skb_queue_len(&sk->sk_receive_queue) || 1646 sk->sk_err || 1647 (sk->sk_shutdown & RCV_SHUTDOWN) || 1648 signal_pending(current) || 1649 !timeo) 1650 break; 1651 1652 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1653 unix_state_runlock(sk); 1654 timeo = schedule_timeout(timeo); 1655 unix_state_rlock(sk); 1656 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1657 } 1658 1659 finish_wait(sk->sk_sleep, &wait); 1660 unix_state_runlock(sk); 1661 return timeo; 1662} 1663 1664 1665 1666static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 1667 struct msghdr *msg, size_t size, 1668 int flags) 1669{ 1670 struct sock_iocb *siocb = kiocb_to_siocb(iocb); 1671 struct scm_cookie tmp_scm; 1672 struct sock *sk = sock->sk; 1673 struct unix_sock *u = unix_sk(sk); 1674 struct sockaddr_un *sunaddr=msg->msg_name; 1675 int copied = 0; 1676 int check_creds = 0; 1677 int target; 1678 int err = 0; 1679 long timeo; 1680 1681 err = -EINVAL; 1682 if (sk->sk_state != TCP_ESTABLISHED) 1683 goto out; 1684 1685 err = -EOPNOTSUPP; 1686 if (flags&MSG_OOB) 1687 goto out; 1688 1689 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); 1690 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); 1691 1692 msg->msg_namelen = 0; 1693 1694 /* Lock the socket to prevent queue disordering 1695 * while sleeps in memcpy_tomsg 1696 */ 1697 1698 if (!siocb->scm) { 1699 siocb->scm = &tmp_scm; 1700 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1701 } 1702 1703 down(&u->readsem); 1704 1705 do 1706 { 1707 int chunk; 1708 struct sk_buff *skb; 1709 1710 skb = skb_dequeue(&sk->sk_receive_queue); 1711 if (skb==NULL) 1712 { 1713 if (copied >= target) 1714 break; 1715 1716 /* 1717 * POSIX 1003.1g mandates this order. 1718 */ 1719 1720 if ((err = sock_error(sk)) != 0) 1721 break; 1722 if (sk->sk_shutdown & RCV_SHUTDOWN) 1723 break; 1724 err = -EAGAIN; 1725 if (!timeo) 1726 break; 1727 up(&u->readsem); 1728 1729 timeo = unix_stream_data_wait(sk, timeo); 1730 1731 if (signal_pending(current)) { 1732 err = sock_intr_errno(timeo); 1733 goto out; 1734 } 1735 down(&u->readsem); 1736 continue; 1737 } 1738 1739 if (check_creds) { 1740 /* Never glue messages from different writers */ 1741 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) { 1742 skb_queue_head(&sk->sk_receive_queue, skb); 1743 break; 1744 } 1745 } else { 1746 /* Copy credentials */ 1747 siocb->scm->creds = *UNIXCREDS(skb); 1748 check_creds = 1; 1749 } 1750 1751 /* Copy address just once */ 1752 if (sunaddr) 1753 { 1754 unix_copy_addr(msg, skb->sk); 1755 sunaddr = NULL; 1756 } 1757 1758 chunk = min_t(unsigned int, skb->len, size); 1759 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { 1760 skb_queue_head(&sk->sk_receive_queue, skb); 1761 if (copied == 0) 1762 copied = -EFAULT; 1763 break; 1764 } 1765 copied += chunk; 1766 size -= chunk; 1767 1768 /* Mark read part of skb as used */ 1769 if (!(flags & MSG_PEEK)) 1770 { 1771 skb_pull(skb, chunk); 1772 1773 if (UNIXCB(skb).fp) 1774 unix_detach_fds(siocb->scm, skb); 1775 1776 /* put the skb back if we didn't use it up.. */ 1777 if (skb->len) 1778 { 1779 skb_queue_head(&sk->sk_receive_queue, skb); 1780 break; 1781 } 1782 1783 kfree_skb(skb); 1784 1785 if (siocb->scm->fp) 1786 break; 1787 } 1788 else 1789 { 1790 /* It is questionable, see note in unix_dgram_recvmsg. 1791 */ 1792 if (UNIXCB(skb).fp) 1793 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1794 1795 /* put message back and return */ 1796 skb_queue_head(&sk->sk_receive_queue, skb); 1797 break; 1798 } 1799 } while (size); 1800 1801 up(&u->readsem); 1802 scm_recv(sock, msg, siocb->scm, flags); 1803out: 1804 return copied ? : err; 1805} 1806 1807static int unix_shutdown(struct socket *sock, int mode) 1808{ 1809 struct sock *sk = sock->sk; 1810 struct sock *other; 1811 1812 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); 1813 1814 if (mode) { 1815 unix_state_wlock(sk); 1816 sk->sk_shutdown |= mode; 1817 other=unix_peer(sk); 1818 if (other) 1819 sock_hold(other); 1820 unix_state_wunlock(sk); 1821 sk->sk_state_change(sk); 1822 1823 if (other && 1824 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 1825 1826 int peer_mode = 0; 1827 1828 if (mode&RCV_SHUTDOWN) 1829 peer_mode |= SEND_SHUTDOWN; 1830 if (mode&SEND_SHUTDOWN) 1831 peer_mode |= RCV_SHUTDOWN; 1832 unix_state_wlock(other); 1833 other->sk_shutdown |= peer_mode; 1834 unix_state_wunlock(other); 1835 other->sk_state_change(other); 1836 read_lock(&other->sk_callback_lock); 1837 if (peer_mode == SHUTDOWN_MASK) 1838 sk_wake_async(other,1,POLL_HUP); 1839 else if (peer_mode & RCV_SHUTDOWN) 1840 sk_wake_async(other,1,POLL_IN); 1841 read_unlock(&other->sk_callback_lock); 1842 } 1843 if (other) 1844 sock_put(other); 1845 } 1846 return 0; 1847} 1848 1849static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1850{ 1851 struct sock *sk = sock->sk; 1852 long amount=0; 1853 int err; 1854 1855 switch(cmd) 1856 { 1857 case SIOCOUTQ: 1858 amount = atomic_read(&sk->sk_wmem_alloc); 1859 err = put_user(amount, (int __user *)arg); 1860 break; 1861 case SIOCINQ: 1862 { 1863 struct sk_buff *skb; 1864 1865 if (sk->sk_state == TCP_LISTEN) { 1866 err = -EINVAL; 1867 break; 1868 } 1869 1870 spin_lock(&sk->sk_receive_queue.lock); 1871 if (sk->sk_type == SOCK_STREAM || 1872 sk->sk_type == SOCK_SEQPACKET) { 1873 skb_queue_walk(&sk->sk_receive_queue, skb) 1874 amount += skb->len; 1875 } else { 1876 skb = skb_peek(&sk->sk_receive_queue); 1877 if (skb) 1878 amount=skb->len; 1879 } 1880 spin_unlock(&sk->sk_receive_queue.lock); 1881 err = put_user(amount, (int __user *)arg); 1882 break; 1883 } 1884 1885 default: 1886 err = dev_ioctl(cmd, (void __user *)arg); 1887 break; 1888 } 1889 return err; 1890} 1891 1892static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) 1893{ 1894 struct sock *sk = sock->sk; 1895 unsigned int mask; 1896 1897 poll_wait(file, sk->sk_sleep, wait); 1898 mask = 0; 1899 1900 /* exceptional events? */ 1901 if (sk->sk_err) 1902 mask |= POLLERR; 1903 if (sk->sk_shutdown == SHUTDOWN_MASK) 1904 mask |= POLLHUP; 1905 1906 /* readable? */ 1907 if (!skb_queue_empty(&sk->sk_receive_queue) || 1908 (sk->sk_shutdown & RCV_SHUTDOWN)) 1909 mask |= POLLIN | POLLRDNORM; 1910 1911 /* Connection-based need to check for termination and startup */ 1912 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE) 1913 mask |= POLLHUP; 1914 1915 /* 1916 * we set writable also when the other side has shut down the 1917 * connection. This prevents stuck sockets. 1918 */ 1919 if (unix_writable(sk)) 1920 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 1921 1922 return mask; 1923} 1924 1925 1926#ifdef CONFIG_PROC_FS 1927static struct sock *unix_seq_idx(int *iter, loff_t pos) 1928{ 1929 loff_t off = 0; 1930 struct sock *s; 1931 1932 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { 1933 if (off == pos) 1934 return s; 1935 ++off; 1936 } 1937 return NULL; 1938} 1939 1940 1941static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 1942{ 1943 read_lock(&unix_table_lock); 1944 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); 1945} 1946 1947static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1948{ 1949 ++*pos; 1950 1951 if (v == (void *)1) 1952 return first_unix_socket(seq->private); 1953 return next_unix_socket(seq->private, v); 1954} 1955 1956static void unix_seq_stop(struct seq_file *seq, void *v) 1957{ 1958 read_unlock(&unix_table_lock); 1959} 1960 1961static int unix_seq_show(struct seq_file *seq, void *v) 1962{ 1963 1964 if (v == (void *)1) 1965 seq_puts(seq, "Num RefCount Protocol Flags Type St " 1966 "Inode Path\n"); 1967 else { 1968 struct sock *s = v; 1969 struct unix_sock *u = unix_sk(s); 1970 unix_state_rlock(s); 1971 1972 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu", 1973 s, 1974 atomic_read(&s->sk_refcnt), 1975 0, 1976 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 1977 s->sk_type, 1978 s->sk_socket ? 1979 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 1980 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 1981 sock_i_ino(s)); 1982 1983 if (u->addr) { 1984 int i, len; 1985 seq_putc(seq, ' '); 1986 1987 i = 0; 1988 len = u->addr->len - sizeof(short); 1989 if (!UNIX_ABSTRACT(s)) 1990 len--; 1991 else { 1992 seq_putc(seq, '@'); 1993 i++; 1994 } 1995 for ( ; i < len; i++) 1996 seq_putc(seq, u->addr->name->sun_path[i]); 1997 } 1998 unix_state_runlock(s); 1999 seq_putc(seq, '\n'); 2000 } 2001 2002 return 0; 2003} 2004 2005static struct seq_operations unix_seq_ops = { 2006 .start = unix_seq_start, 2007 .next = unix_seq_next, 2008 .stop = unix_seq_stop, 2009 .show = unix_seq_show, 2010}; 2011 2012 2013static int unix_seq_open(struct inode *inode, struct file *file) 2014{ 2015 struct seq_file *seq; 2016 int rc = -ENOMEM; 2017 int *iter = kmalloc(sizeof(int), GFP_KERNEL); 2018 2019 if (!iter) 2020 goto out; 2021 2022 rc = seq_open(file, &unix_seq_ops); 2023 if (rc) 2024 goto out_kfree; 2025 2026 seq = file->private_data; 2027 seq->private = iter; 2028 *iter = 0; 2029out: 2030 return rc; 2031out_kfree: 2032 kfree(iter); 2033 goto out; 2034} 2035 2036static struct file_operations unix_seq_fops = { 2037 .owner = THIS_MODULE, 2038 .open = unix_seq_open, 2039 .read = seq_read, 2040 .llseek = seq_lseek, 2041 .release = seq_release_private, 2042}; 2043 2044#endif 2045 2046static struct net_proto_family unix_family_ops = { 2047 .family = PF_UNIX, 2048 .create = unix_create, 2049 .owner = THIS_MODULE, 2050}; 2051 2052#ifdef CONFIG_SYSCTL 2053extern void unix_sysctl_register(void); 2054extern void unix_sysctl_unregister(void); 2055#else 2056static inline void unix_sysctl_register(void) {} 2057static inline void unix_sysctl_unregister(void) {} 2058#endif 2059 2060static int __init af_unix_init(void) 2061{ 2062 int rc = -1; 2063 struct sk_buff *dummy_skb; 2064 2065 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { 2066 printk(KERN_CRIT "%s: panic\n", __FUNCTION__); 2067 goto out; 2068 } 2069 2070 rc = proto_register(&unix_proto, 1); 2071 if (rc != 0) { 2072 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n", 2073 __FUNCTION__); 2074 goto out; 2075 } 2076 2077 sock_register(&unix_family_ops); 2078#ifdef CONFIG_PROC_FS 2079 proc_net_fops_create("unix", 0, &unix_seq_fops); 2080#endif 2081 unix_sysctl_register(); 2082out: 2083 return rc; 2084} 2085 2086static void __exit af_unix_exit(void) 2087{ 2088 sock_unregister(PF_UNIX); 2089 unix_sysctl_unregister(); 2090 proc_net_remove("unix"); 2091 proto_unregister(&unix_proto); 2092} 2093 2094module_init(af_unix_init); 2095module_exit(af_unix_exit); 2096 2097MODULE_LICENSE("GPL"); 2098MODULE_ALIAS_NETPROTO(PF_UNIX); 2099