sock.c revision 6d8ebc8a27e1b187abfb06dd79b35a393aa9f2a2
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Generic socket support routines. Memory allocators, socket lock/release 7 * handler for protocols to use and generic option handler. 8 * 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Fixes: 16 * Alan Cox : Numerous verify_area() problems 17 * Alan Cox : Connecting on a connecting socket 18 * now returns an error for tcp. 19 * Alan Cox : sock->protocol is set correctly. 20 * and is not sometimes left as 0. 21 * Alan Cox : connect handles icmp errors on a 22 * connect properly. Unfortunately there 23 * is a restart syscall nasty there. I 24 * can't match BSD without hacking the C 25 * library. Ideas urgently sought! 26 * Alan Cox : Disallow bind() to addresses that are 27 * not ours - especially broadcast ones!! 28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) 29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, 30 * instead they leave that for the DESTROY timer. 31 * Alan Cox : Clean up error flag in accept 32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer 33 * was buggy. Put a remove_sock() in the handler 34 * for memory when we hit 0. Also altered the timer 35 * code. The ACK stuff can wait and needs major 36 * TCP layer surgery. 37 * Alan Cox : Fixed TCP ack bug, removed remove sock 38 * and fixed timer/inet_bh race. 39 * Alan Cox : Added zapped flag for TCP 40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code 41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb 42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources 43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. 44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... 45 * Rick Sladkey : Relaxed UDP rules for matching packets. 46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support 47 * Pauline Middelink : identd support 48 * Alan Cox : Fixed connect() taking signals I think. 49 * Alan Cox : SO_LINGER supported 50 * Alan Cox : Error reporting fixes 51 * Anonymous : inet_create tidied up (sk->reuse setting) 52 * Alan Cox : inet sockets don't set sk->type! 53 * Alan Cox : Split socket option code 54 * Alan Cox : Callbacks 55 * Alan Cox : Nagle flag for Charles & Johannes stuff 56 * Alex : Removed restriction on inet fioctl 57 * Alan Cox : Splitting INET from NET core 58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() 59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code 60 * Alan Cox : Split IP from generic code 61 * Alan Cox : New kfree_skbmem() 62 * Alan Cox : Make SO_DEBUG superuser only. 63 * Alan Cox : Allow anyone to clear SO_DEBUG 64 * (compatibility fix) 65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. 66 * Alan Cox : Allocator for a socket is settable. 67 * Alan Cox : SO_ERROR includes soft errors. 68 * Alan Cox : Allow NULL arguments on some SO_ opts 69 * Alan Cox : Generic socket allocation to make hooks 70 * easier (suggested by Craig Metz). 71 * Michael Pall : SO_ERROR returns positive errno again 72 * Steve Whitehouse: Added default destructor to free 73 * protocol private data. 74 * Steve Whitehouse: Added various other default routines 75 * common to several socket families. 76 * Chris Evans : Call suser() check last on F_SETOWN 77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. 78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() 79 * Andi Kleen : Fix write_space callback 80 * Chris Evans : Security fixes - signedness again 81 * Arnaldo C. Melo : cleanups, use skb_queue_purge 82 * 83 * To Fix: 84 * 85 * 86 * This program is free software; you can redistribute it and/or 87 * modify it under the terms of the GNU General Public License 88 * as published by the Free Software Foundation; either version 89 * 2 of the License, or (at your option) any later version. 90 */ 91 92#include <linux/capability.h> 93#include <linux/errno.h> 94#include <linux/types.h> 95#include <linux/socket.h> 96#include <linux/in.h> 97#include <linux/kernel.h> 98#include <linux/module.h> 99#include <linux/proc_fs.h> 100#include <linux/seq_file.h> 101#include <linux/sched.h> 102#include <linux/timer.h> 103#include <linux/string.h> 104#include <linux/sockios.h> 105#include <linux/net.h> 106#include <linux/mm.h> 107#include <linux/slab.h> 108#include <linux/interrupt.h> 109#include <linux/poll.h> 110#include <linux/tcp.h> 111#include <linux/init.h> 112#include <linux/highmem.h> 113#include <linux/user_namespace.h> 114#include <linux/static_key.h> 115#include <linux/memcontrol.h> 116#include <linux/prefetch.h> 117 118#include <asm/uaccess.h> 119 120#include <linux/netdevice.h> 121#include <net/protocol.h> 122#include <linux/skbuff.h> 123#include <net/net_namespace.h> 124#include <net/request_sock.h> 125#include <net/sock.h> 126#include <linux/net_tstamp.h> 127#include <net/xfrm.h> 128#include <linux/ipsec.h> 129#include <net/cls_cgroup.h> 130#include <net/netprio_cgroup.h> 131 132#include <linux/filter.h> 133 134#include <trace/events/sock.h> 135 136#ifdef CONFIG_INET 137#include <net/tcp.h> 138#endif 139 140static DEFINE_MUTEX(proto_list_mutex); 141static LIST_HEAD(proto_list); 142 143#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 144int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) 145{ 146 struct proto *proto; 147 int ret = 0; 148 149 mutex_lock(&proto_list_mutex); 150 list_for_each_entry(proto, &proto_list, node) { 151 if (proto->init_cgroup) { 152 ret = proto->init_cgroup(cgrp, ss); 153 if (ret) 154 goto out; 155 } 156 } 157 158 mutex_unlock(&proto_list_mutex); 159 return ret; 160out: 161 list_for_each_entry_continue_reverse(proto, &proto_list, node) 162 if (proto->destroy_cgroup) 163 proto->destroy_cgroup(cgrp); 164 mutex_unlock(&proto_list_mutex); 165 return ret; 166} 167 168void mem_cgroup_sockets_destroy(struct cgroup *cgrp) 169{ 170 struct proto *proto; 171 172 mutex_lock(&proto_list_mutex); 173 list_for_each_entry_reverse(proto, &proto_list, node) 174 if (proto->destroy_cgroup) 175 proto->destroy_cgroup(cgrp); 176 mutex_unlock(&proto_list_mutex); 177} 178#endif 179 180/* 181 * Each address family might have different locking rules, so we have 182 * one slock key per address family: 183 */ 184static struct lock_class_key af_family_keys[AF_MAX]; 185static struct lock_class_key af_family_slock_keys[AF_MAX]; 186 187struct static_key memcg_socket_limit_enabled; 188EXPORT_SYMBOL(memcg_socket_limit_enabled); 189 190/* 191 * Make lock validator output more readable. (we pre-construct these 192 * strings build-time, so that runtime initialization of socket 193 * locks is fast): 194 */ 195static const char *const af_family_key_strings[AF_MAX+1] = { 196 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , 197 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", 198 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , 199 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , 200 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , 201 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , 202 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 203 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 204 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 205 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 206 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 207 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 208 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , 209 "sk_lock-AF_NFC" , "sk_lock-AF_MAX" 210}; 211static const char *const af_family_slock_key_strings[AF_MAX+1] = { 212 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 213 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", 214 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , 215 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , 216 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , 217 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , 218 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 219 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , 220 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 221 "slock-27" , "slock-28" , "slock-AF_CAN" , 222 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 223 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 224 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , 225 "slock-AF_NFC" , "slock-AF_MAX" 226}; 227static const char *const af_family_clock_key_strings[AF_MAX+1] = { 228 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , 229 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", 230 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , 231 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , 232 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , 233 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , 234 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , 235 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , 236 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , 237 "clock-27" , "clock-28" , "clock-AF_CAN" , 238 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 239 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 240 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , 241 "clock-AF_NFC" , "clock-AF_MAX" 242}; 243 244/* 245 * sk_callback_lock locking rules are per-address-family, 246 * so split the lock classes by using a per-AF key: 247 */ 248static struct lock_class_key af_callback_keys[AF_MAX]; 249 250/* Take into consideration the size of the struct sk_buff overhead in the 251 * determination of these values, since that is non-constant across 252 * platforms. This makes socket queueing behavior and performance 253 * not depend upon such differences. 254 */ 255#define _SK_MEM_PACKETS 256 256#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) 257#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 258#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 259 260/* Run time adjustable parameters. */ 261__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; 262EXPORT_SYMBOL(sysctl_wmem_max); 263__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; 264EXPORT_SYMBOL(sysctl_rmem_max); 265__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 266__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 267 268/* Maximal space eaten by iovec or ancillary data plus some space */ 269int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 270EXPORT_SYMBOL(sysctl_optmem_max); 271 272#if defined(CONFIG_CGROUPS) 273#if !defined(CONFIG_NET_CLS_CGROUP) 274int net_cls_subsys_id = -1; 275EXPORT_SYMBOL_GPL(net_cls_subsys_id); 276#endif 277#if !defined(CONFIG_NETPRIO_CGROUP) 278int net_prio_subsys_id = -1; 279EXPORT_SYMBOL_GPL(net_prio_subsys_id); 280#endif 281#endif 282 283static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 284{ 285 struct timeval tv; 286 287 if (optlen < sizeof(tv)) 288 return -EINVAL; 289 if (copy_from_user(&tv, optval, sizeof(tv))) 290 return -EFAULT; 291 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) 292 return -EDOM; 293 294 if (tv.tv_sec < 0) { 295 static int warned __read_mostly; 296 297 *timeo_p = 0; 298 if (warned < 10 && net_ratelimit()) { 299 warned++; 300 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " 301 "tries to set negative timeout\n", 302 current->comm, task_pid_nr(current)); 303 } 304 return 0; 305 } 306 *timeo_p = MAX_SCHEDULE_TIMEOUT; 307 if (tv.tv_sec == 0 && tv.tv_usec == 0) 308 return 0; 309 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) 310 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); 311 return 0; 312} 313 314static void sock_warn_obsolete_bsdism(const char *name) 315{ 316 static int warned; 317 static char warncomm[TASK_COMM_LEN]; 318 if (strcmp(warncomm, current->comm) && warned < 5) { 319 strcpy(warncomm, current->comm); 320 printk(KERN_WARNING "process `%s' is using obsolete " 321 "%s SO_BSDCOMPAT\n", warncomm, name); 322 warned++; 323 } 324} 325 326#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) 327 328static void sock_disable_timestamp(struct sock *sk, unsigned long flags) 329{ 330 if (sk->sk_flags & flags) { 331 sk->sk_flags &= ~flags; 332 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) 333 net_disable_timestamp(); 334 } 335} 336 337 338int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 339{ 340 int err; 341 int skb_len; 342 unsigned long flags; 343 struct sk_buff_head *list = &sk->sk_receive_queue; 344 345 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { 346 atomic_inc(&sk->sk_drops); 347 trace_sock_rcvqueue_full(sk, skb); 348 return -ENOMEM; 349 } 350 351 err = sk_filter(sk, skb); 352 if (err) 353 return err; 354 355 if (!sk_rmem_schedule(sk, skb->truesize)) { 356 atomic_inc(&sk->sk_drops); 357 return -ENOBUFS; 358 } 359 360 skb->dev = NULL; 361 skb_set_owner_r(skb, sk); 362 363 /* Cache the SKB length before we tack it onto the receive 364 * queue. Once it is added it no longer belongs to us and 365 * may be freed by other threads of control pulling packets 366 * from the queue. 367 */ 368 skb_len = skb->len; 369 370 /* we escape from rcu protected region, make sure we dont leak 371 * a norefcounted dst 372 */ 373 skb_dst_force(skb); 374 375 spin_lock_irqsave(&list->lock, flags); 376 skb->dropcount = atomic_read(&sk->sk_drops); 377 __skb_queue_tail(list, skb); 378 spin_unlock_irqrestore(&list->lock, flags); 379 380 if (!sock_flag(sk, SOCK_DEAD)) 381 sk->sk_data_ready(sk, skb_len); 382 return 0; 383} 384EXPORT_SYMBOL(sock_queue_rcv_skb); 385 386int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) 387{ 388 int rc = NET_RX_SUCCESS; 389 390 if (sk_filter(sk, skb)) 391 goto discard_and_relse; 392 393 skb->dev = NULL; 394 395 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { 396 atomic_inc(&sk->sk_drops); 397 goto discard_and_relse; 398 } 399 if (nested) 400 bh_lock_sock_nested(sk); 401 else 402 bh_lock_sock(sk); 403 if (!sock_owned_by_user(sk)) { 404 /* 405 * trylock + unlock semantics: 406 */ 407 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); 408 409 rc = sk_backlog_rcv(sk, skb); 410 411 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 412 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { 413 bh_unlock_sock(sk); 414 atomic_inc(&sk->sk_drops); 415 goto discard_and_relse; 416 } 417 418 bh_unlock_sock(sk); 419out: 420 sock_put(sk); 421 return rc; 422discard_and_relse: 423 kfree_skb(skb); 424 goto out; 425} 426EXPORT_SYMBOL(sk_receive_skb); 427 428void sk_reset_txq(struct sock *sk) 429{ 430 sk_tx_queue_clear(sk); 431} 432EXPORT_SYMBOL(sk_reset_txq); 433 434struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 435{ 436 struct dst_entry *dst = __sk_dst_get(sk); 437 438 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 439 sk_tx_queue_clear(sk); 440 RCU_INIT_POINTER(sk->sk_dst_cache, NULL); 441 dst_release(dst); 442 return NULL; 443 } 444 445 return dst; 446} 447EXPORT_SYMBOL(__sk_dst_check); 448 449struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) 450{ 451 struct dst_entry *dst = sk_dst_get(sk); 452 453 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 454 sk_dst_reset(sk); 455 dst_release(dst); 456 return NULL; 457 } 458 459 return dst; 460} 461EXPORT_SYMBOL(sk_dst_check); 462 463static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) 464{ 465 int ret = -ENOPROTOOPT; 466#ifdef CONFIG_NETDEVICES 467 struct net *net = sock_net(sk); 468 char devname[IFNAMSIZ]; 469 int index; 470 471 /* Sorry... */ 472 ret = -EPERM; 473 if (!capable(CAP_NET_RAW)) 474 goto out; 475 476 ret = -EINVAL; 477 if (optlen < 0) 478 goto out; 479 480 /* Bind this socket to a particular device like "eth0", 481 * as specified in the passed interface name. If the 482 * name is "" or the option length is zero the socket 483 * is not bound. 484 */ 485 if (optlen > IFNAMSIZ - 1) 486 optlen = IFNAMSIZ - 1; 487 memset(devname, 0, sizeof(devname)); 488 489 ret = -EFAULT; 490 if (copy_from_user(devname, optval, optlen)) 491 goto out; 492 493 index = 0; 494 if (devname[0] != '\0') { 495 struct net_device *dev; 496 497 rcu_read_lock(); 498 dev = dev_get_by_name_rcu(net, devname); 499 if (dev) 500 index = dev->ifindex; 501 rcu_read_unlock(); 502 ret = -ENODEV; 503 if (!dev) 504 goto out; 505 } 506 507 lock_sock(sk); 508 sk->sk_bound_dev_if = index; 509 sk_dst_reset(sk); 510 release_sock(sk); 511 512 ret = 0; 513 514out: 515#endif 516 517 return ret; 518} 519 520static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) 521{ 522 if (valbool) 523 sock_set_flag(sk, bit); 524 else 525 sock_reset_flag(sk, bit); 526} 527 528/* 529 * This is meant for all protocols to use and covers goings on 530 * at the socket level. Everything here is generic. 531 */ 532 533int sock_setsockopt(struct socket *sock, int level, int optname, 534 char __user *optval, unsigned int optlen) 535{ 536 struct sock *sk = sock->sk; 537 int val; 538 int valbool; 539 struct linger ling; 540 int ret = 0; 541 542 /* 543 * Options without arguments 544 */ 545 546 if (optname == SO_BINDTODEVICE) 547 return sock_bindtodevice(sk, optval, optlen); 548 549 if (optlen < sizeof(int)) 550 return -EINVAL; 551 552 if (get_user(val, (int __user *)optval)) 553 return -EFAULT; 554 555 valbool = val ? 1 : 0; 556 557 lock_sock(sk); 558 559 switch (optname) { 560 case SO_DEBUG: 561 if (val && !capable(CAP_NET_ADMIN)) 562 ret = -EACCES; 563 else 564 sock_valbool_flag(sk, SOCK_DBG, valbool); 565 break; 566 case SO_REUSEADDR: 567 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 568 break; 569 case SO_TYPE: 570 case SO_PROTOCOL: 571 case SO_DOMAIN: 572 case SO_ERROR: 573 ret = -ENOPROTOOPT; 574 break; 575 case SO_DONTROUTE: 576 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); 577 break; 578 case SO_BROADCAST: 579 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 580 break; 581 case SO_SNDBUF: 582 /* Don't error on this BSD doesn't and if you think 583 * about it this is right. Otherwise apps have to 584 * play 'guess the biggest size' games. RCVBUF/SNDBUF 585 * are treated in BSD as hints 586 */ 587 val = min_t(u32, val, sysctl_wmem_max); 588set_sndbuf: 589 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 590 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); 591 /* Wake up sending tasks if we upped the value. */ 592 sk->sk_write_space(sk); 593 break; 594 595 case SO_SNDBUFFORCE: 596 if (!capable(CAP_NET_ADMIN)) { 597 ret = -EPERM; 598 break; 599 } 600 goto set_sndbuf; 601 602 case SO_RCVBUF: 603 /* Don't error on this BSD doesn't and if you think 604 * about it this is right. Otherwise apps have to 605 * play 'guess the biggest size' games. RCVBUF/SNDBUF 606 * are treated in BSD as hints 607 */ 608 val = min_t(u32, val, sysctl_rmem_max); 609set_rcvbuf: 610 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 611 /* 612 * We double it on the way in to account for 613 * "struct sk_buff" etc. overhead. Applications 614 * assume that the SO_RCVBUF setting they make will 615 * allow that much actual data to be received on that 616 * socket. 617 * 618 * Applications are unaware that "struct sk_buff" and 619 * other overheads allocate from the receive buffer 620 * during socket buffer allocation. 621 * 622 * And after considering the possible alternatives, 623 * returning the value we actually used in getsockopt 624 * is the most desirable behavior. 625 */ 626 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); 627 break; 628 629 case SO_RCVBUFFORCE: 630 if (!capable(CAP_NET_ADMIN)) { 631 ret = -EPERM; 632 break; 633 } 634 goto set_rcvbuf; 635 636 case SO_KEEPALIVE: 637#ifdef CONFIG_INET 638 if (sk->sk_protocol == IPPROTO_TCP) 639 tcp_set_keepalive(sk, valbool); 640#endif 641 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 642 break; 643 644 case SO_OOBINLINE: 645 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 646 break; 647 648 case SO_NO_CHECK: 649 sk->sk_no_check = valbool; 650 break; 651 652 case SO_PRIORITY: 653 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 654 sk->sk_priority = val; 655 else 656 ret = -EPERM; 657 break; 658 659 case SO_LINGER: 660 if (optlen < sizeof(ling)) { 661 ret = -EINVAL; /* 1003.1g */ 662 break; 663 } 664 if (copy_from_user(&ling, optval, sizeof(ling))) { 665 ret = -EFAULT; 666 break; 667 } 668 if (!ling.l_onoff) 669 sock_reset_flag(sk, SOCK_LINGER); 670 else { 671#if (BITS_PER_LONG == 32) 672 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 673 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 674 else 675#endif 676 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; 677 sock_set_flag(sk, SOCK_LINGER); 678 } 679 break; 680 681 case SO_BSDCOMPAT: 682 sock_warn_obsolete_bsdism("setsockopt"); 683 break; 684 685 case SO_PASSCRED: 686 if (valbool) 687 set_bit(SOCK_PASSCRED, &sock->flags); 688 else 689 clear_bit(SOCK_PASSCRED, &sock->flags); 690 break; 691 692 case SO_TIMESTAMP: 693 case SO_TIMESTAMPNS: 694 if (valbool) { 695 if (optname == SO_TIMESTAMP) 696 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 697 else 698 sock_set_flag(sk, SOCK_RCVTSTAMPNS); 699 sock_set_flag(sk, SOCK_RCVTSTAMP); 700 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 701 } else { 702 sock_reset_flag(sk, SOCK_RCVTSTAMP); 703 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 704 } 705 break; 706 707 case SO_TIMESTAMPING: 708 if (val & ~SOF_TIMESTAMPING_MASK) { 709 ret = -EINVAL; 710 break; 711 } 712 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, 713 val & SOF_TIMESTAMPING_TX_HARDWARE); 714 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, 715 val & SOF_TIMESTAMPING_TX_SOFTWARE); 716 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, 717 val & SOF_TIMESTAMPING_RX_HARDWARE); 718 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 719 sock_enable_timestamp(sk, 720 SOCK_TIMESTAMPING_RX_SOFTWARE); 721 else 722 sock_disable_timestamp(sk, 723 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 724 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, 725 val & SOF_TIMESTAMPING_SOFTWARE); 726 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, 727 val & SOF_TIMESTAMPING_SYS_HARDWARE); 728 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, 729 val & SOF_TIMESTAMPING_RAW_HARDWARE); 730 break; 731 732 case SO_RCVLOWAT: 733 if (val < 0) 734 val = INT_MAX; 735 sk->sk_rcvlowat = val ? : 1; 736 break; 737 738 case SO_RCVTIMEO: 739 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); 740 break; 741 742 case SO_SNDTIMEO: 743 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); 744 break; 745 746 case SO_ATTACH_FILTER: 747 ret = -EINVAL; 748 if (optlen == sizeof(struct sock_fprog)) { 749 struct sock_fprog fprog; 750 751 ret = -EFAULT; 752 if (copy_from_user(&fprog, optval, sizeof(fprog))) 753 break; 754 755 ret = sk_attach_filter(&fprog, sk); 756 } 757 break; 758 759 case SO_DETACH_FILTER: 760 ret = sk_detach_filter(sk); 761 break; 762 763 case SO_PASSSEC: 764 if (valbool) 765 set_bit(SOCK_PASSSEC, &sock->flags); 766 else 767 clear_bit(SOCK_PASSSEC, &sock->flags); 768 break; 769 case SO_MARK: 770 if (!capable(CAP_NET_ADMIN)) 771 ret = -EPERM; 772 else 773 sk->sk_mark = val; 774 break; 775 776 /* We implement the SO_SNDLOWAT etc to 777 not be settable (1003.1g 5.3) */ 778 case SO_RXQ_OVFL: 779 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); 780 break; 781 782 case SO_WIFI_STATUS: 783 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); 784 break; 785 786 case SO_PEEK_OFF: 787 if (sock->ops->set_peek_off) 788 sock->ops->set_peek_off(sk, val); 789 else 790 ret = -EOPNOTSUPP; 791 break; 792 793 case SO_NOFCS: 794 sock_valbool_flag(sk, SOCK_NOFCS, valbool); 795 break; 796 797 default: 798 ret = -ENOPROTOOPT; 799 break; 800 } 801 release_sock(sk); 802 return ret; 803} 804EXPORT_SYMBOL(sock_setsockopt); 805 806 807void cred_to_ucred(struct pid *pid, const struct cred *cred, 808 struct ucred *ucred) 809{ 810 ucred->pid = pid_vnr(pid); 811 ucred->uid = ucred->gid = -1; 812 if (cred) { 813 struct user_namespace *current_ns = current_user_ns(); 814 815 ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid); 816 ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid); 817 } 818} 819EXPORT_SYMBOL_GPL(cred_to_ucred); 820 821int sock_getsockopt(struct socket *sock, int level, int optname, 822 char __user *optval, int __user *optlen) 823{ 824 struct sock *sk = sock->sk; 825 826 union { 827 int val; 828 struct linger ling; 829 struct timeval tm; 830 } v; 831 832 int lv = sizeof(int); 833 int len; 834 835 if (get_user(len, optlen)) 836 return -EFAULT; 837 if (len < 0) 838 return -EINVAL; 839 840 memset(&v, 0, sizeof(v)); 841 842 switch (optname) { 843 case SO_DEBUG: 844 v.val = sock_flag(sk, SOCK_DBG); 845 break; 846 847 case SO_DONTROUTE: 848 v.val = sock_flag(sk, SOCK_LOCALROUTE); 849 break; 850 851 case SO_BROADCAST: 852 v.val = !!sock_flag(sk, SOCK_BROADCAST); 853 break; 854 855 case SO_SNDBUF: 856 v.val = sk->sk_sndbuf; 857 break; 858 859 case SO_RCVBUF: 860 v.val = sk->sk_rcvbuf; 861 break; 862 863 case SO_REUSEADDR: 864 v.val = sk->sk_reuse; 865 break; 866 867 case SO_KEEPALIVE: 868 v.val = !!sock_flag(sk, SOCK_KEEPOPEN); 869 break; 870 871 case SO_TYPE: 872 v.val = sk->sk_type; 873 break; 874 875 case SO_PROTOCOL: 876 v.val = sk->sk_protocol; 877 break; 878 879 case SO_DOMAIN: 880 v.val = sk->sk_family; 881 break; 882 883 case SO_ERROR: 884 v.val = -sock_error(sk); 885 if (v.val == 0) 886 v.val = xchg(&sk->sk_err_soft, 0); 887 break; 888 889 case SO_OOBINLINE: 890 v.val = !!sock_flag(sk, SOCK_URGINLINE); 891 break; 892 893 case SO_NO_CHECK: 894 v.val = sk->sk_no_check; 895 break; 896 897 case SO_PRIORITY: 898 v.val = sk->sk_priority; 899 break; 900 901 case SO_LINGER: 902 lv = sizeof(v.ling); 903 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER); 904 v.ling.l_linger = sk->sk_lingertime / HZ; 905 break; 906 907 case SO_BSDCOMPAT: 908 sock_warn_obsolete_bsdism("getsockopt"); 909 break; 910 911 case SO_TIMESTAMP: 912 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && 913 !sock_flag(sk, SOCK_RCVTSTAMPNS); 914 break; 915 916 case SO_TIMESTAMPNS: 917 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); 918 break; 919 920 case SO_TIMESTAMPING: 921 v.val = 0; 922 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 923 v.val |= SOF_TIMESTAMPING_TX_HARDWARE; 924 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 925 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE; 926 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE)) 927 v.val |= SOF_TIMESTAMPING_RX_HARDWARE; 928 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) 929 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE; 930 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) 931 v.val |= SOF_TIMESTAMPING_SOFTWARE; 932 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)) 933 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE; 934 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) 935 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE; 936 break; 937 938 case SO_RCVTIMEO: 939 lv = sizeof(struct timeval); 940 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 941 v.tm.tv_sec = 0; 942 v.tm.tv_usec = 0; 943 } else { 944 v.tm.tv_sec = sk->sk_rcvtimeo / HZ; 945 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; 946 } 947 break; 948 949 case SO_SNDTIMEO: 950 lv = sizeof(struct timeval); 951 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 952 v.tm.tv_sec = 0; 953 v.tm.tv_usec = 0; 954 } else { 955 v.tm.tv_sec = sk->sk_sndtimeo / HZ; 956 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; 957 } 958 break; 959 960 case SO_RCVLOWAT: 961 v.val = sk->sk_rcvlowat; 962 break; 963 964 case SO_SNDLOWAT: 965 v.val = 1; 966 break; 967 968 case SO_PASSCRED: 969 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); 970 break; 971 972 case SO_PEERCRED: 973 { 974 struct ucred peercred; 975 if (len > sizeof(peercred)) 976 len = sizeof(peercred); 977 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); 978 if (copy_to_user(optval, &peercred, len)) 979 return -EFAULT; 980 goto lenout; 981 } 982 983 case SO_PEERNAME: 984 { 985 char address[128]; 986 987 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) 988 return -ENOTCONN; 989 if (lv < len) 990 return -EINVAL; 991 if (copy_to_user(optval, address, len)) 992 return -EFAULT; 993 goto lenout; 994 } 995 996 /* Dubious BSD thing... Probably nobody even uses it, but 997 * the UNIX standard wants it for whatever reason... -DaveM 998 */ 999 case SO_ACCEPTCONN: 1000 v.val = sk->sk_state == TCP_LISTEN; 1001 break; 1002 1003 case SO_PASSSEC: 1004 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); 1005 break; 1006 1007 case SO_PEERSEC: 1008 return security_socket_getpeersec_stream(sock, optval, optlen, len); 1009 1010 case SO_MARK: 1011 v.val = sk->sk_mark; 1012 break; 1013 1014 case SO_RXQ_OVFL: 1015 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); 1016 break; 1017 1018 case SO_WIFI_STATUS: 1019 v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); 1020 break; 1021 1022 case SO_PEEK_OFF: 1023 if (!sock->ops->set_peek_off) 1024 return -EOPNOTSUPP; 1025 1026 v.val = sk->sk_peek_off; 1027 break; 1028 case SO_NOFCS: 1029 v.val = !!sock_flag(sk, SOCK_NOFCS); 1030 break; 1031 default: 1032 return -ENOPROTOOPT; 1033 } 1034 1035 if (len > lv) 1036 len = lv; 1037 if (copy_to_user(optval, &v, len)) 1038 return -EFAULT; 1039lenout: 1040 if (put_user(len, optlen)) 1041 return -EFAULT; 1042 return 0; 1043} 1044 1045/* 1046 * Initialize an sk_lock. 1047 * 1048 * (We also register the sk_lock with the lock validator.) 1049 */ 1050static inline void sock_lock_init(struct sock *sk) 1051{ 1052 sock_lock_init_class_and_name(sk, 1053 af_family_slock_key_strings[sk->sk_family], 1054 af_family_slock_keys + sk->sk_family, 1055 af_family_key_strings[sk->sk_family], 1056 af_family_keys + sk->sk_family); 1057} 1058 1059/* 1060 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, 1061 * even temporarly, because of RCU lookups. sk_node should also be left as is. 1062 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end 1063 */ 1064static void sock_copy(struct sock *nsk, const struct sock *osk) 1065{ 1066#ifdef CONFIG_SECURITY_NETWORK 1067 void *sptr = nsk->sk_security; 1068#endif 1069 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); 1070 1071 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, 1072 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); 1073 1074#ifdef CONFIG_SECURITY_NETWORK 1075 nsk->sk_security = sptr; 1076 security_sk_clone(osk, nsk); 1077#endif 1078} 1079 1080/* 1081 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes 1082 * un-modified. Special care is taken when initializing object to zero. 1083 */ 1084static inline void sk_prot_clear_nulls(struct sock *sk, int size) 1085{ 1086 if (offsetof(struct sock, sk_node.next) != 0) 1087 memset(sk, 0, offsetof(struct sock, sk_node.next)); 1088 memset(&sk->sk_node.pprev, 0, 1089 size - offsetof(struct sock, sk_node.pprev)); 1090} 1091 1092void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) 1093{ 1094 unsigned long nulls1, nulls2; 1095 1096 nulls1 = offsetof(struct sock, __sk_common.skc_node.next); 1097 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); 1098 if (nulls1 > nulls2) 1099 swap(nulls1, nulls2); 1100 1101 if (nulls1 != 0) 1102 memset((char *)sk, 0, nulls1); 1103 memset((char *)sk + nulls1 + sizeof(void *), 0, 1104 nulls2 - nulls1 - sizeof(void *)); 1105 memset((char *)sk + nulls2 + sizeof(void *), 0, 1106 size - nulls2 - sizeof(void *)); 1107} 1108EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); 1109 1110static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1111 int family) 1112{ 1113 struct sock *sk; 1114 struct kmem_cache *slab; 1115 1116 slab = prot->slab; 1117 if (slab != NULL) { 1118 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); 1119 if (!sk) 1120 return sk; 1121 if (priority & __GFP_ZERO) { 1122 if (prot->clear_sk) 1123 prot->clear_sk(sk, prot->obj_size); 1124 else 1125 sk_prot_clear_nulls(sk, prot->obj_size); 1126 } 1127 } else 1128 sk = kmalloc(prot->obj_size, priority); 1129 1130 if (sk != NULL) { 1131 kmemcheck_annotate_bitfield(sk, flags); 1132 1133 if (security_sk_alloc(sk, family, priority)) 1134 goto out_free; 1135 1136 if (!try_module_get(prot->owner)) 1137 goto out_free_sec; 1138 sk_tx_queue_clear(sk); 1139 } 1140 1141 return sk; 1142 1143out_free_sec: 1144 security_sk_free(sk); 1145out_free: 1146 if (slab != NULL) 1147 kmem_cache_free(slab, sk); 1148 else 1149 kfree(sk); 1150 return NULL; 1151} 1152 1153static void sk_prot_free(struct proto *prot, struct sock *sk) 1154{ 1155 struct kmem_cache *slab; 1156 struct module *owner; 1157 1158 owner = prot->owner; 1159 slab = prot->slab; 1160 1161 security_sk_free(sk); 1162 if (slab != NULL) 1163 kmem_cache_free(slab, sk); 1164 else 1165 kfree(sk); 1166 module_put(owner); 1167} 1168 1169#ifdef CONFIG_CGROUPS 1170void sock_update_classid(struct sock *sk) 1171{ 1172 u32 classid; 1173 1174 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1175 classid = task_cls_classid(current); 1176 rcu_read_unlock(); 1177 if (classid && classid != sk->sk_classid) 1178 sk->sk_classid = classid; 1179} 1180EXPORT_SYMBOL(sock_update_classid); 1181 1182void sock_update_netprioidx(struct sock *sk) 1183{ 1184 if (in_interrupt()) 1185 return; 1186 1187 sk->sk_cgrp_prioidx = task_netprioidx(current); 1188} 1189EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1190#endif 1191 1192/** 1193 * sk_alloc - All socket objects are allocated here 1194 * @net: the applicable net namespace 1195 * @family: protocol family 1196 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1197 * @prot: struct proto associated with this new sock instance 1198 */ 1199struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 1200 struct proto *prot) 1201{ 1202 struct sock *sk; 1203 1204 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); 1205 if (sk) { 1206 sk->sk_family = family; 1207 /* 1208 * See comment in struct sock definition to understand 1209 * why we need sk_prot_creator -acme 1210 */ 1211 sk->sk_prot = sk->sk_prot_creator = prot; 1212 sock_lock_init(sk); 1213 sock_net_set(sk, get_net(net)); 1214 atomic_set(&sk->sk_wmem_alloc, 1); 1215 1216 sock_update_classid(sk); 1217 sock_update_netprioidx(sk); 1218 } 1219 1220 return sk; 1221} 1222EXPORT_SYMBOL(sk_alloc); 1223 1224static void __sk_free(struct sock *sk) 1225{ 1226 struct sk_filter *filter; 1227 1228 if (sk->sk_destruct) 1229 sk->sk_destruct(sk); 1230 1231 filter = rcu_dereference_check(sk->sk_filter, 1232 atomic_read(&sk->sk_wmem_alloc) == 0); 1233 if (filter) { 1234 sk_filter_uncharge(sk, filter); 1235 RCU_INIT_POINTER(sk->sk_filter, NULL); 1236 } 1237 1238 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); 1239 1240 if (atomic_read(&sk->sk_omem_alloc)) 1241 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1242 __func__, atomic_read(&sk->sk_omem_alloc)); 1243 1244 if (sk->sk_peer_cred) 1245 put_cred(sk->sk_peer_cred); 1246 put_pid(sk->sk_peer_pid); 1247 put_net(sock_net(sk)); 1248 sk_prot_free(sk->sk_prot_creator, sk); 1249} 1250 1251void sk_free(struct sock *sk) 1252{ 1253 /* 1254 * We subtract one from sk_wmem_alloc and can know if 1255 * some packets are still in some tx queue. 1256 * If not null, sock_wfree() will call __sk_free(sk) later 1257 */ 1258 if (atomic_dec_and_test(&sk->sk_wmem_alloc)) 1259 __sk_free(sk); 1260} 1261EXPORT_SYMBOL(sk_free); 1262 1263/* 1264 * Last sock_put should drop reference to sk->sk_net. It has already 1265 * been dropped in sk_change_net. Taking reference to stopping namespace 1266 * is not an option. 1267 * Take reference to a socket to remove it from hash _alive_ and after that 1268 * destroy it in the context of init_net. 1269 */ 1270void sk_release_kernel(struct sock *sk) 1271{ 1272 if (sk == NULL || sk->sk_socket == NULL) 1273 return; 1274 1275 sock_hold(sk); 1276 sock_release(sk->sk_socket); 1277 release_net(sock_net(sk)); 1278 sock_net_set(sk, get_net(&init_net)); 1279 sock_put(sk); 1280} 1281EXPORT_SYMBOL(sk_release_kernel); 1282 1283static void sk_update_clone(const struct sock *sk, struct sock *newsk) 1284{ 1285 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) 1286 sock_update_memcg(newsk); 1287} 1288 1289/** 1290 * sk_clone_lock - clone a socket, and lock its clone 1291 * @sk: the socket to clone 1292 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1293 * 1294 * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 1295 */ 1296struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) 1297{ 1298 struct sock *newsk; 1299 1300 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); 1301 if (newsk != NULL) { 1302 struct sk_filter *filter; 1303 1304 sock_copy(newsk, sk); 1305 1306 /* SANITY */ 1307 get_net(sock_net(newsk)); 1308 sk_node_init(&newsk->sk_node); 1309 sock_lock_init(newsk); 1310 bh_lock_sock(newsk); 1311 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1312 newsk->sk_backlog.len = 0; 1313 1314 atomic_set(&newsk->sk_rmem_alloc, 0); 1315 /* 1316 * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) 1317 */ 1318 atomic_set(&newsk->sk_wmem_alloc, 1); 1319 atomic_set(&newsk->sk_omem_alloc, 0); 1320 skb_queue_head_init(&newsk->sk_receive_queue); 1321 skb_queue_head_init(&newsk->sk_write_queue); 1322#ifdef CONFIG_NET_DMA 1323 skb_queue_head_init(&newsk->sk_async_wait_queue); 1324#endif 1325 1326 spin_lock_init(&newsk->sk_dst_lock); 1327 rwlock_init(&newsk->sk_callback_lock); 1328 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1329 af_callback_keys + newsk->sk_family, 1330 af_family_clock_key_strings[newsk->sk_family]); 1331 1332 newsk->sk_dst_cache = NULL; 1333 newsk->sk_wmem_queued = 0; 1334 newsk->sk_forward_alloc = 0; 1335 newsk->sk_send_head = NULL; 1336 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 1337 1338 sock_reset_flag(newsk, SOCK_DONE); 1339 skb_queue_head_init(&newsk->sk_error_queue); 1340 1341 filter = rcu_dereference_protected(newsk->sk_filter, 1); 1342 if (filter != NULL) 1343 sk_filter_charge(newsk, filter); 1344 1345 if (unlikely(xfrm_sk_clone_policy(newsk))) { 1346 /* It is still raw copy of parent, so invalidate 1347 * destructor and make plain sk_free() */ 1348 newsk->sk_destruct = NULL; 1349 bh_unlock_sock(newsk); 1350 sk_free(newsk); 1351 newsk = NULL; 1352 goto out; 1353 } 1354 1355 newsk->sk_err = 0; 1356 newsk->sk_priority = 0; 1357 /* 1358 * Before updating sk_refcnt, we must commit prior changes to memory 1359 * (Documentation/RCU/rculist_nulls.txt for details) 1360 */ 1361 smp_wmb(); 1362 atomic_set(&newsk->sk_refcnt, 2); 1363 1364 /* 1365 * Increment the counter in the same struct proto as the master 1366 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that 1367 * is the same as sk->sk_prot->socks, as this field was copied 1368 * with memcpy). 1369 * 1370 * This _changes_ the previous behaviour, where 1371 * tcp_create_openreq_child always was incrementing the 1372 * equivalent to tcp_prot->socks (inet_sock_nr), so this have 1373 * to be taken into account in all callers. -acme 1374 */ 1375 sk_refcnt_debug_inc(newsk); 1376 sk_set_socket(newsk, NULL); 1377 newsk->sk_wq = NULL; 1378 1379 sk_update_clone(sk, newsk); 1380 1381 if (newsk->sk_prot->sockets_allocated) 1382 sk_sockets_allocated_inc(newsk); 1383 1384 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) 1385 net_enable_timestamp(); 1386 } 1387out: 1388 return newsk; 1389} 1390EXPORT_SYMBOL_GPL(sk_clone_lock); 1391 1392void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1393{ 1394 __sk_dst_set(sk, dst); 1395 sk->sk_route_caps = dst->dev->features; 1396 if (sk->sk_route_caps & NETIF_F_GSO) 1397 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1398 sk->sk_route_caps &= ~sk->sk_route_nocaps; 1399 if (sk_can_gso(sk)) { 1400 if (dst->header_len) { 1401 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1402 } else { 1403 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 1404 sk->sk_gso_max_size = dst->dev->gso_max_size; 1405 } 1406 } 1407} 1408EXPORT_SYMBOL_GPL(sk_setup_caps); 1409 1410void __init sk_init(void) 1411{ 1412 if (totalram_pages <= 4096) { 1413 sysctl_wmem_max = 32767; 1414 sysctl_rmem_max = 32767; 1415 sysctl_wmem_default = 32767; 1416 sysctl_rmem_default = 32767; 1417 } else if (totalram_pages >= 131072) { 1418 sysctl_wmem_max = 131071; 1419 sysctl_rmem_max = 131071; 1420 } 1421} 1422 1423/* 1424 * Simple resource managers for sockets. 1425 */ 1426 1427 1428/* 1429 * Write buffer destructor automatically called from kfree_skb. 1430 */ 1431void sock_wfree(struct sk_buff *skb) 1432{ 1433 struct sock *sk = skb->sk; 1434 unsigned int len = skb->truesize; 1435 1436 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { 1437 /* 1438 * Keep a reference on sk_wmem_alloc, this will be released 1439 * after sk_write_space() call 1440 */ 1441 atomic_sub(len - 1, &sk->sk_wmem_alloc); 1442 sk->sk_write_space(sk); 1443 len = 1; 1444 } 1445 /* 1446 * if sk_wmem_alloc reaches 0, we must finish what sk_free() 1447 * could not do because of in-flight packets 1448 */ 1449 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) 1450 __sk_free(sk); 1451} 1452EXPORT_SYMBOL(sock_wfree); 1453 1454/* 1455 * Read buffer destructor automatically called from kfree_skb. 1456 */ 1457void sock_rfree(struct sk_buff *skb) 1458{ 1459 struct sock *sk = skb->sk; 1460 unsigned int len = skb->truesize; 1461 1462 atomic_sub(len, &sk->sk_rmem_alloc); 1463 sk_mem_uncharge(sk, len); 1464} 1465EXPORT_SYMBOL(sock_rfree); 1466 1467 1468int sock_i_uid(struct sock *sk) 1469{ 1470 int uid; 1471 1472 read_lock_bh(&sk->sk_callback_lock); 1473 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 1474 read_unlock_bh(&sk->sk_callback_lock); 1475 return uid; 1476} 1477EXPORT_SYMBOL(sock_i_uid); 1478 1479unsigned long sock_i_ino(struct sock *sk) 1480{ 1481 unsigned long ino; 1482 1483 read_lock_bh(&sk->sk_callback_lock); 1484 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 1485 read_unlock_bh(&sk->sk_callback_lock); 1486 return ino; 1487} 1488EXPORT_SYMBOL(sock_i_ino); 1489 1490/* 1491 * Allocate a skb from the socket's send buffer. 1492 */ 1493struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 1494 gfp_t priority) 1495{ 1496 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1497 struct sk_buff *skb = alloc_skb(size, priority); 1498 if (skb) { 1499 skb_set_owner_w(skb, sk); 1500 return skb; 1501 } 1502 } 1503 return NULL; 1504} 1505EXPORT_SYMBOL(sock_wmalloc); 1506 1507/* 1508 * Allocate a skb from the socket's receive buffer. 1509 */ 1510struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, 1511 gfp_t priority) 1512{ 1513 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 1514 struct sk_buff *skb = alloc_skb(size, priority); 1515 if (skb) { 1516 skb_set_owner_r(skb, sk); 1517 return skb; 1518 } 1519 } 1520 return NULL; 1521} 1522 1523/* 1524 * Allocate a memory block from the socket's option memory buffer. 1525 */ 1526void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 1527{ 1528 if ((unsigned int)size <= sysctl_optmem_max && 1529 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 1530 void *mem; 1531 /* First do the add, to avoid the race if kmalloc 1532 * might sleep. 1533 */ 1534 atomic_add(size, &sk->sk_omem_alloc); 1535 mem = kmalloc(size, priority); 1536 if (mem) 1537 return mem; 1538 atomic_sub(size, &sk->sk_omem_alloc); 1539 } 1540 return NULL; 1541} 1542EXPORT_SYMBOL(sock_kmalloc); 1543 1544/* 1545 * Free an option memory block. 1546 */ 1547void sock_kfree_s(struct sock *sk, void *mem, int size) 1548{ 1549 kfree(mem); 1550 atomic_sub(size, &sk->sk_omem_alloc); 1551} 1552EXPORT_SYMBOL(sock_kfree_s); 1553 1554/* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 1555 I think, these locks should be removed for datagram sockets. 1556 */ 1557static long sock_wait_for_wmem(struct sock *sk, long timeo) 1558{ 1559 DEFINE_WAIT(wait); 1560 1561 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1562 for (;;) { 1563 if (!timeo) 1564 break; 1565 if (signal_pending(current)) 1566 break; 1567 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1568 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1569 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1570 break; 1571 if (sk->sk_shutdown & SEND_SHUTDOWN) 1572 break; 1573 if (sk->sk_err) 1574 break; 1575 timeo = schedule_timeout(timeo); 1576 } 1577 finish_wait(sk_sleep(sk), &wait); 1578 return timeo; 1579} 1580 1581 1582/* 1583 * Generic send/receive buffer handlers 1584 */ 1585 1586struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 1587 unsigned long data_len, int noblock, 1588 int *errcode) 1589{ 1590 struct sk_buff *skb; 1591 gfp_t gfp_mask; 1592 long timeo; 1593 int err; 1594 1595 gfp_mask = sk->sk_allocation; 1596 if (gfp_mask & __GFP_WAIT) 1597 gfp_mask |= __GFP_REPEAT; 1598 1599 timeo = sock_sndtimeo(sk, noblock); 1600 while (1) { 1601 err = sock_error(sk); 1602 if (err != 0) 1603 goto failure; 1604 1605 err = -EPIPE; 1606 if (sk->sk_shutdown & SEND_SHUTDOWN) 1607 goto failure; 1608 1609 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1610 skb = alloc_skb(header_len, gfp_mask); 1611 if (skb) { 1612 int npages; 1613 int i; 1614 1615 /* No pages, we're done... */ 1616 if (!data_len) 1617 break; 1618 1619 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 1620 skb->truesize += data_len; 1621 skb_shinfo(skb)->nr_frags = npages; 1622 for (i = 0; i < npages; i++) { 1623 struct page *page; 1624 1625 page = alloc_pages(sk->sk_allocation, 0); 1626 if (!page) { 1627 err = -ENOBUFS; 1628 skb_shinfo(skb)->nr_frags = i; 1629 kfree_skb(skb); 1630 goto failure; 1631 } 1632 1633 __skb_fill_page_desc(skb, i, 1634 page, 0, 1635 (data_len >= PAGE_SIZE ? 1636 PAGE_SIZE : 1637 data_len)); 1638 data_len -= PAGE_SIZE; 1639 } 1640 1641 /* Full success... */ 1642 break; 1643 } 1644 err = -ENOBUFS; 1645 goto failure; 1646 } 1647 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1648 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1649 err = -EAGAIN; 1650 if (!timeo) 1651 goto failure; 1652 if (signal_pending(current)) 1653 goto interrupted; 1654 timeo = sock_wait_for_wmem(sk, timeo); 1655 } 1656 1657 skb_set_owner_w(skb, sk); 1658 return skb; 1659 1660interrupted: 1661 err = sock_intr_errno(timeo); 1662failure: 1663 *errcode = err; 1664 return NULL; 1665} 1666EXPORT_SYMBOL(sock_alloc_send_pskb); 1667 1668struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1669 int noblock, int *errcode) 1670{ 1671 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 1672} 1673EXPORT_SYMBOL(sock_alloc_send_skb); 1674 1675static void __lock_sock(struct sock *sk) 1676 __releases(&sk->sk_lock.slock) 1677 __acquires(&sk->sk_lock.slock) 1678{ 1679 DEFINE_WAIT(wait); 1680 1681 for (;;) { 1682 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 1683 TASK_UNINTERRUPTIBLE); 1684 spin_unlock_bh(&sk->sk_lock.slock); 1685 schedule(); 1686 spin_lock_bh(&sk->sk_lock.slock); 1687 if (!sock_owned_by_user(sk)) 1688 break; 1689 } 1690 finish_wait(&sk->sk_lock.wq, &wait); 1691} 1692 1693static void __release_sock(struct sock *sk) 1694 __releases(&sk->sk_lock.slock) 1695 __acquires(&sk->sk_lock.slock) 1696{ 1697 struct sk_buff *skb = sk->sk_backlog.head; 1698 1699 do { 1700 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 1701 bh_unlock_sock(sk); 1702 1703 do { 1704 struct sk_buff *next = skb->next; 1705 1706 prefetch(next); 1707 WARN_ON_ONCE(skb_dst_is_noref(skb)); 1708 skb->next = NULL; 1709 sk_backlog_rcv(sk, skb); 1710 1711 /* 1712 * We are in process context here with softirqs 1713 * disabled, use cond_resched_softirq() to preempt. 1714 * This is safe to do because we've taken the backlog 1715 * queue private: 1716 */ 1717 cond_resched_softirq(); 1718 1719 skb = next; 1720 } while (skb != NULL); 1721 1722 bh_lock_sock(sk); 1723 } while ((skb = sk->sk_backlog.head) != NULL); 1724 1725 /* 1726 * Doing the zeroing here guarantee we can not loop forever 1727 * while a wild producer attempts to flood us. 1728 */ 1729 sk->sk_backlog.len = 0; 1730} 1731 1732/** 1733 * sk_wait_data - wait for data to arrive at sk_receive_queue 1734 * @sk: sock to wait on 1735 * @timeo: for how long 1736 * 1737 * Now socket state including sk->sk_err is changed only under lock, 1738 * hence we may omit checks after joining wait queue. 1739 * We check receive queue before schedule() only as optimization; 1740 * it is very likely that release_sock() added new data. 1741 */ 1742int sk_wait_data(struct sock *sk, long *timeo) 1743{ 1744 int rc; 1745 DEFINE_WAIT(wait); 1746 1747 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1748 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1749 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1750 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1751 finish_wait(sk_sleep(sk), &wait); 1752 return rc; 1753} 1754EXPORT_SYMBOL(sk_wait_data); 1755 1756/** 1757 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated 1758 * @sk: socket 1759 * @size: memory size to allocate 1760 * @kind: allocation type 1761 * 1762 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means 1763 * rmem allocation. This function assumes that protocols which have 1764 * memory_pressure use sk_wmem_queued as write buffer accounting. 1765 */ 1766int __sk_mem_schedule(struct sock *sk, int size, int kind) 1767{ 1768 struct proto *prot = sk->sk_prot; 1769 int amt = sk_mem_pages(size); 1770 long allocated; 1771 int parent_status = UNDER_LIMIT; 1772 1773 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1774 1775 allocated = sk_memory_allocated_add(sk, amt, &parent_status); 1776 1777 /* Under limit. */ 1778 if (parent_status == UNDER_LIMIT && 1779 allocated <= sk_prot_mem_limits(sk, 0)) { 1780 sk_leave_memory_pressure(sk); 1781 return 1; 1782 } 1783 1784 /* Under pressure. (we or our parents) */ 1785 if ((parent_status > SOFT_LIMIT) || 1786 allocated > sk_prot_mem_limits(sk, 1)) 1787 sk_enter_memory_pressure(sk); 1788 1789 /* Over hard limit (we or our parents) */ 1790 if ((parent_status == OVER_LIMIT) || 1791 (allocated > sk_prot_mem_limits(sk, 2))) 1792 goto suppress_allocation; 1793 1794 /* guarantee minimum buffer size under pressure */ 1795 if (kind == SK_MEM_RECV) { 1796 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 1797 return 1; 1798 1799 } else { /* SK_MEM_SEND */ 1800 if (sk->sk_type == SOCK_STREAM) { 1801 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 1802 return 1; 1803 } else if (atomic_read(&sk->sk_wmem_alloc) < 1804 prot->sysctl_wmem[0]) 1805 return 1; 1806 } 1807 1808 if (sk_has_memory_pressure(sk)) { 1809 int alloc; 1810 1811 if (!sk_under_memory_pressure(sk)) 1812 return 1; 1813 alloc = sk_sockets_allocated_read_positive(sk); 1814 if (sk_prot_mem_limits(sk, 2) > alloc * 1815 sk_mem_pages(sk->sk_wmem_queued + 1816 atomic_read(&sk->sk_rmem_alloc) + 1817 sk->sk_forward_alloc)) 1818 return 1; 1819 } 1820 1821suppress_allocation: 1822 1823 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { 1824 sk_stream_moderate_sndbuf(sk); 1825 1826 /* Fail only if socket is _under_ its sndbuf. 1827 * In this case we cannot block, so that we have to fail. 1828 */ 1829 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) 1830 return 1; 1831 } 1832 1833 trace_sock_exceed_buf_limit(sk, prot, allocated); 1834 1835 /* Alas. Undo changes. */ 1836 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1837 1838 sk_memory_allocated_sub(sk, amt); 1839 1840 return 0; 1841} 1842EXPORT_SYMBOL(__sk_mem_schedule); 1843 1844/** 1845 * __sk_reclaim - reclaim memory_allocated 1846 * @sk: socket 1847 */ 1848void __sk_mem_reclaim(struct sock *sk) 1849{ 1850 sk_memory_allocated_sub(sk, 1851 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); 1852 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1853 1854 if (sk_under_memory_pressure(sk) && 1855 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 1856 sk_leave_memory_pressure(sk); 1857} 1858EXPORT_SYMBOL(__sk_mem_reclaim); 1859 1860 1861/* 1862 * Set of default routines for initialising struct proto_ops when 1863 * the protocol does not support a particular function. In certain 1864 * cases where it makes no sense for a protocol to have a "do nothing" 1865 * function, some default processing is provided. 1866 */ 1867 1868int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) 1869{ 1870 return -EOPNOTSUPP; 1871} 1872EXPORT_SYMBOL(sock_no_bind); 1873 1874int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 1875 int len, int flags) 1876{ 1877 return -EOPNOTSUPP; 1878} 1879EXPORT_SYMBOL(sock_no_connect); 1880 1881int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 1882{ 1883 return -EOPNOTSUPP; 1884} 1885EXPORT_SYMBOL(sock_no_socketpair); 1886 1887int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) 1888{ 1889 return -EOPNOTSUPP; 1890} 1891EXPORT_SYMBOL(sock_no_accept); 1892 1893int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 1894 int *len, int peer) 1895{ 1896 return -EOPNOTSUPP; 1897} 1898EXPORT_SYMBOL(sock_no_getname); 1899 1900unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 1901{ 1902 return 0; 1903} 1904EXPORT_SYMBOL(sock_no_poll); 1905 1906int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1907{ 1908 return -EOPNOTSUPP; 1909} 1910EXPORT_SYMBOL(sock_no_ioctl); 1911 1912int sock_no_listen(struct socket *sock, int backlog) 1913{ 1914 return -EOPNOTSUPP; 1915} 1916EXPORT_SYMBOL(sock_no_listen); 1917 1918int sock_no_shutdown(struct socket *sock, int how) 1919{ 1920 return -EOPNOTSUPP; 1921} 1922EXPORT_SYMBOL(sock_no_shutdown); 1923 1924int sock_no_setsockopt(struct socket *sock, int level, int optname, 1925 char __user *optval, unsigned int optlen) 1926{ 1927 return -EOPNOTSUPP; 1928} 1929EXPORT_SYMBOL(sock_no_setsockopt); 1930 1931int sock_no_getsockopt(struct socket *sock, int level, int optname, 1932 char __user *optval, int __user *optlen) 1933{ 1934 return -EOPNOTSUPP; 1935} 1936EXPORT_SYMBOL(sock_no_getsockopt); 1937 1938int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1939 size_t len) 1940{ 1941 return -EOPNOTSUPP; 1942} 1943EXPORT_SYMBOL(sock_no_sendmsg); 1944 1945int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1946 size_t len, int flags) 1947{ 1948 return -EOPNOTSUPP; 1949} 1950EXPORT_SYMBOL(sock_no_recvmsg); 1951 1952int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 1953{ 1954 /* Mirror missing mmap method error code */ 1955 return -ENODEV; 1956} 1957EXPORT_SYMBOL(sock_no_mmap); 1958 1959ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 1960{ 1961 ssize_t res; 1962 struct msghdr msg = {.msg_flags = flags}; 1963 struct kvec iov; 1964 char *kaddr = kmap(page); 1965 iov.iov_base = kaddr + offset; 1966 iov.iov_len = size; 1967 res = kernel_sendmsg(sock, &msg, &iov, 1, size); 1968 kunmap(page); 1969 return res; 1970} 1971EXPORT_SYMBOL(sock_no_sendpage); 1972 1973/* 1974 * Default Socket Callbacks 1975 */ 1976 1977static void sock_def_wakeup(struct sock *sk) 1978{ 1979 struct socket_wq *wq; 1980 1981 rcu_read_lock(); 1982 wq = rcu_dereference(sk->sk_wq); 1983 if (wq_has_sleeper(wq)) 1984 wake_up_interruptible_all(&wq->wait); 1985 rcu_read_unlock(); 1986} 1987 1988static void sock_def_error_report(struct sock *sk) 1989{ 1990 struct socket_wq *wq; 1991 1992 rcu_read_lock(); 1993 wq = rcu_dereference(sk->sk_wq); 1994 if (wq_has_sleeper(wq)) 1995 wake_up_interruptible_poll(&wq->wait, POLLERR); 1996 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1997 rcu_read_unlock(); 1998} 1999 2000static void sock_def_readable(struct sock *sk, int len) 2001{ 2002 struct socket_wq *wq; 2003 2004 rcu_read_lock(); 2005 wq = rcu_dereference(sk->sk_wq); 2006 if (wq_has_sleeper(wq)) 2007 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | 2008 POLLRDNORM | POLLRDBAND); 2009 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 2010 rcu_read_unlock(); 2011} 2012 2013static void sock_def_write_space(struct sock *sk) 2014{ 2015 struct socket_wq *wq; 2016 2017 rcu_read_lock(); 2018 2019 /* Do not wake up a writer until he can make "significant" 2020 * progress. --DaveM 2021 */ 2022 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 2023 wq = rcu_dereference(sk->sk_wq); 2024 if (wq_has_sleeper(wq)) 2025 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 2026 POLLWRNORM | POLLWRBAND); 2027 2028 /* Should agree with poll, otherwise some programs break */ 2029 if (sock_writeable(sk)) 2030 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 2031 } 2032 2033 rcu_read_unlock(); 2034} 2035 2036static void sock_def_destruct(struct sock *sk) 2037{ 2038 kfree(sk->sk_protinfo); 2039} 2040 2041void sk_send_sigurg(struct sock *sk) 2042{ 2043 if (sk->sk_socket && sk->sk_socket->file) 2044 if (send_sigurg(&sk->sk_socket->file->f_owner)) 2045 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 2046} 2047EXPORT_SYMBOL(sk_send_sigurg); 2048 2049void sk_reset_timer(struct sock *sk, struct timer_list* timer, 2050 unsigned long expires) 2051{ 2052 if (!mod_timer(timer, expires)) 2053 sock_hold(sk); 2054} 2055EXPORT_SYMBOL(sk_reset_timer); 2056 2057void sk_stop_timer(struct sock *sk, struct timer_list* timer) 2058{ 2059 if (timer_pending(timer) && del_timer(timer)) 2060 __sock_put(sk); 2061} 2062EXPORT_SYMBOL(sk_stop_timer); 2063 2064void sock_init_data(struct socket *sock, struct sock *sk) 2065{ 2066 skb_queue_head_init(&sk->sk_receive_queue); 2067 skb_queue_head_init(&sk->sk_write_queue); 2068 skb_queue_head_init(&sk->sk_error_queue); 2069#ifdef CONFIG_NET_DMA 2070 skb_queue_head_init(&sk->sk_async_wait_queue); 2071#endif 2072 2073 sk->sk_send_head = NULL; 2074 2075 init_timer(&sk->sk_timer); 2076 2077 sk->sk_allocation = GFP_KERNEL; 2078 sk->sk_rcvbuf = sysctl_rmem_default; 2079 sk->sk_sndbuf = sysctl_wmem_default; 2080 sk->sk_state = TCP_CLOSE; 2081 sk_set_socket(sk, sock); 2082 2083 sock_set_flag(sk, SOCK_ZAPPED); 2084 2085 if (sock) { 2086 sk->sk_type = sock->type; 2087 sk->sk_wq = sock->wq; 2088 sock->sk = sk; 2089 } else 2090 sk->sk_wq = NULL; 2091 2092 spin_lock_init(&sk->sk_dst_lock); 2093 rwlock_init(&sk->sk_callback_lock); 2094 lockdep_set_class_and_name(&sk->sk_callback_lock, 2095 af_callback_keys + sk->sk_family, 2096 af_family_clock_key_strings[sk->sk_family]); 2097 2098 sk->sk_state_change = sock_def_wakeup; 2099 sk->sk_data_ready = sock_def_readable; 2100 sk->sk_write_space = sock_def_write_space; 2101 sk->sk_error_report = sock_def_error_report; 2102 sk->sk_destruct = sock_def_destruct; 2103 2104 sk->sk_sndmsg_page = NULL; 2105 sk->sk_sndmsg_off = 0; 2106 sk->sk_peek_off = -1; 2107 2108 sk->sk_peer_pid = NULL; 2109 sk->sk_peer_cred = NULL; 2110 sk->sk_write_pending = 0; 2111 sk->sk_rcvlowat = 1; 2112 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 2113 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 2114 2115 sk->sk_stamp = ktime_set(-1L, 0); 2116 2117 /* 2118 * Before updating sk_refcnt, we must commit prior changes to memory 2119 * (Documentation/RCU/rculist_nulls.txt for details) 2120 */ 2121 smp_wmb(); 2122 atomic_set(&sk->sk_refcnt, 1); 2123 atomic_set(&sk->sk_drops, 0); 2124} 2125EXPORT_SYMBOL(sock_init_data); 2126 2127void lock_sock_nested(struct sock *sk, int subclass) 2128{ 2129 might_sleep(); 2130 spin_lock_bh(&sk->sk_lock.slock); 2131 if (sk->sk_lock.owned) 2132 __lock_sock(sk); 2133 sk->sk_lock.owned = 1; 2134 spin_unlock(&sk->sk_lock.slock); 2135 /* 2136 * The sk_lock has mutex_lock() semantics here: 2137 */ 2138 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 2139 local_bh_enable(); 2140} 2141EXPORT_SYMBOL(lock_sock_nested); 2142 2143void release_sock(struct sock *sk) 2144{ 2145 /* 2146 * The sk_lock has mutex_unlock() semantics: 2147 */ 2148 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 2149 2150 spin_lock_bh(&sk->sk_lock.slock); 2151 if (sk->sk_backlog.tail) 2152 __release_sock(sk); 2153 sk->sk_lock.owned = 0; 2154 if (waitqueue_active(&sk->sk_lock.wq)) 2155 wake_up(&sk->sk_lock.wq); 2156 spin_unlock_bh(&sk->sk_lock.slock); 2157} 2158EXPORT_SYMBOL(release_sock); 2159 2160/** 2161 * lock_sock_fast - fast version of lock_sock 2162 * @sk: socket 2163 * 2164 * This version should be used for very small section, where process wont block 2165 * return false if fast path is taken 2166 * sk_lock.slock locked, owned = 0, BH disabled 2167 * return true if slow path is taken 2168 * sk_lock.slock unlocked, owned = 1, BH enabled 2169 */ 2170bool lock_sock_fast(struct sock *sk) 2171{ 2172 might_sleep(); 2173 spin_lock_bh(&sk->sk_lock.slock); 2174 2175 if (!sk->sk_lock.owned) 2176 /* 2177 * Note : We must disable BH 2178 */ 2179 return false; 2180 2181 __lock_sock(sk); 2182 sk->sk_lock.owned = 1; 2183 spin_unlock(&sk->sk_lock.slock); 2184 /* 2185 * The sk_lock has mutex_lock() semantics here: 2186 */ 2187 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); 2188 local_bh_enable(); 2189 return true; 2190} 2191EXPORT_SYMBOL(lock_sock_fast); 2192 2193int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2194{ 2195 struct timeval tv; 2196 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2197 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2198 tv = ktime_to_timeval(sk->sk_stamp); 2199 if (tv.tv_sec == -1) 2200 return -ENOENT; 2201 if (tv.tv_sec == 0) { 2202 sk->sk_stamp = ktime_get_real(); 2203 tv = ktime_to_timeval(sk->sk_stamp); 2204 } 2205 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; 2206} 2207EXPORT_SYMBOL(sock_get_timestamp); 2208 2209int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) 2210{ 2211 struct timespec ts; 2212 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2213 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2214 ts = ktime_to_timespec(sk->sk_stamp); 2215 if (ts.tv_sec == -1) 2216 return -ENOENT; 2217 if (ts.tv_sec == 0) { 2218 sk->sk_stamp = ktime_get_real(); 2219 ts = ktime_to_timespec(sk->sk_stamp); 2220 } 2221 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; 2222} 2223EXPORT_SYMBOL(sock_get_timestampns); 2224 2225void sock_enable_timestamp(struct sock *sk, int flag) 2226{ 2227 if (!sock_flag(sk, flag)) { 2228 unsigned long previous_flags = sk->sk_flags; 2229 2230 sock_set_flag(sk, flag); 2231 /* 2232 * we just set one of the two flags which require net 2233 * time stamping, but time stamping might have been on 2234 * already because of the other one 2235 */ 2236 if (!(previous_flags & SK_FLAGS_TIMESTAMP)) 2237 net_enable_timestamp(); 2238 } 2239} 2240 2241/* 2242 * Get a socket option on an socket. 2243 * 2244 * FIX: POSIX 1003.1g is very ambiguous here. It states that 2245 * asynchronous errors should be reported by getsockopt. We assume 2246 * this means if you specify SO_ERROR (otherwise whats the point of it). 2247 */ 2248int sock_common_getsockopt(struct socket *sock, int level, int optname, 2249 char __user *optval, int __user *optlen) 2250{ 2251 struct sock *sk = sock->sk; 2252 2253 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2254} 2255EXPORT_SYMBOL(sock_common_getsockopt); 2256 2257#ifdef CONFIG_COMPAT 2258int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, 2259 char __user *optval, int __user *optlen) 2260{ 2261 struct sock *sk = sock->sk; 2262 2263 if (sk->sk_prot->compat_getsockopt != NULL) 2264 return sk->sk_prot->compat_getsockopt(sk, level, optname, 2265 optval, optlen); 2266 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2267} 2268EXPORT_SYMBOL(compat_sock_common_getsockopt); 2269#endif 2270 2271int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, 2272 struct msghdr *msg, size_t size, int flags) 2273{ 2274 struct sock *sk = sock->sk; 2275 int addr_len = 0; 2276 int err; 2277 2278 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, 2279 flags & ~MSG_DONTWAIT, &addr_len); 2280 if (err >= 0) 2281 msg->msg_namelen = addr_len; 2282 return err; 2283} 2284EXPORT_SYMBOL(sock_common_recvmsg); 2285 2286/* 2287 * Set socket options on an inet socket. 2288 */ 2289int sock_common_setsockopt(struct socket *sock, int level, int optname, 2290 char __user *optval, unsigned int optlen) 2291{ 2292 struct sock *sk = sock->sk; 2293 2294 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2295} 2296EXPORT_SYMBOL(sock_common_setsockopt); 2297 2298#ifdef CONFIG_COMPAT 2299int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, 2300 char __user *optval, unsigned int optlen) 2301{ 2302 struct sock *sk = sock->sk; 2303 2304 if (sk->sk_prot->compat_setsockopt != NULL) 2305 return sk->sk_prot->compat_setsockopt(sk, level, optname, 2306 optval, optlen); 2307 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2308} 2309EXPORT_SYMBOL(compat_sock_common_setsockopt); 2310#endif 2311 2312void sk_common_release(struct sock *sk) 2313{ 2314 if (sk->sk_prot->destroy) 2315 sk->sk_prot->destroy(sk); 2316 2317 /* 2318 * Observation: when sock_common_release is called, processes have 2319 * no access to socket. But net still has. 2320 * Step one, detach it from networking: 2321 * 2322 * A. Remove from hash tables. 2323 */ 2324 2325 sk->sk_prot->unhash(sk); 2326 2327 /* 2328 * In this point socket cannot receive new packets, but it is possible 2329 * that some packets are in flight because some CPU runs receiver and 2330 * did hash table lookup before we unhashed socket. They will achieve 2331 * receive queue and will be purged by socket destructor. 2332 * 2333 * Also we still have packets pending on receive queue and probably, 2334 * our own packets waiting in device queues. sock_destroy will drain 2335 * receive queue, but transmitted packets will delay socket destruction 2336 * until the last reference will be released. 2337 */ 2338 2339 sock_orphan(sk); 2340 2341 xfrm_sk_free_policy(sk); 2342 2343 sk_refcnt_debug_release(sk); 2344 sock_put(sk); 2345} 2346EXPORT_SYMBOL(sk_common_release); 2347 2348#ifdef CONFIG_PROC_FS 2349#define PROTO_INUSE_NR 64 /* should be enough for the first time */ 2350struct prot_inuse { 2351 int val[PROTO_INUSE_NR]; 2352}; 2353 2354static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 2355 2356#ifdef CONFIG_NET_NS 2357void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2358{ 2359 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 2360} 2361EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2362 2363int sock_prot_inuse_get(struct net *net, struct proto *prot) 2364{ 2365 int cpu, idx = prot->inuse_idx; 2366 int res = 0; 2367 2368 for_each_possible_cpu(cpu) 2369 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; 2370 2371 return res >= 0 ? res : 0; 2372} 2373EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2374 2375static int __net_init sock_inuse_init_net(struct net *net) 2376{ 2377 net->core.inuse = alloc_percpu(struct prot_inuse); 2378 return net->core.inuse ? 0 : -ENOMEM; 2379} 2380 2381static void __net_exit sock_inuse_exit_net(struct net *net) 2382{ 2383 free_percpu(net->core.inuse); 2384} 2385 2386static struct pernet_operations net_inuse_ops = { 2387 .init = sock_inuse_init_net, 2388 .exit = sock_inuse_exit_net, 2389}; 2390 2391static __init int net_inuse_init(void) 2392{ 2393 if (register_pernet_subsys(&net_inuse_ops)) 2394 panic("Cannot initialize net inuse counters"); 2395 2396 return 0; 2397} 2398 2399core_initcall(net_inuse_init); 2400#else 2401static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); 2402 2403void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2404{ 2405 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); 2406} 2407EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2408 2409int sock_prot_inuse_get(struct net *net, struct proto *prot) 2410{ 2411 int cpu, idx = prot->inuse_idx; 2412 int res = 0; 2413 2414 for_each_possible_cpu(cpu) 2415 res += per_cpu(prot_inuse, cpu).val[idx]; 2416 2417 return res >= 0 ? res : 0; 2418} 2419EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2420#endif 2421 2422static void assign_proto_idx(struct proto *prot) 2423{ 2424 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); 2425 2426 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { 2427 printk(KERN_ERR "PROTO_INUSE_NR exhausted\n"); 2428 return; 2429 } 2430 2431 set_bit(prot->inuse_idx, proto_inuse_idx); 2432} 2433 2434static void release_proto_idx(struct proto *prot) 2435{ 2436 if (prot->inuse_idx != PROTO_INUSE_NR - 1) 2437 clear_bit(prot->inuse_idx, proto_inuse_idx); 2438} 2439#else 2440static inline void assign_proto_idx(struct proto *prot) 2441{ 2442} 2443 2444static inline void release_proto_idx(struct proto *prot) 2445{ 2446} 2447#endif 2448 2449int proto_register(struct proto *prot, int alloc_slab) 2450{ 2451 if (alloc_slab) { 2452 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 2453 SLAB_HWCACHE_ALIGN | prot->slab_flags, 2454 NULL); 2455 2456 if (prot->slab == NULL) { 2457 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", 2458 prot->name); 2459 goto out; 2460 } 2461 2462 if (prot->rsk_prot != NULL) { 2463 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); 2464 if (prot->rsk_prot->slab_name == NULL) 2465 goto out_free_sock_slab; 2466 2467 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, 2468 prot->rsk_prot->obj_size, 0, 2469 SLAB_HWCACHE_ALIGN, NULL); 2470 2471 if (prot->rsk_prot->slab == NULL) { 2472 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", 2473 prot->name); 2474 goto out_free_request_sock_slab_name; 2475 } 2476 } 2477 2478 if (prot->twsk_prot != NULL) { 2479 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); 2480 2481 if (prot->twsk_prot->twsk_slab_name == NULL) 2482 goto out_free_request_sock_slab; 2483 2484 prot->twsk_prot->twsk_slab = 2485 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 2486 prot->twsk_prot->twsk_obj_size, 2487 0, 2488 SLAB_HWCACHE_ALIGN | 2489 prot->slab_flags, 2490 NULL); 2491 if (prot->twsk_prot->twsk_slab == NULL) 2492 goto out_free_timewait_sock_slab_name; 2493 } 2494 } 2495 2496 mutex_lock(&proto_list_mutex); 2497 list_add(&prot->node, &proto_list); 2498 assign_proto_idx(prot); 2499 mutex_unlock(&proto_list_mutex); 2500 return 0; 2501 2502out_free_timewait_sock_slab_name: 2503 kfree(prot->twsk_prot->twsk_slab_name); 2504out_free_request_sock_slab: 2505 if (prot->rsk_prot && prot->rsk_prot->slab) { 2506 kmem_cache_destroy(prot->rsk_prot->slab); 2507 prot->rsk_prot->slab = NULL; 2508 } 2509out_free_request_sock_slab_name: 2510 if (prot->rsk_prot) 2511 kfree(prot->rsk_prot->slab_name); 2512out_free_sock_slab: 2513 kmem_cache_destroy(prot->slab); 2514 prot->slab = NULL; 2515out: 2516 return -ENOBUFS; 2517} 2518EXPORT_SYMBOL(proto_register); 2519 2520void proto_unregister(struct proto *prot) 2521{ 2522 mutex_lock(&proto_list_mutex); 2523 release_proto_idx(prot); 2524 list_del(&prot->node); 2525 mutex_unlock(&proto_list_mutex); 2526 2527 if (prot->slab != NULL) { 2528 kmem_cache_destroy(prot->slab); 2529 prot->slab = NULL; 2530 } 2531 2532 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { 2533 kmem_cache_destroy(prot->rsk_prot->slab); 2534 kfree(prot->rsk_prot->slab_name); 2535 prot->rsk_prot->slab = NULL; 2536 } 2537 2538 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { 2539 kmem_cache_destroy(prot->twsk_prot->twsk_slab); 2540 kfree(prot->twsk_prot->twsk_slab_name); 2541 prot->twsk_prot->twsk_slab = NULL; 2542 } 2543} 2544EXPORT_SYMBOL(proto_unregister); 2545 2546#ifdef CONFIG_PROC_FS 2547static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 2548 __acquires(proto_list_mutex) 2549{ 2550 mutex_lock(&proto_list_mutex); 2551 return seq_list_start_head(&proto_list, *pos); 2552} 2553 2554static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2555{ 2556 return seq_list_next(v, &proto_list, pos); 2557} 2558 2559static void proto_seq_stop(struct seq_file *seq, void *v) 2560 __releases(proto_list_mutex) 2561{ 2562 mutex_unlock(&proto_list_mutex); 2563} 2564 2565static char proto_method_implemented(const void *method) 2566{ 2567 return method == NULL ? 'n' : 'y'; 2568} 2569static long sock_prot_memory_allocated(struct proto *proto) 2570{ 2571 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; 2572} 2573 2574static char *sock_prot_memory_pressure(struct proto *proto) 2575{ 2576 return proto->memory_pressure != NULL ? 2577 proto_memory_pressure(proto) ? "yes" : "no" : "NI"; 2578} 2579 2580static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2581{ 2582 2583 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " 2584 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2585 proto->name, 2586 proto->obj_size, 2587 sock_prot_inuse_get(seq_file_net(seq), proto), 2588 sock_prot_memory_allocated(proto), 2589 sock_prot_memory_pressure(proto), 2590 proto->max_header, 2591 proto->slab == NULL ? "no" : "yes", 2592 module_name(proto->owner), 2593 proto_method_implemented(proto->close), 2594 proto_method_implemented(proto->connect), 2595 proto_method_implemented(proto->disconnect), 2596 proto_method_implemented(proto->accept), 2597 proto_method_implemented(proto->ioctl), 2598 proto_method_implemented(proto->init), 2599 proto_method_implemented(proto->destroy), 2600 proto_method_implemented(proto->shutdown), 2601 proto_method_implemented(proto->setsockopt), 2602 proto_method_implemented(proto->getsockopt), 2603 proto_method_implemented(proto->sendmsg), 2604 proto_method_implemented(proto->recvmsg), 2605 proto_method_implemented(proto->sendpage), 2606 proto_method_implemented(proto->bind), 2607 proto_method_implemented(proto->backlog_rcv), 2608 proto_method_implemented(proto->hash), 2609 proto_method_implemented(proto->unhash), 2610 proto_method_implemented(proto->get_port), 2611 proto_method_implemented(proto->enter_memory_pressure)); 2612} 2613 2614static int proto_seq_show(struct seq_file *seq, void *v) 2615{ 2616 if (v == &proto_list) 2617 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", 2618 "protocol", 2619 "size", 2620 "sockets", 2621 "memory", 2622 "press", 2623 "maxhdr", 2624 "slab", 2625 "module", 2626 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); 2627 else 2628 proto_seq_printf(seq, list_entry(v, struct proto, node)); 2629 return 0; 2630} 2631 2632static const struct seq_operations proto_seq_ops = { 2633 .start = proto_seq_start, 2634 .next = proto_seq_next, 2635 .stop = proto_seq_stop, 2636 .show = proto_seq_show, 2637}; 2638 2639static int proto_seq_open(struct inode *inode, struct file *file) 2640{ 2641 return seq_open_net(inode, file, &proto_seq_ops, 2642 sizeof(struct seq_net_private)); 2643} 2644 2645static const struct file_operations proto_seq_fops = { 2646 .owner = THIS_MODULE, 2647 .open = proto_seq_open, 2648 .read = seq_read, 2649 .llseek = seq_lseek, 2650 .release = seq_release_net, 2651}; 2652 2653static __net_init int proto_init_net(struct net *net) 2654{ 2655 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) 2656 return -ENOMEM; 2657 2658 return 0; 2659} 2660 2661static __net_exit void proto_exit_net(struct net *net) 2662{ 2663 proc_net_remove(net, "protocols"); 2664} 2665 2666 2667static __net_initdata struct pernet_operations proto_net_ops = { 2668 .init = proto_init_net, 2669 .exit = proto_exit_net, 2670}; 2671 2672static int __init proto_init(void) 2673{ 2674 return register_pernet_subsys(&proto_net_ops); 2675} 2676 2677subsys_initcall(proto_init); 2678 2679#endif /* PROC_FS */ 2680