ip_set_core.c revision a8a8a0937e22a5fd55aeb22586724ba6bb70aadd
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> 2 * Patrick Schaaf <bof@bof.de> 3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License version 2 as 7 * published by the Free Software Foundation. 8 */ 9 10/* Kernel module for IP set management */ 11 12#include <linux/init.h> 13#include <linux/module.h> 14#include <linux/moduleparam.h> 15#include <linux/ip.h> 16#include <linux/skbuff.h> 17#include <linux/spinlock.h> 18#include <linux/netlink.h> 19#include <linux/rculist.h> 20#include <linux/version.h> 21#include <net/netlink.h> 22 23#include <linux/netfilter.h> 24#include <linux/netfilter/nfnetlink.h> 25#include <linux/netfilter/ipset/ip_set.h> 26 27static LIST_HEAD(ip_set_type_list); /* all registered set types */ 28static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ 29static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ 30 31static struct ip_set **ip_set_list; /* all individual sets */ 32static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ 33 34#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) 35 36static unsigned int max_sets; 37 38module_param(max_sets, int, 0600); 39MODULE_PARM_DESC(max_sets, "maximal number of sets"); 40MODULE_LICENSE("GPL"); 41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 42MODULE_DESCRIPTION("core IP set support"); 43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); 44 45/* 46 * The set types are implemented in modules and registered set types 47 * can be found in ip_set_type_list. Adding/deleting types is 48 * serialized by ip_set_type_mutex. 49 */ 50 51static inline void 52ip_set_type_lock(void) 53{ 54 mutex_lock(&ip_set_type_mutex); 55} 56 57static inline void 58ip_set_type_unlock(void) 59{ 60 mutex_unlock(&ip_set_type_mutex); 61} 62 63/* Register and deregister settype */ 64 65static struct ip_set_type * 66find_set_type(const char *name, u8 family, u8 revision) 67{ 68 struct ip_set_type *type; 69 70 list_for_each_entry_rcu(type, &ip_set_type_list, list) 71 if (STREQ(type->name, name) && 72 (type->family == family || type->family == AF_UNSPEC) && 73 type->revision == revision) 74 return type; 75 return NULL; 76} 77 78/* Unlock, try to load a set type module and lock again */ 79static int 80try_to_load_type(const char *name) 81{ 82 nfnl_unlock(); 83 pr_debug("try to load ip_set_%s\n", name); 84 if (request_module("ip_set_%s", name) < 0) { 85 pr_warning("Can't find ip_set type %s\n", name); 86 nfnl_lock(); 87 return -IPSET_ERR_FIND_TYPE; 88 } 89 nfnl_lock(); 90 return -EAGAIN; 91} 92 93/* Find a set type and reference it */ 94static int 95find_set_type_get(const char *name, u8 family, u8 revision, 96 struct ip_set_type **found) 97{ 98 struct ip_set_type *type; 99 int err; 100 101 rcu_read_lock(); 102 *found = find_set_type(name, family, revision); 103 if (*found) { 104 err = !try_module_get((*found)->me) ? -EFAULT : 0; 105 goto unlock; 106 } 107 /* Make sure the type is loaded but we don't support the revision */ 108 list_for_each_entry_rcu(type, &ip_set_type_list, list) 109 if (STREQ(type->name, name)) { 110 err = -IPSET_ERR_FIND_TYPE; 111 goto unlock; 112 } 113 rcu_read_unlock(); 114 115 return try_to_load_type(name); 116 117unlock: 118 rcu_read_unlock(); 119 return err; 120} 121 122/* Find a given set type by name and family. 123 * If we succeeded, the supported minimal and maximum revisions are 124 * filled out. 125 */ 126static int 127find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) 128{ 129 struct ip_set_type *type; 130 bool found = false; 131 132 *min = 255; *max = 0; 133 rcu_read_lock(); 134 list_for_each_entry_rcu(type, &ip_set_type_list, list) 135 if (STREQ(type->name, name) && 136 (type->family == family || type->family == AF_UNSPEC)) { 137 found = true; 138 if (type->revision < *min) 139 *min = type->revision; 140 if (type->revision > *max) 141 *max = type->revision; 142 } 143 rcu_read_unlock(); 144 if (found) 145 return 0; 146 147 return try_to_load_type(name); 148} 149 150#define family_name(f) ((f) == AF_INET ? "inet" : \ 151 (f) == AF_INET6 ? "inet6" : "any") 152 153/* Register a set type structure. The type is identified by 154 * the unique triple of name, family and revision. 155 */ 156int 157ip_set_type_register(struct ip_set_type *type) 158{ 159 int ret = 0; 160 161 if (type->protocol != IPSET_PROTOCOL) { 162 pr_warning("ip_set type %s, family %s, revision %u uses " 163 "wrong protocol version %u (want %u)\n", 164 type->name, family_name(type->family), 165 type->revision, type->protocol, IPSET_PROTOCOL); 166 return -EINVAL; 167 } 168 169 ip_set_type_lock(); 170 if (find_set_type(type->name, type->family, type->revision)) { 171 /* Duplicate! */ 172 pr_warning("ip_set type %s, family %s, revision %u " 173 "already registered!\n", type->name, 174 family_name(type->family), type->revision); 175 ret = -EINVAL; 176 goto unlock; 177 } 178 list_add_rcu(&type->list, &ip_set_type_list); 179 pr_debug("type %s, family %s, revision %u registered.\n", 180 type->name, family_name(type->family), type->revision); 181unlock: 182 ip_set_type_unlock(); 183 return ret; 184} 185EXPORT_SYMBOL_GPL(ip_set_type_register); 186 187/* Unregister a set type. There's a small race with ip_set_create */ 188void 189ip_set_type_unregister(struct ip_set_type *type) 190{ 191 ip_set_type_lock(); 192 if (!find_set_type(type->name, type->family, type->revision)) { 193 pr_warning("ip_set type %s, family %s, revision %u " 194 "not registered\n", type->name, 195 family_name(type->family), type->revision); 196 goto unlock; 197 } 198 list_del_rcu(&type->list); 199 pr_debug("type %s, family %s, revision %u unregistered.\n", 200 type->name, family_name(type->family), type->revision); 201unlock: 202 ip_set_type_unlock(); 203 204 synchronize_rcu(); 205} 206EXPORT_SYMBOL_GPL(ip_set_type_unregister); 207 208/* Utility functions */ 209void * 210ip_set_alloc(size_t size) 211{ 212 void *members = NULL; 213 214 if (size < KMALLOC_MAX_SIZE) 215 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 216 217 if (members) { 218 pr_debug("%p: allocated with kmalloc\n", members); 219 return members; 220 } 221 222 members = vzalloc(size); 223 if (!members) 224 return NULL; 225 pr_debug("%p: allocated with vmalloc\n", members); 226 227 return members; 228} 229EXPORT_SYMBOL_GPL(ip_set_alloc); 230 231void 232ip_set_free(void *members) 233{ 234 pr_debug("%p: free with %s\n", members, 235 is_vmalloc_addr(members) ? "vfree" : "kfree"); 236 if (is_vmalloc_addr(members)) 237 vfree(members); 238 else 239 kfree(members); 240} 241EXPORT_SYMBOL_GPL(ip_set_free); 242 243static inline bool 244flag_nested(const struct nlattr *nla) 245{ 246 return nla->nla_type & NLA_F_NESTED; 247} 248 249static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { 250 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, 251 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, 252 .len = sizeof(struct in6_addr) }, 253}; 254 255int 256ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) 257{ 258 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; 259 260 if (unlikely(!flag_nested(nla))) 261 return -IPSET_ERR_PROTOCOL; 262 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) 263 return -IPSET_ERR_PROTOCOL; 264 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) 265 return -IPSET_ERR_PROTOCOL; 266 267 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]); 268 return 0; 269} 270EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); 271 272int 273ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) 274{ 275 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; 276 277 if (unlikely(!flag_nested(nla))) 278 return -IPSET_ERR_PROTOCOL; 279 280 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) 281 return -IPSET_ERR_PROTOCOL; 282 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) 283 return -IPSET_ERR_PROTOCOL; 284 285 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), 286 sizeof(struct in6_addr)); 287 return 0; 288} 289EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); 290 291/* 292 * Creating/destroying/renaming/swapping affect the existence and 293 * the properties of a set. All of these can be executed from userspace 294 * only and serialized by the nfnl mutex indirectly from nfnetlink. 295 * 296 * Sets are identified by their index in ip_set_list and the index 297 * is used by the external references (set/SET netfilter modules). 298 * 299 * The set behind an index may change by swapping only, from userspace. 300 */ 301 302static inline void 303__ip_set_get(ip_set_id_t index) 304{ 305 write_lock_bh(&ip_set_ref_lock); 306 ip_set_list[index]->ref++; 307 write_unlock_bh(&ip_set_ref_lock); 308} 309 310static inline void 311__ip_set_put(ip_set_id_t index) 312{ 313 write_lock_bh(&ip_set_ref_lock); 314 BUG_ON(ip_set_list[index]->ref == 0); 315 ip_set_list[index]->ref--; 316 write_unlock_bh(&ip_set_ref_lock); 317} 318 319/* 320 * Add, del and test set entries from kernel. 321 * 322 * The set behind the index must exist and must be referenced 323 * so it can't be destroyed (or changed) under our foot. 324 */ 325 326int 327ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 328 u8 family, u8 dim, u8 flags) 329{ 330 struct ip_set *set = ip_set_list[index]; 331 int ret = 0; 332 333 BUG_ON(set == NULL); 334 pr_debug("set %s, index %u\n", set->name, index); 335 336 if (dim < set->type->dimension || 337 !(family == set->family || set->family == AF_UNSPEC)) 338 return 0; 339 340 read_lock_bh(&set->lock); 341 ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags); 342 read_unlock_bh(&set->lock); 343 344 if (ret == -EAGAIN) { 345 /* Type requests element to be completed */ 346 pr_debug("element must be competed, ADD is triggered\n"); 347 write_lock_bh(&set->lock); 348 set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); 349 write_unlock_bh(&set->lock); 350 ret = 1; 351 } 352 353 /* Convert error codes to nomatch */ 354 return (ret < 0 ? 0 : ret); 355} 356EXPORT_SYMBOL_GPL(ip_set_test); 357 358int 359ip_set_add(ip_set_id_t index, const struct sk_buff *skb, 360 u8 family, u8 dim, u8 flags) 361{ 362 struct ip_set *set = ip_set_list[index]; 363 int ret; 364 365 BUG_ON(set == NULL); 366 pr_debug("set %s, index %u\n", set->name, index); 367 368 if (dim < set->type->dimension || 369 !(family == set->family || set->family == AF_UNSPEC)) 370 return 0; 371 372 write_lock_bh(&set->lock); 373 ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); 374 write_unlock_bh(&set->lock); 375 376 return ret; 377} 378EXPORT_SYMBOL_GPL(ip_set_add); 379 380int 381ip_set_del(ip_set_id_t index, const struct sk_buff *skb, 382 u8 family, u8 dim, u8 flags) 383{ 384 struct ip_set *set = ip_set_list[index]; 385 int ret = 0; 386 387 BUG_ON(set == NULL); 388 pr_debug("set %s, index %u\n", set->name, index); 389 390 if (dim < set->type->dimension || 391 !(family == set->family || set->family == AF_UNSPEC)) 392 return 0; 393 394 write_lock_bh(&set->lock); 395 ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags); 396 write_unlock_bh(&set->lock); 397 398 return ret; 399} 400EXPORT_SYMBOL_GPL(ip_set_del); 401 402/* 403 * Find set by name, reference it once. The reference makes sure the 404 * thing pointed to, does not go away under our feet. 405 * 406 */ 407ip_set_id_t 408ip_set_get_byname(const char *name, struct ip_set **set) 409{ 410 ip_set_id_t i, index = IPSET_INVALID_ID; 411 struct ip_set *s; 412 413 for (i = 0; i < ip_set_max; i++) { 414 s = ip_set_list[i]; 415 if (s != NULL && STREQ(s->name, name)) { 416 __ip_set_get(i); 417 index = i; 418 *set = s; 419 } 420 } 421 422 return index; 423} 424EXPORT_SYMBOL_GPL(ip_set_get_byname); 425 426/* 427 * If the given set pointer points to a valid set, decrement 428 * reference count by 1. The caller shall not assume the index 429 * to be valid, after calling this function. 430 * 431 */ 432void 433ip_set_put_byindex(ip_set_id_t index) 434{ 435 if (ip_set_list[index] != NULL) 436 __ip_set_put(index); 437} 438EXPORT_SYMBOL_GPL(ip_set_put_byindex); 439 440/* 441 * Get the name of a set behind a set index. 442 * We assume the set is referenced, so it does exist and 443 * can't be destroyed. The set cannot be renamed due to 444 * the referencing either. 445 * 446 */ 447const char * 448ip_set_name_byindex(ip_set_id_t index) 449{ 450 const struct ip_set *set = ip_set_list[index]; 451 452 BUG_ON(set == NULL); 453 BUG_ON(set->ref == 0); 454 455 /* Referenced, so it's safe */ 456 return set->name; 457} 458EXPORT_SYMBOL_GPL(ip_set_name_byindex); 459 460/* 461 * Routines to call by external subsystems, which do not 462 * call nfnl_lock for us. 463 */ 464 465/* 466 * Find set by name, reference it once. The reference makes sure the 467 * thing pointed to, does not go away under our feet. 468 * 469 * The nfnl mutex is used in the function. 470 */ 471ip_set_id_t 472ip_set_nfnl_get(const char *name) 473{ 474 struct ip_set *s; 475 ip_set_id_t index; 476 477 nfnl_lock(); 478 index = ip_set_get_byname(name, &s); 479 nfnl_unlock(); 480 481 return index; 482} 483EXPORT_SYMBOL_GPL(ip_set_nfnl_get); 484 485/* 486 * Find set by index, reference it once. The reference makes sure the 487 * thing pointed to, does not go away under our feet. 488 * 489 * The nfnl mutex is used in the function. 490 */ 491ip_set_id_t 492ip_set_nfnl_get_byindex(ip_set_id_t index) 493{ 494 if (index > ip_set_max) 495 return IPSET_INVALID_ID; 496 497 nfnl_lock(); 498 if (ip_set_list[index]) 499 __ip_set_get(index); 500 else 501 index = IPSET_INVALID_ID; 502 nfnl_unlock(); 503 504 return index; 505} 506EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); 507 508/* 509 * If the given set pointer points to a valid set, decrement 510 * reference count by 1. The caller shall not assume the index 511 * to be valid, after calling this function. 512 * 513 * The nfnl mutex is used in the function. 514 */ 515void 516ip_set_nfnl_put(ip_set_id_t index) 517{ 518 nfnl_lock(); 519 ip_set_put_byindex(index); 520 nfnl_unlock(); 521} 522EXPORT_SYMBOL_GPL(ip_set_nfnl_put); 523 524/* 525 * Communication protocol with userspace over netlink. 526 * 527 * The commands are serialized by the nfnl mutex. 528 */ 529 530static inline bool 531protocol_failed(const struct nlattr * const tb[]) 532{ 533 return !tb[IPSET_ATTR_PROTOCOL] || 534 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; 535} 536 537static inline u32 538flag_exist(const struct nlmsghdr *nlh) 539{ 540 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; 541} 542 543static struct nlmsghdr * 544start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, 545 enum ipset_cmd cmd) 546{ 547 struct nlmsghdr *nlh; 548 struct nfgenmsg *nfmsg; 549 550 nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), 551 sizeof(*nfmsg), flags); 552 if (nlh == NULL) 553 return NULL; 554 555 nfmsg = nlmsg_data(nlh); 556 nfmsg->nfgen_family = AF_INET; 557 nfmsg->version = NFNETLINK_V0; 558 nfmsg->res_id = 0; 559 560 return nlh; 561} 562 563/* Create a set */ 564 565static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { 566 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 567 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 568 .len = IPSET_MAXNAMELEN - 1 }, 569 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, 570 .len = IPSET_MAXNAMELEN - 1}, 571 [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, 572 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, 573 [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, 574}; 575 576static ip_set_id_t 577find_set_id(const char *name) 578{ 579 ip_set_id_t i, index = IPSET_INVALID_ID; 580 const struct ip_set *set; 581 582 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { 583 set = ip_set_list[i]; 584 if (set != NULL && STREQ(set->name, name)) 585 index = i; 586 } 587 return index; 588} 589 590static inline struct ip_set * 591find_set(const char *name) 592{ 593 ip_set_id_t index = find_set_id(name); 594 595 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; 596} 597 598static int 599find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) 600{ 601 ip_set_id_t i; 602 603 *index = IPSET_INVALID_ID; 604 for (i = 0; i < ip_set_max; i++) { 605 if (ip_set_list[i] == NULL) { 606 if (*index == IPSET_INVALID_ID) 607 *index = i; 608 } else if (STREQ(name, ip_set_list[i]->name)) { 609 /* Name clash */ 610 *set = ip_set_list[i]; 611 return -EEXIST; 612 } 613 } 614 if (*index == IPSET_INVALID_ID) 615 /* No free slot remained */ 616 return -IPSET_ERR_MAX_SETS; 617 return 0; 618} 619 620static int 621ip_set_create(struct sock *ctnl, struct sk_buff *skb, 622 const struct nlmsghdr *nlh, 623 const struct nlattr * const attr[]) 624{ 625 struct ip_set *set, *clash = NULL; 626 ip_set_id_t index = IPSET_INVALID_ID; 627 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; 628 const char *name, *typename; 629 u8 family, revision; 630 u32 flags = flag_exist(nlh); 631 int ret = 0; 632 633 if (unlikely(protocol_failed(attr) || 634 attr[IPSET_ATTR_SETNAME] == NULL || 635 attr[IPSET_ATTR_TYPENAME] == NULL || 636 attr[IPSET_ATTR_REVISION] == NULL || 637 attr[IPSET_ATTR_FAMILY] == NULL || 638 (attr[IPSET_ATTR_DATA] != NULL && 639 !flag_nested(attr[IPSET_ATTR_DATA])))) 640 return -IPSET_ERR_PROTOCOL; 641 642 name = nla_data(attr[IPSET_ATTR_SETNAME]); 643 typename = nla_data(attr[IPSET_ATTR_TYPENAME]); 644 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); 645 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); 646 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", 647 name, typename, family_name(family), revision); 648 649 /* 650 * First, and without any locks, allocate and initialize 651 * a normal base set structure. 652 */ 653 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); 654 if (!set) 655 return -ENOMEM; 656 rwlock_init(&set->lock); 657 strlcpy(set->name, name, IPSET_MAXNAMELEN); 658 set->family = family; 659 660 /* 661 * Next, check that we know the type, and take 662 * a reference on the type, to make sure it stays available 663 * while constructing our new set. 664 * 665 * After referencing the type, we try to create the type 666 * specific part of the set without holding any locks. 667 */ 668 ret = find_set_type_get(typename, family, revision, &(set->type)); 669 if (ret) 670 goto out; 671 672 /* 673 * Without holding any locks, create private part. 674 */ 675 if (attr[IPSET_ATTR_DATA] && 676 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], 677 set->type->create_policy)) { 678 ret = -IPSET_ERR_PROTOCOL; 679 goto put_out; 680 } 681 682 ret = set->type->create(set, tb, flags); 683 if (ret != 0) 684 goto put_out; 685 686 /* BTW, ret==0 here. */ 687 688 /* 689 * Here, we have a valid, constructed set and we are protected 690 * by the nfnl mutex. Find the first free index in ip_set_list 691 * and check clashing. 692 */ 693 if ((ret = find_free_id(set->name, &index, &clash)) != 0) { 694 /* If this is the same set and requested, ignore error */ 695 if (ret == -EEXIST && 696 (flags & IPSET_FLAG_EXIST) && 697 STREQ(set->type->name, clash->type->name) && 698 set->type->family == clash->type->family && 699 set->type->revision == clash->type->revision && 700 set->variant->same_set(set, clash)) 701 ret = 0; 702 goto cleanup; 703 } 704 705 /* 706 * Finally! Add our shiny new set to the list, and be done. 707 */ 708 pr_debug("create: '%s' created with index %u!\n", set->name, index); 709 ip_set_list[index] = set; 710 711 return ret; 712 713cleanup: 714 set->variant->destroy(set); 715put_out: 716 module_put(set->type->me); 717out: 718 kfree(set); 719 return ret; 720} 721 722/* Destroy sets */ 723 724static const struct nla_policy 725ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { 726 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 727 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 728 .len = IPSET_MAXNAMELEN - 1 }, 729}; 730 731static void 732ip_set_destroy_set(ip_set_id_t index) 733{ 734 struct ip_set *set = ip_set_list[index]; 735 736 pr_debug("set: %s\n", set->name); 737 ip_set_list[index] = NULL; 738 739 /* Must call it without holding any lock */ 740 set->variant->destroy(set); 741 module_put(set->type->me); 742 kfree(set); 743} 744 745static int 746ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, 747 const struct nlmsghdr *nlh, 748 const struct nlattr * const attr[]) 749{ 750 ip_set_id_t i; 751 int ret = 0; 752 753 if (unlikely(protocol_failed(attr))) 754 return -IPSET_ERR_PROTOCOL; 755 756 /* Commands are serialized and references are 757 * protected by the ip_set_ref_lock. 758 * External systems (i.e. xt_set) must call 759 * ip_set_put|get_nfnl_* functions, that way we 760 * can safely check references here. 761 * 762 * list:set timer can only decrement the reference 763 * counter, so if it's already zero, we can proceed 764 * without holding the lock. 765 */ 766 read_lock_bh(&ip_set_ref_lock); 767 if (!attr[IPSET_ATTR_SETNAME]) { 768 for (i = 0; i < ip_set_max; i++) { 769 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { 770 ret = IPSET_ERR_BUSY; 771 goto out; 772 } 773 } 774 read_unlock_bh(&ip_set_ref_lock); 775 for (i = 0; i < ip_set_max; i++) { 776 if (ip_set_list[i] != NULL) 777 ip_set_destroy_set(i); 778 } 779 } else { 780 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 781 if (i == IPSET_INVALID_ID) { 782 ret = -ENOENT; 783 goto out; 784 } else if (ip_set_list[i]->ref) { 785 ret = -IPSET_ERR_BUSY; 786 goto out; 787 } 788 read_unlock_bh(&ip_set_ref_lock); 789 790 ip_set_destroy_set(i); 791 } 792 return 0; 793out: 794 read_unlock_bh(&ip_set_ref_lock); 795 return ret; 796} 797 798/* Flush sets */ 799 800static void 801ip_set_flush_set(struct ip_set *set) 802{ 803 pr_debug("set: %s\n", set->name); 804 805 write_lock_bh(&set->lock); 806 set->variant->flush(set); 807 write_unlock_bh(&set->lock); 808} 809 810static int 811ip_set_flush(struct sock *ctnl, struct sk_buff *skb, 812 const struct nlmsghdr *nlh, 813 const struct nlattr * const attr[]) 814{ 815 ip_set_id_t i; 816 817 if (unlikely(protocol_failed(attr))) 818 return -EPROTO; 819 820 if (!attr[IPSET_ATTR_SETNAME]) { 821 for (i = 0; i < ip_set_max; i++) 822 if (ip_set_list[i] != NULL) 823 ip_set_flush_set(ip_set_list[i]); 824 } else { 825 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 826 if (i == IPSET_INVALID_ID) 827 return -ENOENT; 828 829 ip_set_flush_set(ip_set_list[i]); 830 } 831 832 return 0; 833} 834 835/* Rename a set */ 836 837static const struct nla_policy 838ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { 839 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 840 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 841 .len = IPSET_MAXNAMELEN - 1 }, 842 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING, 843 .len = IPSET_MAXNAMELEN - 1 }, 844}; 845 846static int 847ip_set_rename(struct sock *ctnl, struct sk_buff *skb, 848 const struct nlmsghdr *nlh, 849 const struct nlattr * const attr[]) 850{ 851 struct ip_set *set; 852 const char *name2; 853 ip_set_id_t i; 854 int ret = 0; 855 856 if (unlikely(protocol_failed(attr) || 857 attr[IPSET_ATTR_SETNAME] == NULL || 858 attr[IPSET_ATTR_SETNAME2] == NULL)) 859 return -IPSET_ERR_PROTOCOL; 860 861 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 862 if (set == NULL) 863 return -ENOENT; 864 865 read_lock_bh(&ip_set_ref_lock); 866 if (set->ref != 0) { 867 ret = -IPSET_ERR_REFERENCED; 868 goto out; 869 } 870 871 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); 872 for (i = 0; i < ip_set_max; i++) { 873 if (ip_set_list[i] != NULL && 874 STREQ(ip_set_list[i]->name, name2)) { 875 ret = -IPSET_ERR_EXIST_SETNAME2; 876 goto out; 877 } 878 } 879 strncpy(set->name, name2, IPSET_MAXNAMELEN); 880 881out: 882 read_unlock_bh(&ip_set_ref_lock); 883 return ret; 884} 885 886/* Swap two sets so that name/index points to the other. 887 * References and set names are also swapped. 888 * 889 * The commands are serialized by the nfnl mutex and references are 890 * protected by the ip_set_ref_lock. The kernel interfaces 891 * do not hold the mutex but the pointer settings are atomic 892 * so the ip_set_list always contains valid pointers to the sets. 893 */ 894 895static int 896ip_set_swap(struct sock *ctnl, struct sk_buff *skb, 897 const struct nlmsghdr *nlh, 898 const struct nlattr * const attr[]) 899{ 900 struct ip_set *from, *to; 901 ip_set_id_t from_id, to_id; 902 char from_name[IPSET_MAXNAMELEN]; 903 904 if (unlikely(protocol_failed(attr) || 905 attr[IPSET_ATTR_SETNAME] == NULL || 906 attr[IPSET_ATTR_SETNAME2] == NULL)) 907 return -IPSET_ERR_PROTOCOL; 908 909 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 910 if (from_id == IPSET_INVALID_ID) 911 return -ENOENT; 912 913 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2])); 914 if (to_id == IPSET_INVALID_ID) 915 return -IPSET_ERR_EXIST_SETNAME2; 916 917 from = ip_set_list[from_id]; 918 to = ip_set_list[to_id]; 919 920 /* Features must not change. 921 * Not an artifical restriction anymore, as we must prevent 922 * possible loops created by swapping in setlist type of sets. */ 923 if (!(from->type->features == to->type->features && 924 from->type->family == to->type->family)) 925 return -IPSET_ERR_TYPE_MISMATCH; 926 927 strncpy(from_name, from->name, IPSET_MAXNAMELEN); 928 strncpy(from->name, to->name, IPSET_MAXNAMELEN); 929 strncpy(to->name, from_name, IPSET_MAXNAMELEN); 930 931 write_lock_bh(&ip_set_ref_lock); 932 swap(from->ref, to->ref); 933 ip_set_list[from_id] = to; 934 ip_set_list[to_id] = from; 935 write_unlock_bh(&ip_set_ref_lock); 936 937 return 0; 938} 939 940/* List/save set data */ 941 942#define DUMP_INIT 0L 943#define DUMP_ALL 1L 944#define DUMP_ONE 2L 945#define DUMP_LAST 3L 946 947static int 948ip_set_dump_done(struct netlink_callback *cb) 949{ 950 if (cb->args[2]) { 951 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); 952 ip_set_put_byindex((ip_set_id_t) cb->args[1]); 953 } 954 return 0; 955} 956 957static inline void 958dump_attrs(struct nlmsghdr *nlh) 959{ 960 const struct nlattr *attr; 961 int rem; 962 963 pr_debug("dump nlmsg\n"); 964 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { 965 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len); 966 } 967} 968 969static int 970dump_init(struct netlink_callback *cb) 971{ 972 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); 973 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 974 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 975 struct nlattr *attr = (void *)nlh + min_len; 976 ip_set_id_t index; 977 978 /* Second pass, so parser can't fail */ 979 nla_parse(cda, IPSET_ATTR_CMD_MAX, 980 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); 981 982 /* cb->args[0] : dump single set/all sets 983 * [1] : set index 984 * [..]: type specific 985 */ 986 987 if (!cda[IPSET_ATTR_SETNAME]) { 988 cb->args[0] = DUMP_ALL; 989 return 0; 990 } 991 992 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); 993 if (index == IPSET_INVALID_ID) 994 return -ENOENT; 995 996 cb->args[0] = DUMP_ONE; 997 cb->args[1] = index; 998 return 0; 999} 1000 1001static int 1002ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) 1003{ 1004 ip_set_id_t index = IPSET_INVALID_ID, max; 1005 struct ip_set *set = NULL; 1006 struct nlmsghdr *nlh = NULL; 1007 unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; 1008 int ret = 0; 1009 1010 if (cb->args[0] == DUMP_INIT) { 1011 ret = dump_init(cb); 1012 if (ret < 0) { 1013 nlh = nlmsg_hdr(cb->skb); 1014 /* We have to create and send the error message 1015 * manually :-( */ 1016 if (nlh->nlmsg_flags & NLM_F_ACK) 1017 netlink_ack(cb->skb, nlh, ret); 1018 return ret; 1019 } 1020 } 1021 1022 if (cb->args[1] >= ip_set_max) 1023 goto out; 1024 1025 max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; 1026dump_last: 1027 pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); 1028 for (; cb->args[1] < max; cb->args[1]++) { 1029 index = (ip_set_id_t) cb->args[1]; 1030 set = ip_set_list[index]; 1031 if (set == NULL) { 1032 if (cb->args[0] == DUMP_ONE) { 1033 ret = -ENOENT; 1034 goto out; 1035 } 1036 continue; 1037 } 1038 /* When dumping all sets, we must dump "sorted" 1039 * so that lists (unions of sets) are dumped last. 1040 */ 1041 if (cb->args[0] != DUMP_ONE && 1042 ((cb->args[0] == DUMP_ALL) == 1043 !!(set->type->features & IPSET_DUMP_LAST))) 1044 continue; 1045 pr_debug("List set: %s\n", set->name); 1046 if (!cb->args[2]) { 1047 /* Start listing: make sure set won't be destroyed */ 1048 pr_debug("reference set\n"); 1049 __ip_set_get(index); 1050 } 1051 nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, 1052 cb->nlh->nlmsg_seq, flags, 1053 IPSET_CMD_LIST); 1054 if (!nlh) { 1055 ret = -EMSGSIZE; 1056 goto release_refcount; 1057 } 1058 NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1059 NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); 1060 switch (cb->args[2]) { 1061 case 0: 1062 /* Core header data */ 1063 NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME, 1064 set->type->name); 1065 NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, 1066 set->family); 1067 NLA_PUT_U8(skb, IPSET_ATTR_REVISION, 1068 set->type->revision); 1069 ret = set->variant->head(set, skb); 1070 if (ret < 0) 1071 goto release_refcount; 1072 /* Fall through and add elements */ 1073 default: 1074 read_lock_bh(&set->lock); 1075 ret = set->variant->list(set, skb, cb); 1076 read_unlock_bh(&set->lock); 1077 if (!cb->args[2]) { 1078 /* Set is done, proceed with next one */ 1079 if (cb->args[0] == DUMP_ONE) 1080 cb->args[1] = IPSET_INVALID_ID; 1081 else 1082 cb->args[1]++; 1083 } 1084 goto release_refcount; 1085 } 1086 } 1087 /* If we dump all sets, continue with dumping last ones */ 1088 if (cb->args[0] == DUMP_ALL) { 1089 cb->args[0] = DUMP_LAST; 1090 cb->args[1] = 0; 1091 goto dump_last; 1092 } 1093 goto out; 1094 1095nla_put_failure: 1096 ret = -EFAULT; 1097release_refcount: 1098 /* If there was an error or set is done, release set */ 1099 if (ret || !cb->args[2]) { 1100 pr_debug("release set %s\n", ip_set_list[index]->name); 1101 ip_set_put_byindex(index); 1102 } 1103out: 1104 if (nlh) { 1105 nlmsg_end(skb, nlh); 1106 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len); 1107 dump_attrs(nlh); 1108 } 1109 1110 return ret < 0 ? ret : skb->len; 1111} 1112 1113static int 1114ip_set_dump(struct sock *ctnl, struct sk_buff *skb, 1115 const struct nlmsghdr *nlh, 1116 const struct nlattr * const attr[]) 1117{ 1118 if (unlikely(protocol_failed(attr))) 1119 return -IPSET_ERR_PROTOCOL; 1120 1121 return netlink_dump_start(ctnl, skb, nlh, 1122 ip_set_dump_start, 1123 ip_set_dump_done); 1124} 1125 1126/* Add, del and test */ 1127 1128static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { 1129 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1130 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 1131 .len = IPSET_MAXNAMELEN - 1 }, 1132 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 1133 [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, 1134 [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, 1135}; 1136 1137static int 1138call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, 1139 struct nlattr *tb[], enum ipset_adt adt, 1140 u32 flags, bool use_lineno) 1141{ 1142 int ret, retried = 0; 1143 u32 lineno = 0; 1144 bool eexist = flags & IPSET_FLAG_EXIST; 1145 1146 do { 1147 write_lock_bh(&set->lock); 1148 ret = set->variant->uadt(set, tb, adt, &lineno, flags); 1149 write_unlock_bh(&set->lock); 1150 } while (ret == -EAGAIN && 1151 set->variant->resize && 1152 (ret = set->variant->resize(set, retried++)) == 0); 1153 1154 if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) 1155 return 0; 1156 if (lineno && use_lineno) { 1157 /* Error in restore/batch mode: send back lineno */ 1158 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb); 1159 struct sk_buff *skb2; 1160 struct nlmsgerr *errmsg; 1161 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); 1162 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 1163 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1164 struct nlattr *cmdattr; 1165 u32 *errline; 1166 1167 skb2 = nlmsg_new(payload, GFP_KERNEL); 1168 if (skb2 == NULL) 1169 return -ENOMEM; 1170 rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, 1171 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); 1172 errmsg = nlmsg_data(rep); 1173 errmsg->error = ret; 1174 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); 1175 cmdattr = (void *)&errmsg->msg + min_len; 1176 1177 nla_parse(cda, IPSET_ATTR_CMD_MAX, 1178 cmdattr, nlh->nlmsg_len - min_len, 1179 ip_set_adt_policy); 1180 1181 errline = nla_data(cda[IPSET_ATTR_LINENO]); 1182 1183 *errline = lineno; 1184 1185 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1186 /* Signal netlink not to send its ACK/errmsg. */ 1187 return -EINTR; 1188 } 1189 1190 return ret; 1191} 1192 1193static int 1194ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, 1195 const struct nlmsghdr *nlh, 1196 const struct nlattr * const attr[]) 1197{ 1198 struct ip_set *set; 1199 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1200 const struct nlattr *nla; 1201 u32 flags = flag_exist(nlh); 1202 bool use_lineno; 1203 int ret = 0; 1204 1205 if (unlikely(protocol_failed(attr) || 1206 attr[IPSET_ATTR_SETNAME] == NULL || 1207 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1208 (attr[IPSET_ATTR_ADT] != NULL)) || 1209 (attr[IPSET_ATTR_DATA] != NULL && 1210 !flag_nested(attr[IPSET_ATTR_DATA])) || 1211 (attr[IPSET_ATTR_ADT] != NULL && 1212 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1213 attr[IPSET_ATTR_LINENO] == NULL)))) 1214 return -IPSET_ERR_PROTOCOL; 1215 1216 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 1217 if (set == NULL) 1218 return -ENOENT; 1219 1220 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1221 if (attr[IPSET_ATTR_DATA]) { 1222 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, 1223 attr[IPSET_ATTR_DATA], 1224 set->type->adt_policy)) 1225 return -IPSET_ERR_PROTOCOL; 1226 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, 1227 use_lineno); 1228 } else { 1229 int nla_rem; 1230 1231 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { 1232 memset(tb, 0, sizeof(tb)); 1233 if (nla_type(nla) != IPSET_ATTR_DATA || 1234 !flag_nested(nla) || 1235 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, 1236 set->type->adt_policy)) 1237 return -IPSET_ERR_PROTOCOL; 1238 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, 1239 flags, use_lineno); 1240 if (ret < 0) 1241 return ret; 1242 } 1243 } 1244 return ret; 1245} 1246 1247static int 1248ip_set_udel(struct sock *ctnl, struct sk_buff *skb, 1249 const struct nlmsghdr *nlh, 1250 const struct nlattr * const attr[]) 1251{ 1252 struct ip_set *set; 1253 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1254 const struct nlattr *nla; 1255 u32 flags = flag_exist(nlh); 1256 bool use_lineno; 1257 int ret = 0; 1258 1259 if (unlikely(protocol_failed(attr) || 1260 attr[IPSET_ATTR_SETNAME] == NULL || 1261 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1262 (attr[IPSET_ATTR_ADT] != NULL)) || 1263 (attr[IPSET_ATTR_DATA] != NULL && 1264 !flag_nested(attr[IPSET_ATTR_DATA])) || 1265 (attr[IPSET_ATTR_ADT] != NULL && 1266 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1267 attr[IPSET_ATTR_LINENO] == NULL)))) 1268 return -IPSET_ERR_PROTOCOL; 1269 1270 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 1271 if (set == NULL) 1272 return -ENOENT; 1273 1274 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1275 if (attr[IPSET_ATTR_DATA]) { 1276 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, 1277 attr[IPSET_ATTR_DATA], 1278 set->type->adt_policy)) 1279 return -IPSET_ERR_PROTOCOL; 1280 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, 1281 use_lineno); 1282 } else { 1283 int nla_rem; 1284 1285 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { 1286 memset(tb, 0, sizeof(*tb)); 1287 if (nla_type(nla) != IPSET_ATTR_DATA || 1288 !flag_nested(nla) || 1289 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, 1290 set->type->adt_policy)) 1291 return -IPSET_ERR_PROTOCOL; 1292 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, 1293 flags, use_lineno); 1294 if (ret < 0) 1295 return ret; 1296 } 1297 } 1298 return ret; 1299} 1300 1301static int 1302ip_set_utest(struct sock *ctnl, struct sk_buff *skb, 1303 const struct nlmsghdr *nlh, 1304 const struct nlattr * const attr[]) 1305{ 1306 struct ip_set *set; 1307 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1308 int ret = 0; 1309 1310 if (unlikely(protocol_failed(attr) || 1311 attr[IPSET_ATTR_SETNAME] == NULL || 1312 attr[IPSET_ATTR_DATA] == NULL || 1313 !flag_nested(attr[IPSET_ATTR_DATA]))) 1314 return -IPSET_ERR_PROTOCOL; 1315 1316 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 1317 if (set == NULL) 1318 return -ENOENT; 1319 1320 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], 1321 set->type->adt_policy)) 1322 return -IPSET_ERR_PROTOCOL; 1323 1324 read_lock_bh(&set->lock); 1325 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0); 1326 read_unlock_bh(&set->lock); 1327 /* Userspace can't trigger element to be re-added */ 1328 if (ret == -EAGAIN) 1329 ret = 1; 1330 1331 return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; 1332} 1333 1334/* Get headed data of a set */ 1335 1336static int 1337ip_set_header(struct sock *ctnl, struct sk_buff *skb, 1338 const struct nlmsghdr *nlh, 1339 const struct nlattr * const attr[]) 1340{ 1341 const struct ip_set *set; 1342 struct sk_buff *skb2; 1343 struct nlmsghdr *nlh2; 1344 ip_set_id_t index; 1345 int ret = 0; 1346 1347 if (unlikely(protocol_failed(attr) || 1348 attr[IPSET_ATTR_SETNAME] == NULL)) 1349 return -IPSET_ERR_PROTOCOL; 1350 1351 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 1352 if (index == IPSET_INVALID_ID) 1353 return -ENOENT; 1354 set = ip_set_list[index]; 1355 1356 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1357 if (skb2 == NULL) 1358 return -ENOMEM; 1359 1360 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 1361 IPSET_CMD_HEADER); 1362 if (!nlh2) 1363 goto nlmsg_failure; 1364 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1365 NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); 1366 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); 1367 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); 1368 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision); 1369 nlmsg_end(skb2, nlh2); 1370 1371 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1372 if (ret < 0) 1373 return ret; 1374 1375 return 0; 1376 1377nla_put_failure: 1378 nlmsg_cancel(skb2, nlh2); 1379nlmsg_failure: 1380 kfree_skb(skb2); 1381 return -EMSGSIZE; 1382} 1383 1384/* Get type data */ 1385 1386static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { 1387 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1388 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, 1389 .len = IPSET_MAXNAMELEN - 1 }, 1390 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, 1391}; 1392 1393static int 1394ip_set_type(struct sock *ctnl, struct sk_buff *skb, 1395 const struct nlmsghdr *nlh, 1396 const struct nlattr * const attr[]) 1397{ 1398 struct sk_buff *skb2; 1399 struct nlmsghdr *nlh2; 1400 u8 family, min, max; 1401 const char *typename; 1402 int ret = 0; 1403 1404 if (unlikely(protocol_failed(attr) || 1405 attr[IPSET_ATTR_TYPENAME] == NULL || 1406 attr[IPSET_ATTR_FAMILY] == NULL)) 1407 return -IPSET_ERR_PROTOCOL; 1408 1409 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); 1410 typename = nla_data(attr[IPSET_ATTR_TYPENAME]); 1411 ret = find_set_type_minmax(typename, family, &min, &max); 1412 if (ret) 1413 return ret; 1414 1415 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1416 if (skb2 == NULL) 1417 return -ENOMEM; 1418 1419 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 1420 IPSET_CMD_TYPE); 1421 if (!nlh2) 1422 goto nlmsg_failure; 1423 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1424 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename); 1425 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family); 1426 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max); 1427 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min); 1428 nlmsg_end(skb2, nlh2); 1429 1430 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); 1431 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1432 if (ret < 0) 1433 return ret; 1434 1435 return 0; 1436 1437nla_put_failure: 1438 nlmsg_cancel(skb2, nlh2); 1439nlmsg_failure: 1440 kfree_skb(skb2); 1441 return -EMSGSIZE; 1442} 1443 1444/* Get protocol version */ 1445 1446static const struct nla_policy 1447ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { 1448 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1449}; 1450 1451static int 1452ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, 1453 const struct nlmsghdr *nlh, 1454 const struct nlattr * const attr[]) 1455{ 1456 struct sk_buff *skb2; 1457 struct nlmsghdr *nlh2; 1458 int ret = 0; 1459 1460 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) 1461 return -IPSET_ERR_PROTOCOL; 1462 1463 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1464 if (skb2 == NULL) 1465 return -ENOMEM; 1466 1467 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 1468 IPSET_CMD_PROTOCOL); 1469 if (!nlh2) 1470 goto nlmsg_failure; 1471 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1472 nlmsg_end(skb2, nlh2); 1473 1474 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1475 if (ret < 0) 1476 return ret; 1477 1478 return 0; 1479 1480nla_put_failure: 1481 nlmsg_cancel(skb2, nlh2); 1482nlmsg_failure: 1483 kfree_skb(skb2); 1484 return -EMSGSIZE; 1485} 1486 1487static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { 1488 [IPSET_CMD_CREATE] = { 1489 .call = ip_set_create, 1490 .attr_count = IPSET_ATTR_CMD_MAX, 1491 .policy = ip_set_create_policy, 1492 }, 1493 [IPSET_CMD_DESTROY] = { 1494 .call = ip_set_destroy, 1495 .attr_count = IPSET_ATTR_CMD_MAX, 1496 .policy = ip_set_setname_policy, 1497 }, 1498 [IPSET_CMD_FLUSH] = { 1499 .call = ip_set_flush, 1500 .attr_count = IPSET_ATTR_CMD_MAX, 1501 .policy = ip_set_setname_policy, 1502 }, 1503 [IPSET_CMD_RENAME] = { 1504 .call = ip_set_rename, 1505 .attr_count = IPSET_ATTR_CMD_MAX, 1506 .policy = ip_set_setname2_policy, 1507 }, 1508 [IPSET_CMD_SWAP] = { 1509 .call = ip_set_swap, 1510 .attr_count = IPSET_ATTR_CMD_MAX, 1511 .policy = ip_set_setname2_policy, 1512 }, 1513 [IPSET_CMD_LIST] = { 1514 .call = ip_set_dump, 1515 .attr_count = IPSET_ATTR_CMD_MAX, 1516 .policy = ip_set_setname_policy, 1517 }, 1518 [IPSET_CMD_SAVE] = { 1519 .call = ip_set_dump, 1520 .attr_count = IPSET_ATTR_CMD_MAX, 1521 .policy = ip_set_setname_policy, 1522 }, 1523 [IPSET_CMD_ADD] = { 1524 .call = ip_set_uadd, 1525 .attr_count = IPSET_ATTR_CMD_MAX, 1526 .policy = ip_set_adt_policy, 1527 }, 1528 [IPSET_CMD_DEL] = { 1529 .call = ip_set_udel, 1530 .attr_count = IPSET_ATTR_CMD_MAX, 1531 .policy = ip_set_adt_policy, 1532 }, 1533 [IPSET_CMD_TEST] = { 1534 .call = ip_set_utest, 1535 .attr_count = IPSET_ATTR_CMD_MAX, 1536 .policy = ip_set_adt_policy, 1537 }, 1538 [IPSET_CMD_HEADER] = { 1539 .call = ip_set_header, 1540 .attr_count = IPSET_ATTR_CMD_MAX, 1541 .policy = ip_set_setname_policy, 1542 }, 1543 [IPSET_CMD_TYPE] = { 1544 .call = ip_set_type, 1545 .attr_count = IPSET_ATTR_CMD_MAX, 1546 .policy = ip_set_type_policy, 1547 }, 1548 [IPSET_CMD_PROTOCOL] = { 1549 .call = ip_set_protocol, 1550 .attr_count = IPSET_ATTR_CMD_MAX, 1551 .policy = ip_set_protocol_policy, 1552 }, 1553}; 1554 1555static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { 1556 .name = "ip_set", 1557 .subsys_id = NFNL_SUBSYS_IPSET, 1558 .cb_count = IPSET_MSG_MAX, 1559 .cb = ip_set_netlink_subsys_cb, 1560}; 1561 1562/* Interface to iptables/ip6tables */ 1563 1564static int 1565ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) 1566{ 1567 unsigned *op; 1568 void *data; 1569 int copylen = *len, ret = 0; 1570 1571 if (!capable(CAP_NET_ADMIN)) 1572 return -EPERM; 1573 if (optval != SO_IP_SET) 1574 return -EBADF; 1575 if (*len < sizeof(unsigned)) 1576 return -EINVAL; 1577 1578 data = vmalloc(*len); 1579 if (!data) 1580 return -ENOMEM; 1581 if (copy_from_user(data, user, *len) != 0) { 1582 ret = -EFAULT; 1583 goto done; 1584 } 1585 op = (unsigned *) data; 1586 1587 if (*op < IP_SET_OP_VERSION) { 1588 /* Check the version at the beginning of operations */ 1589 struct ip_set_req_version *req_version = data; 1590 if (req_version->version != IPSET_PROTOCOL) { 1591 ret = -EPROTO; 1592 goto done; 1593 } 1594 } 1595 1596 switch (*op) { 1597 case IP_SET_OP_VERSION: { 1598 struct ip_set_req_version *req_version = data; 1599 1600 if (*len != sizeof(struct ip_set_req_version)) { 1601 ret = -EINVAL; 1602 goto done; 1603 } 1604 1605 req_version->version = IPSET_PROTOCOL; 1606 ret = copy_to_user(user, req_version, 1607 sizeof(struct ip_set_req_version)); 1608 goto done; 1609 } 1610 case IP_SET_OP_GET_BYNAME: { 1611 struct ip_set_req_get_set *req_get = data; 1612 1613 if (*len != sizeof(struct ip_set_req_get_set)) { 1614 ret = -EINVAL; 1615 goto done; 1616 } 1617 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; 1618 nfnl_lock(); 1619 req_get->set.index = find_set_id(req_get->set.name); 1620 nfnl_unlock(); 1621 goto copy; 1622 } 1623 case IP_SET_OP_GET_BYINDEX: { 1624 struct ip_set_req_get_set *req_get = data; 1625 1626 if (*len != sizeof(struct ip_set_req_get_set) || 1627 req_get->set.index >= ip_set_max) { 1628 ret = -EINVAL; 1629 goto done; 1630 } 1631 nfnl_lock(); 1632 strncpy(req_get->set.name, 1633 ip_set_list[req_get->set.index] 1634 ? ip_set_list[req_get->set.index]->name : "", 1635 IPSET_MAXNAMELEN); 1636 nfnl_unlock(); 1637 goto copy; 1638 } 1639 default: 1640 ret = -EBADMSG; 1641 goto done; 1642 } /* end of switch(op) */ 1643 1644copy: 1645 ret = copy_to_user(user, data, copylen); 1646 1647done: 1648 vfree(data); 1649 if (ret > 0) 1650 ret = 0; 1651 return ret; 1652} 1653 1654static struct nf_sockopt_ops so_set __read_mostly = { 1655 .pf = PF_INET, 1656 .get_optmin = SO_IP_SET, 1657 .get_optmax = SO_IP_SET + 1, 1658 .get = &ip_set_sockfn_get, 1659 .owner = THIS_MODULE, 1660}; 1661 1662static int __init 1663ip_set_init(void) 1664{ 1665 int ret; 1666 1667 if (max_sets) 1668 ip_set_max = max_sets; 1669 if (ip_set_max >= IPSET_INVALID_ID) 1670 ip_set_max = IPSET_INVALID_ID - 1; 1671 1672 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, 1673 GFP_KERNEL); 1674 if (!ip_set_list) { 1675 pr_err("ip_set: Unable to create ip_set_list\n"); 1676 return -ENOMEM; 1677 } 1678 1679 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); 1680 if (ret != 0) { 1681 pr_err("ip_set: cannot register with nfnetlink.\n"); 1682 kfree(ip_set_list); 1683 return ret; 1684 } 1685 ret = nf_register_sockopt(&so_set); 1686 if (ret != 0) { 1687 pr_err("SO_SET registry failed: %d\n", ret); 1688 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 1689 kfree(ip_set_list); 1690 return ret; 1691 } 1692 1693 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); 1694 return 0; 1695} 1696 1697static void __exit 1698ip_set_fini(void) 1699{ 1700 /* There can't be any existing set */ 1701 nf_unregister_sockopt(&so_set); 1702 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 1703 kfree(ip_set_list); 1704 pr_debug("these are the famous last words\n"); 1705} 1706 1707module_init(ip_set_init); 1708module_exit(ip_set_fini); 1709