ip_set_core.c revision 9d8832320f832b9360f6bca71cc045d2e4df171b
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> 2 * Patrick Schaaf <bof@bof.de> 3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License version 2 as 7 * published by the Free Software Foundation. 8 */ 9 10/* Kernel module for IP set management */ 11 12#include <linux/init.h> 13#include <linux/module.h> 14#include <linux/moduleparam.h> 15#include <linux/ip.h> 16#include <linux/skbuff.h> 17#include <linux/spinlock.h> 18#include <linux/netlink.h> 19#include <linux/rculist.h> 20#include <linux/version.h> 21#include <net/netlink.h> 22 23#include <linux/netfilter.h> 24#include <linux/netfilter/x_tables.h> 25#include <linux/netfilter/nfnetlink.h> 26#include <linux/netfilter/ipset/ip_set.h> 27 28static LIST_HEAD(ip_set_type_list); /* all registered set types */ 29static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ 30static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ 31 32static struct ip_set **ip_set_list; /* all individual sets */ 33static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ 34 35#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) 36 37static unsigned int max_sets; 38 39module_param(max_sets, int, 0600); 40MODULE_PARM_DESC(max_sets, "maximal number of sets"); 41MODULE_LICENSE("GPL"); 42MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 43MODULE_DESCRIPTION("core IP set support"); 44MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); 45 46/* 47 * The set types are implemented in modules and registered set types 48 * can be found in ip_set_type_list. Adding/deleting types is 49 * serialized by ip_set_type_mutex. 50 */ 51 52static inline void 53ip_set_type_lock(void) 54{ 55 mutex_lock(&ip_set_type_mutex); 56} 57 58static inline void 59ip_set_type_unlock(void) 60{ 61 mutex_unlock(&ip_set_type_mutex); 62} 63 64/* Register and deregister settype */ 65 66static struct ip_set_type * 67find_set_type(const char *name, u8 family, u8 revision) 68{ 69 struct ip_set_type *type; 70 71 list_for_each_entry_rcu(type, &ip_set_type_list, list) 72 if (STREQ(type->name, name) && 73 (type->family == family || type->family == AF_UNSPEC) && 74 revision >= type->revision_min && 75 revision <= type->revision_max) 76 return type; 77 return NULL; 78} 79 80/* Unlock, try to load a set type module and lock again */ 81static int 82try_to_load_type(const char *name) 83{ 84 nfnl_unlock(); 85 pr_debug("try to load ip_set_%s\n", name); 86 if (request_module("ip_set_%s", name) < 0) { 87 pr_warning("Can't find ip_set type %s\n", name); 88 nfnl_lock(); 89 return -IPSET_ERR_FIND_TYPE; 90 } 91 nfnl_lock(); 92 return -EAGAIN; 93} 94 95/* Find a set type and reference it */ 96static int 97find_set_type_get(const char *name, u8 family, u8 revision, 98 struct ip_set_type **found) 99{ 100 struct ip_set_type *type; 101 int err; 102 103 rcu_read_lock(); 104 *found = find_set_type(name, family, revision); 105 if (*found) { 106 err = !try_module_get((*found)->me) ? -EFAULT : 0; 107 goto unlock; 108 } 109 /* Make sure the type is loaded but we don't support the revision */ 110 list_for_each_entry_rcu(type, &ip_set_type_list, list) 111 if (STREQ(type->name, name)) { 112 err = -IPSET_ERR_FIND_TYPE; 113 goto unlock; 114 } 115 rcu_read_unlock(); 116 117 return try_to_load_type(name); 118 119unlock: 120 rcu_read_unlock(); 121 return err; 122} 123 124/* Find a given set type by name and family. 125 * If we succeeded, the supported minimal and maximum revisions are 126 * filled out. 127 */ 128static int 129find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) 130{ 131 struct ip_set_type *type; 132 bool found = false; 133 134 *min = 255; *max = 0; 135 rcu_read_lock(); 136 list_for_each_entry_rcu(type, &ip_set_type_list, list) 137 if (STREQ(type->name, name) && 138 (type->family == family || type->family == AF_UNSPEC)) { 139 found = true; 140 if (type->revision_min < *min) 141 *min = type->revision_min; 142 if (type->revision_max > *max) 143 *max = type->revision_max; 144 } 145 rcu_read_unlock(); 146 if (found) 147 return 0; 148 149 return try_to_load_type(name); 150} 151 152#define family_name(f) ((f) == AF_INET ? "inet" : \ 153 (f) == AF_INET6 ? "inet6" : "any") 154 155/* Register a set type structure. The type is identified by 156 * the unique triple of name, family and revision. 157 */ 158int 159ip_set_type_register(struct ip_set_type *type) 160{ 161 int ret = 0; 162 163 if (type->protocol != IPSET_PROTOCOL) { 164 pr_warning("ip_set type %s, family %s, revision %u:%u uses " 165 "wrong protocol version %u (want %u)\n", 166 type->name, family_name(type->family), 167 type->revision_min, type->revision_max, 168 type->protocol, IPSET_PROTOCOL); 169 return -EINVAL; 170 } 171 172 ip_set_type_lock(); 173 if (find_set_type(type->name, type->family, type->revision_min)) { 174 /* Duplicate! */ 175 pr_warning("ip_set type %s, family %s with revision min %u " 176 "already registered!\n", type->name, 177 family_name(type->family), type->revision_min); 178 ret = -EINVAL; 179 goto unlock; 180 } 181 list_add_rcu(&type->list, &ip_set_type_list); 182 pr_debug("type %s, family %s, revision %u:%u registered.\n", 183 type->name, family_name(type->family), 184 type->revision_min, type->revision_max); 185unlock: 186 ip_set_type_unlock(); 187 return ret; 188} 189EXPORT_SYMBOL_GPL(ip_set_type_register); 190 191/* Unregister a set type. There's a small race with ip_set_create */ 192void 193ip_set_type_unregister(struct ip_set_type *type) 194{ 195 ip_set_type_lock(); 196 if (!find_set_type(type->name, type->family, type->revision_min)) { 197 pr_warning("ip_set type %s, family %s with revision min %u " 198 "not registered\n", type->name, 199 family_name(type->family), type->revision_min); 200 goto unlock; 201 } 202 list_del_rcu(&type->list); 203 pr_debug("type %s, family %s with revision min %u unregistered.\n", 204 type->name, family_name(type->family), type->revision_min); 205unlock: 206 ip_set_type_unlock(); 207 208 synchronize_rcu(); 209} 210EXPORT_SYMBOL_GPL(ip_set_type_unregister); 211 212/* Utility functions */ 213void * 214ip_set_alloc(size_t size) 215{ 216 void *members = NULL; 217 218 if (size < KMALLOC_MAX_SIZE) 219 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 220 221 if (members) { 222 pr_debug("%p: allocated with kmalloc\n", members); 223 return members; 224 } 225 226 members = vzalloc(size); 227 if (!members) 228 return NULL; 229 pr_debug("%p: allocated with vmalloc\n", members); 230 231 return members; 232} 233EXPORT_SYMBOL_GPL(ip_set_alloc); 234 235void 236ip_set_free(void *members) 237{ 238 pr_debug("%p: free with %s\n", members, 239 is_vmalloc_addr(members) ? "vfree" : "kfree"); 240 if (is_vmalloc_addr(members)) 241 vfree(members); 242 else 243 kfree(members); 244} 245EXPORT_SYMBOL_GPL(ip_set_free); 246 247static inline bool 248flag_nested(const struct nlattr *nla) 249{ 250 return nla->nla_type & NLA_F_NESTED; 251} 252 253static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { 254 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, 255 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, 256 .len = sizeof(struct in6_addr) }, 257}; 258 259int 260ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) 261{ 262 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; 263 264 if (unlikely(!flag_nested(nla))) 265 return -IPSET_ERR_PROTOCOL; 266 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) 267 return -IPSET_ERR_PROTOCOL; 268 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) 269 return -IPSET_ERR_PROTOCOL; 270 271 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]); 272 return 0; 273} 274EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); 275 276int 277ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) 278{ 279 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; 280 281 if (unlikely(!flag_nested(nla))) 282 return -IPSET_ERR_PROTOCOL; 283 284 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) 285 return -IPSET_ERR_PROTOCOL; 286 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) 287 return -IPSET_ERR_PROTOCOL; 288 289 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), 290 sizeof(struct in6_addr)); 291 return 0; 292} 293EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); 294 295/* 296 * Creating/destroying/renaming/swapping affect the existence and 297 * the properties of a set. All of these can be executed from userspace 298 * only and serialized by the nfnl mutex indirectly from nfnetlink. 299 * 300 * Sets are identified by their index in ip_set_list and the index 301 * is used by the external references (set/SET netfilter modules). 302 * 303 * The set behind an index may change by swapping only, from userspace. 304 */ 305 306static inline void 307__ip_set_get(ip_set_id_t index) 308{ 309 write_lock_bh(&ip_set_ref_lock); 310 ip_set_list[index]->ref++; 311 write_unlock_bh(&ip_set_ref_lock); 312} 313 314static inline void 315__ip_set_put(ip_set_id_t index) 316{ 317 write_lock_bh(&ip_set_ref_lock); 318 BUG_ON(ip_set_list[index]->ref == 0); 319 ip_set_list[index]->ref--; 320 write_unlock_bh(&ip_set_ref_lock); 321} 322 323/* 324 * Add, del and test set entries from kernel. 325 * 326 * The set behind the index must exist and must be referenced 327 * so it can't be destroyed (or changed) under our foot. 328 */ 329 330int 331ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 332 const struct xt_action_param *par, 333 const struct ip_set_adt_opt *opt) 334{ 335 struct ip_set *set = ip_set_list[index]; 336 int ret = 0; 337 338 BUG_ON(set == NULL); 339 pr_debug("set %s, index %u\n", set->name, index); 340 341 if (opt->dim < set->type->dimension || 342 !(opt->family == set->family || set->family == AF_UNSPEC)) 343 return 0; 344 345 read_lock_bh(&set->lock); 346 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); 347 read_unlock_bh(&set->lock); 348 349 if (ret == -EAGAIN) { 350 /* Type requests element to be completed */ 351 pr_debug("element must be competed, ADD is triggered\n"); 352 write_lock_bh(&set->lock); 353 set->variant->kadt(set, skb, par, IPSET_ADD, opt); 354 write_unlock_bh(&set->lock); 355 ret = 1; 356 } 357 358 /* Convert error codes to nomatch */ 359 return (ret < 0 ? 0 : ret); 360} 361EXPORT_SYMBOL_GPL(ip_set_test); 362 363int 364ip_set_add(ip_set_id_t index, const struct sk_buff *skb, 365 const struct xt_action_param *par, 366 const struct ip_set_adt_opt *opt) 367{ 368 struct ip_set *set = ip_set_list[index]; 369 int ret; 370 371 BUG_ON(set == NULL); 372 pr_debug("set %s, index %u\n", set->name, index); 373 374 if (opt->dim < set->type->dimension || 375 !(opt->family == set->family || set->family == AF_UNSPEC)) 376 return 0; 377 378 write_lock_bh(&set->lock); 379 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); 380 write_unlock_bh(&set->lock); 381 382 return ret; 383} 384EXPORT_SYMBOL_GPL(ip_set_add); 385 386int 387ip_set_del(ip_set_id_t index, const struct sk_buff *skb, 388 const struct xt_action_param *par, 389 const struct ip_set_adt_opt *opt) 390{ 391 struct ip_set *set = ip_set_list[index]; 392 int ret = 0; 393 394 BUG_ON(set == NULL); 395 pr_debug("set %s, index %u\n", set->name, index); 396 397 if (opt->dim < set->type->dimension || 398 !(opt->family == set->family || set->family == AF_UNSPEC)) 399 return 0; 400 401 write_lock_bh(&set->lock); 402 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); 403 write_unlock_bh(&set->lock); 404 405 return ret; 406} 407EXPORT_SYMBOL_GPL(ip_set_del); 408 409/* 410 * Find set by name, reference it once. The reference makes sure the 411 * thing pointed to, does not go away under our feet. 412 * 413 */ 414ip_set_id_t 415ip_set_get_byname(const char *name, struct ip_set **set) 416{ 417 ip_set_id_t i, index = IPSET_INVALID_ID; 418 struct ip_set *s; 419 420 for (i = 0; i < ip_set_max; i++) { 421 s = ip_set_list[i]; 422 if (s != NULL && STREQ(s->name, name)) { 423 __ip_set_get(i); 424 index = i; 425 *set = s; 426 } 427 } 428 429 return index; 430} 431EXPORT_SYMBOL_GPL(ip_set_get_byname); 432 433/* 434 * If the given set pointer points to a valid set, decrement 435 * reference count by 1. The caller shall not assume the index 436 * to be valid, after calling this function. 437 * 438 */ 439void 440ip_set_put_byindex(ip_set_id_t index) 441{ 442 if (ip_set_list[index] != NULL) 443 __ip_set_put(index); 444} 445EXPORT_SYMBOL_GPL(ip_set_put_byindex); 446 447/* 448 * Get the name of a set behind a set index. 449 * We assume the set is referenced, so it does exist and 450 * can't be destroyed. The set cannot be renamed due to 451 * the referencing either. 452 * 453 */ 454const char * 455ip_set_name_byindex(ip_set_id_t index) 456{ 457 const struct ip_set *set = ip_set_list[index]; 458 459 BUG_ON(set == NULL); 460 BUG_ON(set->ref == 0); 461 462 /* Referenced, so it's safe */ 463 return set->name; 464} 465EXPORT_SYMBOL_GPL(ip_set_name_byindex); 466 467/* 468 * Routines to call by external subsystems, which do not 469 * call nfnl_lock for us. 470 */ 471 472/* 473 * Find set by name, reference it once. The reference makes sure the 474 * thing pointed to, does not go away under our feet. 475 * 476 * The nfnl mutex is used in the function. 477 */ 478ip_set_id_t 479ip_set_nfnl_get(const char *name) 480{ 481 struct ip_set *s; 482 ip_set_id_t index; 483 484 nfnl_lock(); 485 index = ip_set_get_byname(name, &s); 486 nfnl_unlock(); 487 488 return index; 489} 490EXPORT_SYMBOL_GPL(ip_set_nfnl_get); 491 492/* 493 * Find set by index, reference it once. The reference makes sure the 494 * thing pointed to, does not go away under our feet. 495 * 496 * The nfnl mutex is used in the function. 497 */ 498ip_set_id_t 499ip_set_nfnl_get_byindex(ip_set_id_t index) 500{ 501 if (index > ip_set_max) 502 return IPSET_INVALID_ID; 503 504 nfnl_lock(); 505 if (ip_set_list[index]) 506 __ip_set_get(index); 507 else 508 index = IPSET_INVALID_ID; 509 nfnl_unlock(); 510 511 return index; 512} 513EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); 514 515/* 516 * If the given set pointer points to a valid set, decrement 517 * reference count by 1. The caller shall not assume the index 518 * to be valid, after calling this function. 519 * 520 * The nfnl mutex is used in the function. 521 */ 522void 523ip_set_nfnl_put(ip_set_id_t index) 524{ 525 nfnl_lock(); 526 ip_set_put_byindex(index); 527 nfnl_unlock(); 528} 529EXPORT_SYMBOL_GPL(ip_set_nfnl_put); 530 531/* 532 * Communication protocol with userspace over netlink. 533 * 534 * The commands are serialized by the nfnl mutex. 535 */ 536 537static inline bool 538protocol_failed(const struct nlattr * const tb[]) 539{ 540 return !tb[IPSET_ATTR_PROTOCOL] || 541 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; 542} 543 544static inline u32 545flag_exist(const struct nlmsghdr *nlh) 546{ 547 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; 548} 549 550static struct nlmsghdr * 551start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, 552 enum ipset_cmd cmd) 553{ 554 struct nlmsghdr *nlh; 555 struct nfgenmsg *nfmsg; 556 557 nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), 558 sizeof(*nfmsg), flags); 559 if (nlh == NULL) 560 return NULL; 561 562 nfmsg = nlmsg_data(nlh); 563 nfmsg->nfgen_family = AF_INET; 564 nfmsg->version = NFNETLINK_V0; 565 nfmsg->res_id = 0; 566 567 return nlh; 568} 569 570/* Create a set */ 571 572static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { 573 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 574 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 575 .len = IPSET_MAXNAMELEN - 1 }, 576 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, 577 .len = IPSET_MAXNAMELEN - 1}, 578 [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, 579 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, 580 [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, 581}; 582 583static ip_set_id_t 584find_set_id(const char *name) 585{ 586 ip_set_id_t i, index = IPSET_INVALID_ID; 587 const struct ip_set *set; 588 589 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { 590 set = ip_set_list[i]; 591 if (set != NULL && STREQ(set->name, name)) 592 index = i; 593 } 594 return index; 595} 596 597static inline struct ip_set * 598find_set(const char *name) 599{ 600 ip_set_id_t index = find_set_id(name); 601 602 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; 603} 604 605static int 606find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) 607{ 608 ip_set_id_t i; 609 610 *index = IPSET_INVALID_ID; 611 for (i = 0; i < ip_set_max; i++) { 612 if (ip_set_list[i] == NULL) { 613 if (*index == IPSET_INVALID_ID) 614 *index = i; 615 } else if (STREQ(name, ip_set_list[i]->name)) { 616 /* Name clash */ 617 *set = ip_set_list[i]; 618 return -EEXIST; 619 } 620 } 621 if (*index == IPSET_INVALID_ID) 622 /* No free slot remained */ 623 return -IPSET_ERR_MAX_SETS; 624 return 0; 625} 626 627static int 628ip_set_create(struct sock *ctnl, struct sk_buff *skb, 629 const struct nlmsghdr *nlh, 630 const struct nlattr * const attr[]) 631{ 632 struct ip_set *set, *clash = NULL; 633 ip_set_id_t index = IPSET_INVALID_ID; 634 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; 635 const char *name, *typename; 636 u8 family, revision; 637 u32 flags = flag_exist(nlh); 638 int ret = 0; 639 640 if (unlikely(protocol_failed(attr) || 641 attr[IPSET_ATTR_SETNAME] == NULL || 642 attr[IPSET_ATTR_TYPENAME] == NULL || 643 attr[IPSET_ATTR_REVISION] == NULL || 644 attr[IPSET_ATTR_FAMILY] == NULL || 645 (attr[IPSET_ATTR_DATA] != NULL && 646 !flag_nested(attr[IPSET_ATTR_DATA])))) 647 return -IPSET_ERR_PROTOCOL; 648 649 name = nla_data(attr[IPSET_ATTR_SETNAME]); 650 typename = nla_data(attr[IPSET_ATTR_TYPENAME]); 651 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); 652 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); 653 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", 654 name, typename, family_name(family), revision); 655 656 /* 657 * First, and without any locks, allocate and initialize 658 * a normal base set structure. 659 */ 660 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); 661 if (!set) 662 return -ENOMEM; 663 rwlock_init(&set->lock); 664 strlcpy(set->name, name, IPSET_MAXNAMELEN); 665 set->family = family; 666 set->revision = revision; 667 668 /* 669 * Next, check that we know the type, and take 670 * a reference on the type, to make sure it stays available 671 * while constructing our new set. 672 * 673 * After referencing the type, we try to create the type 674 * specific part of the set without holding any locks. 675 */ 676 ret = find_set_type_get(typename, family, revision, &(set->type)); 677 if (ret) 678 goto out; 679 680 /* 681 * Without holding any locks, create private part. 682 */ 683 if (attr[IPSET_ATTR_DATA] && 684 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], 685 set->type->create_policy)) { 686 ret = -IPSET_ERR_PROTOCOL; 687 goto put_out; 688 } 689 690 ret = set->type->create(set, tb, flags); 691 if (ret != 0) 692 goto put_out; 693 694 /* BTW, ret==0 here. */ 695 696 /* 697 * Here, we have a valid, constructed set and we are protected 698 * by the nfnl mutex. Find the first free index in ip_set_list 699 * and check clashing. 700 */ 701 if ((ret = find_free_id(set->name, &index, &clash)) != 0) { 702 /* If this is the same set and requested, ignore error */ 703 if (ret == -EEXIST && 704 (flags & IPSET_FLAG_EXIST) && 705 STREQ(set->type->name, clash->type->name) && 706 set->type->family == clash->type->family && 707 set->type->revision_min == clash->type->revision_min && 708 set->type->revision_max == clash->type->revision_max && 709 set->variant->same_set(set, clash)) 710 ret = 0; 711 goto cleanup; 712 } 713 714 /* 715 * Finally! Add our shiny new set to the list, and be done. 716 */ 717 pr_debug("create: '%s' created with index %u!\n", set->name, index); 718 ip_set_list[index] = set; 719 720 return ret; 721 722cleanup: 723 set->variant->destroy(set); 724put_out: 725 module_put(set->type->me); 726out: 727 kfree(set); 728 return ret; 729} 730 731/* Destroy sets */ 732 733static const struct nla_policy 734ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { 735 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 736 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 737 .len = IPSET_MAXNAMELEN - 1 }, 738}; 739 740static void 741ip_set_destroy_set(ip_set_id_t index) 742{ 743 struct ip_set *set = ip_set_list[index]; 744 745 pr_debug("set: %s\n", set->name); 746 ip_set_list[index] = NULL; 747 748 /* Must call it without holding any lock */ 749 set->variant->destroy(set); 750 module_put(set->type->me); 751 kfree(set); 752} 753 754static int 755ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, 756 const struct nlmsghdr *nlh, 757 const struct nlattr * const attr[]) 758{ 759 ip_set_id_t i; 760 int ret = 0; 761 762 if (unlikely(protocol_failed(attr))) 763 return -IPSET_ERR_PROTOCOL; 764 765 /* Commands are serialized and references are 766 * protected by the ip_set_ref_lock. 767 * External systems (i.e. xt_set) must call 768 * ip_set_put|get_nfnl_* functions, that way we 769 * can safely check references here. 770 * 771 * list:set timer can only decrement the reference 772 * counter, so if it's already zero, we can proceed 773 * without holding the lock. 774 */ 775 read_lock_bh(&ip_set_ref_lock); 776 if (!attr[IPSET_ATTR_SETNAME]) { 777 for (i = 0; i < ip_set_max; i++) { 778 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { 779 ret = -IPSET_ERR_BUSY; 780 goto out; 781 } 782 } 783 read_unlock_bh(&ip_set_ref_lock); 784 for (i = 0; i < ip_set_max; i++) { 785 if (ip_set_list[i] != NULL) 786 ip_set_destroy_set(i); 787 } 788 } else { 789 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 790 if (i == IPSET_INVALID_ID) { 791 ret = -ENOENT; 792 goto out; 793 } else if (ip_set_list[i]->ref) { 794 ret = -IPSET_ERR_BUSY; 795 goto out; 796 } 797 read_unlock_bh(&ip_set_ref_lock); 798 799 ip_set_destroy_set(i); 800 } 801 return 0; 802out: 803 read_unlock_bh(&ip_set_ref_lock); 804 return ret; 805} 806 807/* Flush sets */ 808 809static void 810ip_set_flush_set(struct ip_set *set) 811{ 812 pr_debug("set: %s\n", set->name); 813 814 write_lock_bh(&set->lock); 815 set->variant->flush(set); 816 write_unlock_bh(&set->lock); 817} 818 819static int 820ip_set_flush(struct sock *ctnl, struct sk_buff *skb, 821 const struct nlmsghdr *nlh, 822 const struct nlattr * const attr[]) 823{ 824 ip_set_id_t i; 825 826 if (unlikely(protocol_failed(attr))) 827 return -IPSET_ERR_PROTOCOL; 828 829 if (!attr[IPSET_ATTR_SETNAME]) { 830 for (i = 0; i < ip_set_max; i++) 831 if (ip_set_list[i] != NULL) 832 ip_set_flush_set(ip_set_list[i]); 833 } else { 834 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 835 if (i == IPSET_INVALID_ID) 836 return -ENOENT; 837 838 ip_set_flush_set(ip_set_list[i]); 839 } 840 841 return 0; 842} 843 844/* Rename a set */ 845 846static const struct nla_policy 847ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { 848 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 849 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 850 .len = IPSET_MAXNAMELEN - 1 }, 851 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING, 852 .len = IPSET_MAXNAMELEN - 1 }, 853}; 854 855static int 856ip_set_rename(struct sock *ctnl, struct sk_buff *skb, 857 const struct nlmsghdr *nlh, 858 const struct nlattr * const attr[]) 859{ 860 struct ip_set *set; 861 const char *name2; 862 ip_set_id_t i; 863 int ret = 0; 864 865 if (unlikely(protocol_failed(attr) || 866 attr[IPSET_ATTR_SETNAME] == NULL || 867 attr[IPSET_ATTR_SETNAME2] == NULL)) 868 return -IPSET_ERR_PROTOCOL; 869 870 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 871 if (set == NULL) 872 return -ENOENT; 873 874 read_lock_bh(&ip_set_ref_lock); 875 if (set->ref != 0) { 876 ret = -IPSET_ERR_REFERENCED; 877 goto out; 878 } 879 880 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); 881 for (i = 0; i < ip_set_max; i++) { 882 if (ip_set_list[i] != NULL && 883 STREQ(ip_set_list[i]->name, name2)) { 884 ret = -IPSET_ERR_EXIST_SETNAME2; 885 goto out; 886 } 887 } 888 strncpy(set->name, name2, IPSET_MAXNAMELEN); 889 890out: 891 read_unlock_bh(&ip_set_ref_lock); 892 return ret; 893} 894 895/* Swap two sets so that name/index points to the other. 896 * References and set names are also swapped. 897 * 898 * The commands are serialized by the nfnl mutex and references are 899 * protected by the ip_set_ref_lock. The kernel interfaces 900 * do not hold the mutex but the pointer settings are atomic 901 * so the ip_set_list always contains valid pointers to the sets. 902 */ 903 904static int 905ip_set_swap(struct sock *ctnl, struct sk_buff *skb, 906 const struct nlmsghdr *nlh, 907 const struct nlattr * const attr[]) 908{ 909 struct ip_set *from, *to; 910 ip_set_id_t from_id, to_id; 911 char from_name[IPSET_MAXNAMELEN]; 912 913 if (unlikely(protocol_failed(attr) || 914 attr[IPSET_ATTR_SETNAME] == NULL || 915 attr[IPSET_ATTR_SETNAME2] == NULL)) 916 return -IPSET_ERR_PROTOCOL; 917 918 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 919 if (from_id == IPSET_INVALID_ID) 920 return -ENOENT; 921 922 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2])); 923 if (to_id == IPSET_INVALID_ID) 924 return -IPSET_ERR_EXIST_SETNAME2; 925 926 from = ip_set_list[from_id]; 927 to = ip_set_list[to_id]; 928 929 /* Features must not change. 930 * Not an artificial restriction anymore, as we must prevent 931 * possible loops created by swapping in setlist type of sets. */ 932 if (!(from->type->features == to->type->features && 933 from->type->family == to->type->family)) 934 return -IPSET_ERR_TYPE_MISMATCH; 935 936 strncpy(from_name, from->name, IPSET_MAXNAMELEN); 937 strncpy(from->name, to->name, IPSET_MAXNAMELEN); 938 strncpy(to->name, from_name, IPSET_MAXNAMELEN); 939 940 write_lock_bh(&ip_set_ref_lock); 941 swap(from->ref, to->ref); 942 ip_set_list[from_id] = to; 943 ip_set_list[to_id] = from; 944 write_unlock_bh(&ip_set_ref_lock); 945 946 return 0; 947} 948 949/* List/save set data */ 950 951#define DUMP_INIT 0 952#define DUMP_ALL 1 953#define DUMP_ONE 2 954#define DUMP_LAST 3 955 956#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF) 957#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16) 958 959static int 960ip_set_dump_done(struct netlink_callback *cb) 961{ 962 if (cb->args[2]) { 963 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); 964 ip_set_put_byindex((ip_set_id_t) cb->args[1]); 965 } 966 return 0; 967} 968 969static inline void 970dump_attrs(struct nlmsghdr *nlh) 971{ 972 const struct nlattr *attr; 973 int rem; 974 975 pr_debug("dump nlmsg\n"); 976 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { 977 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len); 978 } 979} 980 981static int 982dump_init(struct netlink_callback *cb) 983{ 984 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); 985 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 986 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 987 struct nlattr *attr = (void *)nlh + min_len; 988 u32 dump_type; 989 ip_set_id_t index; 990 991 /* Second pass, so parser can't fail */ 992 nla_parse(cda, IPSET_ATTR_CMD_MAX, 993 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); 994 995 /* cb->args[0] : dump single set/all sets 996 * [1] : set index 997 * [..]: type specific 998 */ 999 1000 if (cda[IPSET_ATTR_SETNAME]) { 1001 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); 1002 if (index == IPSET_INVALID_ID) 1003 return -ENOENT; 1004 1005 dump_type = DUMP_ONE; 1006 cb->args[1] = index; 1007 } else 1008 dump_type = DUMP_ALL; 1009 1010 if (cda[IPSET_ATTR_FLAGS]) { 1011 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); 1012 dump_type |= (f << 16); 1013 } 1014 cb->args[0] = dump_type; 1015 1016 return 0; 1017} 1018 1019static int 1020ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) 1021{ 1022 ip_set_id_t index = IPSET_INVALID_ID, max; 1023 struct ip_set *set = NULL; 1024 struct nlmsghdr *nlh = NULL; 1025 unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; 1026 u32 dump_type, dump_flags; 1027 int ret = 0; 1028 1029 if (!cb->args[0]) { 1030 ret = dump_init(cb); 1031 if (ret < 0) { 1032 nlh = nlmsg_hdr(cb->skb); 1033 /* We have to create and send the error message 1034 * manually :-( */ 1035 if (nlh->nlmsg_flags & NLM_F_ACK) 1036 netlink_ack(cb->skb, nlh, ret); 1037 return ret; 1038 } 1039 } 1040 1041 if (cb->args[1] >= ip_set_max) 1042 goto out; 1043 1044 dump_type = DUMP_TYPE(cb->args[0]); 1045 dump_flags = DUMP_FLAGS(cb->args[0]); 1046 max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; 1047dump_last: 1048 pr_debug("args[0]: %u %u args[1]: %ld\n", 1049 dump_type, dump_flags, cb->args[1]); 1050 for (; cb->args[1] < max; cb->args[1]++) { 1051 index = (ip_set_id_t) cb->args[1]; 1052 set = ip_set_list[index]; 1053 if (set == NULL) { 1054 if (dump_type == DUMP_ONE) { 1055 ret = -ENOENT; 1056 goto out; 1057 } 1058 continue; 1059 } 1060 /* When dumping all sets, we must dump "sorted" 1061 * so that lists (unions of sets) are dumped last. 1062 */ 1063 if (dump_type != DUMP_ONE && 1064 ((dump_type == DUMP_ALL) == 1065 !!(set->type->features & IPSET_DUMP_LAST))) 1066 continue; 1067 pr_debug("List set: %s\n", set->name); 1068 if (!cb->args[2]) { 1069 /* Start listing: make sure set won't be destroyed */ 1070 pr_debug("reference set\n"); 1071 __ip_set_get(index); 1072 } 1073 nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, 1074 cb->nlh->nlmsg_seq, flags, 1075 IPSET_CMD_LIST); 1076 if (!nlh) { 1077 ret = -EMSGSIZE; 1078 goto release_refcount; 1079 } 1080 NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1081 NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); 1082 if (dump_flags & IPSET_FLAG_LIST_SETNAME) 1083 goto next_set; 1084 switch (cb->args[2]) { 1085 case 0: 1086 /* Core header data */ 1087 NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME, 1088 set->type->name); 1089 NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, 1090 set->family); 1091 NLA_PUT_U8(skb, IPSET_ATTR_REVISION, 1092 set->revision); 1093 ret = set->variant->head(set, skb); 1094 if (ret < 0) 1095 goto release_refcount; 1096 if (dump_flags & IPSET_FLAG_LIST_HEADER) 1097 goto next_set; 1098 /* Fall through and add elements */ 1099 default: 1100 read_lock_bh(&set->lock); 1101 ret = set->variant->list(set, skb, cb); 1102 read_unlock_bh(&set->lock); 1103 if (!cb->args[2]) 1104 /* Set is done, proceed with next one */ 1105 goto next_set; 1106 goto release_refcount; 1107 } 1108 } 1109 /* If we dump all sets, continue with dumping last ones */ 1110 if (dump_type == DUMP_ALL) { 1111 dump_type = DUMP_LAST; 1112 cb->args[0] = dump_type | (dump_flags << 16); 1113 cb->args[1] = 0; 1114 goto dump_last; 1115 } 1116 goto out; 1117 1118nla_put_failure: 1119 ret = -EFAULT; 1120next_set: 1121 if (dump_type == DUMP_ONE) 1122 cb->args[1] = IPSET_INVALID_ID; 1123 else 1124 cb->args[1]++; 1125release_refcount: 1126 /* If there was an error or set is done, release set */ 1127 if (ret || !cb->args[2]) { 1128 pr_debug("release set %s\n", ip_set_list[index]->name); 1129 ip_set_put_byindex(index); 1130 } 1131out: 1132 if (nlh) { 1133 nlmsg_end(skb, nlh); 1134 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len); 1135 dump_attrs(nlh); 1136 } 1137 1138 return ret < 0 ? ret : skb->len; 1139} 1140 1141static int 1142ip_set_dump(struct sock *ctnl, struct sk_buff *skb, 1143 const struct nlmsghdr *nlh, 1144 const struct nlattr * const attr[]) 1145{ 1146 if (unlikely(protocol_failed(attr))) 1147 return -IPSET_ERR_PROTOCOL; 1148 1149 return netlink_dump_start(ctnl, skb, nlh, 1150 ip_set_dump_start, 1151 ip_set_dump_done, 0); 1152} 1153 1154/* Add, del and test */ 1155 1156static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { 1157 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1158 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 1159 .len = IPSET_MAXNAMELEN - 1 }, 1160 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 1161 [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, 1162 [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, 1163}; 1164 1165static int 1166call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, 1167 struct nlattr *tb[], enum ipset_adt adt, 1168 u32 flags, bool use_lineno) 1169{ 1170 int ret; 1171 u32 lineno = 0; 1172 bool eexist = flags & IPSET_FLAG_EXIST, retried = false; 1173 1174 do { 1175 write_lock_bh(&set->lock); 1176 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); 1177 write_unlock_bh(&set->lock); 1178 retried = true; 1179 } while (ret == -EAGAIN && 1180 set->variant->resize && 1181 (ret = set->variant->resize(set, retried)) == 0); 1182 1183 if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) 1184 return 0; 1185 if (lineno && use_lineno) { 1186 /* Error in restore/batch mode: send back lineno */ 1187 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb); 1188 struct sk_buff *skb2; 1189 struct nlmsgerr *errmsg; 1190 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); 1191 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 1192 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1193 struct nlattr *cmdattr; 1194 u32 *errline; 1195 1196 skb2 = nlmsg_new(payload, GFP_KERNEL); 1197 if (skb2 == NULL) 1198 return -ENOMEM; 1199 rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, 1200 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); 1201 errmsg = nlmsg_data(rep); 1202 errmsg->error = ret; 1203 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); 1204 cmdattr = (void *)&errmsg->msg + min_len; 1205 1206 nla_parse(cda, IPSET_ATTR_CMD_MAX, 1207 cmdattr, nlh->nlmsg_len - min_len, 1208 ip_set_adt_policy); 1209 1210 errline = nla_data(cda[IPSET_ATTR_LINENO]); 1211 1212 *errline = lineno; 1213 1214 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1215 /* Signal netlink not to send its ACK/errmsg. */ 1216 return -EINTR; 1217 } 1218 1219 return ret; 1220} 1221 1222static int 1223ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, 1224 const struct nlmsghdr *nlh, 1225 const struct nlattr * const attr[]) 1226{ 1227 struct ip_set *set; 1228 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1229 const struct nlattr *nla; 1230 u32 flags = flag_exist(nlh); 1231 bool use_lineno; 1232 int ret = 0; 1233 1234 if (unlikely(protocol_failed(attr) || 1235 attr[IPSET_ATTR_SETNAME] == NULL || 1236 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1237 (attr[IPSET_ATTR_ADT] != NULL)) || 1238 (attr[IPSET_ATTR_DATA] != NULL && 1239 !flag_nested(attr[IPSET_ATTR_DATA])) || 1240 (attr[IPSET_ATTR_ADT] != NULL && 1241 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1242 attr[IPSET_ATTR_LINENO] == NULL)))) 1243 return -IPSET_ERR_PROTOCOL; 1244 1245 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 1246 if (set == NULL) 1247 return -ENOENT; 1248 1249 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1250 if (attr[IPSET_ATTR_DATA]) { 1251 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, 1252 attr[IPSET_ATTR_DATA], 1253 set->type->adt_policy)) 1254 return -IPSET_ERR_PROTOCOL; 1255 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, 1256 use_lineno); 1257 } else { 1258 int nla_rem; 1259 1260 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { 1261 memset(tb, 0, sizeof(tb)); 1262 if (nla_type(nla) != IPSET_ATTR_DATA || 1263 !flag_nested(nla) || 1264 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, 1265 set->type->adt_policy)) 1266 return -IPSET_ERR_PROTOCOL; 1267 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, 1268 flags, use_lineno); 1269 if (ret < 0) 1270 return ret; 1271 } 1272 } 1273 return ret; 1274} 1275 1276static int 1277ip_set_udel(struct sock *ctnl, struct sk_buff *skb, 1278 const struct nlmsghdr *nlh, 1279 const struct nlattr * const attr[]) 1280{ 1281 struct ip_set *set; 1282 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1283 const struct nlattr *nla; 1284 u32 flags = flag_exist(nlh); 1285 bool use_lineno; 1286 int ret = 0; 1287 1288 if (unlikely(protocol_failed(attr) || 1289 attr[IPSET_ATTR_SETNAME] == NULL || 1290 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1291 (attr[IPSET_ATTR_ADT] != NULL)) || 1292 (attr[IPSET_ATTR_DATA] != NULL && 1293 !flag_nested(attr[IPSET_ATTR_DATA])) || 1294 (attr[IPSET_ATTR_ADT] != NULL && 1295 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1296 attr[IPSET_ATTR_LINENO] == NULL)))) 1297 return -IPSET_ERR_PROTOCOL; 1298 1299 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 1300 if (set == NULL) 1301 return -ENOENT; 1302 1303 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1304 if (attr[IPSET_ATTR_DATA]) { 1305 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, 1306 attr[IPSET_ATTR_DATA], 1307 set->type->adt_policy)) 1308 return -IPSET_ERR_PROTOCOL; 1309 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, 1310 use_lineno); 1311 } else { 1312 int nla_rem; 1313 1314 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { 1315 memset(tb, 0, sizeof(*tb)); 1316 if (nla_type(nla) != IPSET_ATTR_DATA || 1317 !flag_nested(nla) || 1318 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, 1319 set->type->adt_policy)) 1320 return -IPSET_ERR_PROTOCOL; 1321 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, 1322 flags, use_lineno); 1323 if (ret < 0) 1324 return ret; 1325 } 1326 } 1327 return ret; 1328} 1329 1330static int 1331ip_set_utest(struct sock *ctnl, struct sk_buff *skb, 1332 const struct nlmsghdr *nlh, 1333 const struct nlattr * const attr[]) 1334{ 1335 struct ip_set *set; 1336 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; 1337 int ret = 0; 1338 1339 if (unlikely(protocol_failed(attr) || 1340 attr[IPSET_ATTR_SETNAME] == NULL || 1341 attr[IPSET_ATTR_DATA] == NULL || 1342 !flag_nested(attr[IPSET_ATTR_DATA]))) 1343 return -IPSET_ERR_PROTOCOL; 1344 1345 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); 1346 if (set == NULL) 1347 return -ENOENT; 1348 1349 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], 1350 set->type->adt_policy)) 1351 return -IPSET_ERR_PROTOCOL; 1352 1353 read_lock_bh(&set->lock); 1354 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); 1355 read_unlock_bh(&set->lock); 1356 /* Userspace can't trigger element to be re-added */ 1357 if (ret == -EAGAIN) 1358 ret = 1; 1359 1360 return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; 1361} 1362 1363/* Get headed data of a set */ 1364 1365static int 1366ip_set_header(struct sock *ctnl, struct sk_buff *skb, 1367 const struct nlmsghdr *nlh, 1368 const struct nlattr * const attr[]) 1369{ 1370 const struct ip_set *set; 1371 struct sk_buff *skb2; 1372 struct nlmsghdr *nlh2; 1373 ip_set_id_t index; 1374 int ret = 0; 1375 1376 if (unlikely(protocol_failed(attr) || 1377 attr[IPSET_ATTR_SETNAME] == NULL)) 1378 return -IPSET_ERR_PROTOCOL; 1379 1380 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 1381 if (index == IPSET_INVALID_ID) 1382 return -ENOENT; 1383 set = ip_set_list[index]; 1384 1385 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1386 if (skb2 == NULL) 1387 return -ENOMEM; 1388 1389 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 1390 IPSET_CMD_HEADER); 1391 if (!nlh2) 1392 goto nlmsg_failure; 1393 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1394 NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); 1395 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); 1396 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); 1397 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->revision); 1398 nlmsg_end(skb2, nlh2); 1399 1400 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1401 if (ret < 0) 1402 return ret; 1403 1404 return 0; 1405 1406nla_put_failure: 1407 nlmsg_cancel(skb2, nlh2); 1408nlmsg_failure: 1409 kfree_skb(skb2); 1410 return -EMSGSIZE; 1411} 1412 1413/* Get type data */ 1414 1415static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { 1416 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1417 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, 1418 .len = IPSET_MAXNAMELEN - 1 }, 1419 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, 1420}; 1421 1422static int 1423ip_set_type(struct sock *ctnl, struct sk_buff *skb, 1424 const struct nlmsghdr *nlh, 1425 const struct nlattr * const attr[]) 1426{ 1427 struct sk_buff *skb2; 1428 struct nlmsghdr *nlh2; 1429 u8 family, min, max; 1430 const char *typename; 1431 int ret = 0; 1432 1433 if (unlikely(protocol_failed(attr) || 1434 attr[IPSET_ATTR_TYPENAME] == NULL || 1435 attr[IPSET_ATTR_FAMILY] == NULL)) 1436 return -IPSET_ERR_PROTOCOL; 1437 1438 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); 1439 typename = nla_data(attr[IPSET_ATTR_TYPENAME]); 1440 ret = find_set_type_minmax(typename, family, &min, &max); 1441 if (ret) 1442 return ret; 1443 1444 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1445 if (skb2 == NULL) 1446 return -ENOMEM; 1447 1448 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 1449 IPSET_CMD_TYPE); 1450 if (!nlh2) 1451 goto nlmsg_failure; 1452 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1453 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename); 1454 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family); 1455 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max); 1456 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min); 1457 nlmsg_end(skb2, nlh2); 1458 1459 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); 1460 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1461 if (ret < 0) 1462 return ret; 1463 1464 return 0; 1465 1466nla_put_failure: 1467 nlmsg_cancel(skb2, nlh2); 1468nlmsg_failure: 1469 kfree_skb(skb2); 1470 return -EMSGSIZE; 1471} 1472 1473/* Get protocol version */ 1474 1475static const struct nla_policy 1476ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { 1477 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1478}; 1479 1480static int 1481ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, 1482 const struct nlmsghdr *nlh, 1483 const struct nlattr * const attr[]) 1484{ 1485 struct sk_buff *skb2; 1486 struct nlmsghdr *nlh2; 1487 int ret = 0; 1488 1489 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) 1490 return -IPSET_ERR_PROTOCOL; 1491 1492 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1493 if (skb2 == NULL) 1494 return -ENOMEM; 1495 1496 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 1497 IPSET_CMD_PROTOCOL); 1498 if (!nlh2) 1499 goto nlmsg_failure; 1500 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); 1501 nlmsg_end(skb2, nlh2); 1502 1503 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1504 if (ret < 0) 1505 return ret; 1506 1507 return 0; 1508 1509nla_put_failure: 1510 nlmsg_cancel(skb2, nlh2); 1511nlmsg_failure: 1512 kfree_skb(skb2); 1513 return -EMSGSIZE; 1514} 1515 1516static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { 1517 [IPSET_CMD_CREATE] = { 1518 .call = ip_set_create, 1519 .attr_count = IPSET_ATTR_CMD_MAX, 1520 .policy = ip_set_create_policy, 1521 }, 1522 [IPSET_CMD_DESTROY] = { 1523 .call = ip_set_destroy, 1524 .attr_count = IPSET_ATTR_CMD_MAX, 1525 .policy = ip_set_setname_policy, 1526 }, 1527 [IPSET_CMD_FLUSH] = { 1528 .call = ip_set_flush, 1529 .attr_count = IPSET_ATTR_CMD_MAX, 1530 .policy = ip_set_setname_policy, 1531 }, 1532 [IPSET_CMD_RENAME] = { 1533 .call = ip_set_rename, 1534 .attr_count = IPSET_ATTR_CMD_MAX, 1535 .policy = ip_set_setname2_policy, 1536 }, 1537 [IPSET_CMD_SWAP] = { 1538 .call = ip_set_swap, 1539 .attr_count = IPSET_ATTR_CMD_MAX, 1540 .policy = ip_set_setname2_policy, 1541 }, 1542 [IPSET_CMD_LIST] = { 1543 .call = ip_set_dump, 1544 .attr_count = IPSET_ATTR_CMD_MAX, 1545 .policy = ip_set_setname_policy, 1546 }, 1547 [IPSET_CMD_SAVE] = { 1548 .call = ip_set_dump, 1549 .attr_count = IPSET_ATTR_CMD_MAX, 1550 .policy = ip_set_setname_policy, 1551 }, 1552 [IPSET_CMD_ADD] = { 1553 .call = ip_set_uadd, 1554 .attr_count = IPSET_ATTR_CMD_MAX, 1555 .policy = ip_set_adt_policy, 1556 }, 1557 [IPSET_CMD_DEL] = { 1558 .call = ip_set_udel, 1559 .attr_count = IPSET_ATTR_CMD_MAX, 1560 .policy = ip_set_adt_policy, 1561 }, 1562 [IPSET_CMD_TEST] = { 1563 .call = ip_set_utest, 1564 .attr_count = IPSET_ATTR_CMD_MAX, 1565 .policy = ip_set_adt_policy, 1566 }, 1567 [IPSET_CMD_HEADER] = { 1568 .call = ip_set_header, 1569 .attr_count = IPSET_ATTR_CMD_MAX, 1570 .policy = ip_set_setname_policy, 1571 }, 1572 [IPSET_CMD_TYPE] = { 1573 .call = ip_set_type, 1574 .attr_count = IPSET_ATTR_CMD_MAX, 1575 .policy = ip_set_type_policy, 1576 }, 1577 [IPSET_CMD_PROTOCOL] = { 1578 .call = ip_set_protocol, 1579 .attr_count = IPSET_ATTR_CMD_MAX, 1580 .policy = ip_set_protocol_policy, 1581 }, 1582}; 1583 1584static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { 1585 .name = "ip_set", 1586 .subsys_id = NFNL_SUBSYS_IPSET, 1587 .cb_count = IPSET_MSG_MAX, 1588 .cb = ip_set_netlink_subsys_cb, 1589}; 1590 1591/* Interface to iptables/ip6tables */ 1592 1593static int 1594ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) 1595{ 1596 unsigned *op; 1597 void *data; 1598 int copylen = *len, ret = 0; 1599 1600 if (!capable(CAP_NET_ADMIN)) 1601 return -EPERM; 1602 if (optval != SO_IP_SET) 1603 return -EBADF; 1604 if (*len < sizeof(unsigned)) 1605 return -EINVAL; 1606 1607 data = vmalloc(*len); 1608 if (!data) 1609 return -ENOMEM; 1610 if (copy_from_user(data, user, *len) != 0) { 1611 ret = -EFAULT; 1612 goto done; 1613 } 1614 op = (unsigned *) data; 1615 1616 if (*op < IP_SET_OP_VERSION) { 1617 /* Check the version at the beginning of operations */ 1618 struct ip_set_req_version *req_version = data; 1619 if (req_version->version != IPSET_PROTOCOL) { 1620 ret = -EPROTO; 1621 goto done; 1622 } 1623 } 1624 1625 switch (*op) { 1626 case IP_SET_OP_VERSION: { 1627 struct ip_set_req_version *req_version = data; 1628 1629 if (*len != sizeof(struct ip_set_req_version)) { 1630 ret = -EINVAL; 1631 goto done; 1632 } 1633 1634 req_version->version = IPSET_PROTOCOL; 1635 ret = copy_to_user(user, req_version, 1636 sizeof(struct ip_set_req_version)); 1637 goto done; 1638 } 1639 case IP_SET_OP_GET_BYNAME: { 1640 struct ip_set_req_get_set *req_get = data; 1641 1642 if (*len != sizeof(struct ip_set_req_get_set)) { 1643 ret = -EINVAL; 1644 goto done; 1645 } 1646 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; 1647 nfnl_lock(); 1648 req_get->set.index = find_set_id(req_get->set.name); 1649 nfnl_unlock(); 1650 goto copy; 1651 } 1652 case IP_SET_OP_GET_BYINDEX: { 1653 struct ip_set_req_get_set *req_get = data; 1654 1655 if (*len != sizeof(struct ip_set_req_get_set) || 1656 req_get->set.index >= ip_set_max) { 1657 ret = -EINVAL; 1658 goto done; 1659 } 1660 nfnl_lock(); 1661 strncpy(req_get->set.name, 1662 ip_set_list[req_get->set.index] 1663 ? ip_set_list[req_get->set.index]->name : "", 1664 IPSET_MAXNAMELEN); 1665 nfnl_unlock(); 1666 goto copy; 1667 } 1668 default: 1669 ret = -EBADMSG; 1670 goto done; 1671 } /* end of switch(op) */ 1672 1673copy: 1674 ret = copy_to_user(user, data, copylen); 1675 1676done: 1677 vfree(data); 1678 if (ret > 0) 1679 ret = 0; 1680 return ret; 1681} 1682 1683static struct nf_sockopt_ops so_set __read_mostly = { 1684 .pf = PF_INET, 1685 .get_optmin = SO_IP_SET, 1686 .get_optmax = SO_IP_SET + 1, 1687 .get = &ip_set_sockfn_get, 1688 .owner = THIS_MODULE, 1689}; 1690 1691static int __init 1692ip_set_init(void) 1693{ 1694 int ret; 1695 1696 if (max_sets) 1697 ip_set_max = max_sets; 1698 if (ip_set_max >= IPSET_INVALID_ID) 1699 ip_set_max = IPSET_INVALID_ID - 1; 1700 1701 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, 1702 GFP_KERNEL); 1703 if (!ip_set_list) { 1704 pr_err("ip_set: Unable to create ip_set_list\n"); 1705 return -ENOMEM; 1706 } 1707 1708 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); 1709 if (ret != 0) { 1710 pr_err("ip_set: cannot register with nfnetlink.\n"); 1711 kfree(ip_set_list); 1712 return ret; 1713 } 1714 ret = nf_register_sockopt(&so_set); 1715 if (ret != 0) { 1716 pr_err("SO_SET registry failed: %d\n", ret); 1717 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 1718 kfree(ip_set_list); 1719 return ret; 1720 } 1721 1722 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); 1723 return 0; 1724} 1725 1726static void __exit 1727ip_set_fini(void) 1728{ 1729 /* There can't be any existing set */ 1730 nf_unregister_sockopt(&so_set); 1731 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 1732 kfree(ip_set_list); 1733 pr_debug("these are the famous last words\n"); 1734} 1735 1736module_init(ip_set_init); 1737module_exit(ip_set_fini); 1738