datapath.c revision 971427f353f3c42c8dcef62e7124440df68eb809
1/* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21#include <linux/init.h> 22#include <linux/module.h> 23#include <linux/if_arp.h> 24#include <linux/if_vlan.h> 25#include <linux/in.h> 26#include <linux/ip.h> 27#include <linux/jhash.h> 28#include <linux/delay.h> 29#include <linux/time.h> 30#include <linux/etherdevice.h> 31#include <linux/genetlink.h> 32#include <linux/kernel.h> 33#include <linux/kthread.h> 34#include <linux/mutex.h> 35#include <linux/percpu.h> 36#include <linux/rcupdate.h> 37#include <linux/tcp.h> 38#include <linux/udp.h> 39#include <linux/ethtool.h> 40#include <linux/wait.h> 41#include <asm/div64.h> 42#include <linux/highmem.h> 43#include <linux/netfilter_bridge.h> 44#include <linux/netfilter_ipv4.h> 45#include <linux/inetdevice.h> 46#include <linux/list.h> 47#include <linux/openvswitch.h> 48#include <linux/rculist.h> 49#include <linux/dmi.h> 50#include <net/genetlink.h> 51#include <net/net_namespace.h> 52#include <net/netns/generic.h> 53 54#include "datapath.h" 55#include "flow.h" 56#include "flow_table.h" 57#include "flow_netlink.h" 58#include "vport-internal_dev.h" 59#include "vport-netdev.h" 60 61int ovs_net_id __read_mostly; 62 63static struct genl_family dp_packet_genl_family; 64static struct genl_family dp_flow_genl_family; 65static struct genl_family dp_datapath_genl_family; 66 67static const struct genl_multicast_group ovs_dp_flow_multicast_group = { 68 .name = OVS_FLOW_MCGROUP, 69}; 70 71static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { 72 .name = OVS_DATAPATH_MCGROUP, 73}; 74 75static const struct genl_multicast_group ovs_dp_vport_multicast_group = { 76 .name = OVS_VPORT_MCGROUP, 77}; 78 79/* Check if need to build a reply message. 80 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ 81static bool ovs_must_notify(struct genl_info *info, 82 const struct genl_multicast_group *grp) 83{ 84 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 85 netlink_has_listeners(genl_info_net(info)->genl_sock, 0); 86} 87 88static void ovs_notify(struct genl_family *family, 89 struct sk_buff *skb, struct genl_info *info) 90{ 91 genl_notify(family, skb, genl_info_net(info), info->snd_portid, 92 0, info->nlhdr, GFP_KERNEL); 93} 94 95/** 96 * DOC: Locking: 97 * 98 * All writes e.g. Writes to device state (add/remove datapath, port, set 99 * operations on vports, etc.), Writes to other state (flow table 100 * modifications, set miscellaneous datapath parameters, etc.) are protected 101 * by ovs_lock. 102 * 103 * Reads are protected by RCU. 104 * 105 * There are a few special cases (mostly stats) that have their own 106 * synchronization but they nest under all of above and don't interact with 107 * each other. 108 * 109 * The RTNL lock nests inside ovs_mutex. 110 */ 111 112static DEFINE_MUTEX(ovs_mutex); 113 114void ovs_lock(void) 115{ 116 mutex_lock(&ovs_mutex); 117} 118 119void ovs_unlock(void) 120{ 121 mutex_unlock(&ovs_mutex); 122} 123 124#ifdef CONFIG_LOCKDEP 125int lockdep_ovsl_is_held(void) 126{ 127 if (debug_locks) 128 return lockdep_is_held(&ovs_mutex); 129 else 130 return 1; 131} 132#endif 133 134static struct vport *new_vport(const struct vport_parms *); 135static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 136 const struct dp_upcall_info *); 137static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 138 const struct dp_upcall_info *); 139 140/* Must be called with rcu_read_lock or ovs_mutex. */ 141static struct datapath *get_dp(struct net *net, int dp_ifindex) 142{ 143 struct datapath *dp = NULL; 144 struct net_device *dev; 145 146 rcu_read_lock(); 147 dev = dev_get_by_index_rcu(net, dp_ifindex); 148 if (dev) { 149 struct vport *vport = ovs_internal_dev_get_vport(dev); 150 if (vport) 151 dp = vport->dp; 152 } 153 rcu_read_unlock(); 154 155 return dp; 156} 157 158/* Must be called with rcu_read_lock or ovs_mutex. */ 159const char *ovs_dp_name(const struct datapath *dp) 160{ 161 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 162 return vport->ops->get_name(vport); 163} 164 165static int get_dpifindex(struct datapath *dp) 166{ 167 struct vport *local; 168 int ifindex; 169 170 rcu_read_lock(); 171 172 local = ovs_vport_rcu(dp, OVSP_LOCAL); 173 if (local) 174 ifindex = netdev_vport_priv(local)->dev->ifindex; 175 else 176 ifindex = 0; 177 178 rcu_read_unlock(); 179 180 return ifindex; 181} 182 183static void destroy_dp_rcu(struct rcu_head *rcu) 184{ 185 struct datapath *dp = container_of(rcu, struct datapath, rcu); 186 187 free_percpu(dp->stats_percpu); 188 release_net(ovs_dp_get_net(dp)); 189 kfree(dp->ports); 190 kfree(dp); 191} 192 193static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 194 u16 port_no) 195{ 196 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 197} 198 199/* Called with ovs_mutex or RCU read lock. */ 200struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 201{ 202 struct vport *vport; 203 struct hlist_head *head; 204 205 head = vport_hash_bucket(dp, port_no); 206 hlist_for_each_entry_rcu(vport, head, dp_hash_node) { 207 if (vport->port_no == port_no) 208 return vport; 209 } 210 return NULL; 211} 212 213/* Called with ovs_mutex. */ 214static struct vport *new_vport(const struct vport_parms *parms) 215{ 216 struct vport *vport; 217 218 vport = ovs_vport_add(parms); 219 if (!IS_ERR(vport)) { 220 struct datapath *dp = parms->dp; 221 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 222 223 hlist_add_head_rcu(&vport->dp_hash_node, head); 224 } 225 return vport; 226} 227 228void ovs_dp_detach_port(struct vport *p) 229{ 230 ASSERT_OVSL(); 231 232 /* First drop references to device. */ 233 hlist_del_rcu(&p->dp_hash_node); 234 235 /* Then destroy it. */ 236 ovs_vport_del(p); 237} 238 239/* Must be called with rcu_read_lock. */ 240void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 241{ 242 const struct vport *p = OVS_CB(skb)->input_vport; 243 struct datapath *dp = p->dp; 244 struct sw_flow *flow; 245 struct dp_stats_percpu *stats; 246 u64 *stats_counter; 247 u32 n_mask_hit; 248 249 stats = this_cpu_ptr(dp->stats_percpu); 250 251 /* Look up flow. */ 252 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); 253 if (unlikely(!flow)) { 254 struct dp_upcall_info upcall; 255 int error; 256 257 upcall.cmd = OVS_PACKET_CMD_MISS; 258 upcall.key = key; 259 upcall.userdata = NULL; 260 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 261 error = ovs_dp_upcall(dp, skb, &upcall); 262 if (unlikely(error)) 263 kfree_skb(skb); 264 else 265 consume_skb(skb); 266 stats_counter = &stats->n_missed; 267 goto out; 268 } 269 270 OVS_CB(skb)->flow = flow; 271 272 ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb); 273 ovs_execute_actions(dp, skb, key); 274 stats_counter = &stats->n_hit; 275 276out: 277 /* Update datapath statistics. */ 278 u64_stats_update_begin(&stats->syncp); 279 (*stats_counter)++; 280 stats->n_mask_hit += n_mask_hit; 281 u64_stats_update_end(&stats->syncp); 282} 283 284int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 285 const struct dp_upcall_info *upcall_info) 286{ 287 struct dp_stats_percpu *stats; 288 int err; 289 290 if (upcall_info->portid == 0) { 291 err = -ENOTCONN; 292 goto err; 293 } 294 295 if (!skb_is_gso(skb)) 296 err = queue_userspace_packet(dp, skb, upcall_info); 297 else 298 err = queue_gso_packets(dp, skb, upcall_info); 299 if (err) 300 goto err; 301 302 return 0; 303 304err: 305 stats = this_cpu_ptr(dp->stats_percpu); 306 307 u64_stats_update_begin(&stats->syncp); 308 stats->n_lost++; 309 u64_stats_update_end(&stats->syncp); 310 311 return err; 312} 313 314static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 315 const struct dp_upcall_info *upcall_info) 316{ 317 unsigned short gso_type = skb_shinfo(skb)->gso_type; 318 struct dp_upcall_info later_info; 319 struct sw_flow_key later_key; 320 struct sk_buff *segs, *nskb; 321 int err; 322 323 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 324 if (IS_ERR(segs)) 325 return PTR_ERR(segs); 326 327 /* Queue all of the segments. */ 328 skb = segs; 329 do { 330 err = queue_userspace_packet(dp, skb, upcall_info); 331 if (err) 332 break; 333 334 if (skb == segs && gso_type & SKB_GSO_UDP) { 335 /* The initial flow key extracted by ovs_flow_extract() 336 * in this case is for a first fragment, so we need to 337 * properly mark later fragments. 338 */ 339 later_key = *upcall_info->key; 340 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 341 342 later_info = *upcall_info; 343 later_info.key = &later_key; 344 upcall_info = &later_info; 345 } 346 } while ((skb = skb->next)); 347 348 /* Free all of the segments. */ 349 skb = segs; 350 do { 351 nskb = skb->next; 352 if (err) 353 kfree_skb(skb); 354 else 355 consume_skb(skb); 356 } while ((skb = nskb)); 357 return err; 358} 359 360static size_t key_attr_size(void) 361{ 362 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 363 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 364 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 365 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ 366 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ 367 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 368 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 369 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 370 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 371 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 372 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 373 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 374 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 375 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */ 376 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 377 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 378 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 379 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 380 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 381} 382 383static size_t upcall_msg_size(const struct nlattr *userdata, 384 unsigned int hdrlen) 385{ 386 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 387 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 388 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ 389 390 /* OVS_PACKET_ATTR_USERDATA */ 391 if (userdata) 392 size += NLA_ALIGN(userdata->nla_len); 393 394 return size; 395} 396 397static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 398 const struct dp_upcall_info *upcall_info) 399{ 400 struct ovs_header *upcall; 401 struct sk_buff *nskb = NULL; 402 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 403 struct nlattr *nla; 404 struct genl_info info = { 405 .dst_sk = ovs_dp_get_net(dp)->genl_sock, 406 .snd_portid = upcall_info->portid, 407 }; 408 size_t len; 409 unsigned int hlen; 410 int err, dp_ifindex; 411 412 dp_ifindex = get_dpifindex(dp); 413 if (!dp_ifindex) 414 return -ENODEV; 415 416 if (vlan_tx_tag_present(skb)) { 417 nskb = skb_clone(skb, GFP_ATOMIC); 418 if (!nskb) 419 return -ENOMEM; 420 421 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb)); 422 if (!nskb) 423 return -ENOMEM; 424 425 nskb->vlan_tci = 0; 426 skb = nskb; 427 } 428 429 if (nla_attr_size(skb->len) > USHRT_MAX) { 430 err = -EFBIG; 431 goto out; 432 } 433 434 /* Complete checksum if needed */ 435 if (skb->ip_summed == CHECKSUM_PARTIAL && 436 (err = skb_checksum_help(skb))) 437 goto out; 438 439 /* Older versions of OVS user space enforce alignment of the last 440 * Netlink attribute to NLA_ALIGNTO which would require extensive 441 * padding logic. Only perform zerocopy if padding is not required. 442 */ 443 if (dp->user_features & OVS_DP_F_UNALIGNED) 444 hlen = skb_zerocopy_headlen(skb); 445 else 446 hlen = skb->len; 447 448 len = upcall_msg_size(upcall_info->userdata, hlen); 449 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 450 if (!user_skb) { 451 err = -ENOMEM; 452 goto out; 453 } 454 455 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 456 0, upcall_info->cmd); 457 upcall->dp_ifindex = dp_ifindex; 458 459 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 460 err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); 461 BUG_ON(err); 462 nla_nest_end(user_skb, nla); 463 464 if (upcall_info->userdata) 465 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 466 nla_len(upcall_info->userdata), 467 nla_data(upcall_info->userdata)); 468 469 /* Only reserve room for attribute header, packet data is added 470 * in skb_zerocopy() */ 471 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 472 err = -ENOBUFS; 473 goto out; 474 } 475 nla->nla_len = nla_attr_size(skb->len); 476 477 err = skb_zerocopy(user_skb, skb, skb->len, hlen); 478 if (err) 479 goto out; 480 481 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 482 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 483 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len; 484 485 if (plen > 0) 486 memset(skb_put(user_skb, plen), 0, plen); 487 } 488 489 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 490 491 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 492 user_skb = NULL; 493out: 494 if (err) 495 skb_tx_error(skb); 496 kfree_skb(user_skb); 497 kfree_skb(nskb); 498 return err; 499} 500 501static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 502{ 503 struct ovs_header *ovs_header = info->userhdr; 504 struct nlattr **a = info->attrs; 505 struct sw_flow_actions *acts; 506 struct sk_buff *packet; 507 struct sw_flow *flow; 508 struct datapath *dp; 509 struct ethhdr *eth; 510 struct vport *input_vport; 511 int len; 512 int err; 513 514 err = -EINVAL; 515 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 516 !a[OVS_PACKET_ATTR_ACTIONS]) 517 goto err; 518 519 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 520 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 521 err = -ENOMEM; 522 if (!packet) 523 goto err; 524 skb_reserve(packet, NET_IP_ALIGN); 525 526 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 527 528 skb_reset_mac_header(packet); 529 eth = eth_hdr(packet); 530 531 /* Normally, setting the skb 'protocol' field would be handled by a 532 * call to eth_type_trans(), but it assumes there's a sending 533 * device, which we may not have. */ 534 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) 535 packet->protocol = eth->h_proto; 536 else 537 packet->protocol = htons(ETH_P_802_2); 538 539 /* Build an sw_flow for sending this packet. */ 540 flow = ovs_flow_alloc(); 541 err = PTR_ERR(flow); 542 if (IS_ERR(flow)) 543 goto err_kfree_skb; 544 545 err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, 546 &flow->key); 547 if (err) 548 goto err_flow_free; 549 550 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); 551 err = PTR_ERR(acts); 552 if (IS_ERR(acts)) 553 goto err_flow_free; 554 555 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 556 &flow->key, 0, &acts); 557 rcu_assign_pointer(flow->sf_acts, acts); 558 if (err) 559 goto err_flow_free; 560 561 OVS_CB(packet)->flow = flow; 562 packet->priority = flow->key.phy.priority; 563 packet->mark = flow->key.phy.skb_mark; 564 565 rcu_read_lock(); 566 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 567 err = -ENODEV; 568 if (!dp) 569 goto err_unlock; 570 571 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port); 572 if (!input_vport) 573 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL); 574 575 if (!input_vport) 576 goto err_unlock; 577 578 OVS_CB(packet)->input_vport = input_vport; 579 580 local_bh_disable(); 581 err = ovs_execute_actions(dp, packet, &flow->key); 582 local_bh_enable(); 583 rcu_read_unlock(); 584 585 ovs_flow_free(flow, false); 586 return err; 587 588err_unlock: 589 rcu_read_unlock(); 590err_flow_free: 591 ovs_flow_free(flow, false); 592err_kfree_skb: 593 kfree_skb(packet); 594err: 595 return err; 596} 597 598static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 599 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, 600 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 601 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 602}; 603 604static const struct genl_ops dp_packet_genl_ops[] = { 605 { .cmd = OVS_PACKET_CMD_EXECUTE, 606 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 607 .policy = packet_policy, 608 .doit = ovs_packet_cmd_execute 609 } 610}; 611 612static struct genl_family dp_packet_genl_family = { 613 .id = GENL_ID_GENERATE, 614 .hdrsize = sizeof(struct ovs_header), 615 .name = OVS_PACKET_FAMILY, 616 .version = OVS_PACKET_VERSION, 617 .maxattr = OVS_PACKET_ATTR_MAX, 618 .netnsok = true, 619 .parallel_ops = true, 620 .ops = dp_packet_genl_ops, 621 .n_ops = ARRAY_SIZE(dp_packet_genl_ops), 622}; 623 624static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, 625 struct ovs_dp_megaflow_stats *mega_stats) 626{ 627 int i; 628 629 memset(mega_stats, 0, sizeof(*mega_stats)); 630 631 stats->n_flows = ovs_flow_tbl_count(&dp->table); 632 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 633 634 stats->n_hit = stats->n_missed = stats->n_lost = 0; 635 636 for_each_possible_cpu(i) { 637 const struct dp_stats_percpu *percpu_stats; 638 struct dp_stats_percpu local_stats; 639 unsigned int start; 640 641 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 642 643 do { 644 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); 645 local_stats = *percpu_stats; 646 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); 647 648 stats->n_hit += local_stats.n_hit; 649 stats->n_missed += local_stats.n_missed; 650 stats->n_lost += local_stats.n_lost; 651 mega_stats->n_mask_hit += local_stats.n_mask_hit; 652 } 653} 654 655static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 656{ 657 return NLMSG_ALIGN(sizeof(struct ovs_header)) 658 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 659 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */ 660 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 661 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 662 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 663 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ 664} 665 666/* Called with ovs_mutex or RCU read lock. */ 667static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 668 struct sk_buff *skb, u32 portid, 669 u32 seq, u32 flags, u8 cmd) 670{ 671 const int skb_orig_len = skb->len; 672 struct nlattr *start; 673 struct ovs_flow_stats stats; 674 __be16 tcp_flags; 675 unsigned long used; 676 struct ovs_header *ovs_header; 677 struct nlattr *nla; 678 int err; 679 680 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 681 if (!ovs_header) 682 return -EMSGSIZE; 683 684 ovs_header->dp_ifindex = dp_ifindex; 685 686 /* Fill flow key. */ 687 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 688 if (!nla) 689 goto nla_put_failure; 690 691 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); 692 if (err) 693 goto error; 694 nla_nest_end(skb, nla); 695 696 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); 697 if (!nla) 698 goto nla_put_failure; 699 700 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); 701 if (err) 702 goto error; 703 704 nla_nest_end(skb, nla); 705 706 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 707 708 if (used && 709 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) 710 goto nla_put_failure; 711 712 if (stats.n_packets && 713 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) 714 goto nla_put_failure; 715 716 if ((u8)ntohs(tcp_flags) && 717 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 718 goto nla_put_failure; 719 720 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 721 * this is the first flow to be dumped into 'skb'. This is unusual for 722 * Netlink but individual action lists can be longer than 723 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 724 * The userspace caller can always fetch the actions separately if it 725 * really wants them. (Most userspace callers in fact don't care.) 726 * 727 * This can only fail for dump operations because the skb is always 728 * properly sized for single flows. 729 */ 730 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); 731 if (start) { 732 const struct sw_flow_actions *sf_acts; 733 734 sf_acts = rcu_dereference_ovsl(flow->sf_acts); 735 err = ovs_nla_put_actions(sf_acts->actions, 736 sf_acts->actions_len, skb); 737 738 if (!err) 739 nla_nest_end(skb, start); 740 else { 741 if (skb_orig_len) 742 goto error; 743 744 nla_nest_cancel(skb, start); 745 } 746 } else if (skb_orig_len) 747 goto nla_put_failure; 748 749 return genlmsg_end(skb, ovs_header); 750 751nla_put_failure: 752 err = -EMSGSIZE; 753error: 754 genlmsg_cancel(skb, ovs_header); 755 return err; 756} 757 758/* May not be called with RCU read lock. */ 759static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, 760 struct genl_info *info, 761 bool always) 762{ 763 struct sk_buff *skb; 764 765 if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) 766 return NULL; 767 768 skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); 769 if (!skb) 770 return ERR_PTR(-ENOMEM); 771 772 return skb; 773} 774 775/* Called with ovs_mutex. */ 776static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, 777 int dp_ifindex, 778 struct genl_info *info, u8 cmd, 779 bool always) 780{ 781 struct sk_buff *skb; 782 int retval; 783 784 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, 785 always); 786 if (IS_ERR_OR_NULL(skb)) 787 return skb; 788 789 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, 790 info->snd_portid, info->snd_seq, 0, 791 cmd); 792 BUG_ON(retval < 0); 793 return skb; 794} 795 796static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 797{ 798 struct nlattr **a = info->attrs; 799 struct ovs_header *ovs_header = info->userhdr; 800 struct sw_flow *flow, *new_flow; 801 struct sw_flow_mask mask; 802 struct sk_buff *reply; 803 struct datapath *dp; 804 struct sw_flow_actions *acts; 805 struct sw_flow_match match; 806 int error; 807 808 /* Must have key and actions. */ 809 error = -EINVAL; 810 if (!a[OVS_FLOW_ATTR_KEY]) 811 goto error; 812 if (!a[OVS_FLOW_ATTR_ACTIONS]) 813 goto error; 814 815 /* Most of the time we need to allocate a new flow, do it before 816 * locking. 817 */ 818 new_flow = ovs_flow_alloc(); 819 if (IS_ERR(new_flow)) { 820 error = PTR_ERR(new_flow); 821 goto error; 822 } 823 824 /* Extract key. */ 825 ovs_match_init(&match, &new_flow->unmasked_key, &mask); 826 error = ovs_nla_get_match(&match, 827 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 828 if (error) 829 goto err_kfree_flow; 830 831 ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); 832 833 /* Validate actions. */ 834 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); 835 error = PTR_ERR(acts); 836 if (IS_ERR(acts)) 837 goto err_kfree_flow; 838 839 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, 840 0, &acts); 841 if (error) { 842 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 843 goto err_kfree_acts; 844 } 845 846 reply = ovs_flow_cmd_alloc_info(acts, info, false); 847 if (IS_ERR(reply)) { 848 error = PTR_ERR(reply); 849 goto err_kfree_acts; 850 } 851 852 ovs_lock(); 853 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 854 if (unlikely(!dp)) { 855 error = -ENODEV; 856 goto err_unlock_ovs; 857 } 858 /* Check if this is a duplicate flow */ 859 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); 860 if (likely(!flow)) { 861 rcu_assign_pointer(new_flow->sf_acts, acts); 862 863 /* Put flow in bucket. */ 864 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); 865 if (unlikely(error)) { 866 acts = NULL; 867 goto err_unlock_ovs; 868 } 869 870 if (unlikely(reply)) { 871 error = ovs_flow_cmd_fill_info(new_flow, 872 ovs_header->dp_ifindex, 873 reply, info->snd_portid, 874 info->snd_seq, 0, 875 OVS_FLOW_CMD_NEW); 876 BUG_ON(error < 0); 877 } 878 ovs_unlock(); 879 } else { 880 struct sw_flow_actions *old_acts; 881 882 /* Bail out if we're not allowed to modify an existing flow. 883 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 884 * because Generic Netlink treats the latter as a dump 885 * request. We also accept NLM_F_EXCL in case that bug ever 886 * gets fixed. 887 */ 888 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE 889 | NLM_F_EXCL))) { 890 error = -EEXIST; 891 goto err_unlock_ovs; 892 } 893 /* The unmasked key has to be the same for flow updates. */ 894 if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { 895 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 896 if (!flow) { 897 error = -ENOENT; 898 goto err_unlock_ovs; 899 } 900 } 901 /* Update actions. */ 902 old_acts = ovsl_dereference(flow->sf_acts); 903 rcu_assign_pointer(flow->sf_acts, acts); 904 905 if (unlikely(reply)) { 906 error = ovs_flow_cmd_fill_info(flow, 907 ovs_header->dp_ifindex, 908 reply, info->snd_portid, 909 info->snd_seq, 0, 910 OVS_FLOW_CMD_NEW); 911 BUG_ON(error < 0); 912 } 913 ovs_unlock(); 914 915 ovs_nla_free_flow_actions(old_acts); 916 ovs_flow_free(new_flow, false); 917 } 918 919 if (reply) 920 ovs_notify(&dp_flow_genl_family, reply, info); 921 return 0; 922 923err_unlock_ovs: 924 ovs_unlock(); 925 kfree_skb(reply); 926err_kfree_acts: 927 kfree(acts); 928err_kfree_flow: 929 ovs_flow_free(new_flow, false); 930error: 931 return error; 932} 933 934static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 935{ 936 struct nlattr **a = info->attrs; 937 struct ovs_header *ovs_header = info->userhdr; 938 struct sw_flow_key key, masked_key; 939 struct sw_flow *flow; 940 struct sw_flow_mask mask; 941 struct sk_buff *reply = NULL; 942 struct datapath *dp; 943 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 944 struct sw_flow_match match; 945 int error; 946 947 /* Extract key. */ 948 error = -EINVAL; 949 if (!a[OVS_FLOW_ATTR_KEY]) 950 goto error; 951 952 ovs_match_init(&match, &key, &mask); 953 error = ovs_nla_get_match(&match, 954 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 955 if (error) 956 goto error; 957 958 /* Validate actions. */ 959 if (a[OVS_FLOW_ATTR_ACTIONS]) { 960 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); 961 error = PTR_ERR(acts); 962 if (IS_ERR(acts)) 963 goto error; 964 965 ovs_flow_mask_key(&masked_key, &key, &mask); 966 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], 967 &masked_key, 0, &acts); 968 if (error) { 969 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 970 goto err_kfree_acts; 971 } 972 } 973 974 /* Can allocate before locking if have acts. */ 975 if (acts) { 976 reply = ovs_flow_cmd_alloc_info(acts, info, false); 977 if (IS_ERR(reply)) { 978 error = PTR_ERR(reply); 979 goto err_kfree_acts; 980 } 981 } 982 983 ovs_lock(); 984 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 985 if (unlikely(!dp)) { 986 error = -ENODEV; 987 goto err_unlock_ovs; 988 } 989 /* Check that the flow exists. */ 990 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 991 if (unlikely(!flow)) { 992 error = -ENOENT; 993 goto err_unlock_ovs; 994 } 995 996 /* Update actions, if present. */ 997 if (likely(acts)) { 998 old_acts = ovsl_dereference(flow->sf_acts); 999 rcu_assign_pointer(flow->sf_acts, acts); 1000 1001 if (unlikely(reply)) { 1002 error = ovs_flow_cmd_fill_info(flow, 1003 ovs_header->dp_ifindex, 1004 reply, info->snd_portid, 1005 info->snd_seq, 0, 1006 OVS_FLOW_CMD_NEW); 1007 BUG_ON(error < 0); 1008 } 1009 } else { 1010 /* Could not alloc without acts before locking. */ 1011 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, 1012 info, OVS_FLOW_CMD_NEW, false); 1013 if (unlikely(IS_ERR(reply))) { 1014 error = PTR_ERR(reply); 1015 goto err_unlock_ovs; 1016 } 1017 } 1018 1019 /* Clear stats. */ 1020 if (a[OVS_FLOW_ATTR_CLEAR]) 1021 ovs_flow_stats_clear(flow); 1022 ovs_unlock(); 1023 1024 if (reply) 1025 ovs_notify(&dp_flow_genl_family, reply, info); 1026 if (old_acts) 1027 ovs_nla_free_flow_actions(old_acts); 1028 1029 return 0; 1030 1031err_unlock_ovs: 1032 ovs_unlock(); 1033 kfree_skb(reply); 1034err_kfree_acts: 1035 kfree(acts); 1036error: 1037 return error; 1038} 1039 1040static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1041{ 1042 struct nlattr **a = info->attrs; 1043 struct ovs_header *ovs_header = info->userhdr; 1044 struct sw_flow_key key; 1045 struct sk_buff *reply; 1046 struct sw_flow *flow; 1047 struct datapath *dp; 1048 struct sw_flow_match match; 1049 int err; 1050 1051 if (!a[OVS_FLOW_ATTR_KEY]) { 1052 OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); 1053 return -EINVAL; 1054 } 1055 1056 ovs_match_init(&match, &key, NULL); 1057 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1058 if (err) 1059 return err; 1060 1061 ovs_lock(); 1062 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1063 if (!dp) { 1064 err = -ENODEV; 1065 goto unlock; 1066 } 1067 1068 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1069 if (!flow) { 1070 err = -ENOENT; 1071 goto unlock; 1072 } 1073 1074 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, 1075 OVS_FLOW_CMD_NEW, true); 1076 if (IS_ERR(reply)) { 1077 err = PTR_ERR(reply); 1078 goto unlock; 1079 } 1080 1081 ovs_unlock(); 1082 return genlmsg_reply(reply, info); 1083unlock: 1084 ovs_unlock(); 1085 return err; 1086} 1087 1088static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1089{ 1090 struct nlattr **a = info->attrs; 1091 struct ovs_header *ovs_header = info->userhdr; 1092 struct sw_flow_key key; 1093 struct sk_buff *reply; 1094 struct sw_flow *flow; 1095 struct datapath *dp; 1096 struct sw_flow_match match; 1097 int err; 1098 1099 if (likely(a[OVS_FLOW_ATTR_KEY])) { 1100 ovs_match_init(&match, &key, NULL); 1101 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1102 if (unlikely(err)) 1103 return err; 1104 } 1105 1106 ovs_lock(); 1107 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1108 if (unlikely(!dp)) { 1109 err = -ENODEV; 1110 goto unlock; 1111 } 1112 1113 if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { 1114 err = ovs_flow_tbl_flush(&dp->table); 1115 goto unlock; 1116 } 1117 1118 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1119 if (unlikely(!flow)) { 1120 err = -ENOENT; 1121 goto unlock; 1122 } 1123 1124 ovs_flow_tbl_remove(&dp->table, flow); 1125 ovs_unlock(); 1126 1127 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, 1128 info, false); 1129 if (likely(reply)) { 1130 if (likely(!IS_ERR(reply))) { 1131 rcu_read_lock(); /*To keep RCU checker happy. */ 1132 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, 1133 reply, info->snd_portid, 1134 info->snd_seq, 0, 1135 OVS_FLOW_CMD_DEL); 1136 rcu_read_unlock(); 1137 BUG_ON(err < 0); 1138 1139 ovs_notify(&dp_flow_genl_family, reply, info); 1140 } else { 1141 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); 1142 } 1143 } 1144 1145 ovs_flow_free(flow, true); 1146 return 0; 1147unlock: 1148 ovs_unlock(); 1149 return err; 1150} 1151 1152static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1153{ 1154 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1155 struct table_instance *ti; 1156 struct datapath *dp; 1157 1158 rcu_read_lock(); 1159 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1160 if (!dp) { 1161 rcu_read_unlock(); 1162 return -ENODEV; 1163 } 1164 1165 ti = rcu_dereference(dp->table.ti); 1166 for (;;) { 1167 struct sw_flow *flow; 1168 u32 bucket, obj; 1169 1170 bucket = cb->args[0]; 1171 obj = cb->args[1]; 1172 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1173 if (!flow) 1174 break; 1175 1176 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, 1177 NETLINK_CB(cb->skb).portid, 1178 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1179 OVS_FLOW_CMD_NEW) < 0) 1180 break; 1181 1182 cb->args[0] = bucket; 1183 cb->args[1] = obj; 1184 } 1185 rcu_read_unlock(); 1186 return skb->len; 1187} 1188 1189static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1190 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1191 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1192 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1193}; 1194 1195static const struct genl_ops dp_flow_genl_ops[] = { 1196 { .cmd = OVS_FLOW_CMD_NEW, 1197 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1198 .policy = flow_policy, 1199 .doit = ovs_flow_cmd_new 1200 }, 1201 { .cmd = OVS_FLOW_CMD_DEL, 1202 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1203 .policy = flow_policy, 1204 .doit = ovs_flow_cmd_del 1205 }, 1206 { .cmd = OVS_FLOW_CMD_GET, 1207 .flags = 0, /* OK for unprivileged users. */ 1208 .policy = flow_policy, 1209 .doit = ovs_flow_cmd_get, 1210 .dumpit = ovs_flow_cmd_dump 1211 }, 1212 { .cmd = OVS_FLOW_CMD_SET, 1213 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1214 .policy = flow_policy, 1215 .doit = ovs_flow_cmd_set, 1216 }, 1217}; 1218 1219static struct genl_family dp_flow_genl_family = { 1220 .id = GENL_ID_GENERATE, 1221 .hdrsize = sizeof(struct ovs_header), 1222 .name = OVS_FLOW_FAMILY, 1223 .version = OVS_FLOW_VERSION, 1224 .maxattr = OVS_FLOW_ATTR_MAX, 1225 .netnsok = true, 1226 .parallel_ops = true, 1227 .ops = dp_flow_genl_ops, 1228 .n_ops = ARRAY_SIZE(dp_flow_genl_ops), 1229 .mcgrps = &ovs_dp_flow_multicast_group, 1230 .n_mcgrps = 1, 1231}; 1232 1233static size_t ovs_dp_cmd_msg_size(void) 1234{ 1235 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1236 1237 msgsize += nla_total_size(IFNAMSIZ); 1238 msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); 1239 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); 1240 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1241 1242 return msgsize; 1243} 1244 1245/* Called with ovs_mutex or RCU read lock. */ 1246static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1247 u32 portid, u32 seq, u32 flags, u8 cmd) 1248{ 1249 struct ovs_header *ovs_header; 1250 struct ovs_dp_stats dp_stats; 1251 struct ovs_dp_megaflow_stats dp_megaflow_stats; 1252 int err; 1253 1254 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1255 flags, cmd); 1256 if (!ovs_header) 1257 goto error; 1258 1259 ovs_header->dp_ifindex = get_dpifindex(dp); 1260 1261 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1262 if (err) 1263 goto nla_put_failure; 1264 1265 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1266 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1267 &dp_stats)) 1268 goto nla_put_failure; 1269 1270 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1271 sizeof(struct ovs_dp_megaflow_stats), 1272 &dp_megaflow_stats)) 1273 goto nla_put_failure; 1274 1275 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1276 goto nla_put_failure; 1277 1278 return genlmsg_end(skb, ovs_header); 1279 1280nla_put_failure: 1281 genlmsg_cancel(skb, ovs_header); 1282error: 1283 return -EMSGSIZE; 1284} 1285 1286static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) 1287{ 1288 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); 1289} 1290 1291/* Called with rcu_read_lock or ovs_mutex. */ 1292static struct datapath *lookup_datapath(struct net *net, 1293 struct ovs_header *ovs_header, 1294 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1295{ 1296 struct datapath *dp; 1297 1298 if (!a[OVS_DP_ATTR_NAME]) 1299 dp = get_dp(net, ovs_header->dp_ifindex); 1300 else { 1301 struct vport *vport; 1302 1303 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1304 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1305 } 1306 return dp ? dp : ERR_PTR(-ENODEV); 1307} 1308 1309static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) 1310{ 1311 struct datapath *dp; 1312 1313 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1314 if (IS_ERR(dp)) 1315 return; 1316 1317 WARN(dp->user_features, "Dropping previously announced user features\n"); 1318 dp->user_features = 0; 1319} 1320 1321static void ovs_dp_change(struct datapath *dp, struct nlattr **a) 1322{ 1323 if (a[OVS_DP_ATTR_USER_FEATURES]) 1324 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1325} 1326 1327static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1328{ 1329 struct nlattr **a = info->attrs; 1330 struct vport_parms parms; 1331 struct sk_buff *reply; 1332 struct datapath *dp; 1333 struct vport *vport; 1334 struct ovs_net *ovs_net; 1335 int err, i; 1336 1337 err = -EINVAL; 1338 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1339 goto err; 1340 1341 reply = ovs_dp_cmd_alloc_info(info); 1342 if (!reply) 1343 return -ENOMEM; 1344 1345 err = -ENOMEM; 1346 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1347 if (dp == NULL) 1348 goto err_free_reply; 1349 1350 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1351 1352 /* Allocate table. */ 1353 err = ovs_flow_tbl_init(&dp->table); 1354 if (err) 1355 goto err_free_dp; 1356 1357 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); 1358 if (!dp->stats_percpu) { 1359 err = -ENOMEM; 1360 goto err_destroy_table; 1361 } 1362 1363 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), 1364 GFP_KERNEL); 1365 if (!dp->ports) { 1366 err = -ENOMEM; 1367 goto err_destroy_percpu; 1368 } 1369 1370 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1371 INIT_HLIST_HEAD(&dp->ports[i]); 1372 1373 /* Set up our datapath device. */ 1374 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1375 parms.type = OVS_VPORT_TYPE_INTERNAL; 1376 parms.options = NULL; 1377 parms.dp = dp; 1378 parms.port_no = OVSP_LOCAL; 1379 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; 1380 1381 ovs_dp_change(dp, a); 1382 1383 /* So far only local changes have been made, now need the lock. */ 1384 ovs_lock(); 1385 1386 vport = new_vport(&parms); 1387 if (IS_ERR(vport)) { 1388 err = PTR_ERR(vport); 1389 if (err == -EBUSY) 1390 err = -EEXIST; 1391 1392 if (err == -EEXIST) { 1393 /* An outdated user space instance that does not understand 1394 * the concept of user_features has attempted to create a new 1395 * datapath and is likely to reuse it. Drop all user features. 1396 */ 1397 if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1398 ovs_dp_reset_user_features(skb, info); 1399 } 1400 1401 goto err_destroy_ports_array; 1402 } 1403 1404 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1405 info->snd_seq, 0, OVS_DP_CMD_NEW); 1406 BUG_ON(err < 0); 1407 1408 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1409 list_add_tail_rcu(&dp->list_node, &ovs_net->dps); 1410 1411 ovs_unlock(); 1412 1413 ovs_notify(&dp_datapath_genl_family, reply, info); 1414 return 0; 1415 1416err_destroy_ports_array: 1417 ovs_unlock(); 1418 kfree(dp->ports); 1419err_destroy_percpu: 1420 free_percpu(dp->stats_percpu); 1421err_destroy_table: 1422 ovs_flow_tbl_destroy(&dp->table, false); 1423err_free_dp: 1424 release_net(ovs_dp_get_net(dp)); 1425 kfree(dp); 1426err_free_reply: 1427 kfree_skb(reply); 1428err: 1429 return err; 1430} 1431 1432/* Called with ovs_mutex. */ 1433static void __dp_destroy(struct datapath *dp) 1434{ 1435 int i; 1436 1437 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1438 struct vport *vport; 1439 struct hlist_node *n; 1440 1441 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) 1442 if (vport->port_no != OVSP_LOCAL) 1443 ovs_dp_detach_port(vport); 1444 } 1445 1446 list_del_rcu(&dp->list_node); 1447 1448 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1449 * all ports in datapath are destroyed first before freeing datapath. 1450 */ 1451 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1452 1453 /* RCU destroy the flow table */ 1454 ovs_flow_tbl_destroy(&dp->table, true); 1455 1456 call_rcu(&dp->rcu, destroy_dp_rcu); 1457} 1458 1459static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1460{ 1461 struct sk_buff *reply; 1462 struct datapath *dp; 1463 int err; 1464 1465 reply = ovs_dp_cmd_alloc_info(info); 1466 if (!reply) 1467 return -ENOMEM; 1468 1469 ovs_lock(); 1470 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1471 err = PTR_ERR(dp); 1472 if (IS_ERR(dp)) 1473 goto err_unlock_free; 1474 1475 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1476 info->snd_seq, 0, OVS_DP_CMD_DEL); 1477 BUG_ON(err < 0); 1478 1479 __dp_destroy(dp); 1480 ovs_unlock(); 1481 1482 ovs_notify(&dp_datapath_genl_family, reply, info); 1483 1484 return 0; 1485 1486err_unlock_free: 1487 ovs_unlock(); 1488 kfree_skb(reply); 1489 return err; 1490} 1491 1492static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1493{ 1494 struct sk_buff *reply; 1495 struct datapath *dp; 1496 int err; 1497 1498 reply = ovs_dp_cmd_alloc_info(info); 1499 if (!reply) 1500 return -ENOMEM; 1501 1502 ovs_lock(); 1503 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1504 err = PTR_ERR(dp); 1505 if (IS_ERR(dp)) 1506 goto err_unlock_free; 1507 1508 ovs_dp_change(dp, info->attrs); 1509 1510 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1511 info->snd_seq, 0, OVS_DP_CMD_NEW); 1512 BUG_ON(err < 0); 1513 1514 ovs_unlock(); 1515 ovs_notify(&dp_datapath_genl_family, reply, info); 1516 1517 return 0; 1518 1519err_unlock_free: 1520 ovs_unlock(); 1521 kfree_skb(reply); 1522 return err; 1523} 1524 1525static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1526{ 1527 struct sk_buff *reply; 1528 struct datapath *dp; 1529 int err; 1530 1531 reply = ovs_dp_cmd_alloc_info(info); 1532 if (!reply) 1533 return -ENOMEM; 1534 1535 rcu_read_lock(); 1536 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1537 if (IS_ERR(dp)) { 1538 err = PTR_ERR(dp); 1539 goto err_unlock_free; 1540 } 1541 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1542 info->snd_seq, 0, OVS_DP_CMD_NEW); 1543 BUG_ON(err < 0); 1544 rcu_read_unlock(); 1545 1546 return genlmsg_reply(reply, info); 1547 1548err_unlock_free: 1549 rcu_read_unlock(); 1550 kfree_skb(reply); 1551 return err; 1552} 1553 1554static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1555{ 1556 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1557 struct datapath *dp; 1558 int skip = cb->args[0]; 1559 int i = 0; 1560 1561 rcu_read_lock(); 1562 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { 1563 if (i >= skip && 1564 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1565 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1566 OVS_DP_CMD_NEW) < 0) 1567 break; 1568 i++; 1569 } 1570 rcu_read_unlock(); 1571 1572 cb->args[0] = i; 1573 1574 return skb->len; 1575} 1576 1577static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 1578 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1579 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1580 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 1581}; 1582 1583static const struct genl_ops dp_datapath_genl_ops[] = { 1584 { .cmd = OVS_DP_CMD_NEW, 1585 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1586 .policy = datapath_policy, 1587 .doit = ovs_dp_cmd_new 1588 }, 1589 { .cmd = OVS_DP_CMD_DEL, 1590 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1591 .policy = datapath_policy, 1592 .doit = ovs_dp_cmd_del 1593 }, 1594 { .cmd = OVS_DP_CMD_GET, 1595 .flags = 0, /* OK for unprivileged users. */ 1596 .policy = datapath_policy, 1597 .doit = ovs_dp_cmd_get, 1598 .dumpit = ovs_dp_cmd_dump 1599 }, 1600 { .cmd = OVS_DP_CMD_SET, 1601 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1602 .policy = datapath_policy, 1603 .doit = ovs_dp_cmd_set, 1604 }, 1605}; 1606 1607static struct genl_family dp_datapath_genl_family = { 1608 .id = GENL_ID_GENERATE, 1609 .hdrsize = sizeof(struct ovs_header), 1610 .name = OVS_DATAPATH_FAMILY, 1611 .version = OVS_DATAPATH_VERSION, 1612 .maxattr = OVS_DP_ATTR_MAX, 1613 .netnsok = true, 1614 .parallel_ops = true, 1615 .ops = dp_datapath_genl_ops, 1616 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), 1617 .mcgrps = &ovs_dp_datapath_multicast_group, 1618 .n_mcgrps = 1, 1619}; 1620 1621/* Called with ovs_mutex or RCU read lock. */ 1622static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1623 u32 portid, u32 seq, u32 flags, u8 cmd) 1624{ 1625 struct ovs_header *ovs_header; 1626 struct ovs_vport_stats vport_stats; 1627 int err; 1628 1629 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 1630 flags, cmd); 1631 if (!ovs_header) 1632 return -EMSGSIZE; 1633 1634 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 1635 1636 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 1637 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 1638 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 1639 vport->ops->get_name(vport))) 1640 goto nla_put_failure; 1641 1642 ovs_vport_get_stats(vport, &vport_stats); 1643 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), 1644 &vport_stats)) 1645 goto nla_put_failure; 1646 1647 if (ovs_vport_get_upcall_portids(vport, skb)) 1648 goto nla_put_failure; 1649 1650 err = ovs_vport_get_options(vport, skb); 1651 if (err == -EMSGSIZE) 1652 goto error; 1653 1654 return genlmsg_end(skb, ovs_header); 1655 1656nla_put_failure: 1657 err = -EMSGSIZE; 1658error: 1659 genlmsg_cancel(skb, ovs_header); 1660 return err; 1661} 1662 1663static struct sk_buff *ovs_vport_cmd_alloc_info(void) 1664{ 1665 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1666} 1667 1668/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 1669struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1670 u32 seq, u8 cmd) 1671{ 1672 struct sk_buff *skb; 1673 int retval; 1674 1675 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1676 if (!skb) 1677 return ERR_PTR(-ENOMEM); 1678 1679 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); 1680 BUG_ON(retval < 0); 1681 1682 return skb; 1683} 1684 1685/* Called with ovs_mutex or RCU read lock. */ 1686static struct vport *lookup_vport(struct net *net, 1687 struct ovs_header *ovs_header, 1688 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1689{ 1690 struct datapath *dp; 1691 struct vport *vport; 1692 1693 if (a[OVS_VPORT_ATTR_NAME]) { 1694 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 1695 if (!vport) 1696 return ERR_PTR(-ENODEV); 1697 if (ovs_header->dp_ifindex && 1698 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 1699 return ERR_PTR(-ENODEV); 1700 return vport; 1701 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 1702 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 1703 1704 if (port_no >= DP_MAX_PORTS) 1705 return ERR_PTR(-EFBIG); 1706 1707 dp = get_dp(net, ovs_header->dp_ifindex); 1708 if (!dp) 1709 return ERR_PTR(-ENODEV); 1710 1711 vport = ovs_vport_ovsl_rcu(dp, port_no); 1712 if (!vport) 1713 return ERR_PTR(-ENODEV); 1714 return vport; 1715 } else 1716 return ERR_PTR(-EINVAL); 1717} 1718 1719static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 1720{ 1721 struct nlattr **a = info->attrs; 1722 struct ovs_header *ovs_header = info->userhdr; 1723 struct vport_parms parms; 1724 struct sk_buff *reply; 1725 struct vport *vport; 1726 struct datapath *dp; 1727 u32 port_no; 1728 int err; 1729 1730 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 1731 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1732 return -EINVAL; 1733 1734 port_no = a[OVS_VPORT_ATTR_PORT_NO] 1735 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 1736 if (port_no >= DP_MAX_PORTS) 1737 return -EFBIG; 1738 1739 reply = ovs_vport_cmd_alloc_info(); 1740 if (!reply) 1741 return -ENOMEM; 1742 1743 ovs_lock(); 1744 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1745 err = -ENODEV; 1746 if (!dp) 1747 goto exit_unlock_free; 1748 1749 if (port_no) { 1750 vport = ovs_vport_ovsl(dp, port_no); 1751 err = -EBUSY; 1752 if (vport) 1753 goto exit_unlock_free; 1754 } else { 1755 for (port_no = 1; ; port_no++) { 1756 if (port_no >= DP_MAX_PORTS) { 1757 err = -EFBIG; 1758 goto exit_unlock_free; 1759 } 1760 vport = ovs_vport_ovsl(dp, port_no); 1761 if (!vport) 1762 break; 1763 } 1764 } 1765 1766 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 1767 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 1768 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 1769 parms.dp = dp; 1770 parms.port_no = port_no; 1771 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; 1772 1773 vport = new_vport(&parms); 1774 err = PTR_ERR(vport); 1775 if (IS_ERR(vport)) 1776 goto exit_unlock_free; 1777 1778 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1779 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1780 BUG_ON(err < 0); 1781 ovs_unlock(); 1782 1783 ovs_notify(&dp_vport_genl_family, reply, info); 1784 return 0; 1785 1786exit_unlock_free: 1787 ovs_unlock(); 1788 kfree_skb(reply); 1789 return err; 1790} 1791 1792static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 1793{ 1794 struct nlattr **a = info->attrs; 1795 struct sk_buff *reply; 1796 struct vport *vport; 1797 int err; 1798 1799 reply = ovs_vport_cmd_alloc_info(); 1800 if (!reply) 1801 return -ENOMEM; 1802 1803 ovs_lock(); 1804 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1805 err = PTR_ERR(vport); 1806 if (IS_ERR(vport)) 1807 goto exit_unlock_free; 1808 1809 if (a[OVS_VPORT_ATTR_TYPE] && 1810 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { 1811 err = -EINVAL; 1812 goto exit_unlock_free; 1813 } 1814 1815 if (a[OVS_VPORT_ATTR_OPTIONS]) { 1816 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 1817 if (err) 1818 goto exit_unlock_free; 1819 } 1820 1821 1822 if (a[OVS_VPORT_ATTR_UPCALL_PID]) { 1823 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; 1824 1825 err = ovs_vport_set_upcall_portids(vport, ids); 1826 if (err) 1827 goto exit_unlock_free; 1828 } 1829 1830 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1831 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1832 BUG_ON(err < 0); 1833 1834 ovs_unlock(); 1835 ovs_notify(&dp_vport_genl_family, reply, info); 1836 return 0; 1837 1838exit_unlock_free: 1839 ovs_unlock(); 1840 kfree_skb(reply); 1841 return err; 1842} 1843 1844static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 1845{ 1846 struct nlattr **a = info->attrs; 1847 struct sk_buff *reply; 1848 struct vport *vport; 1849 int err; 1850 1851 reply = ovs_vport_cmd_alloc_info(); 1852 if (!reply) 1853 return -ENOMEM; 1854 1855 ovs_lock(); 1856 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1857 err = PTR_ERR(vport); 1858 if (IS_ERR(vport)) 1859 goto exit_unlock_free; 1860 1861 if (vport->port_no == OVSP_LOCAL) { 1862 err = -EINVAL; 1863 goto exit_unlock_free; 1864 } 1865 1866 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1867 info->snd_seq, 0, OVS_VPORT_CMD_DEL); 1868 BUG_ON(err < 0); 1869 ovs_dp_detach_port(vport); 1870 ovs_unlock(); 1871 1872 ovs_notify(&dp_vport_genl_family, reply, info); 1873 return 0; 1874 1875exit_unlock_free: 1876 ovs_unlock(); 1877 kfree_skb(reply); 1878 return err; 1879} 1880 1881static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 1882{ 1883 struct nlattr **a = info->attrs; 1884 struct ovs_header *ovs_header = info->userhdr; 1885 struct sk_buff *reply; 1886 struct vport *vport; 1887 int err; 1888 1889 reply = ovs_vport_cmd_alloc_info(); 1890 if (!reply) 1891 return -ENOMEM; 1892 1893 rcu_read_lock(); 1894 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 1895 err = PTR_ERR(vport); 1896 if (IS_ERR(vport)) 1897 goto exit_unlock_free; 1898 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1899 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1900 BUG_ON(err < 0); 1901 rcu_read_unlock(); 1902 1903 return genlmsg_reply(reply, info); 1904 1905exit_unlock_free: 1906 rcu_read_unlock(); 1907 kfree_skb(reply); 1908 return err; 1909} 1910 1911static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1912{ 1913 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1914 struct datapath *dp; 1915 int bucket = cb->args[0], skip = cb->args[1]; 1916 int i, j = 0; 1917 1918 rcu_read_lock(); 1919 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1920 if (!dp) { 1921 rcu_read_unlock(); 1922 return -ENODEV; 1923 } 1924 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 1925 struct vport *vport; 1926 1927 j = 0; 1928 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 1929 if (j >= skip && 1930 ovs_vport_cmd_fill_info(vport, skb, 1931 NETLINK_CB(cb->skb).portid, 1932 cb->nlh->nlmsg_seq, 1933 NLM_F_MULTI, 1934 OVS_VPORT_CMD_NEW) < 0) 1935 goto out; 1936 1937 j++; 1938 } 1939 skip = 0; 1940 } 1941out: 1942 rcu_read_unlock(); 1943 1944 cb->args[0] = i; 1945 cb->args[1] = j; 1946 1947 return skb->len; 1948} 1949 1950static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 1951 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1952 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 1953 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 1954 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 1955 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1956 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 1957}; 1958 1959static const struct genl_ops dp_vport_genl_ops[] = { 1960 { .cmd = OVS_VPORT_CMD_NEW, 1961 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1962 .policy = vport_policy, 1963 .doit = ovs_vport_cmd_new 1964 }, 1965 { .cmd = OVS_VPORT_CMD_DEL, 1966 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1967 .policy = vport_policy, 1968 .doit = ovs_vport_cmd_del 1969 }, 1970 { .cmd = OVS_VPORT_CMD_GET, 1971 .flags = 0, /* OK for unprivileged users. */ 1972 .policy = vport_policy, 1973 .doit = ovs_vport_cmd_get, 1974 .dumpit = ovs_vport_cmd_dump 1975 }, 1976 { .cmd = OVS_VPORT_CMD_SET, 1977 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1978 .policy = vport_policy, 1979 .doit = ovs_vport_cmd_set, 1980 }, 1981}; 1982 1983struct genl_family dp_vport_genl_family = { 1984 .id = GENL_ID_GENERATE, 1985 .hdrsize = sizeof(struct ovs_header), 1986 .name = OVS_VPORT_FAMILY, 1987 .version = OVS_VPORT_VERSION, 1988 .maxattr = OVS_VPORT_ATTR_MAX, 1989 .netnsok = true, 1990 .parallel_ops = true, 1991 .ops = dp_vport_genl_ops, 1992 .n_ops = ARRAY_SIZE(dp_vport_genl_ops), 1993 .mcgrps = &ovs_dp_vport_multicast_group, 1994 .n_mcgrps = 1, 1995}; 1996 1997static struct genl_family * const dp_genl_families[] = { 1998 &dp_datapath_genl_family, 1999 &dp_vport_genl_family, 2000 &dp_flow_genl_family, 2001 &dp_packet_genl_family, 2002}; 2003 2004static void dp_unregister_genl(int n_families) 2005{ 2006 int i; 2007 2008 for (i = 0; i < n_families; i++) 2009 genl_unregister_family(dp_genl_families[i]); 2010} 2011 2012static int dp_register_genl(void) 2013{ 2014 int err; 2015 int i; 2016 2017 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 2018 2019 err = genl_register_family(dp_genl_families[i]); 2020 if (err) 2021 goto error; 2022 } 2023 2024 return 0; 2025 2026error: 2027 dp_unregister_genl(i); 2028 return err; 2029} 2030 2031static int __net_init ovs_init_net(struct net *net) 2032{ 2033 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2034 2035 INIT_LIST_HEAD(&ovs_net->dps); 2036 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); 2037 return 0; 2038} 2039 2040static void __net_exit ovs_exit_net(struct net *net) 2041{ 2042 struct datapath *dp, *dp_next; 2043 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2044 2045 ovs_lock(); 2046 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2047 __dp_destroy(dp); 2048 ovs_unlock(); 2049 2050 cancel_work_sync(&ovs_net->dp_notify_work); 2051} 2052 2053static struct pernet_operations ovs_net_ops = { 2054 .init = ovs_init_net, 2055 .exit = ovs_exit_net, 2056 .id = &ovs_net_id, 2057 .size = sizeof(struct ovs_net), 2058}; 2059 2060static int __init dp_init(void) 2061{ 2062 int err; 2063 2064 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); 2065 2066 pr_info("Open vSwitch switching datapath\n"); 2067 2068 err = action_fifos_init(); 2069 if (err) 2070 goto error; 2071 2072 err = ovs_internal_dev_rtnl_link_register(); 2073 if (err) 2074 goto error_action_fifos_exit; 2075 2076 err = ovs_flow_init(); 2077 if (err) 2078 goto error_unreg_rtnl_link; 2079 2080 err = ovs_vport_init(); 2081 if (err) 2082 goto error_flow_exit; 2083 2084 err = register_pernet_device(&ovs_net_ops); 2085 if (err) 2086 goto error_vport_exit; 2087 2088 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2089 if (err) 2090 goto error_netns_exit; 2091 2092 err = dp_register_genl(); 2093 if (err < 0) 2094 goto error_unreg_notifier; 2095 2096 return 0; 2097 2098error_unreg_notifier: 2099 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2100error_netns_exit: 2101 unregister_pernet_device(&ovs_net_ops); 2102error_vport_exit: 2103 ovs_vport_exit(); 2104error_flow_exit: 2105 ovs_flow_exit(); 2106error_unreg_rtnl_link: 2107 ovs_internal_dev_rtnl_link_unregister(); 2108error_action_fifos_exit: 2109 action_fifos_exit(); 2110error: 2111 return err; 2112} 2113 2114static void dp_cleanup(void) 2115{ 2116 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2117 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2118 unregister_pernet_device(&ovs_net_ops); 2119 rcu_barrier(); 2120 ovs_vport_exit(); 2121 ovs_flow_exit(); 2122 ovs_internal_dev_rtnl_link_unregister(); 2123 action_fifos_exit(); 2124} 2125 2126module_init(dp_init); 2127module_exit(dp_cleanup); 2128 2129MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2130MODULE_LICENSE("GPL"); 2131