flow.c revision 42415c90ceaf50c792e29823e359463bc6d4ee05
1/* 2 * Copyright (c) 2007-2011 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19#include "flow.h" 20#include "datapath.h" 21#include <linux/uaccess.h> 22#include <linux/netdevice.h> 23#include <linux/etherdevice.h> 24#include <linux/if_ether.h> 25#include <linux/if_vlan.h> 26#include <net/llc_pdu.h> 27#include <linux/kernel.h> 28#include <linux/jhash.h> 29#include <linux/jiffies.h> 30#include <linux/llc.h> 31#include <linux/module.h> 32#include <linux/in.h> 33#include <linux/rcupdate.h> 34#include <linux/if_arp.h> 35#include <linux/ip.h> 36#include <linux/ipv6.h> 37#include <linux/tcp.h> 38#include <linux/udp.h> 39#include <linux/icmp.h> 40#include <linux/icmpv6.h> 41#include <linux/rculist.h> 42#include <net/ip.h> 43#include <net/ip_tunnels.h> 44#include <net/ipv6.h> 45#include <net/ndisc.h> 46 47static struct kmem_cache *flow_cache; 48 49static int check_header(struct sk_buff *skb, int len) 50{ 51 if (unlikely(skb->len < len)) 52 return -EINVAL; 53 if (unlikely(!pskb_may_pull(skb, len))) 54 return -ENOMEM; 55 return 0; 56} 57 58static bool arphdr_ok(struct sk_buff *skb) 59{ 60 return pskb_may_pull(skb, skb_network_offset(skb) + 61 sizeof(struct arp_eth_header)); 62} 63 64static int check_iphdr(struct sk_buff *skb) 65{ 66 unsigned int nh_ofs = skb_network_offset(skb); 67 unsigned int ip_len; 68 int err; 69 70 err = check_header(skb, nh_ofs + sizeof(struct iphdr)); 71 if (unlikely(err)) 72 return err; 73 74 ip_len = ip_hdrlen(skb); 75 if (unlikely(ip_len < sizeof(struct iphdr) || 76 skb->len < nh_ofs + ip_len)) 77 return -EINVAL; 78 79 skb_set_transport_header(skb, nh_ofs + ip_len); 80 return 0; 81} 82 83static bool tcphdr_ok(struct sk_buff *skb) 84{ 85 int th_ofs = skb_transport_offset(skb); 86 int tcp_len; 87 88 if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr)))) 89 return false; 90 91 tcp_len = tcp_hdrlen(skb); 92 if (unlikely(tcp_len < sizeof(struct tcphdr) || 93 skb->len < th_ofs + tcp_len)) 94 return false; 95 96 return true; 97} 98 99static bool udphdr_ok(struct sk_buff *skb) 100{ 101 return pskb_may_pull(skb, skb_transport_offset(skb) + 102 sizeof(struct udphdr)); 103} 104 105static bool icmphdr_ok(struct sk_buff *skb) 106{ 107 return pskb_may_pull(skb, skb_transport_offset(skb) + 108 sizeof(struct icmphdr)); 109} 110 111u64 ovs_flow_used_time(unsigned long flow_jiffies) 112{ 113 struct timespec cur_ts; 114 u64 cur_ms, idle_ms; 115 116 ktime_get_ts(&cur_ts); 117 idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); 118 cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + 119 cur_ts.tv_nsec / NSEC_PER_MSEC; 120 121 return cur_ms - idle_ms; 122} 123 124#define SW_FLOW_KEY_OFFSET(field) \ 125 (offsetof(struct sw_flow_key, field) + \ 126 FIELD_SIZEOF(struct sw_flow_key, field)) 127 128static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, 129 int *key_lenp) 130{ 131 unsigned int nh_ofs = skb_network_offset(skb); 132 unsigned int nh_len; 133 int payload_ofs; 134 struct ipv6hdr *nh; 135 uint8_t nexthdr; 136 __be16 frag_off; 137 int err; 138 139 *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label); 140 141 err = check_header(skb, nh_ofs + sizeof(*nh)); 142 if (unlikely(err)) 143 return err; 144 145 nh = ipv6_hdr(skb); 146 nexthdr = nh->nexthdr; 147 payload_ofs = (u8 *)(nh + 1) - skb->data; 148 149 key->ip.proto = NEXTHDR_NONE; 150 key->ip.tos = ipv6_get_dsfield(nh); 151 key->ip.ttl = nh->hop_limit; 152 key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); 153 key->ipv6.addr.src = nh->saddr; 154 key->ipv6.addr.dst = nh->daddr; 155 156 payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off); 157 if (unlikely(payload_ofs < 0)) 158 return -EINVAL; 159 160 if (frag_off) { 161 if (frag_off & htons(~0x7)) 162 key->ip.frag = OVS_FRAG_TYPE_LATER; 163 else 164 key->ip.frag = OVS_FRAG_TYPE_FIRST; 165 } 166 167 nh_len = payload_ofs - nh_ofs; 168 skb_set_transport_header(skb, nh_ofs + nh_len); 169 key->ip.proto = nexthdr; 170 return nh_len; 171} 172 173static bool icmp6hdr_ok(struct sk_buff *skb) 174{ 175 return pskb_may_pull(skb, skb_transport_offset(skb) + 176 sizeof(struct icmp6hdr)); 177} 178 179#define TCP_FLAGS_OFFSET 13 180#define TCP_FLAG_MASK 0x3f 181 182void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) 183{ 184 u8 tcp_flags = 0; 185 186 if ((flow->key.eth.type == htons(ETH_P_IP) || 187 flow->key.eth.type == htons(ETH_P_IPV6)) && 188 flow->key.ip.proto == IPPROTO_TCP && 189 likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { 190 u8 *tcp = (u8 *)tcp_hdr(skb); 191 tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; 192 } 193 194 spin_lock(&flow->lock); 195 flow->used = jiffies; 196 flow->packet_count++; 197 flow->byte_count += skb->len; 198 flow->tcp_flags |= tcp_flags; 199 spin_unlock(&flow->lock); 200} 201 202struct sw_flow_actions *ovs_flow_actions_alloc(int size) 203{ 204 struct sw_flow_actions *sfa; 205 206 if (size > MAX_ACTIONS_BUFSIZE) 207 return ERR_PTR(-EINVAL); 208 209 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 210 if (!sfa) 211 return ERR_PTR(-ENOMEM); 212 213 sfa->actions_len = 0; 214 return sfa; 215} 216 217struct sw_flow *ovs_flow_alloc(void) 218{ 219 struct sw_flow *flow; 220 221 flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); 222 if (!flow) 223 return ERR_PTR(-ENOMEM); 224 225 spin_lock_init(&flow->lock); 226 flow->sf_acts = NULL; 227 228 return flow; 229} 230 231static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) 232{ 233 hash = jhash_1word(hash, table->hash_seed); 234 return flex_array_get(table->buckets, 235 (hash & (table->n_buckets - 1))); 236} 237 238static struct flex_array *alloc_buckets(unsigned int n_buckets) 239{ 240 struct flex_array *buckets; 241 int i, err; 242 243 buckets = flex_array_alloc(sizeof(struct hlist_head), 244 n_buckets, GFP_KERNEL); 245 if (!buckets) 246 return NULL; 247 248 err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); 249 if (err) { 250 flex_array_free(buckets); 251 return NULL; 252 } 253 254 for (i = 0; i < n_buckets; i++) 255 INIT_HLIST_HEAD((struct hlist_head *) 256 flex_array_get(buckets, i)); 257 258 return buckets; 259} 260 261static void free_buckets(struct flex_array *buckets) 262{ 263 flex_array_free(buckets); 264} 265 266struct flow_table *ovs_flow_tbl_alloc(int new_size) 267{ 268 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); 269 270 if (!table) 271 return NULL; 272 273 table->buckets = alloc_buckets(new_size); 274 275 if (!table->buckets) { 276 kfree(table); 277 return NULL; 278 } 279 table->n_buckets = new_size; 280 table->count = 0; 281 table->node_ver = 0; 282 table->keep_flows = false; 283 get_random_bytes(&table->hash_seed, sizeof(u32)); 284 285 return table; 286} 287 288void ovs_flow_tbl_destroy(struct flow_table *table) 289{ 290 int i; 291 292 if (!table) 293 return; 294 295 if (table->keep_flows) 296 goto skip_flows; 297 298 for (i = 0; i < table->n_buckets; i++) { 299 struct sw_flow *flow; 300 struct hlist_head *head = flex_array_get(table->buckets, i); 301 struct hlist_node *n; 302 int ver = table->node_ver; 303 304 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 305 hlist_del_rcu(&flow->hash_node[ver]); 306 ovs_flow_free(flow); 307 } 308 } 309 310skip_flows: 311 free_buckets(table->buckets); 312 kfree(table); 313} 314 315static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 316{ 317 struct flow_table *table = container_of(rcu, struct flow_table, rcu); 318 319 ovs_flow_tbl_destroy(table); 320} 321 322void ovs_flow_tbl_deferred_destroy(struct flow_table *table) 323{ 324 if (!table) 325 return; 326 327 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); 328} 329 330struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last) 331{ 332 struct sw_flow *flow; 333 struct hlist_head *head; 334 int ver; 335 int i; 336 337 ver = table->node_ver; 338 while (*bucket < table->n_buckets) { 339 i = 0; 340 head = flex_array_get(table->buckets, *bucket); 341 hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { 342 if (i < *last) { 343 i++; 344 continue; 345 } 346 *last = i + 1; 347 return flow; 348 } 349 (*bucket)++; 350 *last = 0; 351 } 352 353 return NULL; 354} 355 356static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) 357{ 358 struct hlist_head *head; 359 head = find_bucket(table, flow->hash); 360 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); 361 table->count++; 362} 363 364static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) 365{ 366 int old_ver; 367 int i; 368 369 old_ver = old->node_ver; 370 new->node_ver = !old_ver; 371 372 /* Insert in new table. */ 373 for (i = 0; i < old->n_buckets; i++) { 374 struct sw_flow *flow; 375 struct hlist_head *head; 376 377 head = flex_array_get(old->buckets, i); 378 379 hlist_for_each_entry(flow, head, hash_node[old_ver]) 380 __flow_tbl_insert(new, flow); 381 } 382 old->keep_flows = true; 383} 384 385static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) 386{ 387 struct flow_table *new_table; 388 389 new_table = ovs_flow_tbl_alloc(n_buckets); 390 if (!new_table) 391 return ERR_PTR(-ENOMEM); 392 393 flow_table_copy_flows(table, new_table); 394 395 return new_table; 396} 397 398struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) 399{ 400 return __flow_tbl_rehash(table, table->n_buckets); 401} 402 403struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) 404{ 405 return __flow_tbl_rehash(table, table->n_buckets * 2); 406} 407 408void ovs_flow_free(struct sw_flow *flow) 409{ 410 if (unlikely(!flow)) 411 return; 412 413 kfree((struct sf_flow_acts __force *)flow->sf_acts); 414 kmem_cache_free(flow_cache, flow); 415} 416 417/* RCU callback used by ovs_flow_deferred_free. */ 418static void rcu_free_flow_callback(struct rcu_head *rcu) 419{ 420 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 421 422 ovs_flow_free(flow); 423} 424 425/* Schedules 'flow' to be freed after the next RCU grace period. 426 * The caller must hold rcu_read_lock for this to be sensible. */ 427void ovs_flow_deferred_free(struct sw_flow *flow) 428{ 429 call_rcu(&flow->rcu, rcu_free_flow_callback); 430} 431 432/* Schedules 'sf_acts' to be freed after the next RCU grace period. 433 * The caller must hold rcu_read_lock for this to be sensible. */ 434void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) 435{ 436 kfree_rcu(sf_acts, rcu); 437} 438 439static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) 440{ 441 struct qtag_prefix { 442 __be16 eth_type; /* ETH_P_8021Q */ 443 __be16 tci; 444 }; 445 struct qtag_prefix *qp; 446 447 if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) 448 return 0; 449 450 if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + 451 sizeof(__be16)))) 452 return -ENOMEM; 453 454 qp = (struct qtag_prefix *) skb->data; 455 key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); 456 __skb_pull(skb, sizeof(struct qtag_prefix)); 457 458 return 0; 459} 460 461static __be16 parse_ethertype(struct sk_buff *skb) 462{ 463 struct llc_snap_hdr { 464 u8 dsap; /* Always 0xAA */ 465 u8 ssap; /* Always 0xAA */ 466 u8 ctrl; 467 u8 oui[3]; 468 __be16 ethertype; 469 }; 470 struct llc_snap_hdr *llc; 471 __be16 proto; 472 473 proto = *(__be16 *) skb->data; 474 __skb_pull(skb, sizeof(__be16)); 475 476 if (ntohs(proto) >= ETH_P_802_3_MIN) 477 return proto; 478 479 if (skb->len < sizeof(struct llc_snap_hdr)) 480 return htons(ETH_P_802_2); 481 482 if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr)))) 483 return htons(0); 484 485 llc = (struct llc_snap_hdr *) skb->data; 486 if (llc->dsap != LLC_SAP_SNAP || 487 llc->ssap != LLC_SAP_SNAP || 488 (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) 489 return htons(ETH_P_802_2); 490 491 __skb_pull(skb, sizeof(struct llc_snap_hdr)); 492 493 if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) 494 return llc->ethertype; 495 496 return htons(ETH_P_802_2); 497} 498 499static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, 500 int *key_lenp, int nh_len) 501{ 502 struct icmp6hdr *icmp = icmp6_hdr(skb); 503 int error = 0; 504 int key_len; 505 506 /* The ICMPv6 type and code fields use the 16-bit transport port 507 * fields, so we need to store them in 16-bit network byte order. 508 */ 509 key->ipv6.tp.src = htons(icmp->icmp6_type); 510 key->ipv6.tp.dst = htons(icmp->icmp6_code); 511 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 512 513 if (icmp->icmp6_code == 0 && 514 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || 515 icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) { 516 int icmp_len = skb->len - skb_transport_offset(skb); 517 struct nd_msg *nd; 518 int offset; 519 520 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); 521 522 /* In order to process neighbor discovery options, we need the 523 * entire packet. 524 */ 525 if (unlikely(icmp_len < sizeof(*nd))) 526 goto out; 527 if (unlikely(skb_linearize(skb))) { 528 error = -ENOMEM; 529 goto out; 530 } 531 532 nd = (struct nd_msg *)skb_transport_header(skb); 533 key->ipv6.nd.target = nd->target; 534 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); 535 536 icmp_len -= sizeof(*nd); 537 offset = 0; 538 while (icmp_len >= 8) { 539 struct nd_opt_hdr *nd_opt = 540 (struct nd_opt_hdr *)(nd->opt + offset); 541 int opt_len = nd_opt->nd_opt_len * 8; 542 543 if (unlikely(!opt_len || opt_len > icmp_len)) 544 goto invalid; 545 546 /* Store the link layer address if the appropriate 547 * option is provided. It is considered an error if 548 * the same link layer option is specified twice. 549 */ 550 if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR 551 && opt_len == 8) { 552 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) 553 goto invalid; 554 memcpy(key->ipv6.nd.sll, 555 &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); 556 } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR 557 && opt_len == 8) { 558 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) 559 goto invalid; 560 memcpy(key->ipv6.nd.tll, 561 &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); 562 } 563 564 icmp_len -= opt_len; 565 offset += opt_len; 566 } 567 } 568 569 goto out; 570 571invalid: 572 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); 573 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); 574 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); 575 576out: 577 *key_lenp = key_len; 578 return error; 579} 580 581/** 582 * ovs_flow_extract - extracts a flow key from an Ethernet frame. 583 * @skb: sk_buff that contains the frame, with skb->data pointing to the 584 * Ethernet header 585 * @in_port: port number on which @skb was received. 586 * @key: output flow key 587 * @key_lenp: length of output flow key 588 * 589 * The caller must ensure that skb->len >= ETH_HLEN. 590 * 591 * Returns 0 if successful, otherwise a negative errno value. 592 * 593 * Initializes @skb header pointers as follows: 594 * 595 * - skb->mac_header: the Ethernet header. 596 * 597 * - skb->network_header: just past the Ethernet header, or just past the 598 * VLAN header, to the first byte of the Ethernet payload. 599 * 600 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 601 * on output, then just past the IP header, if one is present and 602 * of a correct length, otherwise the same as skb->network_header. 603 * For other key->eth.type values it is left untouched. 604 */ 605int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, 606 int *key_lenp) 607{ 608 int error = 0; 609 int key_len = SW_FLOW_KEY_OFFSET(eth); 610 struct ethhdr *eth; 611 612 memset(key, 0, sizeof(*key)); 613 614 key->phy.priority = skb->priority; 615 if (OVS_CB(skb)->tun_key) 616 memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); 617 key->phy.in_port = in_port; 618 key->phy.skb_mark = skb->mark; 619 620 skb_reset_mac_header(skb); 621 622 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet 623 * header in the linear data area. 624 */ 625 eth = eth_hdr(skb); 626 memcpy(key->eth.src, eth->h_source, ETH_ALEN); 627 memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); 628 629 __skb_pull(skb, 2 * ETH_ALEN); 630 /* We are going to push all headers that we pull, so no need to 631 * update skb->csum here. 632 */ 633 634 if (vlan_tx_tag_present(skb)) 635 key->eth.tci = htons(skb->vlan_tci); 636 else if (eth->h_proto == htons(ETH_P_8021Q)) 637 if (unlikely(parse_vlan(skb, key))) 638 return -ENOMEM; 639 640 key->eth.type = parse_ethertype(skb); 641 if (unlikely(key->eth.type == htons(0))) 642 return -ENOMEM; 643 644 skb_reset_network_header(skb); 645 __skb_push(skb, skb->data - skb_mac_header(skb)); 646 647 /* Network layer. */ 648 if (key->eth.type == htons(ETH_P_IP)) { 649 struct iphdr *nh; 650 __be16 offset; 651 652 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr); 653 654 error = check_iphdr(skb); 655 if (unlikely(error)) { 656 if (error == -EINVAL) { 657 skb->transport_header = skb->network_header; 658 error = 0; 659 } 660 goto out; 661 } 662 663 nh = ip_hdr(skb); 664 key->ipv4.addr.src = nh->saddr; 665 key->ipv4.addr.dst = nh->daddr; 666 667 key->ip.proto = nh->protocol; 668 key->ip.tos = nh->tos; 669 key->ip.ttl = nh->ttl; 670 671 offset = nh->frag_off & htons(IP_OFFSET); 672 if (offset) { 673 key->ip.frag = OVS_FRAG_TYPE_LATER; 674 goto out; 675 } 676 if (nh->frag_off & htons(IP_MF) || 677 skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 678 key->ip.frag = OVS_FRAG_TYPE_FIRST; 679 680 /* Transport layer. */ 681 if (key->ip.proto == IPPROTO_TCP) { 682 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 683 if (tcphdr_ok(skb)) { 684 struct tcphdr *tcp = tcp_hdr(skb); 685 key->ipv4.tp.src = tcp->source; 686 key->ipv4.tp.dst = tcp->dest; 687 } 688 } else if (key->ip.proto == IPPROTO_UDP) { 689 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 690 if (udphdr_ok(skb)) { 691 struct udphdr *udp = udp_hdr(skb); 692 key->ipv4.tp.src = udp->source; 693 key->ipv4.tp.dst = udp->dest; 694 } 695 } else if (key->ip.proto == IPPROTO_ICMP) { 696 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 697 if (icmphdr_ok(skb)) { 698 struct icmphdr *icmp = icmp_hdr(skb); 699 /* The ICMP type and code fields use the 16-bit 700 * transport port fields, so we need to store 701 * them in 16-bit network byte order. */ 702 key->ipv4.tp.src = htons(icmp->type); 703 key->ipv4.tp.dst = htons(icmp->code); 704 } 705 } 706 707 } else if ((key->eth.type == htons(ETH_P_ARP) || 708 key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) { 709 struct arp_eth_header *arp; 710 711 arp = (struct arp_eth_header *)skb_network_header(skb); 712 713 if (arp->ar_hrd == htons(ARPHRD_ETHER) 714 && arp->ar_pro == htons(ETH_P_IP) 715 && arp->ar_hln == ETH_ALEN 716 && arp->ar_pln == 4) { 717 718 /* We only match on the lower 8 bits of the opcode. */ 719 if (ntohs(arp->ar_op) <= 0xff) 720 key->ip.proto = ntohs(arp->ar_op); 721 memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); 722 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); 723 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); 724 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); 725 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); 726 } 727 } else if (key->eth.type == htons(ETH_P_IPV6)) { 728 int nh_len; /* IPv6 Header + Extensions */ 729 730 nh_len = parse_ipv6hdr(skb, key, &key_len); 731 if (unlikely(nh_len < 0)) { 732 if (nh_len == -EINVAL) 733 skb->transport_header = skb->network_header; 734 else 735 error = nh_len; 736 goto out; 737 } 738 739 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 740 goto out; 741 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 742 key->ip.frag = OVS_FRAG_TYPE_FIRST; 743 744 /* Transport layer. */ 745 if (key->ip.proto == NEXTHDR_TCP) { 746 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 747 if (tcphdr_ok(skb)) { 748 struct tcphdr *tcp = tcp_hdr(skb); 749 key->ipv6.tp.src = tcp->source; 750 key->ipv6.tp.dst = tcp->dest; 751 } 752 } else if (key->ip.proto == NEXTHDR_UDP) { 753 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 754 if (udphdr_ok(skb)) { 755 struct udphdr *udp = udp_hdr(skb); 756 key->ipv6.tp.src = udp->source; 757 key->ipv6.tp.dst = udp->dest; 758 } 759 } else if (key->ip.proto == NEXTHDR_ICMP) { 760 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 761 if (icmp6hdr_ok(skb)) { 762 error = parse_icmpv6(skb, key, &key_len, nh_len); 763 if (error < 0) 764 goto out; 765 } 766 } 767 } 768 769out: 770 *key_lenp = key_len; 771 return error; 772} 773 774static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) 775{ 776 return jhash2((u32 *)((u8 *)key + key_start), 777 DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); 778} 779 780static int flow_key_start(struct sw_flow_key *key) 781{ 782 if (key->tun_key.ipv4_dst) 783 return 0; 784 else 785 return offsetof(struct sw_flow_key, phy); 786} 787 788struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 789 struct sw_flow_key *key, int key_len) 790{ 791 struct sw_flow *flow; 792 struct hlist_head *head; 793 u8 *_key; 794 int key_start; 795 u32 hash; 796 797 key_start = flow_key_start(key); 798 hash = ovs_flow_hash(key, key_start, key_len); 799 800 _key = (u8 *) key + key_start; 801 head = find_bucket(table, hash); 802 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 803 804 if (flow->hash == hash && 805 !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) { 806 return flow; 807 } 808 } 809 return NULL; 810} 811 812void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 813 struct sw_flow_key *key, int key_len) 814{ 815 flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); 816 memcpy(&flow->key, key, sizeof(flow->key)); 817 __flow_tbl_insert(table, flow); 818} 819 820void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 821{ 822 BUG_ON(table->count == 0); 823 hlist_del_rcu(&flow->hash_node[table->node_ver]); 824 table->count--; 825} 826 827/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 828const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 829 [OVS_KEY_ATTR_ENCAP] = -1, 830 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 831 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 832 [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 833 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 834 [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 835 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 836 [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 837 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 838 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 839 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 840 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 841 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 842 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 843 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 844 [OVS_KEY_ATTR_TUNNEL] = -1, 845}; 846 847static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, 848 const struct nlattr *a[], u32 *attrs) 849{ 850 const struct ovs_key_icmp *icmp_key; 851 const struct ovs_key_tcp *tcp_key; 852 const struct ovs_key_udp *udp_key; 853 854 switch (swkey->ip.proto) { 855 case IPPROTO_TCP: 856 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP))) 857 return -EINVAL; 858 *attrs &= ~(1 << OVS_KEY_ATTR_TCP); 859 860 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 861 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 862 swkey->ipv4.tp.src = tcp_key->tcp_src; 863 swkey->ipv4.tp.dst = tcp_key->tcp_dst; 864 break; 865 866 case IPPROTO_UDP: 867 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) 868 return -EINVAL; 869 *attrs &= ~(1 << OVS_KEY_ATTR_UDP); 870 871 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 872 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 873 swkey->ipv4.tp.src = udp_key->udp_src; 874 swkey->ipv4.tp.dst = udp_key->udp_dst; 875 break; 876 877 case IPPROTO_ICMP: 878 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP))) 879 return -EINVAL; 880 *attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 881 882 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 883 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 884 swkey->ipv4.tp.src = htons(icmp_key->icmp_type); 885 swkey->ipv4.tp.dst = htons(icmp_key->icmp_code); 886 break; 887 } 888 889 return 0; 890} 891 892static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, 893 const struct nlattr *a[], u32 *attrs) 894{ 895 const struct ovs_key_icmpv6 *icmpv6_key; 896 const struct ovs_key_tcp *tcp_key; 897 const struct ovs_key_udp *udp_key; 898 899 switch (swkey->ip.proto) { 900 case IPPROTO_TCP: 901 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP))) 902 return -EINVAL; 903 *attrs &= ~(1 << OVS_KEY_ATTR_TCP); 904 905 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 906 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 907 swkey->ipv6.tp.src = tcp_key->tcp_src; 908 swkey->ipv6.tp.dst = tcp_key->tcp_dst; 909 break; 910 911 case IPPROTO_UDP: 912 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) 913 return -EINVAL; 914 *attrs &= ~(1 << OVS_KEY_ATTR_UDP); 915 916 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 917 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 918 swkey->ipv6.tp.src = udp_key->udp_src; 919 swkey->ipv6.tp.dst = udp_key->udp_dst; 920 break; 921 922 case IPPROTO_ICMPV6: 923 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6))) 924 return -EINVAL; 925 *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 926 927 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 928 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 929 swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type); 930 swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code); 931 932 if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || 933 swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 934 const struct ovs_key_nd *nd_key; 935 936 if (!(*attrs & (1 << OVS_KEY_ATTR_ND))) 937 return -EINVAL; 938 *attrs &= ~(1 << OVS_KEY_ATTR_ND); 939 940 *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); 941 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 942 memcpy(&swkey->ipv6.nd.target, nd_key->nd_target, 943 sizeof(swkey->ipv6.nd.target)); 944 memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN); 945 memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN); 946 } 947 break; 948 } 949 950 return 0; 951} 952 953static int parse_flow_nlattrs(const struct nlattr *attr, 954 const struct nlattr *a[], u32 *attrsp) 955{ 956 const struct nlattr *nla; 957 u32 attrs; 958 int rem; 959 960 attrs = 0; 961 nla_for_each_nested(nla, attr, rem) { 962 u16 type = nla_type(nla); 963 int expected_len; 964 965 if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type)) 966 return -EINVAL; 967 968 expected_len = ovs_key_lens[type]; 969 if (nla_len(nla) != expected_len && expected_len != -1) 970 return -EINVAL; 971 972 attrs |= 1 << type; 973 a[type] = nla; 974 } 975 if (rem) 976 return -EINVAL; 977 978 *attrsp = attrs; 979 return 0; 980} 981 982int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 983 struct ovs_key_ipv4_tunnel *tun_key) 984{ 985 struct nlattr *a; 986 int rem; 987 bool ttl = false; 988 989 memset(tun_key, 0, sizeof(*tun_key)); 990 991 nla_for_each_nested(a, attr, rem) { 992 int type = nla_type(a); 993 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 994 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 995 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 996 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 997 [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 998 [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 999 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 1000 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 1001 }; 1002 1003 if (type > OVS_TUNNEL_KEY_ATTR_MAX || 1004 ovs_tunnel_key_lens[type] != nla_len(a)) 1005 return -EINVAL; 1006 1007 switch (type) { 1008 case OVS_TUNNEL_KEY_ATTR_ID: 1009 tun_key->tun_id = nla_get_be64(a); 1010 tun_key->tun_flags |= TUNNEL_KEY; 1011 break; 1012 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 1013 tun_key->ipv4_src = nla_get_be32(a); 1014 break; 1015 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 1016 tun_key->ipv4_dst = nla_get_be32(a); 1017 break; 1018 case OVS_TUNNEL_KEY_ATTR_TOS: 1019 tun_key->ipv4_tos = nla_get_u8(a); 1020 break; 1021 case OVS_TUNNEL_KEY_ATTR_TTL: 1022 tun_key->ipv4_ttl = nla_get_u8(a); 1023 ttl = true; 1024 break; 1025 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 1026 tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; 1027 break; 1028 case OVS_TUNNEL_KEY_ATTR_CSUM: 1029 tun_key->tun_flags |= TUNNEL_CSUM; 1030 break; 1031 default: 1032 return -EINVAL; 1033 1034 } 1035 } 1036 if (rem > 0) 1037 return -EINVAL; 1038 1039 if (!tun_key->ipv4_dst) 1040 return -EINVAL; 1041 1042 if (!ttl) 1043 return -EINVAL; 1044 1045 return 0; 1046} 1047 1048int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 1049 const struct ovs_key_ipv4_tunnel *tun_key) 1050{ 1051 struct nlattr *nla; 1052 1053 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 1054 if (!nla) 1055 return -EMSGSIZE; 1056 1057 if (tun_key->tun_flags & TUNNEL_KEY && 1058 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) 1059 return -EMSGSIZE; 1060 if (tun_key->ipv4_src && 1061 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) 1062 return -EMSGSIZE; 1063 if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) 1064 return -EMSGSIZE; 1065 if (tun_key->ipv4_tos && 1066 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) 1067 return -EMSGSIZE; 1068 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) 1069 return -EMSGSIZE; 1070 if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && 1071 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 1072 return -EMSGSIZE; 1073 if ((tun_key->tun_flags & TUNNEL_CSUM) && 1074 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 1075 return -EMSGSIZE; 1076 1077 nla_nest_end(skb, nla); 1078 return 0; 1079} 1080 1081/** 1082 * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. 1083 * @swkey: receives the extracted flow key. 1084 * @key_lenp: number of bytes used in @swkey. 1085 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1086 * sequence. 1087 */ 1088int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 1089 const struct nlattr *attr) 1090{ 1091 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1092 const struct ovs_key_ethernet *eth_key; 1093 int key_len; 1094 u32 attrs; 1095 int err; 1096 1097 memset(swkey, 0, sizeof(struct sw_flow_key)); 1098 key_len = SW_FLOW_KEY_OFFSET(eth); 1099 1100 err = parse_flow_nlattrs(attr, a, &attrs); 1101 if (err) 1102 return err; 1103 1104 /* Metadata attributes. */ 1105 if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1106 swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]); 1107 attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1108 } 1109 if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1110 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1111 if (in_port >= DP_MAX_PORTS) 1112 return -EINVAL; 1113 swkey->phy.in_port = in_port; 1114 attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1115 } else { 1116 swkey->phy.in_port = DP_MAX_PORTS; 1117 } 1118 if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1119 swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1120 attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1121 } 1122 1123 if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1124 err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); 1125 if (err) 1126 return err; 1127 1128 attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1129 } 1130 1131 /* Data attributes. */ 1132 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) 1133 return -EINVAL; 1134 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1135 1136 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1137 memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); 1138 memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); 1139 1140 if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) && 1141 nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) { 1142 const struct nlattr *encap; 1143 __be16 tci; 1144 1145 if (attrs != ((1 << OVS_KEY_ATTR_VLAN) | 1146 (1 << OVS_KEY_ATTR_ETHERTYPE) | 1147 (1 << OVS_KEY_ATTR_ENCAP))) 1148 return -EINVAL; 1149 1150 encap = a[OVS_KEY_ATTR_ENCAP]; 1151 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1152 if (tci & htons(VLAN_TAG_PRESENT)) { 1153 swkey->eth.tci = tci; 1154 1155 err = parse_flow_nlattrs(encap, a, &attrs); 1156 if (err) 1157 return err; 1158 } else if (!tci) { 1159 /* Corner case for truncated 802.1Q header. */ 1160 if (nla_len(encap)) 1161 return -EINVAL; 1162 1163 swkey->eth.type = htons(ETH_P_8021Q); 1164 *key_lenp = key_len; 1165 return 0; 1166 } else { 1167 return -EINVAL; 1168 } 1169 } 1170 1171 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1172 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1173 if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) 1174 return -EINVAL; 1175 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1176 } else { 1177 swkey->eth.type = htons(ETH_P_802_2); 1178 } 1179 1180 if (swkey->eth.type == htons(ETH_P_IP)) { 1181 const struct ovs_key_ipv4 *ipv4_key; 1182 1183 if (!(attrs & (1 << OVS_KEY_ATTR_IPV4))) 1184 return -EINVAL; 1185 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 1186 1187 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr); 1188 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1189 if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) 1190 return -EINVAL; 1191 swkey->ip.proto = ipv4_key->ipv4_proto; 1192 swkey->ip.tos = ipv4_key->ipv4_tos; 1193 swkey->ip.ttl = ipv4_key->ipv4_ttl; 1194 swkey->ip.frag = ipv4_key->ipv4_frag; 1195 swkey->ipv4.addr.src = ipv4_key->ipv4_src; 1196 swkey->ipv4.addr.dst = ipv4_key->ipv4_dst; 1197 1198 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1199 err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs); 1200 if (err) 1201 return err; 1202 } 1203 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1204 const struct ovs_key_ipv6 *ipv6_key; 1205 1206 if (!(attrs & (1 << OVS_KEY_ATTR_IPV6))) 1207 return -EINVAL; 1208 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1209 1210 key_len = SW_FLOW_KEY_OFFSET(ipv6.label); 1211 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1212 if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) 1213 return -EINVAL; 1214 swkey->ipv6.label = ipv6_key->ipv6_label; 1215 swkey->ip.proto = ipv6_key->ipv6_proto; 1216 swkey->ip.tos = ipv6_key->ipv6_tclass; 1217 swkey->ip.ttl = ipv6_key->ipv6_hlimit; 1218 swkey->ip.frag = ipv6_key->ipv6_frag; 1219 memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src, 1220 sizeof(swkey->ipv6.addr.src)); 1221 memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst, 1222 sizeof(swkey->ipv6.addr.dst)); 1223 1224 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1225 err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs); 1226 if (err) 1227 return err; 1228 } 1229 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1230 swkey->eth.type == htons(ETH_P_RARP)) { 1231 const struct ovs_key_arp *arp_key; 1232 1233 if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) 1234 return -EINVAL; 1235 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1236 1237 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); 1238 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1239 swkey->ipv4.addr.src = arp_key->arp_sip; 1240 swkey->ipv4.addr.dst = arp_key->arp_tip; 1241 if (arp_key->arp_op & htons(0xff00)) 1242 return -EINVAL; 1243 swkey->ip.proto = ntohs(arp_key->arp_op); 1244 memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); 1245 memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); 1246 } 1247 1248 if (attrs) 1249 return -EINVAL; 1250 *key_lenp = key_len; 1251 1252 return 0; 1253} 1254 1255/** 1256 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1257 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 1258 * @key_len: Length of key in @flow. Used for calculating flow hash. 1259 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1260 * sequence. 1261 * 1262 * This parses a series of Netlink attributes that form a flow key, which must 1263 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1264 * get the metadata, that is, the parts of the flow key that cannot be 1265 * extracted from the packet itself. 1266 */ 1267int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, 1268 const struct nlattr *attr) 1269{ 1270 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 1271 const struct nlattr *nla; 1272 int rem; 1273 1274 flow->key.phy.in_port = DP_MAX_PORTS; 1275 flow->key.phy.priority = 0; 1276 flow->key.phy.skb_mark = 0; 1277 memset(tun_key, 0, sizeof(flow->key.tun_key)); 1278 1279 nla_for_each_nested(nla, attr, rem) { 1280 int type = nla_type(nla); 1281 1282 if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { 1283 int err; 1284 1285 if (nla_len(nla) != ovs_key_lens[type]) 1286 return -EINVAL; 1287 1288 switch (type) { 1289 case OVS_KEY_ATTR_PRIORITY: 1290 flow->key.phy.priority = nla_get_u32(nla); 1291 break; 1292 1293 case OVS_KEY_ATTR_TUNNEL: 1294 err = ovs_ipv4_tun_from_nlattr(nla, tun_key); 1295 if (err) 1296 return err; 1297 break; 1298 1299 case OVS_KEY_ATTR_IN_PORT: 1300 if (nla_get_u32(nla) >= DP_MAX_PORTS) 1301 return -EINVAL; 1302 flow->key.phy.in_port = nla_get_u32(nla); 1303 break; 1304 1305 case OVS_KEY_ATTR_SKB_MARK: 1306 flow->key.phy.skb_mark = nla_get_u32(nla); 1307 break; 1308 } 1309 } 1310 } 1311 if (rem) 1312 return -EINVAL; 1313 1314 flow->hash = ovs_flow_hash(&flow->key, 1315 flow_key_start(&flow->key), key_len); 1316 1317 return 0; 1318} 1319 1320int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) 1321{ 1322 struct ovs_key_ethernet *eth_key; 1323 struct nlattr *nla, *encap; 1324 1325 if (swkey->phy.priority && 1326 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) 1327 goto nla_put_failure; 1328 1329 if (swkey->tun_key.ipv4_dst && 1330 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) 1331 goto nla_put_failure; 1332 1333 if (swkey->phy.in_port != DP_MAX_PORTS && 1334 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) 1335 goto nla_put_failure; 1336 1337 if (swkey->phy.skb_mark && 1338 nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) 1339 goto nla_put_failure; 1340 1341 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1342 if (!nla) 1343 goto nla_put_failure; 1344 eth_key = nla_data(nla); 1345 memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN); 1346 memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN); 1347 1348 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1349 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) || 1350 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci)) 1351 goto nla_put_failure; 1352 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1353 if (!swkey->eth.tci) 1354 goto unencap; 1355 } else { 1356 encap = NULL; 1357 } 1358 1359 if (swkey->eth.type == htons(ETH_P_802_2)) 1360 goto unencap; 1361 1362 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type)) 1363 goto nla_put_failure; 1364 1365 if (swkey->eth.type == htons(ETH_P_IP)) { 1366 struct ovs_key_ipv4 *ipv4_key; 1367 1368 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1369 if (!nla) 1370 goto nla_put_failure; 1371 ipv4_key = nla_data(nla); 1372 ipv4_key->ipv4_src = swkey->ipv4.addr.src; 1373 ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; 1374 ipv4_key->ipv4_proto = swkey->ip.proto; 1375 ipv4_key->ipv4_tos = swkey->ip.tos; 1376 ipv4_key->ipv4_ttl = swkey->ip.ttl; 1377 ipv4_key->ipv4_frag = swkey->ip.frag; 1378 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1379 struct ovs_key_ipv6 *ipv6_key; 1380 1381 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1382 if (!nla) 1383 goto nla_put_failure; 1384 ipv6_key = nla_data(nla); 1385 memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src, 1386 sizeof(ipv6_key->ipv6_src)); 1387 memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, 1388 sizeof(ipv6_key->ipv6_dst)); 1389 ipv6_key->ipv6_label = swkey->ipv6.label; 1390 ipv6_key->ipv6_proto = swkey->ip.proto; 1391 ipv6_key->ipv6_tclass = swkey->ip.tos; 1392 ipv6_key->ipv6_hlimit = swkey->ip.ttl; 1393 ipv6_key->ipv6_frag = swkey->ip.frag; 1394 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1395 swkey->eth.type == htons(ETH_P_RARP)) { 1396 struct ovs_key_arp *arp_key; 1397 1398 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1399 if (!nla) 1400 goto nla_put_failure; 1401 arp_key = nla_data(nla); 1402 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1403 arp_key->arp_sip = swkey->ipv4.addr.src; 1404 arp_key->arp_tip = swkey->ipv4.addr.dst; 1405 arp_key->arp_op = htons(swkey->ip.proto); 1406 memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN); 1407 memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); 1408 } 1409 1410 if ((swkey->eth.type == htons(ETH_P_IP) || 1411 swkey->eth.type == htons(ETH_P_IPV6)) && 1412 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1413 1414 if (swkey->ip.proto == IPPROTO_TCP) { 1415 struct ovs_key_tcp *tcp_key; 1416 1417 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1418 if (!nla) 1419 goto nla_put_failure; 1420 tcp_key = nla_data(nla); 1421 if (swkey->eth.type == htons(ETH_P_IP)) { 1422 tcp_key->tcp_src = swkey->ipv4.tp.src; 1423 tcp_key->tcp_dst = swkey->ipv4.tp.dst; 1424 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1425 tcp_key->tcp_src = swkey->ipv6.tp.src; 1426 tcp_key->tcp_dst = swkey->ipv6.tp.dst; 1427 } 1428 } else if (swkey->ip.proto == IPPROTO_UDP) { 1429 struct ovs_key_udp *udp_key; 1430 1431 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1432 if (!nla) 1433 goto nla_put_failure; 1434 udp_key = nla_data(nla); 1435 if (swkey->eth.type == htons(ETH_P_IP)) { 1436 udp_key->udp_src = swkey->ipv4.tp.src; 1437 udp_key->udp_dst = swkey->ipv4.tp.dst; 1438 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1439 udp_key->udp_src = swkey->ipv6.tp.src; 1440 udp_key->udp_dst = swkey->ipv6.tp.dst; 1441 } 1442 } else if (swkey->eth.type == htons(ETH_P_IP) && 1443 swkey->ip.proto == IPPROTO_ICMP) { 1444 struct ovs_key_icmp *icmp_key; 1445 1446 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1447 if (!nla) 1448 goto nla_put_failure; 1449 icmp_key = nla_data(nla); 1450 icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src); 1451 icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst); 1452 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1453 swkey->ip.proto == IPPROTO_ICMPV6) { 1454 struct ovs_key_icmpv6 *icmpv6_key; 1455 1456 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1457 sizeof(*icmpv6_key)); 1458 if (!nla) 1459 goto nla_put_failure; 1460 icmpv6_key = nla_data(nla); 1461 icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src); 1462 icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst); 1463 1464 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1465 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1466 struct ovs_key_nd *nd_key; 1467 1468 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1469 if (!nla) 1470 goto nla_put_failure; 1471 nd_key = nla_data(nla); 1472 memcpy(nd_key->nd_target, &swkey->ipv6.nd.target, 1473 sizeof(nd_key->nd_target)); 1474 memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN); 1475 memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN); 1476 } 1477 } 1478 } 1479 1480unencap: 1481 if (encap) 1482 nla_nest_end(skb, encap); 1483 1484 return 0; 1485 1486nla_put_failure: 1487 return -EMSGSIZE; 1488} 1489 1490/* Initializes the flow module. 1491 * Returns zero if successful or a negative error code. */ 1492int ovs_flow_init(void) 1493{ 1494 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 1495 0, NULL); 1496 if (flow_cache == NULL) 1497 return -ENOMEM; 1498 1499 return 0; 1500} 1501 1502/* Uninitializes the flow module. */ 1503void ovs_flow_exit(void) 1504{ 1505 kmem_cache_destroy(flow_cache); 1506} 1507