flow_dissector.c revision 8ed781668dd49b608f1e67a22e3b445fd0c2cd6f
1#include <linux/skbuff.h> 2#include <linux/export.h> 3#include <linux/ip.h> 4#include <linux/ipv6.h> 5#include <linux/if_vlan.h> 6#include <net/ip.h> 7#include <net/ipv6.h> 8#include <linux/if_tunnel.h> 9#include <linux/if_pppox.h> 10#include <linux/ppp_defs.h> 11#include <net/flow_keys.h> 12 13/* copy saddr & daddr, possibly using 64bit load/store 14 * Equivalent to : flow->src = iph->saddr; 15 * flow->dst = iph->daddr; 16 */ 17static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) 18{ 19 BUILD_BUG_ON(offsetof(typeof(*flow), dst) != 20 offsetof(typeof(*flow), src) + sizeof(flow->src)); 21 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); 22} 23 24bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) 25{ 26 int poff, nhoff = skb_network_offset(skb); 27 u8 ip_proto; 28 __be16 proto = skb->protocol; 29 30 memset(flow, 0, sizeof(*flow)); 31 32again: 33 switch (proto) { 34 case __constant_htons(ETH_P_IP): { 35 const struct iphdr *iph; 36 struct iphdr _iph; 37ip: 38 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 39 if (!iph) 40 return false; 41 42 if (ip_is_fragment(iph)) 43 ip_proto = 0; 44 else 45 ip_proto = iph->protocol; 46 iph_to_flow_copy_addrs(flow, iph); 47 nhoff += iph->ihl * 4; 48 break; 49 } 50 case __constant_htons(ETH_P_IPV6): { 51 const struct ipv6hdr *iph; 52 struct ipv6hdr _iph; 53ipv6: 54 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 55 if (!iph) 56 return false; 57 58 ip_proto = iph->nexthdr; 59 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); 60 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); 61 nhoff += sizeof(struct ipv6hdr); 62 break; 63 } 64 case __constant_htons(ETH_P_8021Q): { 65 const struct vlan_hdr *vlan; 66 struct vlan_hdr _vlan; 67 68 vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); 69 if (!vlan) 70 return false; 71 72 proto = vlan->h_vlan_encapsulated_proto; 73 nhoff += sizeof(*vlan); 74 goto again; 75 } 76 case __constant_htons(ETH_P_PPP_SES): { 77 struct { 78 struct pppoe_hdr hdr; 79 __be16 proto; 80 } *hdr, _hdr; 81 hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); 82 if (!hdr) 83 return false; 84 proto = hdr->proto; 85 nhoff += PPPOE_SES_HLEN; 86 switch (proto) { 87 case __constant_htons(PPP_IP): 88 goto ip; 89 case __constant_htons(PPP_IPV6): 90 goto ipv6; 91 default: 92 return false; 93 } 94 } 95 default: 96 return false; 97 } 98 99 switch (ip_proto) { 100 case IPPROTO_GRE: { 101 struct gre_hdr { 102 __be16 flags; 103 __be16 proto; 104 } *hdr, _hdr; 105 106 hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); 107 if (!hdr) 108 return false; 109 /* 110 * Only look inside GRE if version zero and no 111 * routing 112 */ 113 if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { 114 proto = hdr->proto; 115 nhoff += 4; 116 if (hdr->flags & GRE_CSUM) 117 nhoff += 4; 118 if (hdr->flags & GRE_KEY) 119 nhoff += 4; 120 if (hdr->flags & GRE_SEQ) 121 nhoff += 4; 122 goto again; 123 } 124 break; 125 } 126 case IPPROTO_IPIP: 127 goto again; 128 default: 129 break; 130 } 131 132 flow->ip_proto = ip_proto; 133 poff = proto_ports_offset(ip_proto); 134 if (poff >= 0) { 135 __be32 *ports, _ports; 136 137 nhoff += poff; 138 ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); 139 if (ports) 140 flow->ports = *ports; 141 } 142 143 flow->thoff = (u16) nhoff; 144 145 return true; 146} 147EXPORT_SYMBOL(skb_flow_dissect); 148 149static u32 hashrnd __read_mostly; 150 151/* 152 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses 153 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value 154 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb 155 * if hash is a canonical 4-tuple hash over transport ports. 156 */ 157void __skb_get_rxhash(struct sk_buff *skb) 158{ 159 struct flow_keys keys; 160 u32 hash; 161 162 if (!skb_flow_dissect(skb, &keys)) 163 return; 164 165 if (keys.ports) 166 skb->l4_rxhash = 1; 167 168 /* get a consistent hash (same value on both flow directions) */ 169 if (((__force u32)keys.dst < (__force u32)keys.src) || 170 (((__force u32)keys.dst == (__force u32)keys.src) && 171 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { 172 swap(keys.dst, keys.src); 173 swap(keys.port16[0], keys.port16[1]); 174 } 175 176 hash = jhash_3words((__force u32)keys.dst, 177 (__force u32)keys.src, 178 (__force u32)keys.ports, hashrnd); 179 if (!hash) 180 hash = 1; 181 182 skb->rxhash = hash; 183} 184EXPORT_SYMBOL(__skb_get_rxhash); 185 186/* 187 * Returns a Tx hash based on the given packet descriptor a Tx queues' number 188 * to be used as a distribution range. 189 */ 190u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, 191 unsigned int num_tx_queues) 192{ 193 u32 hash; 194 u16 qoffset = 0; 195 u16 qcount = num_tx_queues; 196 197 if (skb_rx_queue_recorded(skb)) { 198 hash = skb_get_rx_queue(skb); 199 while (unlikely(hash >= num_tx_queues)) 200 hash -= num_tx_queues; 201 return hash; 202 } 203 204 if (dev->num_tc) { 205 u8 tc = netdev_get_prio_tc_map(dev, skb->priority); 206 qoffset = dev->tc_to_txq[tc].offset; 207 qcount = dev->tc_to_txq[tc].count; 208 } 209 210 if (skb->sk && skb->sk->sk_hash) 211 hash = skb->sk->sk_hash; 212 else 213 hash = (__force u16) skb->protocol; 214 hash = jhash_1word(hash, hashrnd); 215 216 return (u16) (((u64) hash * qcount) >> 32) + qoffset; 217} 218EXPORT_SYMBOL(__skb_tx_hash); 219 220static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) 221{ 222 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 223 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", 224 dev->name, queue_index, 225 dev->real_num_tx_queues); 226 return 0; 227 } 228 return queue_index; 229} 230 231static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) 232{ 233#ifdef CONFIG_XPS 234 struct xps_dev_maps *dev_maps; 235 struct xps_map *map; 236 int queue_index = -1; 237 238 rcu_read_lock(); 239 dev_maps = rcu_dereference(dev->xps_maps); 240 if (dev_maps) { 241 map = rcu_dereference( 242 dev_maps->cpu_map[raw_smp_processor_id()]); 243 if (map) { 244 if (map->len == 1) 245 queue_index = map->queues[0]; 246 else { 247 u32 hash; 248 if (skb->sk && skb->sk->sk_hash) 249 hash = skb->sk->sk_hash; 250 else 251 hash = (__force u16) skb->protocol ^ 252 skb->rxhash; 253 hash = jhash_1word(hash, hashrnd); 254 queue_index = map->queues[ 255 ((u64)hash * map->len) >> 32]; 256 } 257 if (unlikely(queue_index >= dev->real_num_tx_queues)) 258 queue_index = -1; 259 } 260 } 261 rcu_read_unlock(); 262 263 return queue_index; 264#else 265 return -1; 266#endif 267} 268 269u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) 270{ 271 struct sock *sk = skb->sk; 272 int queue_index = sk_tx_queue_get(sk); 273 274 if (queue_index < 0 || skb->ooo_okay || 275 queue_index >= dev->real_num_tx_queues) { 276 int new_index = get_xps_queue(dev, skb); 277 if (new_index < 0) 278 new_index = skb_tx_hash(dev, skb); 279 280 if (queue_index != new_index && sk) { 281 struct dst_entry *dst = 282 rcu_dereference_check(sk->sk_dst_cache, 1); 283 284 if (dst && skb_dst(skb) == dst) 285 sk_tx_queue_set(sk, queue_index); 286 287 } 288 289 queue_index = new_index; 290 } 291 292 return queue_index; 293} 294EXPORT_SYMBOL(__netdev_pick_tx); 295 296struct netdev_queue *netdev_pick_tx(struct net_device *dev, 297 struct sk_buff *skb) 298{ 299 int queue_index = 0; 300 301 if (dev->real_num_tx_queues != 1) { 302 const struct net_device_ops *ops = dev->netdev_ops; 303 if (ops->ndo_select_queue) 304 queue_index = ops->ndo_select_queue(dev, skb); 305 else 306 queue_index = __netdev_pick_tx(dev, skb); 307 queue_index = dev_cap_txqueue(dev, queue_index); 308 } 309 310 skb_set_queue_mapping(skb, queue_index); 311 return netdev_get_tx_queue(dev, queue_index); 312} 313 314static int __init initialize_hashrnd(void) 315{ 316 get_random_bytes(&hashrnd, sizeof(hashrnd)); 317 return 0; 318} 319 320late_initcall_sync(initialize_hashrnd); 321