Blame view
net/core/flow_dissector.c
11.2 KB
0744dd00c
|
1 |
#include <linux/skbuff.h> |
c452ed707
|
2 |
#include <linux/export.h> |
0744dd00c
|
3 4 5 6 |
#include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <net/ip.h> |
ddbe50320
|
7 |
#include <net/ipv6.h> |
f77668dc2
|
8 9 10 11 |
#include <linux/igmp.h> #include <linux/icmp.h> #include <linux/sctp.h> #include <linux/dccp.h> |
0744dd00c
|
12 13 14 15 |
#include <linux/if_tunnel.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> #include <net/flow_keys.h> |
56193d1bc
|
16 |
#include <scsi/fc/fc_fcoe.h> |
0744dd00c
|
17 |
|
4d77d2b56
|
18 19 20 21 22 23 24 25 26 27 |
/* copy saddr & daddr, possibly using 64bit load/store * Equivalent to : flow->src = iph->saddr; * flow->dst = iph->daddr; */ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) { BUILD_BUG_ON(offsetof(typeof(*flow), dst) != offsetof(typeof(*flow), src) + sizeof(flow->src)); memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); } |
0744dd00c
|
28 |
|
357afe9c4
|
29 |
/** |
6451b3f59
|
30 31 |
* __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from |
357afe9c4
|
32 33 |
* @thoff: transport header offset * @ip_proto: protocol for which to get port offset |
6451b3f59
|
34 35 |
* @data: raw buffer pointer to the packet, if NULL use skb->data * @hlen: packet header length, if @data is NULL use skb_headlen(skb) |
357afe9c4
|
36 37 38 39 |
* * The function will try to retrieve the ports at offset thoff + poff where poff * is the protocol port offset returned from proto_ports_offset */ |
690e36e72
|
40 41 |
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, void *data, int hlen) |
357afe9c4
|
42 43 |
{ int poff = proto_ports_offset(ip_proto); |
690e36e72
|
44 45 46 47 |
if (!data) { data = skb->data; hlen = skb_headlen(skb); } |
357afe9c4
|
48 49 |
if (poff >= 0) { __be32 *ports, _ports; |
690e36e72
|
50 51 |
ports = __skb_header_pointer(skb, thoff + poff, sizeof(_ports), data, hlen, &_ports); |
357afe9c4
|
52 53 54 55 56 57 |
if (ports) return *ports; } return 0; } |
690e36e72
|
58 |
EXPORT_SYMBOL(__skb_flow_get_ports); |
357afe9c4
|
59 |
|
453a940ea
|
60 61 62 63 64 65 66 67 68 69 70 71 72 |
/** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified * @data: raw buffer pointer to the packet, if NULL use skb->data * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * * The function will try to retrieve the struct flow_keys from either the skbuff * or a raw buffer specified by the rest parameters */ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, __be16 proto, int nhoff, int hlen) |
0744dd00c
|
73 |
{ |
0744dd00c
|
74 |
u8 ip_proto; |
0744dd00c
|
75 |
|
690e36e72
|
76 77 |
if (!data) { data = skb->data; |
453a940ea
|
78 79 |
proto = skb->protocol; nhoff = skb_network_offset(skb); |
690e36e72
|
80 81 |
hlen = skb_headlen(skb); } |
0744dd00c
|
82 83 84 85 |
memset(flow, 0, sizeof(*flow)); again: switch (proto) { |
2b8837aea
|
86 |
case htons(ETH_P_IP): { |
0744dd00c
|
87 88 89 |
const struct iphdr *iph; struct iphdr _iph; ip: |
690e36e72
|
90 |
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); |
6f0923438
|
91 |
if (!iph || iph->ihl < 5) |
0744dd00c
|
92 |
return false; |
3797d3e84
|
93 |
nhoff += iph->ihl * 4; |
0744dd00c
|
94 |
|
3797d3e84
|
95 |
ip_proto = iph->protocol; |
0744dd00c
|
96 97 |
if (ip_is_fragment(iph)) ip_proto = 0; |
3797d3e84
|
98 |
|
5af7fb6e3
|
99 100 101 102 103 104 |
/* skip the address processing if skb is NULL. The assumption * here is that if there is no skb we are not looking for flow * info but lengths and protocols. */ if (!skb) break; |
4d77d2b56
|
105 |
iph_to_flow_copy_addrs(flow, iph); |
0744dd00c
|
106 107 |
break; } |
2b8837aea
|
108 |
case htons(ETH_P_IPV6): { |
0744dd00c
|
109 110 |
const struct ipv6hdr *iph; struct ipv6hdr _iph; |
19469a873
|
111 |
__be32 flow_label; |
0744dd00c
|
112 |
ipv6: |
690e36e72
|
113 |
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); |
0744dd00c
|
114 115 116 117 |
if (!iph) return false; ip_proto = iph->nexthdr; |
0744dd00c
|
118 |
nhoff += sizeof(struct ipv6hdr); |
19469a873
|
119 |
|
5af7fb6e3
|
120 |
/* see comment above in IPv4 section */ |
56193d1bc
|
121 122 |
if (!skb) break; |
5af7fb6e3
|
123 124 |
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); |
19469a873
|
125 126 127 128 129 130 131 132 133 134 135 136 137 |
flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can * use that to represent the ports without any * further dissection. */ flow->n_proto = proto; flow->ip_proto = ip_proto; flow->ports = flow_label; flow->thoff = (u16)nhoff; return true; } |
0744dd00c
|
138 139 |
break; } |
2b8837aea
|
140 141 |
case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { |
0744dd00c
|
142 143 |
const struct vlan_hdr *vlan; struct vlan_hdr _vlan; |
690e36e72
|
144 |
vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); |
0744dd00c
|
145 146 147 148 149 150 151 |
if (!vlan) return false; proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); goto again; } |
2b8837aea
|
152 |
case htons(ETH_P_PPP_SES): { |
0744dd00c
|
153 154 155 156 |
struct { struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; |
690e36e72
|
157 |
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); |
0744dd00c
|
158 159 160 161 162 |
if (!hdr) return false; proto = hdr->proto; nhoff += PPPOE_SES_HLEN; switch (proto) { |
2b8837aea
|
163 |
case htons(PPP_IP): |
0744dd00c
|
164 |
goto ip; |
2b8837aea
|
165 |
case htons(PPP_IPV6): |
0744dd00c
|
166 167 168 169 170 |
goto ipv6; default: return false; } } |
56193d1bc
|
171 172 173 |
case htons(ETH_P_FCOE): flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ |
0744dd00c
|
174 175 176 177 178 179 180 181 182 183 |
default: return false; } switch (ip_proto) { case IPPROTO_GRE: { struct gre_hdr { __be16 flags; __be16 proto; } *hdr, _hdr; |
690e36e72
|
184 |
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); |
0744dd00c
|
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
if (!hdr) return false; /* * Only look inside GRE if version zero and no * routing */ if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { proto = hdr->proto; nhoff += 4; if (hdr->flags & GRE_CSUM) nhoff += 4; if (hdr->flags & GRE_KEY) nhoff += 4; if (hdr->flags & GRE_SEQ) nhoff += 4; |
e1733de22
|
200 201 202 |
if (proto == htons(ETH_P_TEB)) { const struct ethhdr *eth; struct ethhdr _eth; |
690e36e72
|
203 204 205 |
eth = __skb_header_pointer(skb, nhoff, sizeof(_eth), data, hlen, &_eth); |
e1733de22
|
206 207 208 209 210 |
if (!eth) return false; proto = eth->h_proto; nhoff += sizeof(*eth); } |
0744dd00c
|
211 212 213 214 215 |
goto again; } break; } case IPPROTO_IPIP: |
fca418955
|
216 217 |
proto = htons(ETH_P_IP); goto ip; |
b438f940d
|
218 219 220 |
case IPPROTO_IPV6: proto = htons(ETH_P_IPV6); goto ipv6; |
0744dd00c
|
221 222 223 |
default: break; } |
e0f31d849
|
224 |
flow->n_proto = proto; |
0744dd00c
|
225 |
flow->ip_proto = ip_proto; |
8ed781668
|
226 |
flow->thoff = (u16) nhoff; |
5af7fb6e3
|
227 228 229 230 |
/* unless skb is set we don't need to record port info */ if (skb) flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); |
0744dd00c
|
231 232 |
return true; } |
690e36e72
|
233 |
EXPORT_SYMBOL(__skb_flow_dissect); |
441d9d327
|
234 235 |
static u32 hashrnd __read_mostly; |
66415cf8a
|
236 237 238 239 240 241 242 243 244 245 |
static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) { __flow_hash_secret_init(); return jhash_3words(a, b, c, hashrnd); } |
5ed20a68c
|
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 |
static inline u32 __flow_hash_from_keys(struct flow_keys *keys) { u32 hash; /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys->dst < (__force u32)keys->src) || (((__force u32)keys->dst == (__force u32)keys->src) && ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { swap(keys->dst, keys->src); swap(keys->port16[0], keys->port16[1]); } hash = __flow_hash_3words((__force u32)keys->dst, (__force u32)keys->src, (__force u32)keys->ports); if (!hash) hash = 1; return hash; } u32 flow_hash_from_keys(struct flow_keys *keys) { return __flow_hash_from_keys(keys); } EXPORT_SYMBOL(flow_hash_from_keys); |
441d9d327
|
272 |
/* |
3958afa1b
|
273 |
* __skb_get_hash: calculate a flow hash based on src/dst addresses |
61b905da3
|
274 275 |
* and src/dst port numbers. Sets hash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_hash in skb |
441d9d327
|
276 277 |
* if hash is a canonical 4-tuple hash over transport ports. */ |
3958afa1b
|
278 |
void __skb_get_hash(struct sk_buff *skb) |
441d9d327
|
279 280 |
{ struct flow_keys keys; |
441d9d327
|
281 282 283 284 285 |
if (!skb_flow_dissect(skb, &keys)) return; if (keys.ports) |
61b905da3
|
286 |
skb->l4_hash = 1; |
441d9d327
|
287 |
|
a3b18ddb9
|
288 |
skb->sw_hash = 1; |
5ed20a68c
|
289 |
skb->hash = __flow_hash_from_keys(&keys); |
441d9d327
|
290 |
} |
3958afa1b
|
291 |
EXPORT_SYMBOL(__skb_get_hash); |
441d9d327
|
292 293 294 295 296 |
/* * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ |
0e001614e
|
297 |
u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, |
441d9d327
|
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 |
unsigned int num_tx_queues) { u32 hash; u16 qoffset = 0; u16 qcount = num_tx_queues; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); while (unlikely(hash >= num_tx_queues)) hash -= num_tx_queues; return hash; } if (dev->num_tc) { u8 tc = netdev_get_prio_tc_map(dev, skb->priority); qoffset = dev->tc_to_txq[tc].offset; qcount = dev->tc_to_txq[tc].count; } |
8fc54f689
|
316 |
return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; |
441d9d327
|
317 318 |
} EXPORT_SYMBOL(__skb_tx_hash); |
56193d1bc
|
319 320 |
u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) |
f77668dc2
|
321 |
{ |
56193d1bc
|
322 |
u32 poff = keys->thoff; |
f77668dc2
|
323 |
|
56193d1bc
|
324 |
switch (keys->ip_proto) { |
f77668dc2
|
325 |
case IPPROTO_TCP: { |
5af7fb6e3
|
326 327 328 |
/* access doff as u8 to avoid unaligned access */ const u8 *doff; u8 _doff; |
f77668dc2
|
329 |
|
5af7fb6e3
|
330 331 332 |
doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), data, hlen, &_doff); if (!doff) |
f77668dc2
|
333 |
return poff; |
5af7fb6e3
|
334 |
poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); |
f77668dc2
|
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 |
break; } case IPPROTO_UDP: case IPPROTO_UDPLITE: poff += sizeof(struct udphdr); break; /* For the rest, we do not really care about header * extensions at this point for now. */ case IPPROTO_ICMP: poff += sizeof(struct icmphdr); break; case IPPROTO_ICMPV6: poff += sizeof(struct icmp6hdr); break; case IPPROTO_IGMP: poff += sizeof(struct igmphdr); break; case IPPROTO_DCCP: poff += sizeof(struct dccp_hdr); break; case IPPROTO_SCTP: poff += sizeof(struct sctphdr); break; } return poff; } |
56193d1bc
|
363 364 365 366 367 368 369 370 371 372 373 374 375 376 |
/* skb_get_poff() returns the offset to the payload as far as it could * be dissected. The main user is currently BPF, so that we can dynamically * truncate packets without needing to push actual payload to the user * space and can analyze headers only, instead. */ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys keys; if (!skb_flow_dissect(skb, &keys)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); } |
441d9d327
|
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 |
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; struct xps_map *map; int queue_index = -1; rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { map = rcu_dereference( dev_maps->cpu_map[raw_smp_processor_id()]); if (map) { if (map->len == 1) queue_index = map->queues[0]; |
0e001614e
|
392 |
else |
8fc54f689
|
393 394 |
queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), map->len)]; |
441d9d327
|
395 396 397 398 399 400 401 402 403 404 405 |
if (unlikely(queue_index >= dev->real_num_tx_queues)) queue_index = -1; } } rcu_read_unlock(); return queue_index; #else return -1; #endif } |
99932d4fc
|
406 |
static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) |
441d9d327
|
407 408 409 410 411 412 413 414 415 |
{ struct sock *sk = skb->sk; int queue_index = sk_tx_queue_get(sk); if (queue_index < 0 || skb->ooo_okay || queue_index >= dev->real_num_tx_queues) { int new_index = get_xps_queue(dev, skb); if (new_index < 0) new_index = skb_tx_hash(dev, skb); |
702821f4e
|
416 417 |
if (queue_index != new_index && sk && rcu_access_pointer(sk->sk_dst_cache)) |
50d1784ee
|
418 |
sk_tx_queue_set(sk, new_index); |
441d9d327
|
419 420 421 422 423 424 |
queue_index = new_index; } return queue_index; } |
441d9d327
|
425 426 |
struct netdev_queue *netdev_pick_tx(struct net_device *dev, |
f663dd9aa
|
427 428 |
struct sk_buff *skb, void *accel_priv) |
441d9d327
|
429 430 431 432 433 434 |
{ int queue_index = 0; if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) |
99932d4fc
|
435 436 |
queue_index = ops->ndo_select_queue(dev, skb, accel_priv, __netdev_pick_tx); |
441d9d327
|
437 438 |
else queue_index = __netdev_pick_tx(dev, skb); |
f663dd9aa
|
439 440 |
if (!accel_priv) |
b9507bdaf
|
441 |
queue_index = netdev_cap_txqueue(dev, queue_index); |
441d9d327
|
442 443 444 445 446 |
} skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); } |