Commit 5d38a079ce3971f932bbdc0dc5b887806fabd5dc

Authored by Herbert Xu
Committed by David S. Miller
1 parent b530256d2e

gro: Add page frag support

This patch allows GRO to merge page frags (skb_shinfo(skb)->frags)
in one skb, rather than using the less efficient frag_list.

It also adds a new interface, napi_gro_frags to allow drivers
to inject page frags directly into the stack without allocating
an skb.  This is intended to be the GRO equivalent for LRO's
lro_receive_frags interface.

The existing GSO interface can already handle page frags with
or without an appended frag_list so nothing needs to be changed
there.

The merging itself is rather simple.  We store any new frag entries
after the last existing entry, without checking whether the first
new entry can be merged with the last existing entry.  Making this
check would actually be easy but since no existing driver can
produce contiguous frags anyway it would just be mental masturbation.

If the total number of entries would exceed the capacity of a
single skb, we simply resort to using frag_list as we do now.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 3 changed files with 114 additions and 7 deletions Side-by-side Diff

include/linux/netdevice.h
... ... @@ -313,10 +313,11 @@
313 313 #ifdef CONFIG_NETPOLL
314 314 spinlock_t poll_lock;
315 315 int poll_owner;
316   - struct net_device *dev;
317 316 #endif
  317 + struct net_device *dev;
318 318 struct list_head dev_list;
319 319 struct sk_buff *gro_list;
  320 + struct sk_buff *skb;
320 321 };
321 322  
322 323 enum
... ... @@ -990,6 +991,9 @@
990 991  
991 992 /* Number of segments aggregated. */
992 993 int count;
  994 +
  995 + /* Free the skb? */
  996 + int free;
993 997 };
994 998  
995 999 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
... ... @@ -1011,6 +1015,14 @@
1011 1015 struct list_head list;
1012 1016 };
1013 1017  
  1018 +struct napi_gro_fraginfo {
  1019 + skb_frag_t frags[MAX_SKB_FRAGS];
  1020 + unsigned int nr_frags;
  1021 + unsigned int ip_summed;
  1022 + unsigned int len;
  1023 + __wsum csum;
  1024 +};
  1025 +
1014 1026 #include <linux/interrupt.h>
1015 1027 #include <linux/notifier.h>
1016 1028  
... ... @@ -1363,6 +1375,8 @@
1363 1375 extern void napi_gro_flush(struct napi_struct *napi);
1364 1376 extern int napi_gro_receive(struct napi_struct *napi,
1365 1377 struct sk_buff *skb);
  1378 +extern int napi_gro_frags(struct napi_struct *napi,
  1379 + struct napi_gro_fraginfo *info);
1366 1380 extern void netif_nit_deliver(struct sk_buff *skb);
1367 1381 extern int dev_valid_name(const char *name);
1368 1382 extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
... ... @@ -132,6 +132,9 @@
132 132 /* Instead of increasing this, you should create a hash table. */
133 133 #define MAX_GRO_SKBS 8
134 134  
  135 +/* This should be increased if a protocol with a bigger head is added. */
  136 +#define GRO_MAX_HEAD (MAX_HEADER + 128)
  137 +
135 138 /*
136 139 * The list of packet types we will receive (as opposed to discard)
137 140 * and the routines to invoke.
... ... @@ -2345,7 +2348,7 @@
2345 2348 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2346 2349 int err = -ENOENT;
2347 2350  
2348   - if (!skb_shinfo(skb)->frag_list)
  2351 + if (NAPI_GRO_CB(skb)->count == 1)
2349 2352 goto out;
2350 2353  
2351 2354 rcu_read_lock();
... ... @@ -2384,7 +2387,7 @@
2384 2387 }
2385 2388 EXPORT_SYMBOL(napi_gro_flush);
2386 2389  
2387   -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  2390 +static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2388 2391 {
2389 2392 struct sk_buff **pp = NULL;
2390 2393 struct packet_type *ptype;
... ... @@ -2393,6 +2396,7 @@
2393 2396 int count = 0;
2394 2397 int same_flow;
2395 2398 int mac_len;
  2399 + int free;
2396 2400  
2397 2401 if (!(skb->dev->features & NETIF_F_GRO))
2398 2402 goto normal;
... ... @@ -2409,6 +2413,7 @@
2409 2413 skb->mac_len = mac_len;
2410 2414 NAPI_GRO_CB(skb)->same_flow = 0;
2411 2415 NAPI_GRO_CB(skb)->flush = 0;
  2416 + NAPI_GRO_CB(skb)->free = 0;
2412 2417  
2413 2418 for (p = napi->gro_list; p; p = p->next) {
2414 2419 count++;
... ... @@ -2428,6 +2433,7 @@
2428 2433 goto normal;
2429 2434  
2430 2435 same_flow = NAPI_GRO_CB(skb)->same_flow;
  2436 + free = NAPI_GRO_CB(skb)->free;
2431 2437  
2432 2438 if (pp) {
2433 2439 struct sk_buff *nskb = *pp;
2434 2440  
2435 2441  
2436 2442  
... ... @@ -2452,13 +2458,86 @@
2452 2458 napi->gro_list = skb;
2453 2459  
2454 2460 ok:
2455   - return NET_RX_SUCCESS;
  2461 + return free;
2456 2462  
2457 2463 normal:
2458   - return netif_receive_skb(skb);
  2464 + return -1;
2459 2465 }
  2466 +
  2467 +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  2468 +{
  2469 + switch (__napi_gro_receive(napi, skb)) {
  2470 + case -1:
  2471 + return netif_receive_skb(skb);
  2472 +
  2473 + case 1:
  2474 + kfree_skb(skb);
  2475 + break;
  2476 + }
  2477 +
  2478 + return NET_RX_SUCCESS;
  2479 +}
2460 2480 EXPORT_SYMBOL(napi_gro_receive);
2461 2481  
  2482 +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
  2483 +{
  2484 + struct net_device *dev = napi->dev;
  2485 + struct sk_buff *skb = napi->skb;
  2486 + int err = NET_RX_DROP;
  2487 +
  2488 + napi->skb = NULL;
  2489 +
  2490 + if (!skb) {
  2491 + skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
  2492 + if (!skb)
  2493 + goto out;
  2494 +
  2495 + skb_reserve(skb, NET_IP_ALIGN);
  2496 + }
  2497 +
  2498 + BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
  2499 + skb_shinfo(skb)->nr_frags = info->nr_frags;
  2500 + memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
  2501 +
  2502 + skb->data_len = info->len;
  2503 + skb->len += info->len;
  2504 + skb->truesize += info->len;
  2505 +
  2506 + if (!pskb_may_pull(skb, ETH_HLEN))
  2507 + goto reuse;
  2508 +
  2509 + err = NET_RX_SUCCESS;
  2510 +
  2511 + skb->protocol = eth_type_trans(skb, dev);
  2512 +
  2513 + skb->ip_summed = info->ip_summed;
  2514 + skb->csum = info->csum;
  2515 +
  2516 + switch (__napi_gro_receive(napi, skb)) {
  2517 + case -1:
  2518 + return netif_receive_skb(skb);
  2519 +
  2520 + case 0:
  2521 + goto out;
  2522 + }
  2523 +
  2524 +reuse:
  2525 + skb_shinfo(skb)->nr_frags = 0;
  2526 +
  2527 + skb->len -= skb->data_len;
  2528 + skb->truesize -= skb->data_len;
  2529 + skb->data_len = 0;
  2530 +
  2531 + __skb_pull(skb, skb_headlen(skb));
  2532 + skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
  2533 +
  2534 + napi->skb = skb;
  2535 +
  2536 +out:
  2537 + return err;
  2538 +}
  2539 +EXPORT_SYMBOL(napi_gro_frags);
  2540 +
2462 2541 static int process_backlog(struct napi_struct *napi, int quota)
2463 2542 {
2464 2543 int work = 0;
2465 2544  
2466 2545  
... ... @@ -2537,11 +2616,12 @@
2537 2616 {
2538 2617 INIT_LIST_HEAD(&napi->poll_list);
2539 2618 napi->gro_list = NULL;
  2619 + napi->skb = NULL;
2540 2620 napi->poll = poll;
2541 2621 napi->weight = weight;
2542 2622 list_add(&napi->dev_list, &dev->napi_list);
2543   -#ifdef CONFIG_NETPOLL
2544 2623 napi->dev = dev;
  2624 +#ifdef CONFIG_NETPOLL
2545 2625 spin_lock_init(&napi->poll_lock);
2546 2626 napi->poll_owner = -1;
2547 2627 #endif
... ... @@ -2554,6 +2634,7 @@
2554 2634 struct sk_buff *skb, *next;
2555 2635  
2556 2636 list_del_init(&napi->dev_list);
  2637 + kfree(napi->skb);
2557 2638  
2558 2639 for (skb = napi->gro_list; skb; skb = next) {
2559 2640 next = skb->next;
... ... @@ -2594,7 +2594,18 @@
2594 2594  
2595 2595 if (skb_shinfo(p)->frag_list)
2596 2596 goto merge;
  2597 + else if (!skb_headlen(p) && !skb_headlen(skb) &&
  2598 + skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
  2599 + MAX_SKB_FRAGS) {
  2600 + memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
  2601 + skb_shinfo(skb)->frags,
  2602 + skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
2597 2603  
  2604 + skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags;
  2605 + NAPI_GRO_CB(skb)->free = 1;
  2606 + goto done;
  2607 + }
  2608 +
2598 2609 headroom = skb_headroom(p);
2599 2610 nskb = netdev_alloc_skb(p->dev, headroom);
2600 2611 if (unlikely(!nskb))
2601 2612  
... ... @@ -2628,11 +2639,12 @@
2628 2639 p = nskb;
2629 2640  
2630 2641 merge:
2631   - NAPI_GRO_CB(p)->count++;
2632 2642 p->prev->next = skb;
2633 2643 p->prev = skb;
2634 2644 skb_header_release(skb);
2635 2645  
  2646 +done:
  2647 + NAPI_GRO_CB(p)->count++;
2636 2648 p->data_len += skb->len;
2637 2649 p->truesize += skb->len;
2638 2650 p->len += skb->len;