Commit 87fb4b7b533073eeeaed0b6bf7c2328995f6c075

Authored by Eric Dumazet
Committed by David S. Miller
1 parent 97ba0eb64c

net: more accurate skb truesize

skb truesize currently accounts for sk_buff struct and part of skb head.
kmalloc() roundings are also ignored.

Considering that skb_shared_info is larger than sk_buff, its time to
take it into account for better memory accounting.

This patch introduces SKB_TRUESIZE(X) macro to centralize various
assumptions into a single place.

At skb alloc phase, we put skb_shared_info struct at the exact end of
skb head, to allow a better use of memory (lowering number of
reallocations), since kmalloc() gives us power-of-two memory blocks.

Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are
aligned to cache lines, as before.

Note: This patch might trigger performance regressions because of
misconfigured protocol stacks, hitting per socket or global memory
limits that were previously not reached. But its a necessary step for a
more accurate memory accounting.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Andi Kleen <ak@linux.intel.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 8 changed files with 32 additions and 19 deletions Side-by-side Diff

include/linux/skbuff.h
... ... @@ -46,6 +46,11 @@
46 46 #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
47 47 #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
48 48  
  49 +/* return minimum truesize of one skb containing X bytes of data */
  50 +#define SKB_TRUESIZE(X) ((X) + \
  51 + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \
  52 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
  53 +
49 54 /* A. Checksumming of received packets by device.
50 55 *
51 56 * NONE: device failed to checksum this packet.
... ... @@ -184,11 +184,20 @@
184 184 goto out;
185 185 prefetchw(skb);
186 186  
187   - size = SKB_DATA_ALIGN(size);
188   - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
189   - gfp_mask, node);
  187 + /* We do our best to align skb_shared_info on a separate cache
  188 + * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
  189 + * aligned memory blocks, unless SLUB/SLAB debug is enabled.
  190 + * Both skb->head and skb_shared_info are cache line aligned.
  191 + */
  192 + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  193 + data = kmalloc_node_track_caller(size, gfp_mask, node);
190 194 if (!data)
191 195 goto nodata;
  196 + /* kmalloc(size) might give us more room than requested.
  197 + * Put skb_shared_info exactly at the end of allocated zone,
  198 + * to allow max possible filling before reallocation.
  199 + */
  200 + size = SKB_WITH_OVERHEAD(ksize(data));
192 201 prefetchw(data + size);
193 202  
194 203 /*
... ... @@ -197,7 +206,8 @@
197 206 * the tail pointer in struct sk_buff!
198 207 */
199 208 memset(skb, 0, offsetof(struct sk_buff, tail));
200   - skb->truesize = size + sizeof(struct sk_buff);
  209 + /* Account for allocated memory : skb + skb->head */
  210 + skb->truesize = SKB_TRUESIZE(size);
201 211 atomic_set(&skb->users, 1);
202 212 skb->head = data;
203 213 skb->data = data;
... ... @@ -207,7 +207,7 @@
207 207 * not depend upon such differences.
208 208 */
209 209 #define _SK_MEM_PACKETS 256
210   -#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
  210 +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
211 211 #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
212 212 #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
213 213  
... ... @@ -1152,10 +1152,9 @@
1152 1152 net->ipv4.icmp_sk[i] = sk;
1153 1153  
1154 1154 /* Enough space for 2 64K ICMP packets, including
1155   - * sk_buff struct overhead.
  1155 + * sk_buff/skb_shared_info struct overhead.
1156 1156 */
1157   - sk->sk_sndbuf =
1158   - (2 * ((64 * 1024) + sizeof(struct sk_buff)));
  1157 + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1159 1158  
1160 1159 /*
1161 1160 * Speedup sock_wfree()
net/ipv4/tcp_input.c
... ... @@ -265,8 +265,7 @@
265 265  
266 266 static void tcp_fixup_sndbuf(struct sock *sk)
267 267 {
268   - int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
269   - sizeof(struct sk_buff);
  268 + int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
270 269  
271 270 if (sk->sk_sndbuf < 3 * sndmem) {
272 271 sk->sk_sndbuf = 3 * sndmem;
... ... @@ -349,7 +348,7 @@
349 348 static void tcp_fixup_rcvbuf(struct sock *sk)
350 349 {
351 350 struct tcp_sock *tp = tcp_sk(sk);
352   - int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
  351 + int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
353 352  
354 353 /* Try to select rcvbuf so that 4 mss-sized segments
355 354 * will fit to window and corresponding skbs will fit to our rcvbuf.
... ... @@ -540,8 +539,7 @@
540 539 space /= tp->advmss;
541 540 if (!space)
542 541 space = 1;
543   - rcvmem = (tp->advmss + MAX_TCP_HEADER +
544   - 16 + sizeof(struct sk_buff));
  542 + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
545 543 while (tcp_win_from_space(rcvmem) < tp->advmss)
546 544 rcvmem += 128;
547 545 space *= rcvmem;
... ... @@ -4950,8 +4948,10 @@
4950 4948 struct tcp_sock *tp = tcp_sk(sk);
4951 4949  
4952 4950 if (tcp_should_expand_sndbuf(sk)) {
4953   - int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4954   - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
  4951 + int sndmem = SKB_TRUESIZE(max_t(u32,
  4952 + tp->rx_opt.mss_clamp,
  4953 + tp->mss_cache) +
  4954 + MAX_TCP_HEADER);
4955 4955 int demanded = max_t(unsigned int, tp->snd_cwnd,
4956 4956 tp->reordering + 1);
4957 4957 sndmem *= 2 * demanded;
... ... @@ -835,8 +835,7 @@
835 835 /* Enough space for 2 64K ICMP packets, including
836 836 * sk_buff struct overhead.
837 837 */
838   - sk->sk_sndbuf =
839   - (2 * ((64 * 1024) + sizeof(struct sk_buff)));
  838 + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
840 839 }
841 840 return 0;
842 841  
... ... @@ -1819,7 +1819,7 @@
1819 1819 goto save_message;
1820 1820  
1821 1821 len = atomic_read(&sk->sk_rmem_alloc);
1822   - len += iucv_msg_length(msg) + sizeof(struct sk_buff);
  1822 + len += SKB_TRUESIZE(iucv_msg_length(msg));
1823 1823 if (len > sk->sk_rcvbuf)
1824 1824 goto save_message;
1825 1825  
... ... @@ -1299,7 +1299,7 @@
1299 1299 max_share = min(4UL*1024*1024, limit);
1300 1300  
1301 1301 sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
1302   - sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
  1302 + sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
1303 1303 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
1304 1304  
1305 1305 sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;