Commit 87fb4b7b533073eeeaed0b6bf7c2328995f6c075
Committed by
David S. Miller
1 parent
97ba0eb64c
Exists in
master
and in
38 other branches
net: more accurate skb truesize
skb truesize currently accounts for sk_buff struct and part of skb head. kmalloc() roundings are also ignored. Considering that skb_shared_info is larger than sk_buff, its time to take it into account for better memory accounting. This patch introduces SKB_TRUESIZE(X) macro to centralize various assumptions into a single place. At skb alloc phase, we put skb_shared_info struct at the exact end of skb head, to allow a better use of memory (lowering number of reallocations), since kmalloc() gives us power-of-two memory blocks. Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are aligned to cache lines, as before. Note: This patch might trigger performance regressions because of misconfigured protocol stacks, hitting per socket or global memory limits that were previously not reached. But its a necessary step for a more accurate memory accounting. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Andi Kleen <ak@linux.intel.com> CC: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 8 changed files with 32 additions and 19 deletions Side-by-side Diff
include/linux/skbuff.h
... | ... | @@ -46,6 +46,11 @@ |
46 | 46 | #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) |
47 | 47 | #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) |
48 | 48 | |
49 | +/* return minimum truesize of one skb containing X bytes of data */ | |
50 | +#define SKB_TRUESIZE(X) ((X) + \ | |
51 | + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ | |
52 | + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) | |
53 | + | |
49 | 54 | /* A. Checksumming of received packets by device. |
50 | 55 | * |
51 | 56 | * NONE: device failed to checksum this packet. |
net/core/skbuff.c
... | ... | @@ -184,11 +184,20 @@ |
184 | 184 | goto out; |
185 | 185 | prefetchw(skb); |
186 | 186 | |
187 | - size = SKB_DATA_ALIGN(size); | |
188 | - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), | |
189 | - gfp_mask, node); | |
187 | + /* We do our best to align skb_shared_info on a separate cache | |
188 | + * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives | |
189 | + * aligned memory blocks, unless SLUB/SLAB debug is enabled. | |
190 | + * Both skb->head and skb_shared_info are cache line aligned. | |
191 | + */ | |
192 | + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | |
193 | + data = kmalloc_node_track_caller(size, gfp_mask, node); | |
190 | 194 | if (!data) |
191 | 195 | goto nodata; |
196 | + /* kmalloc(size) might give us more room than requested. | |
197 | + * Put skb_shared_info exactly at the end of allocated zone, | |
198 | + * to allow max possible filling before reallocation. | |
199 | + */ | |
200 | + size = SKB_WITH_OVERHEAD(ksize(data)); | |
192 | 201 | prefetchw(data + size); |
193 | 202 | |
194 | 203 | /* |
... | ... | @@ -197,7 +206,8 @@ |
197 | 206 | * the tail pointer in struct sk_buff! |
198 | 207 | */ |
199 | 208 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
200 | - skb->truesize = size + sizeof(struct sk_buff); | |
209 | + /* Account for allocated memory : skb + skb->head */ | |
210 | + skb->truesize = SKB_TRUESIZE(size); | |
201 | 211 | atomic_set(&skb->users, 1); |
202 | 212 | skb->head = data; |
203 | 213 | skb->data = data; |
net/core/sock.c
... | ... | @@ -207,7 +207,7 @@ |
207 | 207 | * not depend upon such differences. |
208 | 208 | */ |
209 | 209 | #define _SK_MEM_PACKETS 256 |
210 | -#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) | |
210 | +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) | |
211 | 211 | #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) |
212 | 212 | #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) |
213 | 213 |
net/ipv4/icmp.c
... | ... | @@ -1152,10 +1152,9 @@ |
1152 | 1152 | net->ipv4.icmp_sk[i] = sk; |
1153 | 1153 | |
1154 | 1154 | /* Enough space for 2 64K ICMP packets, including |
1155 | - * sk_buff struct overhead. | |
1155 | + * sk_buff/skb_shared_info struct overhead. | |
1156 | 1156 | */ |
1157 | - sk->sk_sndbuf = | |
1158 | - (2 * ((64 * 1024) + sizeof(struct sk_buff))); | |
1157 | + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); | |
1159 | 1158 | |
1160 | 1159 | /* |
1161 | 1160 | * Speedup sock_wfree() |
net/ipv4/tcp_input.c
... | ... | @@ -265,8 +265,7 @@ |
265 | 265 | |
266 | 266 | static void tcp_fixup_sndbuf(struct sock *sk) |
267 | 267 | { |
268 | - int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + | |
269 | - sizeof(struct sk_buff); | |
268 | + int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); | |
270 | 269 | |
271 | 270 | if (sk->sk_sndbuf < 3 * sndmem) { |
272 | 271 | sk->sk_sndbuf = 3 * sndmem; |
... | ... | @@ -349,7 +348,7 @@ |
349 | 348 | static void tcp_fixup_rcvbuf(struct sock *sk) |
350 | 349 | { |
351 | 350 | struct tcp_sock *tp = tcp_sk(sk); |
352 | - int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); | |
351 | + int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); | |
353 | 352 | |
354 | 353 | /* Try to select rcvbuf so that 4 mss-sized segments |
355 | 354 | * will fit to window and corresponding skbs will fit to our rcvbuf. |
... | ... | @@ -540,8 +539,7 @@ |
540 | 539 | space /= tp->advmss; |
541 | 540 | if (!space) |
542 | 541 | space = 1; |
543 | - rcvmem = (tp->advmss + MAX_TCP_HEADER + | |
544 | - 16 + sizeof(struct sk_buff)); | |
542 | + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); | |
545 | 543 | while (tcp_win_from_space(rcvmem) < tp->advmss) |
546 | 544 | rcvmem += 128; |
547 | 545 | space *= rcvmem; |
... | ... | @@ -4950,8 +4948,10 @@ |
4950 | 4948 | struct tcp_sock *tp = tcp_sk(sk); |
4951 | 4949 | |
4952 | 4950 | if (tcp_should_expand_sndbuf(sk)) { |
4953 | - int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + | |
4954 | - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); | |
4951 | + int sndmem = SKB_TRUESIZE(max_t(u32, | |
4952 | + tp->rx_opt.mss_clamp, | |
4953 | + tp->mss_cache) + | |
4954 | + MAX_TCP_HEADER); | |
4955 | 4955 | int demanded = max_t(unsigned int, tp->snd_cwnd, |
4956 | 4956 | tp->reordering + 1); |
4957 | 4957 | sndmem *= 2 * demanded; |
net/ipv6/icmp.c
net/iucv/af_iucv.c
... | ... | @@ -1819,7 +1819,7 @@ |
1819 | 1819 | goto save_message; |
1820 | 1820 | |
1821 | 1821 | len = atomic_read(&sk->sk_rmem_alloc); |
1822 | - len += iucv_msg_length(msg) + sizeof(struct sk_buff); | |
1822 | + len += SKB_TRUESIZE(iucv_msg_length(msg)); | |
1823 | 1823 | if (len > sk->sk_rcvbuf) |
1824 | 1824 | goto save_message; |
1825 | 1825 |
net/sctp/protocol.c
... | ... | @@ -1299,7 +1299,7 @@ |
1299 | 1299 | max_share = min(4UL*1024*1024, limit); |
1300 | 1300 | |
1301 | 1301 | sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ |
1302 | - sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); | |
1302 | + sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); | |
1303 | 1303 | sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); |
1304 | 1304 | |
1305 | 1305 | sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; |