Commit dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1
Committed by
David S. Miller
1 parent
1748376b66
Exists in
master
and in
39 other branches
net: Use a percpu_counter for orphan_count
Instead of using one atomic_t per protocol, use a percpu_counter for "orphan_count", to reduce cache line contention on heavy duty network servers. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 8 changed files with 24 additions and 18 deletions Side-by-side Diff
include/net/sock.h
include/net/tcp.h
... | ... | @@ -46,7 +46,7 @@ |
46 | 46 | |
47 | 47 | extern struct inet_hashinfo tcp_hashinfo; |
48 | 48 | |
49 | -extern atomic_t tcp_orphan_count; | |
49 | +extern struct percpu_counter tcp_orphan_count; | |
50 | 50 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); |
51 | 51 | |
52 | 52 | #define MAX_TCP_HEADER (128 + MAX_HEADER) |
net/dccp/dccp.h
net/dccp/proto.c
... | ... | @@ -40,8 +40,7 @@ |
40 | 40 | |
41 | 41 | EXPORT_SYMBOL_GPL(dccp_statistics); |
42 | 42 | |
43 | -atomic_t dccp_orphan_count = ATOMIC_INIT(0); | |
44 | - | |
43 | +struct percpu_counter dccp_orphan_count; | |
45 | 44 | EXPORT_SYMBOL_GPL(dccp_orphan_count); |
46 | 45 | |
47 | 46 | struct inet_hashinfo dccp_hashinfo; |
... | ... | @@ -1000,7 +999,7 @@ |
1000 | 999 | state = sk->sk_state; |
1001 | 1000 | sock_hold(sk); |
1002 | 1001 | sock_orphan(sk); |
1003 | - atomic_inc(sk->sk_prot->orphan_count); | |
1002 | + percpu_counter_inc(sk->sk_prot->orphan_count); | |
1004 | 1003 | |
1005 | 1004 | /* |
1006 | 1005 | * It is the last release_sock in its life. It will remove backlog. |
1007 | 1006 | |
1008 | 1007 | |
... | ... | @@ -1064,18 +1063,21 @@ |
1064 | 1063 | { |
1065 | 1064 | unsigned long goal; |
1066 | 1065 | int ehash_order, bhash_order, i; |
1067 | - int rc = -ENOBUFS; | |
1066 | + int rc; | |
1068 | 1067 | |
1069 | 1068 | BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > |
1070 | 1069 | FIELD_SIZEOF(struct sk_buff, cb)); |
1071 | - | |
1070 | + rc = percpu_counter_init(&dccp_orphan_count, 0); | |
1071 | + if (rc) | |
1072 | + goto out; | |
1073 | + rc = -ENOBUFS; | |
1072 | 1074 | inet_hashinfo_init(&dccp_hashinfo); |
1073 | 1075 | dccp_hashinfo.bind_bucket_cachep = |
1074 | 1076 | kmem_cache_create("dccp_bind_bucket", |
1075 | 1077 | sizeof(struct inet_bind_bucket), 0, |
1076 | 1078 | SLAB_HWCACHE_ALIGN, NULL); |
1077 | 1079 | if (!dccp_hashinfo.bind_bucket_cachep) |
1078 | - goto out; | |
1080 | + goto out_free_percpu; | |
1079 | 1081 | |
1080 | 1082 | /* |
1081 | 1083 | * Size and allocate the main established and bind bucket |
... | ... | @@ -1168,6 +1170,8 @@ |
1168 | 1170 | out_free_bind_bucket_cachep: |
1169 | 1171 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); |
1170 | 1172 | dccp_hashinfo.bind_bucket_cachep = NULL; |
1173 | +out_free_percpu: | |
1174 | + percpu_counter_destroy(&dccp_orphan_count); | |
1171 | 1175 | goto out; |
1172 | 1176 | } |
1173 | 1177 |
net/ipv4/inet_connection_sock.c
... | ... | @@ -561,7 +561,7 @@ |
561 | 561 | |
562 | 562 | sk_refcnt_debug_release(sk); |
563 | 563 | |
564 | - atomic_dec(sk->sk_prot->orphan_count); | |
564 | + percpu_counter_dec(sk->sk_prot->orphan_count); | |
565 | 565 | sock_put(sk); |
566 | 566 | } |
567 | 567 | |
... | ... | @@ -641,7 +641,7 @@ |
641 | 641 | |
642 | 642 | sock_orphan(child); |
643 | 643 | |
644 | - atomic_inc(sk->sk_prot->orphan_count); | |
644 | + percpu_counter_inc(sk->sk_prot->orphan_count); | |
645 | 645 | |
646 | 646 | inet_csk_destroy_sock(child); |
647 | 647 |
net/ipv4/proc.c
... | ... | @@ -54,7 +54,7 @@ |
54 | 54 | socket_seq_show(seq); |
55 | 55 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", |
56 | 56 | sock_prot_inuse_get(net, &tcp_prot), |
57 | - atomic_read(&tcp_orphan_count), | |
57 | + (int)percpu_counter_sum_positive(&tcp_orphan_count), | |
58 | 58 | tcp_death_row.tw_count, |
59 | 59 | (int)percpu_counter_sum_positive(&tcp_sockets_allocated), |
60 | 60 | atomic_read(&tcp_memory_allocated)); |
net/ipv4/tcp.c
... | ... | @@ -277,8 +277,7 @@ |
277 | 277 | |
278 | 278 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; |
279 | 279 | |
280 | -atomic_t tcp_orphan_count = ATOMIC_INIT(0); | |
281 | - | |
280 | +struct percpu_counter tcp_orphan_count; | |
282 | 281 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
283 | 282 | |
284 | 283 | int sysctl_tcp_mem[3] __read_mostly; |
... | ... | @@ -1837,7 +1836,7 @@ |
1837 | 1836 | state = sk->sk_state; |
1838 | 1837 | sock_hold(sk); |
1839 | 1838 | sock_orphan(sk); |
1840 | - atomic_inc(sk->sk_prot->orphan_count); | |
1839 | + percpu_counter_inc(sk->sk_prot->orphan_count); | |
1841 | 1840 | |
1842 | 1841 | /* It is the last release_sock in its life. It will remove backlog. */ |
1843 | 1842 | release_sock(sk); |
1844 | 1843 | |
... | ... | @@ -1888,9 +1887,11 @@ |
1888 | 1887 | } |
1889 | 1888 | } |
1890 | 1889 | if (sk->sk_state != TCP_CLOSE) { |
1890 | + int orphan_count = percpu_counter_read_positive( | |
1891 | + sk->sk_prot->orphan_count); | |
1892 | + | |
1891 | 1893 | sk_mem_reclaim(sk); |
1892 | - if (tcp_too_many_orphans(sk, | |
1893 | - atomic_read(sk->sk_prot->orphan_count))) { | |
1894 | + if (tcp_too_many_orphans(sk, orphan_count)) { | |
1894 | 1895 | if (net_ratelimit()) |
1895 | 1896 | printk(KERN_INFO "TCP: too many of orphaned " |
1896 | 1897 | "sockets\n"); |
... | ... | @@ -2689,6 +2690,7 @@ |
2689 | 2690 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
2690 | 2691 | |
2691 | 2692 | percpu_counter_init(&tcp_sockets_allocated, 0); |
2693 | + percpu_counter_init(&tcp_orphan_count, 0); | |
2692 | 2694 | tcp_hashinfo.bind_bucket_cachep = |
2693 | 2695 | kmem_cache_create("tcp_bind_bucket", |
2694 | 2696 | sizeof(struct inet_bind_bucket), 0, |
net/ipv4/tcp_timer.c
... | ... | @@ -65,7 +65,7 @@ |
65 | 65 | static int tcp_out_of_resources(struct sock *sk, int do_reset) |
66 | 66 | { |
67 | 67 | struct tcp_sock *tp = tcp_sk(sk); |
68 | - int orphans = atomic_read(&tcp_orphan_count); | |
68 | + int orphans = percpu_counter_read_positive(&tcp_orphan_count); | |
69 | 69 | |
70 | 70 | /* If peer does not open window for long time, or did not transmit |
71 | 71 | * anything for long time, penalize it. */ |