Commit a8ddac7e53e89cb877965097d05adfeb1c91def3
Committed by
Linus Torvalds
1 parent
5a439c5657
Exists in
master
and in
39 other branches
mutex: speed up generic mutex implementations
- atomic operations which both modify the variable and return something imply full smp memory barriers before and after the memory operations involved (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because they don't modify the target). See Documentation/atomic_ops.txt. So remove extra barriers and branches. - All architectures support atomic_cmpxchg. This has no relation to __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally This reduces a simple single threaded fastpath lock+unlock test from 590 cycles to 203 cycles on a ppc970 system. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 3 additions and 32 deletions Side-by-side Diff
include/asm-generic/mutex-dec.h
... | ... | @@ -22,8 +22,6 @@ |
22 | 22 | { |
23 | 23 | if (unlikely(atomic_dec_return(count) < 0)) |
24 | 24 | fail_fn(count); |
25 | - else | |
26 | - smp_mb(); | |
27 | 25 | } |
28 | 26 | |
29 | 27 | /** |
... | ... | @@ -41,10 +39,7 @@ |
41 | 39 | { |
42 | 40 | if (unlikely(atomic_dec_return(count) < 0)) |
43 | 41 | return fail_fn(count); |
44 | - else { | |
45 | - smp_mb(); | |
46 | - return 0; | |
47 | - } | |
42 | + return 0; | |
48 | 43 | } |
49 | 44 | |
50 | 45 | /** |
... | ... | @@ -63,7 +58,6 @@ |
63 | 58 | static inline void |
64 | 59 | __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) |
65 | 60 | { |
66 | - smp_mb(); | |
67 | 61 | if (unlikely(atomic_inc_return(count) <= 0)) |
68 | 62 | fail_fn(count); |
69 | 63 | } |
70 | 64 | |
71 | 65 | |
... | ... | @@ -88,25 +82,9 @@ |
88 | 82 | static inline int |
89 | 83 | __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) |
90 | 84 | { |
91 | - /* | |
92 | - * We have two variants here. The cmpxchg based one is the best one | |
93 | - * because it never induce a false contention state. It is included | |
94 | - * here because architectures using the inc/dec algorithms over the | |
95 | - * xchg ones are much more likely to support cmpxchg natively. | |
96 | - * | |
97 | - * If not we fall back to the spinlock based variant - that is | |
98 | - * just as efficient (and simpler) as a 'destructive' probing of | |
99 | - * the mutex state would be. | |
100 | - */ | |
101 | -#ifdef __HAVE_ARCH_CMPXCHG | |
102 | - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) { | |
103 | - smp_mb(); | |
85 | + if (likely(atomic_cmpxchg(count, 1, 0) == 1)) | |
104 | 86 | return 1; |
105 | - } | |
106 | 87 | return 0; |
107 | -#else | |
108 | - return fail_fn(count); | |
109 | -#endif | |
110 | 88 | } |
111 | 89 | |
112 | 90 | #endif |
include/asm-generic/mutex-xchg.h
... | ... | @@ -27,8 +27,6 @@ |
27 | 27 | { |
28 | 28 | if (unlikely(atomic_xchg(count, 0) != 1)) |
29 | 29 | fail_fn(count); |
30 | - else | |
31 | - smp_mb(); | |
32 | 30 | } |
33 | 31 | |
34 | 32 | /** |
... | ... | @@ -46,10 +44,7 @@ |
46 | 44 | { |
47 | 45 | if (unlikely(atomic_xchg(count, 0) != 1)) |
48 | 46 | return fail_fn(count); |
49 | - else { | |
50 | - smp_mb(); | |
51 | - return 0; | |
52 | - } | |
47 | + return 0; | |
53 | 48 | } |
54 | 49 | |
55 | 50 | /** |
... | ... | @@ -67,7 +62,6 @@ |
67 | 62 | static inline void |
68 | 63 | __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) |
69 | 64 | { |
70 | - smp_mb(); | |
71 | 65 | if (unlikely(atomic_xchg(count, 1) != 0)) |
72 | 66 | fail_fn(count); |
73 | 67 | } |
... | ... | @@ -110,7 +104,6 @@ |
110 | 104 | if (prev < 0) |
111 | 105 | prev = 0; |
112 | 106 | } |
113 | - smp_mb(); | |
114 | 107 | |
115 | 108 | return prev; |
116 | 109 | } |