Commit a8ddac7e53e89cb877965097d05adfeb1c91def3

Authored by Nick Piggin
Committed by Linus Torvalds
1 parent 5a439c5657

mutex: speed up generic mutex implementations

- atomic operations which both modify the variable and return something imply
  full smp memory barriers before and after the memory operations involved
  (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because
  they don't modify the target). See Documentation/atomic_ops.txt.
  So remove extra barriers and branches.

- All architectures support atomic_cmpxchg. This has no relation to
  __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally

This reduces a simple single threaded fastpath lock+unlock test from 590 cycles
to 203 cycles on a ppc970 system.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 3 additions and 32 deletions Side-by-side Diff

include/asm-generic/mutex-dec.h
... ... @@ -22,8 +22,6 @@
22 22 {
23 23 if (unlikely(atomic_dec_return(count) < 0))
24 24 fail_fn(count);
25   - else
26   - smp_mb();
27 25 }
28 26  
29 27 /**
... ... @@ -41,10 +39,7 @@
41 39 {
42 40 if (unlikely(atomic_dec_return(count) < 0))
43 41 return fail_fn(count);
44   - else {
45   - smp_mb();
46   - return 0;
47   - }
  42 + return 0;
48 43 }
49 44  
50 45 /**
... ... @@ -63,7 +58,6 @@
63 58 static inline void
64 59 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
65 60 {
66   - smp_mb();
67 61 if (unlikely(atomic_inc_return(count) <= 0))
68 62 fail_fn(count);
69 63 }
70 64  
71 65  
... ... @@ -88,25 +82,9 @@
88 82 static inline int
89 83 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
90 84 {
91   - /*
92   - * We have two variants here. The cmpxchg based one is the best one
93   - * because it never induce a false contention state. It is included
94   - * here because architectures using the inc/dec algorithms over the
95   - * xchg ones are much more likely to support cmpxchg natively.
96   - *
97   - * If not we fall back to the spinlock based variant - that is
98   - * just as efficient (and simpler) as a 'destructive' probing of
99   - * the mutex state would be.
100   - */
101   -#ifdef __HAVE_ARCH_CMPXCHG
102   - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) {
103   - smp_mb();
  85 + if (likely(atomic_cmpxchg(count, 1, 0) == 1))
104 86 return 1;
105   - }
106 87 return 0;
107   -#else
108   - return fail_fn(count);
109   -#endif
110 88 }
111 89  
112 90 #endif
include/asm-generic/mutex-xchg.h
... ... @@ -27,8 +27,6 @@
27 27 {
28 28 if (unlikely(atomic_xchg(count, 0) != 1))
29 29 fail_fn(count);
30   - else
31   - smp_mb();
32 30 }
33 31  
34 32 /**
... ... @@ -46,10 +44,7 @@
46 44 {
47 45 if (unlikely(atomic_xchg(count, 0) != 1))
48 46 return fail_fn(count);
49   - else {
50   - smp_mb();
51   - return 0;
52   - }
  47 + return 0;
53 48 }
54 49  
55 50 /**
... ... @@ -67,7 +62,6 @@
67 62 static inline void
68 63 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
69 64 {
70   - smp_mb();
71 65 if (unlikely(atomic_xchg(count, 1) != 0))
72 66 fail_fn(count);
73 67 }
... ... @@ -110,7 +104,6 @@
110 104 if (prev < 0)
111 105 prev = 0;
112 106 }
113   - smp_mb();
114 107  
115 108 return prev;
116 109 }