Commit d84b6728c54dcf73bcef3e3f7cf6767e2d224e39
Committed by
Ingo Molnar
1 parent
4bd19084fa
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
locking/mcs: Better differentiate between MCS variants
We have two flavors of the MCS spinlock: standard and cancelable (OSQ). While each one is independent of the other, we currently mix and match them. This patch: - Moves the OSQ code out of mcs_spinlock.h (which only deals with the traditional version) into include/linux/osq_lock.h. No unnecessary code is added to the more global header file, anything locks that make use of OSQ must include it anyway. - Renames mcs_spinlock.c to osq_lock.c. This file only contains osq code. - Introduces a CONFIG_LOCK_SPIN_ON_OWNER in order to only build osq_lock if there is support for it. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Jason Low <jason.low2@hp.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Waiman Long <Waiman.Long@hp.com> Link: http://lkml.kernel.org/r/1420573509-24774-5-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Showing 6 changed files with 219 additions and 227 deletions Side-by-side Diff
include/linux/osq_lock.h
... | ... | @@ -5,9 +5,12 @@ |
5 | 5 | * An MCS like lock especially tailored for optimistic spinning for sleeping |
6 | 6 | * lock implementations (mutex, rwsem, etc). |
7 | 7 | */ |
8 | +struct optimistic_spin_node { | |
9 | + struct optimistic_spin_node *next, *prev; | |
10 | + int locked; /* 1 if lock acquired */ | |
11 | + int cpu; /* encoded CPU # + 1 value */ | |
12 | +}; | |
8 | 13 | |
9 | -#define OSQ_UNLOCKED_VAL (0) | |
10 | - | |
11 | 14 | struct optimistic_spin_queue { |
12 | 15 | /* |
13 | 16 | * Stores an encoded value of the CPU # of the tail node in the queue. |
... | ... | @@ -16,6 +19,8 @@ |
16 | 19 | atomic_t tail; |
17 | 20 | }; |
18 | 21 | |
22 | +#define OSQ_UNLOCKED_VAL (0) | |
23 | + | |
19 | 24 | /* Init macro and function. */ |
20 | 25 | #define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } |
21 | 26 | |
... | ... | @@ -23,6 +28,9 @@ |
23 | 28 | { |
24 | 29 | atomic_set(&lock->tail, OSQ_UNLOCKED_VAL); |
25 | 30 | } |
31 | + | |
32 | +extern bool osq_lock(struct optimistic_spin_queue *lock); | |
33 | +extern void osq_unlock(struct optimistic_spin_queue *lock); | |
26 | 34 | |
27 | 35 | #endif |
kernel/Kconfig.locks
... | ... | @@ -231,6 +231,10 @@ |
231 | 231 | def_bool y |
232 | 232 | depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW |
233 | 233 | |
234 | +config LOCK_SPIN_ON_OWNER | |
235 | + def_bool y | |
236 | + depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER | |
237 | + | |
234 | 238 | config ARCH_USE_QUEUE_RWLOCK |
235 | 239 | bool |
236 | 240 |
kernel/locking/Makefile
1 | 1 | |
2 | -obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o | |
2 | +obj-y += mutex.o semaphore.o rwsem.o | |
3 | 3 | |
4 | 4 | ifdef CONFIG_FUNCTION_TRACER |
5 | 5 | CFLAGS_REMOVE_lockdep.o = -pg |
... | ... | @@ -14,6 +14,7 @@ |
14 | 14 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o |
15 | 15 | endif |
16 | 16 | obj-$(CONFIG_SMP) += spinlock.o |
17 | +obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o | |
17 | 18 | obj-$(CONFIG_SMP) += lglock.o |
18 | 19 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
19 | 20 | obj-$(CONFIG_RT_MUTEXES) += rtmutex.o |
kernel/locking/mcs_spinlock.c
1 | -#include <linux/percpu.h> | |
2 | -#include <linux/sched.h> | |
3 | -#include "mcs_spinlock.h" | |
4 | - | |
5 | -#ifdef CONFIG_SMP | |
6 | - | |
7 | -/* | |
8 | - * An MCS like lock especially tailored for optimistic spinning for sleeping | |
9 | - * lock implementations (mutex, rwsem, etc). | |
10 | - * | |
11 | - * Using a single mcs node per CPU is safe because sleeping locks should not be | |
12 | - * called from interrupt context and we have preemption disabled while | |
13 | - * spinning. | |
14 | - */ | |
15 | -static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); | |
16 | - | |
17 | -/* | |
18 | - * We use the value 0 to represent "no CPU", thus the encoded value | |
19 | - * will be the CPU number incremented by 1. | |
20 | - */ | |
21 | -static inline int encode_cpu(int cpu_nr) | |
22 | -{ | |
23 | - return cpu_nr + 1; | |
24 | -} | |
25 | - | |
26 | -static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) | |
27 | -{ | |
28 | - int cpu_nr = encoded_cpu_val - 1; | |
29 | - | |
30 | - return per_cpu_ptr(&osq_node, cpu_nr); | |
31 | -} | |
32 | - | |
33 | -/* | |
34 | - * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | |
35 | - * Can return NULL in case we were the last queued and we updated @lock instead. | |
36 | - */ | |
37 | -static inline struct optimistic_spin_node * | |
38 | -osq_wait_next(struct optimistic_spin_queue *lock, | |
39 | - struct optimistic_spin_node *node, | |
40 | - struct optimistic_spin_node *prev) | |
41 | -{ | |
42 | - struct optimistic_spin_node *next = NULL; | |
43 | - int curr = encode_cpu(smp_processor_id()); | |
44 | - int old; | |
45 | - | |
46 | - /* | |
47 | - * If there is a prev node in queue, then the 'old' value will be | |
48 | - * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if | |
49 | - * we're currently last in queue, then the queue will then become empty. | |
50 | - */ | |
51 | - old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; | |
52 | - | |
53 | - for (;;) { | |
54 | - if (atomic_read(&lock->tail) == curr && | |
55 | - atomic_cmpxchg(&lock->tail, curr, old) == curr) { | |
56 | - /* | |
57 | - * We were the last queued, we moved @lock back. @prev | |
58 | - * will now observe @lock and will complete its | |
59 | - * unlock()/unqueue(). | |
60 | - */ | |
61 | - break; | |
62 | - } | |
63 | - | |
64 | - /* | |
65 | - * We must xchg() the @node->next value, because if we were to | |
66 | - * leave it in, a concurrent unlock()/unqueue() from | |
67 | - * @node->next might complete Step-A and think its @prev is | |
68 | - * still valid. | |
69 | - * | |
70 | - * If the concurrent unlock()/unqueue() wins the race, we'll | |
71 | - * wait for either @lock to point to us, through its Step-B, or | |
72 | - * wait for a new @node->next from its Step-C. | |
73 | - */ | |
74 | - if (node->next) { | |
75 | - next = xchg(&node->next, NULL); | |
76 | - if (next) | |
77 | - break; | |
78 | - } | |
79 | - | |
80 | - cpu_relax_lowlatency(); | |
81 | - } | |
82 | - | |
83 | - return next; | |
84 | -} | |
85 | - | |
86 | -bool osq_lock(struct optimistic_spin_queue *lock) | |
87 | -{ | |
88 | - struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); | |
89 | - struct optimistic_spin_node *prev, *next; | |
90 | - int curr = encode_cpu(smp_processor_id()); | |
91 | - int old; | |
92 | - | |
93 | - node->locked = 0; | |
94 | - node->next = NULL; | |
95 | - node->cpu = curr; | |
96 | - | |
97 | - old = atomic_xchg(&lock->tail, curr); | |
98 | - if (old == OSQ_UNLOCKED_VAL) | |
99 | - return true; | |
100 | - | |
101 | - prev = decode_cpu(old); | |
102 | - node->prev = prev; | |
103 | - ACCESS_ONCE(prev->next) = node; | |
104 | - | |
105 | - /* | |
106 | - * Normally @prev is untouchable after the above store; because at that | |
107 | - * moment unlock can proceed and wipe the node element from stack. | |
108 | - * | |
109 | - * However, since our nodes are static per-cpu storage, we're | |
110 | - * guaranteed their existence -- this allows us to apply | |
111 | - * cmpxchg in an attempt to undo our queueing. | |
112 | - */ | |
113 | - | |
114 | - while (!smp_load_acquire(&node->locked)) { | |
115 | - /* | |
116 | - * If we need to reschedule bail... so we can block. | |
117 | - */ | |
118 | - if (need_resched()) | |
119 | - goto unqueue; | |
120 | - | |
121 | - cpu_relax_lowlatency(); | |
122 | - } | |
123 | - return true; | |
124 | - | |
125 | -unqueue: | |
126 | - /* | |
127 | - * Step - A -- stabilize @prev | |
128 | - * | |
129 | - * Undo our @prev->next assignment; this will make @prev's | |
130 | - * unlock()/unqueue() wait for a next pointer since @lock points to us | |
131 | - * (or later). | |
132 | - */ | |
133 | - | |
134 | - for (;;) { | |
135 | - if (prev->next == node && | |
136 | - cmpxchg(&prev->next, node, NULL) == node) | |
137 | - break; | |
138 | - | |
139 | - /* | |
140 | - * We can only fail the cmpxchg() racing against an unlock(), | |
141 | - * in which case we should observe @node->locked becomming | |
142 | - * true. | |
143 | - */ | |
144 | - if (smp_load_acquire(&node->locked)) | |
145 | - return true; | |
146 | - | |
147 | - cpu_relax_lowlatency(); | |
148 | - | |
149 | - /* | |
150 | - * Or we race against a concurrent unqueue()'s step-B, in which | |
151 | - * case its step-C will write us a new @node->prev pointer. | |
152 | - */ | |
153 | - prev = ACCESS_ONCE(node->prev); | |
154 | - } | |
155 | - | |
156 | - /* | |
157 | - * Step - B -- stabilize @next | |
158 | - * | |
159 | - * Similar to unlock(), wait for @node->next or move @lock from @node | |
160 | - * back to @prev. | |
161 | - */ | |
162 | - | |
163 | - next = osq_wait_next(lock, node, prev); | |
164 | - if (!next) | |
165 | - return false; | |
166 | - | |
167 | - /* | |
168 | - * Step - C -- unlink | |
169 | - * | |
170 | - * @prev is stable because its still waiting for a new @prev->next | |
171 | - * pointer, @next is stable because our @node->next pointer is NULL and | |
172 | - * it will wait in Step-A. | |
173 | - */ | |
174 | - | |
175 | - ACCESS_ONCE(next->prev) = prev; | |
176 | - ACCESS_ONCE(prev->next) = next; | |
177 | - | |
178 | - return false; | |
179 | -} | |
180 | - | |
181 | -void osq_unlock(struct optimistic_spin_queue *lock) | |
182 | -{ | |
183 | - struct optimistic_spin_node *node, *next; | |
184 | - int curr = encode_cpu(smp_processor_id()); | |
185 | - | |
186 | - /* | |
187 | - * Fast path for the uncontended case. | |
188 | - */ | |
189 | - if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) | |
190 | - return; | |
191 | - | |
192 | - /* | |
193 | - * Second most likely case. | |
194 | - */ | |
195 | - node = this_cpu_ptr(&osq_node); | |
196 | - next = xchg(&node->next, NULL); | |
197 | - if (next) { | |
198 | - ACCESS_ONCE(next->locked) = 1; | |
199 | - return; | |
200 | - } | |
201 | - | |
202 | - next = osq_wait_next(lock, node, NULL); | |
203 | - if (next) | |
204 | - ACCESS_ONCE(next->locked) = 1; | |
205 | -} | |
206 | - | |
207 | -#endif |
kernel/locking/mcs_spinlock.h
... | ... | @@ -108,21 +108,5 @@ |
108 | 108 | arch_mcs_spin_unlock_contended(&next->locked); |
109 | 109 | } |
110 | 110 | |
111 | -/* | |
112 | - * Cancellable version of the MCS lock above. | |
113 | - * | |
114 | - * Intended for adaptive spinning of sleeping locks: | |
115 | - * mutex_lock()/rwsem_down_{read,write}() etc. | |
116 | - */ | |
117 | - | |
118 | -struct optimistic_spin_node { | |
119 | - struct optimistic_spin_node *next, *prev; | |
120 | - int locked; /* 1 if lock acquired */ | |
121 | - int cpu; /* encoded CPU # value */ | |
122 | -}; | |
123 | - | |
124 | -extern bool osq_lock(struct optimistic_spin_queue *lock); | |
125 | -extern void osq_unlock(struct optimistic_spin_queue *lock); | |
126 | - | |
127 | 111 | #endif /* __LINUX_MCS_SPINLOCK_H */ |
kernel/locking/osq_lock.c
1 | +#include <linux/percpu.h> | |
2 | +#include <linux/sched.h> | |
3 | +#include <linux/osq_lock.h> | |
4 | + | |
5 | +/* | |
6 | + * An MCS like lock especially tailored for optimistic spinning for sleeping | |
7 | + * lock implementations (mutex, rwsem, etc). | |
8 | + * | |
9 | + * Using a single mcs node per CPU is safe because sleeping locks should not be | |
10 | + * called from interrupt context and we have preemption disabled while | |
11 | + * spinning. | |
12 | + */ | |
13 | +static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); | |
14 | + | |
15 | +/* | |
16 | + * We use the value 0 to represent "no CPU", thus the encoded value | |
17 | + * will be the CPU number incremented by 1. | |
18 | + */ | |
19 | +static inline int encode_cpu(int cpu_nr) | |
20 | +{ | |
21 | + return cpu_nr + 1; | |
22 | +} | |
23 | + | |
24 | +static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) | |
25 | +{ | |
26 | + int cpu_nr = encoded_cpu_val - 1; | |
27 | + | |
28 | + return per_cpu_ptr(&osq_node, cpu_nr); | |
29 | +} | |
30 | + | |
31 | +/* | |
32 | + * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | |
33 | + * Can return NULL in case we were the last queued and we updated @lock instead. | |
34 | + */ | |
35 | +static inline struct optimistic_spin_node * | |
36 | +osq_wait_next(struct optimistic_spin_queue *lock, | |
37 | + struct optimistic_spin_node *node, | |
38 | + struct optimistic_spin_node *prev) | |
39 | +{ | |
40 | + struct optimistic_spin_node *next = NULL; | |
41 | + int curr = encode_cpu(smp_processor_id()); | |
42 | + int old; | |
43 | + | |
44 | + /* | |
45 | + * If there is a prev node in queue, then the 'old' value will be | |
46 | + * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if | |
47 | + * we're currently last in queue, then the queue will then become empty. | |
48 | + */ | |
49 | + old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; | |
50 | + | |
51 | + for (;;) { | |
52 | + if (atomic_read(&lock->tail) == curr && | |
53 | + atomic_cmpxchg(&lock->tail, curr, old) == curr) { | |
54 | + /* | |
55 | + * We were the last queued, we moved @lock back. @prev | |
56 | + * will now observe @lock and will complete its | |
57 | + * unlock()/unqueue(). | |
58 | + */ | |
59 | + break; | |
60 | + } | |
61 | + | |
62 | + /* | |
63 | + * We must xchg() the @node->next value, because if we were to | |
64 | + * leave it in, a concurrent unlock()/unqueue() from | |
65 | + * @node->next might complete Step-A and think its @prev is | |
66 | + * still valid. | |
67 | + * | |
68 | + * If the concurrent unlock()/unqueue() wins the race, we'll | |
69 | + * wait for either @lock to point to us, through its Step-B, or | |
70 | + * wait for a new @node->next from its Step-C. | |
71 | + */ | |
72 | + if (node->next) { | |
73 | + next = xchg(&node->next, NULL); | |
74 | + if (next) | |
75 | + break; | |
76 | + } | |
77 | + | |
78 | + cpu_relax_lowlatency(); | |
79 | + } | |
80 | + | |
81 | + return next; | |
82 | +} | |
83 | + | |
84 | +bool osq_lock(struct optimistic_spin_queue *lock) | |
85 | +{ | |
86 | + struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); | |
87 | + struct optimistic_spin_node *prev, *next; | |
88 | + int curr = encode_cpu(smp_processor_id()); | |
89 | + int old; | |
90 | + | |
91 | + node->locked = 0; | |
92 | + node->next = NULL; | |
93 | + node->cpu = curr; | |
94 | + | |
95 | + old = atomic_xchg(&lock->tail, curr); | |
96 | + if (old == OSQ_UNLOCKED_VAL) | |
97 | + return true; | |
98 | + | |
99 | + prev = decode_cpu(old); | |
100 | + node->prev = prev; | |
101 | + ACCESS_ONCE(prev->next) = node; | |
102 | + | |
103 | + /* | |
104 | + * Normally @prev is untouchable after the above store; because at that | |
105 | + * moment unlock can proceed and wipe the node element from stack. | |
106 | + * | |
107 | + * However, since our nodes are static per-cpu storage, we're | |
108 | + * guaranteed their existence -- this allows us to apply | |
109 | + * cmpxchg in an attempt to undo our queueing. | |
110 | + */ | |
111 | + | |
112 | + while (!smp_load_acquire(&node->locked)) { | |
113 | + /* | |
114 | + * If we need to reschedule bail... so we can block. | |
115 | + */ | |
116 | + if (need_resched()) | |
117 | + goto unqueue; | |
118 | + | |
119 | + cpu_relax_lowlatency(); | |
120 | + } | |
121 | + return true; | |
122 | + | |
123 | +unqueue: | |
124 | + /* | |
125 | + * Step - A -- stabilize @prev | |
126 | + * | |
127 | + * Undo our @prev->next assignment; this will make @prev's | |
128 | + * unlock()/unqueue() wait for a next pointer since @lock points to us | |
129 | + * (or later). | |
130 | + */ | |
131 | + | |
132 | + for (;;) { | |
133 | + if (prev->next == node && | |
134 | + cmpxchg(&prev->next, node, NULL) == node) | |
135 | + break; | |
136 | + | |
137 | + /* | |
138 | + * We can only fail the cmpxchg() racing against an unlock(), | |
139 | + * in which case we should observe @node->locked becomming | |
140 | + * true. | |
141 | + */ | |
142 | + if (smp_load_acquire(&node->locked)) | |
143 | + return true; | |
144 | + | |
145 | + cpu_relax_lowlatency(); | |
146 | + | |
147 | + /* | |
148 | + * Or we race against a concurrent unqueue()'s step-B, in which | |
149 | + * case its step-C will write us a new @node->prev pointer. | |
150 | + */ | |
151 | + prev = ACCESS_ONCE(node->prev); | |
152 | + } | |
153 | + | |
154 | + /* | |
155 | + * Step - B -- stabilize @next | |
156 | + * | |
157 | + * Similar to unlock(), wait for @node->next or move @lock from @node | |
158 | + * back to @prev. | |
159 | + */ | |
160 | + | |
161 | + next = osq_wait_next(lock, node, prev); | |
162 | + if (!next) | |
163 | + return false; | |
164 | + | |
165 | + /* | |
166 | + * Step - C -- unlink | |
167 | + * | |
168 | + * @prev is stable because its still waiting for a new @prev->next | |
169 | + * pointer, @next is stable because our @node->next pointer is NULL and | |
170 | + * it will wait in Step-A. | |
171 | + */ | |
172 | + | |
173 | + ACCESS_ONCE(next->prev) = prev; | |
174 | + ACCESS_ONCE(prev->next) = next; | |
175 | + | |
176 | + return false; | |
177 | +} | |
178 | + | |
179 | +void osq_unlock(struct optimistic_spin_queue *lock) | |
180 | +{ | |
181 | + struct optimistic_spin_node *node, *next; | |
182 | + int curr = encode_cpu(smp_processor_id()); | |
183 | + | |
184 | + /* | |
185 | + * Fast path for the uncontended case. | |
186 | + */ | |
187 | + if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) | |
188 | + return; | |
189 | + | |
190 | + /* | |
191 | + * Second most likely case. | |
192 | + */ | |
193 | + node = this_cpu_ptr(&osq_node); | |
194 | + next = xchg(&node->next, NULL); | |
195 | + if (next) { | |
196 | + ACCESS_ONCE(next->locked) = 1; | |
197 | + return; | |
198 | + } | |
199 | + | |
200 | + next = osq_wait_next(lock, node, NULL); | |
201 | + if (next) | |
202 | + ACCESS_ONCE(next->locked) = 1; | |
203 | +} |