Commit d84b6728c54dcf73bcef3e3f7cf6767e2d224e39

Authored by Davidlohr Bueso
Committed by Ingo Molnar
1 parent 4bd19084fa

locking/mcs: Better differentiate between MCS variants

We have two flavors of the MCS spinlock: standard and cancelable (OSQ).
While each one is independent of the other, we currently mix and match
them. This patch:

  - Moves the OSQ code out of mcs_spinlock.h (which only deals with the traditional
    version) into include/linux/osq_lock.h. No unnecessary code is added to the
    more global header file, anything locks that make use of OSQ must include
    it anyway.

  - Renames mcs_spinlock.c to osq_lock.c. This file only contains osq code.

  - Introduces a CONFIG_LOCK_SPIN_ON_OWNER in order to only build osq_lock
    if there is support for it.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Waiman Long <Waiman.Long@hp.com>
Link: http://lkml.kernel.org/r/1420573509-24774-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Showing 6 changed files with 219 additions and 227 deletions Side-by-side Diff

include/linux/osq_lock.h
... ... @@ -5,9 +5,12 @@
5 5 * An MCS like lock especially tailored for optimistic spinning for sleeping
6 6 * lock implementations (mutex, rwsem, etc).
7 7 */
  8 +struct optimistic_spin_node {
  9 + struct optimistic_spin_node *next, *prev;
  10 + int locked; /* 1 if lock acquired */
  11 + int cpu; /* encoded CPU # + 1 value */
  12 +};
8 13  
9   -#define OSQ_UNLOCKED_VAL (0)
10   -
11 14 struct optimistic_spin_queue {
12 15 /*
13 16 * Stores an encoded value of the CPU # of the tail node in the queue.
... ... @@ -16,6 +19,8 @@
16 19 atomic_t tail;
17 20 };
18 21  
  22 +#define OSQ_UNLOCKED_VAL (0)
  23 +
19 24 /* Init macro and function. */
20 25 #define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
21 26  
... ... @@ -23,6 +28,9 @@
23 28 {
24 29 atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
25 30 }
  31 +
  32 +extern bool osq_lock(struct optimistic_spin_queue *lock);
  33 +extern void osq_unlock(struct optimistic_spin_queue *lock);
26 34  
27 35 #endif
kernel/Kconfig.locks
... ... @@ -231,6 +231,10 @@
231 231 def_bool y
232 232 depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
233 233  
  234 +config LOCK_SPIN_ON_OWNER
  235 + def_bool y
  236 + depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
  237 +
234 238 config ARCH_USE_QUEUE_RWLOCK
235 239 bool
236 240  
kernel/locking/Makefile
1 1  
2   -obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
  2 +obj-y += mutex.o semaphore.o rwsem.o
3 3  
4 4 ifdef CONFIG_FUNCTION_TRACER
5 5 CFLAGS_REMOVE_lockdep.o = -pg
... ... @@ -14,6 +14,7 @@
14 14 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
15 15 endif
16 16 obj-$(CONFIG_SMP) += spinlock.o
  17 +obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
17 18 obj-$(CONFIG_SMP) += lglock.o
18 19 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
19 20 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
kernel/locking/mcs_spinlock.c
1   -#include <linux/percpu.h>
2   -#include <linux/sched.h>
3   -#include "mcs_spinlock.h"
4   -
5   -#ifdef CONFIG_SMP
6   -
7   -/*
8   - * An MCS like lock especially tailored for optimistic spinning for sleeping
9   - * lock implementations (mutex, rwsem, etc).
10   - *
11   - * Using a single mcs node per CPU is safe because sleeping locks should not be
12   - * called from interrupt context and we have preemption disabled while
13   - * spinning.
14   - */
15   -static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
16   -
17   -/*
18   - * We use the value 0 to represent "no CPU", thus the encoded value
19   - * will be the CPU number incremented by 1.
20   - */
21   -static inline int encode_cpu(int cpu_nr)
22   -{
23   - return cpu_nr + 1;
24   -}
25   -
26   -static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
27   -{
28   - int cpu_nr = encoded_cpu_val - 1;
29   -
30   - return per_cpu_ptr(&osq_node, cpu_nr);
31   -}
32   -
33   -/*
34   - * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
35   - * Can return NULL in case we were the last queued and we updated @lock instead.
36   - */
37   -static inline struct optimistic_spin_node *
38   -osq_wait_next(struct optimistic_spin_queue *lock,
39   - struct optimistic_spin_node *node,
40   - struct optimistic_spin_node *prev)
41   -{
42   - struct optimistic_spin_node *next = NULL;
43   - int curr = encode_cpu(smp_processor_id());
44   - int old;
45   -
46   - /*
47   - * If there is a prev node in queue, then the 'old' value will be
48   - * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
49   - * we're currently last in queue, then the queue will then become empty.
50   - */
51   - old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
52   -
53   - for (;;) {
54   - if (atomic_read(&lock->tail) == curr &&
55   - atomic_cmpxchg(&lock->tail, curr, old) == curr) {
56   - /*
57   - * We were the last queued, we moved @lock back. @prev
58   - * will now observe @lock and will complete its
59   - * unlock()/unqueue().
60   - */
61   - break;
62   - }
63   -
64   - /*
65   - * We must xchg() the @node->next value, because if we were to
66   - * leave it in, a concurrent unlock()/unqueue() from
67   - * @node->next might complete Step-A and think its @prev is
68   - * still valid.
69   - *
70   - * If the concurrent unlock()/unqueue() wins the race, we'll
71   - * wait for either @lock to point to us, through its Step-B, or
72   - * wait for a new @node->next from its Step-C.
73   - */
74   - if (node->next) {
75   - next = xchg(&node->next, NULL);
76   - if (next)
77   - break;
78   - }
79   -
80   - cpu_relax_lowlatency();
81   - }
82   -
83   - return next;
84   -}
85   -
86   -bool osq_lock(struct optimistic_spin_queue *lock)
87   -{
88   - struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
89   - struct optimistic_spin_node *prev, *next;
90   - int curr = encode_cpu(smp_processor_id());
91   - int old;
92   -
93   - node->locked = 0;
94   - node->next = NULL;
95   - node->cpu = curr;
96   -
97   - old = atomic_xchg(&lock->tail, curr);
98   - if (old == OSQ_UNLOCKED_VAL)
99   - return true;
100   -
101   - prev = decode_cpu(old);
102   - node->prev = prev;
103   - ACCESS_ONCE(prev->next) = node;
104   -
105   - /*
106   - * Normally @prev is untouchable after the above store; because at that
107   - * moment unlock can proceed and wipe the node element from stack.
108   - *
109   - * However, since our nodes are static per-cpu storage, we're
110   - * guaranteed their existence -- this allows us to apply
111   - * cmpxchg in an attempt to undo our queueing.
112   - */
113   -
114   - while (!smp_load_acquire(&node->locked)) {
115   - /*
116   - * If we need to reschedule bail... so we can block.
117   - */
118   - if (need_resched())
119   - goto unqueue;
120   -
121   - cpu_relax_lowlatency();
122   - }
123   - return true;
124   -
125   -unqueue:
126   - /*
127   - * Step - A -- stabilize @prev
128   - *
129   - * Undo our @prev->next assignment; this will make @prev's
130   - * unlock()/unqueue() wait for a next pointer since @lock points to us
131   - * (or later).
132   - */
133   -
134   - for (;;) {
135   - if (prev->next == node &&
136   - cmpxchg(&prev->next, node, NULL) == node)
137   - break;
138   -
139   - /*
140   - * We can only fail the cmpxchg() racing against an unlock(),
141   - * in which case we should observe @node->locked becomming
142   - * true.
143   - */
144   - if (smp_load_acquire(&node->locked))
145   - return true;
146   -
147   - cpu_relax_lowlatency();
148   -
149   - /*
150   - * Or we race against a concurrent unqueue()'s step-B, in which
151   - * case its step-C will write us a new @node->prev pointer.
152   - */
153   - prev = ACCESS_ONCE(node->prev);
154   - }
155   -
156   - /*
157   - * Step - B -- stabilize @next
158   - *
159   - * Similar to unlock(), wait for @node->next or move @lock from @node
160   - * back to @prev.
161   - */
162   -
163   - next = osq_wait_next(lock, node, prev);
164   - if (!next)
165   - return false;
166   -
167   - /*
168   - * Step - C -- unlink
169   - *
170   - * @prev is stable because its still waiting for a new @prev->next
171   - * pointer, @next is stable because our @node->next pointer is NULL and
172   - * it will wait in Step-A.
173   - */
174   -
175   - ACCESS_ONCE(next->prev) = prev;
176   - ACCESS_ONCE(prev->next) = next;
177   -
178   - return false;
179   -}
180   -
181   -void osq_unlock(struct optimistic_spin_queue *lock)
182   -{
183   - struct optimistic_spin_node *node, *next;
184   - int curr = encode_cpu(smp_processor_id());
185   -
186   - /*
187   - * Fast path for the uncontended case.
188   - */
189   - if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
190   - return;
191   -
192   - /*
193   - * Second most likely case.
194   - */
195   - node = this_cpu_ptr(&osq_node);
196   - next = xchg(&node->next, NULL);
197   - if (next) {
198   - ACCESS_ONCE(next->locked) = 1;
199   - return;
200   - }
201   -
202   - next = osq_wait_next(lock, node, NULL);
203   - if (next)
204   - ACCESS_ONCE(next->locked) = 1;
205   -}
206   -
207   -#endif
kernel/locking/mcs_spinlock.h
... ... @@ -108,21 +108,5 @@
108 108 arch_mcs_spin_unlock_contended(&next->locked);
109 109 }
110 110  
111   -/*
112   - * Cancellable version of the MCS lock above.
113   - *
114   - * Intended for adaptive spinning of sleeping locks:
115   - * mutex_lock()/rwsem_down_{read,write}() etc.
116   - */
117   -
118   -struct optimistic_spin_node {
119   - struct optimistic_spin_node *next, *prev;
120   - int locked; /* 1 if lock acquired */
121   - int cpu; /* encoded CPU # value */
122   -};
123   -
124   -extern bool osq_lock(struct optimistic_spin_queue *lock);
125   -extern void osq_unlock(struct optimistic_spin_queue *lock);
126   -
127 111 #endif /* __LINUX_MCS_SPINLOCK_H */
kernel/locking/osq_lock.c
  1 +#include <linux/percpu.h>
  2 +#include <linux/sched.h>
  3 +#include <linux/osq_lock.h>
  4 +
  5 +/*
  6 + * An MCS like lock especially tailored for optimistic spinning for sleeping
  7 + * lock implementations (mutex, rwsem, etc).
  8 + *
  9 + * Using a single mcs node per CPU is safe because sleeping locks should not be
  10 + * called from interrupt context and we have preemption disabled while
  11 + * spinning.
  12 + */
  13 +static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
  14 +
  15 +/*
  16 + * We use the value 0 to represent "no CPU", thus the encoded value
  17 + * will be the CPU number incremented by 1.
  18 + */
  19 +static inline int encode_cpu(int cpu_nr)
  20 +{
  21 + return cpu_nr + 1;
  22 +}
  23 +
  24 +static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
  25 +{
  26 + int cpu_nr = encoded_cpu_val - 1;
  27 +
  28 + return per_cpu_ptr(&osq_node, cpu_nr);
  29 +}
  30 +
  31 +/*
  32 + * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
  33 + * Can return NULL in case we were the last queued and we updated @lock instead.
  34 + */
  35 +static inline struct optimistic_spin_node *
  36 +osq_wait_next(struct optimistic_spin_queue *lock,
  37 + struct optimistic_spin_node *node,
  38 + struct optimistic_spin_node *prev)
  39 +{
  40 + struct optimistic_spin_node *next = NULL;
  41 + int curr = encode_cpu(smp_processor_id());
  42 + int old;
  43 +
  44 + /*
  45 + * If there is a prev node in queue, then the 'old' value will be
  46 + * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
  47 + * we're currently last in queue, then the queue will then become empty.
  48 + */
  49 + old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
  50 +
  51 + for (;;) {
  52 + if (atomic_read(&lock->tail) == curr &&
  53 + atomic_cmpxchg(&lock->tail, curr, old) == curr) {
  54 + /*
  55 + * We were the last queued, we moved @lock back. @prev
  56 + * will now observe @lock and will complete its
  57 + * unlock()/unqueue().
  58 + */
  59 + break;
  60 + }
  61 +
  62 + /*
  63 + * We must xchg() the @node->next value, because if we were to
  64 + * leave it in, a concurrent unlock()/unqueue() from
  65 + * @node->next might complete Step-A and think its @prev is
  66 + * still valid.
  67 + *
  68 + * If the concurrent unlock()/unqueue() wins the race, we'll
  69 + * wait for either @lock to point to us, through its Step-B, or
  70 + * wait for a new @node->next from its Step-C.
  71 + */
  72 + if (node->next) {
  73 + next = xchg(&node->next, NULL);
  74 + if (next)
  75 + break;
  76 + }
  77 +
  78 + cpu_relax_lowlatency();
  79 + }
  80 +
  81 + return next;
  82 +}
  83 +
  84 +bool osq_lock(struct optimistic_spin_queue *lock)
  85 +{
  86 + struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
  87 + struct optimistic_spin_node *prev, *next;
  88 + int curr = encode_cpu(smp_processor_id());
  89 + int old;
  90 +
  91 + node->locked = 0;
  92 + node->next = NULL;
  93 + node->cpu = curr;
  94 +
  95 + old = atomic_xchg(&lock->tail, curr);
  96 + if (old == OSQ_UNLOCKED_VAL)
  97 + return true;
  98 +
  99 + prev = decode_cpu(old);
  100 + node->prev = prev;
  101 + ACCESS_ONCE(prev->next) = node;
  102 +
  103 + /*
  104 + * Normally @prev is untouchable after the above store; because at that
  105 + * moment unlock can proceed and wipe the node element from stack.
  106 + *
  107 + * However, since our nodes are static per-cpu storage, we're
  108 + * guaranteed their existence -- this allows us to apply
  109 + * cmpxchg in an attempt to undo our queueing.
  110 + */
  111 +
  112 + while (!smp_load_acquire(&node->locked)) {
  113 + /*
  114 + * If we need to reschedule bail... so we can block.
  115 + */
  116 + if (need_resched())
  117 + goto unqueue;
  118 +
  119 + cpu_relax_lowlatency();
  120 + }
  121 + return true;
  122 +
  123 +unqueue:
  124 + /*
  125 + * Step - A -- stabilize @prev
  126 + *
  127 + * Undo our @prev->next assignment; this will make @prev's
  128 + * unlock()/unqueue() wait for a next pointer since @lock points to us
  129 + * (or later).
  130 + */
  131 +
  132 + for (;;) {
  133 + if (prev->next == node &&
  134 + cmpxchg(&prev->next, node, NULL) == node)
  135 + break;
  136 +
  137 + /*
  138 + * We can only fail the cmpxchg() racing against an unlock(),
  139 + * in which case we should observe @node->locked becomming
  140 + * true.
  141 + */
  142 + if (smp_load_acquire(&node->locked))
  143 + return true;
  144 +
  145 + cpu_relax_lowlatency();
  146 +
  147 + /*
  148 + * Or we race against a concurrent unqueue()'s step-B, in which
  149 + * case its step-C will write us a new @node->prev pointer.
  150 + */
  151 + prev = ACCESS_ONCE(node->prev);
  152 + }
  153 +
  154 + /*
  155 + * Step - B -- stabilize @next
  156 + *
  157 + * Similar to unlock(), wait for @node->next or move @lock from @node
  158 + * back to @prev.
  159 + */
  160 +
  161 + next = osq_wait_next(lock, node, prev);
  162 + if (!next)
  163 + return false;
  164 +
  165 + /*
  166 + * Step - C -- unlink
  167 + *
  168 + * @prev is stable because its still waiting for a new @prev->next
  169 + * pointer, @next is stable because our @node->next pointer is NULL and
  170 + * it will wait in Step-A.
  171 + */
  172 +
  173 + ACCESS_ONCE(next->prev) = prev;
  174 + ACCESS_ONCE(prev->next) = next;
  175 +
  176 + return false;
  177 +}
  178 +
  179 +void osq_unlock(struct optimistic_spin_queue *lock)
  180 +{
  181 + struct optimistic_spin_node *node, *next;
  182 + int curr = encode_cpu(smp_processor_id());
  183 +
  184 + /*
  185 + * Fast path for the uncontended case.
  186 + */
  187 + if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
  188 + return;
  189 +
  190 + /*
  191 + * Second most likely case.
  192 + */
  193 + node = this_cpu_ptr(&osq_node);
  194 + next = xchg(&node->next, NULL);
  195 + if (next) {
  196 + ACCESS_ONCE(next->locked) = 1;
  197 + return;
  198 + }
  199 +
  200 + next = osq_wait_next(lock, node, NULL);
  201 + if (next)
  202 + ACCESS_ONCE(next->locked) = 1;
  203 +}