locking/mcs: Better differentiate between MCS variants

We have two flavors of the MCS spinlock: standard and cancelable (OSQ). While each one is independent of the other, we currently mix and match them. This patch: - Moves the OSQ code out of mcs_spinlock.h (which only deals with the traditional version) into include/linux/osq_lock.h. No unnecessary code is added to the more global header file, anything locks that make use of OSQ must include it anyway. - Renames mcs_spinlock.c to osq_lock.c. This file only contains osq code. - Introduces a CONFIG_LOCK_SPIN_ON_OWNER in order to only build osq_lock if there is support for it. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Jason Low <jason.low2@hp.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Waiman Long <Waiman.Long@hp.com> Link: http://lkml.kernel.org/r/1420573509-24774-5-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar <mingo@kernel.org>

locking/mcs: Better differentiate between MCS variants
We have two flavors of the MCS spinlock: standard and cancelable (OSQ). While each one is independent of the other, we currently mix and match them. This patch: - Moves the OSQ code out of mcs_spinlock.h (which only deals with the traditional version) into include/linux/osq_lock.h. No unnecessary code is added to the more global header file, anything locks that make use of OSQ must include it anyway. - Renames mcs_spinlock.c to osq_lock.c. This file only contains osq code. - Introduces a CONFIG_LOCK_SPIN_ON_OWNER in order to only build osq_lock if there is support for it. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Jason Low <jason.low2@hp.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Waiman Long <Waiman.Long@hp.com> Link: http://lkml.kernel.org/r/1420573509-24774-5-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Davidlohr Bueso · Ingo Molnar
1 parent 4bd19084fa
Showing 6 changed files with 219 additions and 227 deletions Side-by-side Diff
include/linux/osq_lock.h
kernel/Kconfig.locks
kernel/locking/Makefile
kernel/locking/mcs_spinlock.c
kernel/locking/mcs_spinlock.h
kernel/locking/osq_lock.c
@@ -5,9 +5,12 @@
  * An MCS like lock especially tailored for optimistic spinning for sleeping
  * lock implementations (mutex, rwsem, etc).
  */
+struct optimistic_spin_node {
+	struct optimistic_spin_node *next, *prev;
+	int locked; /* 1 if lock acquired */
+	int cpu; /* encoded CPU # + 1 value */
+};
  
-#define OSQ_UNLOCKED_VAL (0)
-
 struct optimistic_spin_queue {
 	/*
 	 * Stores an encoded value of the CPU # of the tail node in the queue.
@@ -16,6 +19,8 @@
 	atomic_t tail;
 };
  
+#define OSQ_UNLOCKED_VAL (0)
+
 /* Init macro and function. */
 #define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
  
@@ -23,6 +28,9 @@
 {
 	atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
 }
+
+extern bool osq_lock(struct optimistic_spin_queue *lock);
+extern void osq_unlock(struct optimistic_spin_queue *lock);
  
 #endif
@@ -231,6 +231,10 @@
        def_bool y
        depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  
+config LOCK_SPIN_ON_OWNER
+       def_bool y
+       depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
+
 config ARCH_USE_QUEUE_RWLOCK
 	bool
  
  
-obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
+obj-y += mutex.o semaphore.o rwsem.o
  
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = -pg
@@ -14,6 +14,7 @@
 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
 endif
 obj-$(CONFIG_SMP) += spinlock.o
+obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
 obj-$(CONFIG_SMP) += lglock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
-#include <linux/percpu.h>
-#include <linux/sched.h>
-#include "mcs_spinlock.h"
-
-#ifdef CONFIG_SMP
-
-/*
- * An MCS like lock especially tailored for optimistic spinning for sleeping
- * lock implementations (mutex, rwsem, etc).
- *
- * Using a single mcs node per CPU is safe because sleeping locks should not be
- * called from interrupt context and we have preemption disabled while
- * spinning.
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
-
-/*
- * We use the value 0 to represent "no CPU", thus the encoded value
- * will be the CPU number incremented by 1.
- */
-static inline int encode_cpu(int cpu_nr)
-{
-	return cpu_nr + 1;
-}
-
-static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
-{
-	int cpu_nr = encoded_cpu_val - 1;
-
-	return per_cpu_ptr(&osq_node, cpu_nr);
-}
-
-/*
- * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
- * Can return NULL in case we were the last queued and we updated @lock instead.
- */
-static inline struct optimistic_spin_node *
-osq_wait_next(struct optimistic_spin_queue *lock,
-	      struct optimistic_spin_node *node,
-	      struct optimistic_spin_node *prev)
-{
-	struct optimistic_spin_node *next = NULL;
-	int curr = encode_cpu(smp_processor_id());
-	int old;
-
-	/*
-	 * If there is a prev node in queue, then the 'old' value will be
-	 * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
-	 * we're currently last in queue, then the queue will then become empty.
-	 */
-	old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
-
-	for (;;) {
-		if (atomic_read(&lock->tail) == curr &&
-		    atomic_cmpxchg(&lock->tail, curr, old) == curr) {
-			/*
-			 * We were the last queued, we moved @lock back. @prev
-			 * will now observe @lock and will complete its
-			 * unlock()/unqueue().
-			 */
-			break;
-		}
-
-		/*
-		 * We must xchg() the @node->next value, because if we were to
-		 * leave it in, a concurrent unlock()/unqueue() from
-		 * @node->next might complete Step-A and think its @prev is
-		 * still valid.
-		 *
-		 * If the concurrent unlock()/unqueue() wins the race, we'll
-		 * wait for either @lock to point to us, through its Step-B, or
-		 * wait for a new @node->next from its Step-C.
-		 */
-		if (node->next) {
-			next = xchg(&node->next, NULL);
-			if (next)
-				break;
-		}
-
-		cpu_relax_lowlatency();
-	}
-
-	return next;
-}
-
-bool osq_lock(struct optimistic_spin_queue *lock)
-{
-	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
-	struct optimistic_spin_node *prev, *next;
-	int curr = encode_cpu(smp_processor_id());
-	int old;
-
-	node->locked = 0;
-	node->next = NULL;
-	node->cpu = curr;
-
-	old = atomic_xchg(&lock->tail, curr);
-	if (old == OSQ_UNLOCKED_VAL)
-		return true;
-
-	prev = decode_cpu(old);
-	node->prev = prev;
-	ACCESS_ONCE(prev->next) = node;
-
-	/*
-	 * Normally @prev is untouchable after the above store; because at that
-	 * moment unlock can proceed and wipe the node element from stack.
-	 *
-	 * However, since our nodes are static per-cpu storage, we're
-	 * guaranteed their existence -- this allows us to apply
-	 * cmpxchg in an attempt to undo our queueing.
-	 */
-
-	while (!smp_load_acquire(&node->locked)) {
-		/*
-		 * If we need to reschedule bail... so we can block.
-		 */
-		if (need_resched())
-			goto unqueue;
-
-		cpu_relax_lowlatency();
-	}
-	return true;
-
-unqueue:
-	/*
-	 * Step - A  -- stabilize @prev
-	 *
-	 * Undo our @prev->next assignment; this will make @prev's
-	 * unlock()/unqueue() wait for a next pointer since @lock points to us
-	 * (or later).
-	 */
-
-	for (;;) {
-		if (prev->next == node &&
-		    cmpxchg(&prev->next, node, NULL) == node)
-			break;
-
-		/*
-		 * We can only fail the cmpxchg() racing against an unlock(),
-		 * in which case we should observe @node->locked becomming
-		 * true.
-		 */
-		if (smp_load_acquire(&node->locked))
-			return true;
-
-		cpu_relax_lowlatency();
-
-		/*
-		 * Or we race against a concurrent unqueue()'s step-B, in which
-		 * case its step-C will write us a new @node->prev pointer.
-		 */
-		prev = ACCESS_ONCE(node->prev);
-	}
-
-	/*
-	 * Step - B -- stabilize @next
-	 *
-	 * Similar to unlock(), wait for @node->next or move @lock from @node
-	 * back to @prev.
-	 */
-
-	next = osq_wait_next(lock, node, prev);
-	if (!next)
-		return false;
-
-	/*
-	 * Step - C -- unlink
-	 *
-	 * @prev is stable because its still waiting for a new @prev->next
-	 * pointer, @next is stable because our @node->next pointer is NULL and
-	 * it will wait in Step-A.
-	 */
-
-	ACCESS_ONCE(next->prev) = prev;
-	ACCESS_ONCE(prev->next) = next;
-
-	return false;
-}
-
-void osq_unlock(struct optimistic_spin_queue *lock)
-{
-	struct optimistic_spin_node *node, *next;
-	int curr = encode_cpu(smp_processor_id());
-
-	/*
-	 * Fast path for the uncontended case.
-	 */
-	if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
-		return;
-
-	/*
-	 * Second most likely case.
-	 */
-	node = this_cpu_ptr(&osq_node);
-	next = xchg(&node->next, NULL);
-	if (next) {
-		ACCESS_ONCE(next->locked) = 1;
-		return;
-	}
-
-	next = osq_wait_next(lock, node, NULL);
-	if (next)
-		ACCESS_ONCE(next->locked) = 1;
-}
-
-#endif
@@ -108,21 +108,5 @@
 	arch_mcs_spin_unlock_contended(&next->locked);
 }
  
-/*
- * Cancellable version of the MCS lock above.
- *
- * Intended for adaptive spinning of sleeping locks:
- * mutex_lock()/rwsem_down_{read,write}() etc.
- */
-
-struct optimistic_spin_node {
-	struct optimistic_spin_node *next, *prev;
-	int locked; /* 1 if lock acquired */
-	int cpu; /* encoded CPU # value */
-};
-
-extern bool osq_lock(struct optimistic_spin_queue *lock);
-extern void osq_unlock(struct optimistic_spin_queue *lock);
-
 #endif /* __LINUX_MCS_SPINLOCK_H */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/osq_lock.h>
+
+/*
+ * An MCS like lock especially tailored for optimistic spinning for sleeping
+ * lock implementations (mutex, rwsem, etc).
+ *
+ * Using a single mcs node per CPU is safe because sleeping locks should not be
+ * called from interrupt context and we have preemption disabled while
+ * spinning.
+ */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
+
+/*
+ * We use the value 0 to represent "no CPU", thus the encoded value
+ * will be the CPU number incremented by 1.
+ */
+static inline int encode_cpu(int cpu_nr)
+{
+	return cpu_nr + 1;
+}
+
+static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
+{
+	int cpu_nr = encoded_cpu_val - 1;
+
+	return per_cpu_ptr(&osq_node, cpu_nr);
+}
+
+/*
+ * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
+ * Can return NULL in case we were the last queued and we updated @lock instead.
+ */
+static inline struct optimistic_spin_node *
+osq_wait_next(struct optimistic_spin_queue *lock,
+	      struct optimistic_spin_node *node,
+	      struct optimistic_spin_node *prev)
+{
+	struct optimistic_spin_node *next = NULL;
+	int curr = encode_cpu(smp_processor_id());
+	int old;
+
+	/*
+	 * If there is a prev node in queue, then the 'old' value will be
+	 * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
+	 * we're currently last in queue, then the queue will then become empty.
+	 */
+	old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
+
+	for (;;) {
+		if (atomic_read(&lock->tail) == curr &&
+		    atomic_cmpxchg(&lock->tail, curr, old) == curr) {
+			/*
+			 * We were the last queued, we moved @lock back. @prev
+			 * will now observe @lock and will complete its
+			 * unlock()/unqueue().
+			 */
+			break;
+		}
+
+		/*
+		 * We must xchg() the @node->next value, because if we were to
+		 * leave it in, a concurrent unlock()/unqueue() from
+		 * @node->next might complete Step-A and think its @prev is
+		 * still valid.
+		 *
+		 * If the concurrent unlock()/unqueue() wins the race, we'll
+		 * wait for either @lock to point to us, through its Step-B, or
+		 * wait for a new @node->next from its Step-C.
+		 */
+		if (node->next) {
+			next = xchg(&node->next, NULL);
+			if (next)
+				break;
+		}
+
+		cpu_relax_lowlatency();
+	}
+
+	return next;
+}
+
+bool osq_lock(struct optimistic_spin_queue *lock)
+{
+	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
+	struct optimistic_spin_node *prev, *next;
+	int curr = encode_cpu(smp_processor_id());
+	int old;
+
+	node->locked = 0;
+	node->next = NULL;
+	node->cpu = curr;
+
+	old = atomic_xchg(&lock->tail, curr);
+	if (old == OSQ_UNLOCKED_VAL)
+		return true;
+
+	prev = decode_cpu(old);
+	node->prev = prev;
+	ACCESS_ONCE(prev->next) = node;
+
+	/*
+	 * Normally @prev is untouchable after the above store; because at that
+	 * moment unlock can proceed and wipe the node element from stack.
+	 *
+	 * However, since our nodes are static per-cpu storage, we're
+	 * guaranteed their existence -- this allows us to apply
+	 * cmpxchg in an attempt to undo our queueing.
+	 */
+
+	while (!smp_load_acquire(&node->locked)) {
+		/*
+		 * If we need to reschedule bail... so we can block.
+		 */
+		if (need_resched())
+			goto unqueue;
+
+		cpu_relax_lowlatency();
+	}
+	return true;
+
+unqueue:
+	/*
+	 * Step - A  -- stabilize @prev
+	 *
+	 * Undo our @prev->next assignment; this will make @prev's
+	 * unlock()/unqueue() wait for a next pointer since @lock points to us
+	 * (or later).
+	 */
+
+	for (;;) {
+		if (prev->next == node &&
+		    cmpxchg(&prev->next, node, NULL) == node)
+			break;
+
+		/*
+		 * We can only fail the cmpxchg() racing against an unlock(),
+		 * in which case we should observe @node->locked becomming
+		 * true.
+		 */
+		if (smp_load_acquire(&node->locked))
+			return true;
+
+		cpu_relax_lowlatency();
+
+		/*
+		 * Or we race against a concurrent unqueue()'s step-B, in which
+		 * case its step-C will write us a new @node->prev pointer.
+		 */
+		prev = ACCESS_ONCE(node->prev);
+	}
+
+	/*
+	 * Step - B -- stabilize @next
+	 *
+	 * Similar to unlock(), wait for @node->next or move @lock from @node
+	 * back to @prev.
+	 */
+
+	next = osq_wait_next(lock, node, prev);
+	if (!next)
+		return false;
+
+	/*
+	 * Step - C -- unlink
+	 *
+	 * @prev is stable because its still waiting for a new @prev->next
+	 * pointer, @next is stable because our @node->next pointer is NULL and
+	 * it will wait in Step-A.
+	 */
+
+	ACCESS_ONCE(next->prev) = prev;
+	ACCESS_ONCE(prev->next) = next;
+
+	return false;
+}
+
+void osq_unlock(struct optimistic_spin_queue *lock)
+{
+	struct optimistic_spin_node *node, *next;
+	int curr = encode_cpu(smp_processor_id());
+
+	/*
+	 * Fast path for the uncontended case.
+	 */
+	if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
+		return;
+
+	/*
+	 * Second most likely case.
+	 */
+	node = this_cpu_ptr(&osq_node);
+	next = xchg(&node->next, NULL);
+	if (next) {
+		ACCESS_ONCE(next->locked) = 1;
+		return;
+	}
+
+	next = osq_wait_next(lock, node, NULL);
+	if (next)
+		ACCESS_ONCE(next->locked) = 1;
+}
...	...	@@ -5,9 +5,12 @@
5	5	* An MCS like lock especially tailored for optimistic spinning for sleeping
6	6	* lock implementations (mutex, rwsem, etc).
7	7	*/
	8	+struct optimistic_spin_node {
	9	+ struct optimistic_spin_node next, prev;
	10	+ int locked; /* 1 if lock acquired */
	11	+ int cpu; /* encoded CPU # + 1 value */
	12	+};
8	13
9		-#define OSQ_UNLOCKED_VAL (0)
10		-
11	14	struct optimistic_spin_queue {
12	15	/*
13	16	* Stores an encoded value of the CPU # of the tail node in the queue.
...	...	@@ -16,6 +19,8 @@
16	19	atomic_t tail;
17	20	};
18	21
	22	+#define OSQ_UNLOCKED_VAL (0)
	23	+
19	24	/* Init macro and function. */
20	25	#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
21	26
...	...	@@ -23,6 +28,9 @@
23	28	{
24	29	atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
25	30	}
	31	+
	32	+extern bool osq_lock(struct optimistic_spin_queue *lock);
	33	+extern void osq_unlock(struct optimistic_spin_queue *lock);
26	34
27	35	#endif
...	...	@@ -231,6 +231,10 @@
231	231	def_bool y
232	232	depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
233	233
	234	+config LOCK_SPIN_ON_OWNER
	235	+ def_bool y
	236	+ depends on MUTEX_SPIN_ON_OWNER \|\| RWSEM_SPIN_ON_OWNER
	237	+
234	238	config ARCH_USE_QUEUE_RWLOCK
235	239	bool
236	240
1	1
2		-obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
	2	+obj-y += mutex.o semaphore.o rwsem.o
3	3
4	4	ifdef CONFIG_FUNCTION_TRACER
5	5	CFLAGS_REMOVE_lockdep.o = -pg
...	...	@@ -14,6 +14,7 @@
14	14	obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
15	15	endif
16	16	obj-$(CONFIG_SMP) += spinlock.o
	17	+obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
17	18	obj-$(CONFIG_SMP) += lglock.o
18	19	obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
19	20	obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
1		-#include <linux/percpu.h>
2		-#include <linux/sched.h>
3		-#include "mcs_spinlock.h"
4		-
5		-#ifdef CONFIG_SMP
6		-
7		-/*
8		- * An MCS like lock especially tailored for optimistic spinning for sleeping
9		- * lock implementations (mutex, rwsem, etc).
10		- *
11		- * Using a single mcs node per CPU is safe because sleeping locks should not be
12		- * called from interrupt context and we have preemption disabled while
13		- * spinning.
14		- */
15		-static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
16		-
17		-/*
18		- * We use the value 0 to represent "no CPU", thus the encoded value
19		- * will be the CPU number incremented by 1.
20		- */
21		-static inline int encode_cpu(int cpu_nr)
22		-{
23		- return cpu_nr + 1;
24		-}
25		-
26		-static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
27		-{
28		- int cpu_nr = encoded_cpu_val - 1;
29		-
30		- return per_cpu_ptr(&osq_node, cpu_nr);
31		-}
32		-
33		-/*
34		- * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
35		- * Can return NULL in case we were the last queued and we updated @lock instead.
36		- */
37		-static inline struct optimistic_spin_node *
38		-osq_wait_next(struct optimistic_spin_queue *lock,
39		- struct optimistic_spin_node *node,
40		- struct optimistic_spin_node *prev)
41		-{
42		- struct optimistic_spin_node *next = NULL;
43		- int curr = encode_cpu(smp_processor_id());
44		- int old;
45		-
46		- /*
47		- * If there is a prev node in queue, then the 'old' value will be
48		- * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
49		- * we're currently last in queue, then the queue will then become empty.
50		- */
51		- old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
52		-
53		- for (;;) {
54		- if (atomic_read(&lock->tail) == curr &&
55		- atomic_cmpxchg(&lock->tail, curr, old) == curr) {
56		- /*
57		- * We were the last queued, we moved @lock back. @prev
58		- * will now observe @lock and will complete its
59		- * unlock()/unqueue().
60		- */
61		- break;
62		- }
63		-
64		- /*
65		- * We must xchg() the @node->next value, because if we were to
66		- * leave it in, a concurrent unlock()/unqueue() from
67		- * @node->next might complete Step-A and think its @prev is
68		- * still valid.
69		- *
70		- * If the concurrent unlock()/unqueue() wins the race, we'll
71		- * wait for either @lock to point to us, through its Step-B, or
72		- * wait for a new @node->next from its Step-C.
73		- */
74		- if (node->next) {
75		- next = xchg(&node->next, NULL);
76		- if (next)
77		- break;
78		- }
79		-
80		- cpu_relax_lowlatency();
81		- }
82		-
83		- return next;
84		-}
85		-
86		-bool osq_lock(struct optimistic_spin_queue *lock)
87		-{
88		- struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
89		- struct optimistic_spin_node prev, next;
90		- int curr = encode_cpu(smp_processor_id());
91		- int old;
92		-
93		- node->locked = 0;
94		- node->next = NULL;
95		- node->cpu = curr;
96		-
97		- old = atomic_xchg(&lock->tail, curr);
98		- if (old == OSQ_UNLOCKED_VAL)
99		- return true;
100		-
101		- prev = decode_cpu(old);
102		- node->prev = prev;
103		- ACCESS_ONCE(prev->next) = node;
104		-
105		- /*
106		- * Normally @prev is untouchable after the above store; because at that
107		- * moment unlock can proceed and wipe the node element from stack.
108		- *
109		- * However, since our nodes are static per-cpu storage, we're
110		- * guaranteed their existence -- this allows us to apply
111		- * cmpxchg in an attempt to undo our queueing.
112		- */
113		-
114		- while (!smp_load_acquire(&node->locked)) {
115		- /*
116		- * If we need to reschedule bail... so we can block.
117		- */
118		- if (need_resched())
119		- goto unqueue;
120		-
121		- cpu_relax_lowlatency();
122		- }
123		- return true;
124		-
125		-unqueue:
126		- /*
127		- * Step - A -- stabilize @prev
128		- *
129		- * Undo our @prev->next assignment; this will make @prev's
130		- * unlock()/unqueue() wait for a next pointer since @lock points to us
131		- * (or later).
132		- */
133		-
134		- for (;;) {
135		- if (prev->next == node &&
136		- cmpxchg(&prev->next, node, NULL) == node)
137		- break;
138		-
139		- /*
140		- * We can only fail the cmpxchg() racing against an unlock(),
141		- * in which case we should observe @node->locked becomming
142		- * true.
143		- */
144		- if (smp_load_acquire(&node->locked))
145		- return true;
146		-
147		- cpu_relax_lowlatency();
148		-
149		- /*
150		- * Or we race against a concurrent unqueue()'s step-B, in which
151		- * case its step-C will write us a new @node->prev pointer.
152		- */
153		- prev = ACCESS_ONCE(node->prev);
154		- }
155		-
156		- /*
157		- * Step - B -- stabilize @next
158		- *
159		- * Similar to unlock(), wait for @node->next or move @lock from @node
160		- * back to @prev.
161		- */
162		-
163		- next = osq_wait_next(lock, node, prev);
164		- if (!next)
165		- return false;
166		-
167		- /*
168		- * Step - C -- unlink
169		- *
170		- * @prev is stable because its still waiting for a new @prev->next
171		- * pointer, @next is stable because our @node->next pointer is NULL and
172		- * it will wait in Step-A.
173		- */
174		-
175		- ACCESS_ONCE(next->prev) = prev;
176		- ACCESS_ONCE(prev->next) = next;
177		-
178		- return false;
179		-}
180		-
181		-void osq_unlock(struct optimistic_spin_queue *lock)
182		-{
183		- struct optimistic_spin_node node, next;
184		- int curr = encode_cpu(smp_processor_id());
185		-
186		- /*
187		- * Fast path for the uncontended case.
188		- */
189		- if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
190		- return;
191		-
192		- /*
193		- * Second most likely case.
194		- */
195		- node = this_cpu_ptr(&osq_node);
196		- next = xchg(&node->next, NULL);
197		- if (next) {
198		- ACCESS_ONCE(next->locked) = 1;
199		- return;
200		- }
201		-
202		- next = osq_wait_next(lock, node, NULL);
203		- if (next)
204		- ACCESS_ONCE(next->locked) = 1;
205		-}
206		-
207		-#endif
...	...	@@ -108,21 +108,5 @@
108	108	arch_mcs_spin_unlock_contended(&next->locked);
109	109	}
110	110
111		-/*
112		- * Cancellable version of the MCS lock above.
113		- *
114		- * Intended for adaptive spinning of sleeping locks:
115		- * mutex_lock()/rwsem_down_{read,write}() etc.
116		- */
117		-
118		-struct optimistic_spin_node {
119		- struct optimistic_spin_node next, prev;
120		- int locked; /* 1 if lock acquired */
121		- int cpu; /* encoded CPU # value */
122		-};
123		-
124		-extern bool osq_lock(struct optimistic_spin_queue *lock);
125		-extern void osq_unlock(struct optimistic_spin_queue *lock);
126		-
127	111	#endif /* __LINUX_MCS_SPINLOCK_H */
	1	+#include <linux/percpu.h>
	2	+#include <linux/sched.h>
	3	+#include <linux/osq_lock.h>
	4	+
	5	+/*
	6	+ * An MCS like lock especially tailored for optimistic spinning for sleeping
	7	+ * lock implementations (mutex, rwsem, etc).
	8	+ *
	9	+ * Using a single mcs node per CPU is safe because sleeping locks should not be
	10	+ * called from interrupt context and we have preemption disabled while
	11	+ * spinning.
	12	+ */
	13	+static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
	14	+
	15	+/*
	16	+ * We use the value 0 to represent "no CPU", thus the encoded value
	17	+ * will be the CPU number incremented by 1.
	18	+ */
	19	+static inline int encode_cpu(int cpu_nr)
	20	+{
	21	+ return cpu_nr + 1;
	22	+}
	23	+
	24	+static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
	25	+{
	26	+ int cpu_nr = encoded_cpu_val - 1;
	27	+
	28	+ return per_cpu_ptr(&osq_node, cpu_nr);
	29	+}
	30	+
	31	+/*
	32	+ * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
	33	+ * Can return NULL in case we were the last queued and we updated @lock instead.
	34	+ */
	35	+static inline struct optimistic_spin_node *
	36	+osq_wait_next(struct optimistic_spin_queue *lock,
	37	+ struct optimistic_spin_node *node,
	38	+ struct optimistic_spin_node *prev)
	39	+{
	40	+ struct optimistic_spin_node *next = NULL;
	41	+ int curr = encode_cpu(smp_processor_id());
	42	+ int old;
	43	+
	44	+ /*
	45	+ * If there is a prev node in queue, then the 'old' value will be
	46	+ * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
	47	+ * we're currently last in queue, then the queue will then become empty.
	48	+ */
	49	+ old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
	50	+
	51	+ for (;;) {
	52	+ if (atomic_read(&lock->tail) == curr &&
	53	+ atomic_cmpxchg(&lock->tail, curr, old) == curr) {
	54	+ /*
	55	+ * We were the last queued, we moved @lock back. @prev
	56	+ * will now observe @lock and will complete its
	57	+ * unlock()/unqueue().
	58	+ */
	59	+ break;
	60	+ }
	61	+
	62	+ /*
	63	+ * We must xchg() the @node->next value, because if we were to
	64	+ * leave it in, a concurrent unlock()/unqueue() from
	65	+ * @node->next might complete Step-A and think its @prev is
	66	+ * still valid.
	67	+ *
	68	+ * If the concurrent unlock()/unqueue() wins the race, we'll
	69	+ * wait for either @lock to point to us, through its Step-B, or
	70	+ * wait for a new @node->next from its Step-C.
	71	+ */
	72	+ if (node->next) {
	73	+ next = xchg(&node->next, NULL);
	74	+ if (next)
	75	+ break;
	76	+ }
	77	+
	78	+ cpu_relax_lowlatency();
	79	+ }
	80	+
	81	+ return next;
	82	+}
	83	+
	84	+bool osq_lock(struct optimistic_spin_queue *lock)
	85	+{
	86	+ struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
	87	+ struct optimistic_spin_node prev, next;
	88	+ int curr = encode_cpu(smp_processor_id());
	89	+ int old;
	90	+
	91	+ node->locked = 0;
	92	+ node->next = NULL;
	93	+ node->cpu = curr;
	94	+
	95	+ old = atomic_xchg(&lock->tail, curr);
	96	+ if (old == OSQ_UNLOCKED_VAL)
	97	+ return true;
	98	+
	99	+ prev = decode_cpu(old);
	100	+ node->prev = prev;
	101	+ ACCESS_ONCE(prev->next) = node;
	102	+
	103	+ /*
	104	+ * Normally @prev is untouchable after the above store; because at that
	105	+ * moment unlock can proceed and wipe the node element from stack.
	106	+ *
	107	+ * However, since our nodes are static per-cpu storage, we're
	108	+ * guaranteed their existence -- this allows us to apply
	109	+ * cmpxchg in an attempt to undo our queueing.
	110	+ */
	111	+
	112	+ while (!smp_load_acquire(&node->locked)) {
	113	+ /*
	114	+ * If we need to reschedule bail... so we can block.
	115	+ */
	116	+ if (need_resched())
	117	+ goto unqueue;
	118	+
	119	+ cpu_relax_lowlatency();
	120	+ }
	121	+ return true;
	122	+
	123	+unqueue:
	124	+ /*
	125	+ * Step - A -- stabilize @prev
	126	+ *
	127	+ * Undo our @prev->next assignment; this will make @prev's
	128	+ * unlock()/unqueue() wait for a next pointer since @lock points to us
	129	+ * (or later).
	130	+ */
	131	+
	132	+ for (;;) {
	133	+ if (prev->next == node &&
	134	+ cmpxchg(&prev->next, node, NULL) == node)
	135	+ break;
	136	+
	137	+ /*
	138	+ * We can only fail the cmpxchg() racing against an unlock(),
	139	+ * in which case we should observe @node->locked becomming
	140	+ * true.
	141	+ */
	142	+ if (smp_load_acquire(&node->locked))
	143	+ return true;
	144	+
	145	+ cpu_relax_lowlatency();
	146	+
	147	+ /*
	148	+ * Or we race against a concurrent unqueue()'s step-B, in which
	149	+ * case its step-C will write us a new @node->prev pointer.
	150	+ */
	151	+ prev = ACCESS_ONCE(node->prev);
	152	+ }
	153	+
	154	+ /*
	155	+ * Step - B -- stabilize @next
	156	+ *
	157	+ * Similar to unlock(), wait for @node->next or move @lock from @node
	158	+ * back to @prev.
	159	+ */
	160	+
	161	+ next = osq_wait_next(lock, node, prev);
	162	+ if (!next)
	163	+ return false;
	164	+
	165	+ /*
	166	+ * Step - C -- unlink
	167	+ *
	168	+ * @prev is stable because its still waiting for a new @prev->next
	169	+ * pointer, @next is stable because our @node->next pointer is NULL and
	170	+ * it will wait in Step-A.
	171	+ */
	172	+
	173	+ ACCESS_ONCE(next->prev) = prev;
	174	+ ACCESS_ONCE(prev->next) = next;
	175	+
	176	+ return false;
	177	+}
	178	+
	179	+void osq_unlock(struct optimistic_spin_queue *lock)
	180	+{
	181	+ struct optimistic_spin_node node, next;
	182	+ int curr = encode_cpu(smp_processor_id());
	183	+
	184	+ /*
	185	+ * Fast path for the uncontended case.
	186	+ */
	187	+ if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
	188	+ return;
	189	+
	190	+ /*
	191	+ * Second most likely case.
	192	+ */
	193	+ node = this_cpu_ptr(&osq_node);
	194	+ next = xchg(&node->next, NULL);
	195	+ if (next) {
	196	+ ACCESS_ONCE(next->locked) = 1;
	197	+ return;
	198	+ }
	199	+
	200	+ next = osq_wait_next(lock, node, NULL);
	201	+ if (next)
	202	+ ACCESS_ONCE(next->locked) = 1;
	203	+}