Eric Lee / smarc-fsl-linux-kernel

Blame view

kernel/locking/qspinlock.c 16.4 KB

c942fddf8 Thomas Gleixner treewide: Replace...	1	// SPDX-License-Identifier: GPL-2.0-or-later
a33fda35e Waiman Long locking/qspinlock...	2 3 4	/* * Queued spinlock *
a33fda35e Waiman Long locking/qspinlock...	5	* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
81d3dc9a3 Waiman Long locking/qspinlock...	6	* (C) Copyright 2013-2014,2018 Red Hat, Inc.
a33fda35e Waiman Long locking/qspinlock...	7	* (C) Copyright 2015 Intel Corp.
64d816cba Waiman Long locking/qspinlock...	8	* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
a33fda35e Waiman Long locking/qspinlock...	9	*
81d3dc9a3 Waiman Long locking/qspinlock...	10	* Authors: Waiman Long <longman@redhat.com>
a33fda35e Waiman Long locking/qspinlock...	11 12	* Peter Zijlstra <peterz@infradead.org> */
a23db284f Waiman Long locking/pvqspinlo...	13 14	#ifndef _GEN_PV_LOCK_SLOWPATH
a33fda35e Waiman Long locking/qspinlock...	15 16 17 18 19 20	#include <linux/smp.h> #include <linux/bug.h> #include <linux/cpumask.h> #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/mutex.h>
5671360f2 Stafford Horne locking/qspinlock...	21	#include <linux/prefetch.h>
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	22	#include <asm/byteorder.h>
a33fda35e Waiman Long locking/qspinlock...	23 24 25	#include <asm/qspinlock.h> /*
81d3dc9a3 Waiman Long locking/qspinlock...	26 27 28 29 30	* Include queued spinlock statistics code / #include "qspinlock_stat.h" /
a33fda35e Waiman Long locking/qspinlock...	31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59	* The basic principle of a queue-based spinlock can best be understood * by studying a classic queue-based spinlock implementation called the * MCS lock. The paper below provides a good description for this kind * of lock. * * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf * * This queued spinlock implementation is based on the MCS lock, however to make * it fit the 4 bytes we assume spinlock_t to be, and preserve its existing * API, we must modify it somehow. * * In particular; where the traditional MCS lock consists of a tail pointer * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to * unlock the next pending (next->locked), we compress both these: {tail, * next->locked} into a single u32 value. * * Since a spinlock disables recursion of its own context and there is a limit * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now * we can encode the tail by combining the 2-bit nesting level with the cpu * number. With one byte for the lock value and 3 bytes for the tail, only a * 32-bit word is now needed. Even though we only need 1 bit for the lock, * we extend it to a full byte to achieve better performance for architectures * that support atomic byte write. * * We also change the first spinner to spin on the lock bit instead of its * node; whereby avoiding the need to carry a node from lock to unlock, and * preserving existing lock API. This also makes the unlock code simpler and * faster.
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	60 61 62 63	* * N.B. The current implementation only supports architectures that allow * atomic operations on smaller 8-bit and 16-bit data types. *
a33fda35e Waiman Long locking/qspinlock...	64 65 66	*/ #include "mcs_spinlock.h"
0fa809ca7 Waiman Long locking/pvqspinlo...	67	#define MAX_NODES 4
a33fda35e Waiman Long locking/qspinlock...	68
0fa809ca7 Waiman Long locking/pvqspinlo...	69 70 71 72 73 74 75 76 77 78 79 80	/* * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in * size and four of them will fit nicely in one 64-byte cacheline. For * pvqspinlock, however, we need more space for extra data. To accommodate * that, we insert two more long words to pad it up to 32 bytes. IOW, only * two of them can fit in a cacheline in this case. That is OK as it is rare * to have more than 2 levels of slowpath nesting in actual use. We don't * want to penalize pvqspinlocks to optimize for a rare case in native * qspinlocks. */ struct qnode { struct mcs_spinlock mcs;
a23db284f Waiman Long locking/pvqspinlo...	81	#ifdef CONFIG_PARAVIRT_SPINLOCKS
0fa809ca7 Waiman Long locking/pvqspinlo...	82	long reserved[2];
a23db284f Waiman Long locking/pvqspinlo...	83	#endif
0fa809ca7 Waiman Long locking/pvqspinlo...	84	};
a23db284f Waiman Long locking/pvqspinlo...	85
a33fda35e Waiman Long locking/qspinlock...	86	/*
6512276d9 Will Deacon locking/qspinlock...	87 88 89 90 91 92 93 94 95 96 97 98	* The pending bit spinning loop count. * This heuristic is used to limit the number of lockword accesses * made by atomic_cond_read_relaxed when waiting for the lock to * transition out of the "== _Q_PENDING_VAL" state. We don't spin * indefinitely because there's no guarantee that we'll make forward * progress. / #ifndef _Q_PENDING_LOOPS #define _Q_PENDING_LOOPS 1 #endif /
a33fda35e Waiman Long locking/qspinlock...	99 100 101 102	* Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture.
a23db284f Waiman Long locking/pvqspinlo...	103 104	* * PV doubles the storage and uses the second cacheline for PV state.
a33fda35e Waiman Long locking/qspinlock...	105	*/
0fa809ca7 Waiman Long locking/pvqspinlo...	106	static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
a33fda35e Waiman Long locking/qspinlock...	107 108 109 110 111	/* * We must be able to distinguish between no-tail and the tail at 0:0, * therefore increment the cpu number by one. */
8d53fa190 Peter Zijlstra locking/qspinlock...	112	static inline __pure u32 encode_tail(int cpu, int idx)
a33fda35e Waiman Long locking/qspinlock...	113 114	{ u32 tail;
a33fda35e Waiman Long locking/qspinlock...	115 116 117 118 119	tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; tail \|= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ return tail; }
8d53fa190 Peter Zijlstra locking/qspinlock...	120	static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
a33fda35e Waiman Long locking/qspinlock...	121 122 123	{ int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
0fa809ca7 Waiman Long locking/pvqspinlo...	124 125 126 127 128 129 130	return per_cpu_ptr(&qnodes[idx].mcs, cpu); } static inline __pure struct mcs_spinlock grab_mcs_node(struct mcs_spinlock base, int idx) { return &((struct qnode *)base + idx)->mcs;
a33fda35e Waiman Long locking/qspinlock...	131	}
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	132	#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK \| _Q_PENDING_MASK)
2c83e8e94 Waiman Long locking/qspinlock...	133	#if _Q_PENDING_BITS == 8
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	134	/**
59fb586b4 Will Deacon locking/qspinlock...	135 136 137 138 139 140 141 142 143 144 145	* clear_pending - clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1, -> ,0, / static __always_inline void clear_pending(struct qspinlock lock) { WRITE_ONCE(lock->pending, 0); } /**
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	146 147 148 149 150 151 152 153 154	* clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1,0 -> ,0,1 * * Lock stealing is not allowed if this function is used. / static __always_inline void clear_pending_set_locked(struct qspinlock lock) {
625e88be1 Will Deacon locking/qspinlock...	155	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	156 157 158 159 160 161 162 163	} /* * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word * Return: The previous queue tail code word *
548095dea Paul E. McKenney locking: Remove s...	164	* xchg(lock, tail), which heads an address dependency
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	165 166 167 168 169	* * p,, -> n,, ; prev = xchg(lock, node) / static __always_inline u32 xchg_tail(struct qspinlock lock, u32 tail) {
64d816cba Waiman Long locking/qspinlock...	170	/*
9d4646d14 Will Deacon locking/qspinlock...	171 172	* We can use relaxed semantics since the caller ensures that the * MCS node is properly initialized before updating the tail.
64d816cba Waiman Long locking/qspinlock...	173	*/
9d4646d14 Will Deacon locking/qspinlock...	174	return (u32)xchg_relaxed(&lock->tail,
64d816cba Waiman Long locking/qspinlock...	175	tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	176 177 178	} #else /* _Q_PENDING_BITS == 8 */
a33fda35e Waiman Long locking/qspinlock...	179	/**
59fb586b4 Will Deacon locking/qspinlock...	180 181 182 183 184 185 186 187 188 189 190	* clear_pending - clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1, -> ,0, / static __always_inline void clear_pending(struct qspinlock lock) { atomic_andnot(_Q_PENDING_VAL, &lock->val); } /**
6403bd7d0 Waiman Long locking/qspinlock...	191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216	* clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1,0 -> ,0,1 / static __always_inline void clear_pending_set_locked(struct qspinlock lock) { atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); } /** * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word * Return: The previous queue tail code word * * xchg(lock, tail) * * p,, -> n,, ; prev = xchg(lock, node) / static __always_inline u32 xchg_tail(struct qspinlock lock, u32 tail) { u32 old, new, val = atomic_read(&lock->val); for (;;) { new = (val & _Q_LOCKED_PENDING_MASK) \| tail;
64d816cba Waiman Long locking/qspinlock...	217	/*
9d4646d14 Will Deacon locking/qspinlock...	218 219 220	* We can use relaxed semantics since the caller ensures that * the MCS node is properly initialized before updating the * tail.
64d816cba Waiman Long locking/qspinlock...	221	*/
9d4646d14 Will Deacon locking/qspinlock...	222	old = atomic_cmpxchg_relaxed(&lock->val, val, new);
6403bd7d0 Waiman Long locking/qspinlock...	223 224 225 226 227 228 229	if (old == val) break; val = old; } return old; }
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	230	#endif /* _Q_PENDING_BITS == 8 */
6403bd7d0 Waiman Long locking/qspinlock...	231 232	/**
7aa54be29 Peter Zijlstra locking/qspinlock...	233 234 235 236 237 238 239 240 241 242 243 244 245 246	* queued_fetch_set_pending_acquire - fetch the whole lock value and set pending * @lock : Pointer to queued spinlock structure * Return: The previous lock value * * ,,* -> ,1, / #ifndef queued_fetch_set_pending_acquire static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock lock) { return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); } #endif /**
2c83e8e94 Waiman Long locking/qspinlock...	247 248 249 250 251 252 253	* set_locked - Set the lock bit and own the lock * @lock: Pointer to queued spinlock structure * * ,,0 -> ,0,1 / static __always_inline void set_locked(struct qspinlock *lock) {
625e88be1 Will Deacon locking/qspinlock...	254	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
2c83e8e94 Waiman Long locking/qspinlock...	255	}
a23db284f Waiman Long locking/pvqspinlo...	256 257 258 259 260 261 262	/* * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for * all the PV callbacks. / static __always_inline void __pv_init_node(struct mcs_spinlock node) { }
cd0272fab Waiman Long locking/pvqspinlo...	263 264	static __always_inline void __pv_wait_node(struct mcs_spinlock node, struct mcs_spinlock prev) { }
75d227028 Waiman Long locking/pvqspinlo...	265 266	static __always_inline void __pv_kick_node(struct qspinlock lock, struct mcs_spinlock node) { }
1c4941fd5 Waiman Long locking/pvqspinlo...	267 268 269	static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock lock, struct mcs_spinlock node) { return 0; }
a23db284f Waiman Long locking/pvqspinlo...	270 271 272 273 274 275	#define pv_enabled() false #define pv_init_node __pv_init_node #define pv_wait_node __pv_wait_node #define pv_kick_node __pv_kick_node
1c4941fd5 Waiman Long locking/pvqspinlo...	276	#define pv_wait_head_or_lock __pv_wait_head_or_lock
a23db284f Waiman Long locking/pvqspinlo...	277 278 279 280 281 282	#ifdef CONFIG_PARAVIRT_SPINLOCKS #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif #endif /* _GEN_PV_LOCK_SLOWPATH */
2c83e8e94 Waiman Long locking/qspinlock...	283	/**
a33fda35e Waiman Long locking/qspinlock...	284 285 286 287	* queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure * @val: Current value of the queued spinlock 32-bit word *
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	288	* (queue tail, pending bit, lock value)
a33fda35e Waiman Long locking/qspinlock...	289	*
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	290 291 292 293 294 295 296 297 298 299 300 301 302	* fast : slow : unlock * : : * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (,,0) * : \| ^--------.------. / : * : v \ \ \| : * pending : (0,1,1) +--> (0,1,0) \ \| : * : \| ^--' \| \| : * : v \| \| : * uncontended : (n,x,y) +--> (n,0,0) --' \| : * queue : \| ^--' \| : * : v \| : * contended : (,x,y) +--> (,0,0) ---> (,0,1) -' : queue : ^--' :
a33fda35e Waiman Long locking/qspinlock...	303 304 305 306	/ void queued_spin_lock_slowpath(struct qspinlock lock, u32 val) { struct mcs_spinlock prev, next, *node;
59fb586b4 Will Deacon locking/qspinlock...	307	u32 old, tail;
a33fda35e Waiman Long locking/qspinlock...	308 309 310	int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
a23db284f Waiman Long locking/pvqspinlo...	311	if (pv_enabled())
81d3dc9a3 Waiman Long locking/qspinlock...	312	goto pv_queue;
a23db284f Waiman Long locking/pvqspinlo...	313
43b3f0289 Peter Zijlstra locking/qspinlock...	314	if (virt_spin_lock(lock))
2aa79af64 Peter Zijlstra (Intel) locking/qspinlock...	315	return;
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	316	/*
6512276d9 Will Deacon locking/qspinlock...	317 318	* Wait for in-progress pending->locked hand-overs with a bounded * number of spins so that we guarantee forward progress.
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	319 320 321 322	* * 0,1,0 -> 0,0,1 */ if (val == _Q_PENDING_VAL) {
6512276d9 Will Deacon locking/qspinlock...	323 324 325	int cnt = _Q_PENDING_LOOPS; val = atomic_cond_read_relaxed(&lock->val, (VAL != _Q_PENDING_VAL) \|\| !cnt--);
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	326 327 328	} /*
59fb586b4 Will Deacon locking/qspinlock...	329 330 331 332 333 334	* If we observe any contention; queue. / if (val & ~_Q_LOCKED_MASK) goto queue; /
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	335 336	* trylock \|\| pending *
756b1df4c Peter Zijlstra locking/qspinlock...	337	* 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	338	*/
7aa54be29 Peter Zijlstra locking/qspinlock...	339	val = queued_fetch_set_pending_acquire(lock);
756b1df4c Peter Zijlstra locking/qspinlock...	340
53bf57fab Peter Zijlstra locking/qspinlock...	341	/*
756b1df4c Peter Zijlstra locking/qspinlock...	342 343 344 345 346	* If we observe contention, there is a concurrent locker. * * Undo and queue; our setting of PENDING might have made the * n,0,0 -> 0,0,0 transition fail and it will now be waiting * on @next to become !NULL.
53bf57fab Peter Zijlstra locking/qspinlock...	347 348	*/ if (unlikely(val & ~_Q_LOCKED_MASK)) {
756b1df4c Peter Zijlstra locking/qspinlock...	349 350	/* Undo PENDING if we set it. */
53bf57fab Peter Zijlstra locking/qspinlock...	351 352	if (!(val & _Q_PENDING_MASK)) clear_pending(lock);
756b1df4c Peter Zijlstra locking/qspinlock...	353
53bf57fab Peter Zijlstra locking/qspinlock...	354	goto queue;
59fb586b4 Will Deacon locking/qspinlock...	355	}
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	356 357	/*
53bf57fab Peter Zijlstra locking/qspinlock...	358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374	* We're pending, wait for the owner to go away. * * 0,1,1 -> 0,1,0 * * this wait loop must be a load-acquire such that we match the * store-release that clears the locked bit and create lock * sequentiality; this is because not all * clear_pending_set_locked() implementations imply full * barriers. / if (val & _Q_LOCKED_MASK) atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); / * take ownership and clear the pending bit. * * 0,1,0 -> 0,0,1
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	375	*/
53bf57fab Peter Zijlstra locking/qspinlock...	376	clear_pending_set_locked(lock);
ad53fa10f Waiman Long locking/qspinlock...	377	lockevent_inc(lock_pending);
53bf57fab Peter Zijlstra locking/qspinlock...	378	return;
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	379 380 381 382 383 384	/* * End of pending bit optimistic spinning and beginning of MCS * queuing. */ queue:
ad53fa10f Waiman Long locking/qspinlock...	385	lockevent_inc(lock_slowpath);
81d3dc9a3 Waiman Long locking/qspinlock...	386	pv_queue:
0fa809ca7 Waiman Long locking/pvqspinlo...	387	node = this_cpu_ptr(&qnodes[0].mcs);
a33fda35e Waiman Long locking/qspinlock...	388 389	idx = node->count++; tail = encode_tail(smp_processor_id(), idx);
d682b596d Waiman Long locking/qspinlock...	390 391 392 393 394 395 396 397 398 399	/* * 4 nodes are allocated based on the assumption that there will * not be nested NMIs taking spinlocks. That may not be true in * some architectures even though the chance of needing more than * 4 nodes will still be extremely unlikely. When that happens, * we fall back to spinning on the lock directly without using * any MCS node. This is not the most elegant solution, but is * simple enough. */ if (unlikely(idx >= MAX_NODES)) {
ad53fa10f Waiman Long locking/qspinlock...	400	lockevent_inc(lock_no_node);
d682b596d Waiman Long locking/qspinlock...	401 402 403 404	while (!queued_spin_trylock(lock)) cpu_relax(); goto release; }
0fa809ca7 Waiman Long locking/pvqspinlo...	405	node = grab_mcs_node(node, idx);
11dc13224 Will Deacon locking/qspinlock...	406 407	/*
1222109a5 Waiman Long locking/qspinlock...	408 409	* Keep counts of non-zero index values: */
ad53fa10f Waiman Long locking/qspinlock...	410	lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
1222109a5 Waiman Long locking/qspinlock...	411 412	/*
11dc13224 Will Deacon locking/qspinlock...	413 414 415 416 417	* Ensure that we increment the head node->count before initialising * the actual node. If the compiler is kind enough to reorder these * stores, then an IRQ could overwrite our assignments. */ barrier();
a33fda35e Waiman Long locking/qspinlock...	418 419	node->locked = 0; node->next = NULL;
a23db284f Waiman Long locking/pvqspinlo...	420	pv_init_node(node);
a33fda35e Waiman Long locking/qspinlock...	421 422	/*
6403bd7d0 Waiman Long locking/qspinlock...	423 424 425	* We touched a (possibly) cold cacheline in the per-cpu queue node; * attempt the trylock once more in the hope someone let go while we * weren't watching.
a33fda35e Waiman Long locking/qspinlock...	426	*/
6403bd7d0 Waiman Long locking/qspinlock...	427 428	if (queued_spin_trylock(lock)) goto release;
a33fda35e Waiman Long locking/qspinlock...	429 430	/*
9d4646d14 Will Deacon locking/qspinlock...	431 432 433 434 435 436 437 438	* Ensure that the initialisation of @node is complete before we * publish the updated tail via xchg_tail() and potentially link * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. / smp_wmb(); / * Publish the updated tail.
6403bd7d0 Waiman Long locking/qspinlock...	439 440 441 442	* We have already touched the queueing cacheline; don't bother with * pending stuff. * * p,, -> n,,
a33fda35e Waiman Long locking/qspinlock...	443	*/
6403bd7d0 Waiman Long locking/qspinlock...	444	old = xchg_tail(lock, tail);
aa68744f8 Waiman Long locking/qspinlock...	445	next = NULL;
a33fda35e Waiman Long locking/qspinlock...	446 447 448 449 450	/* * if there was a previous node; link it and wait until reaching the * head of the waitqueue. */
6403bd7d0 Waiman Long locking/qspinlock...	451	if (old & _Q_TAIL_MASK) {
a33fda35e Waiman Long locking/qspinlock...	452	prev = decode_tail(old);
95bcade33 Will Deacon locking/qspinlock...	453
9d4646d14 Will Deacon locking/qspinlock...	454 455	/* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node);
a33fda35e Waiman Long locking/qspinlock...	456
cd0272fab Waiman Long locking/pvqspinlo...	457	pv_wait_node(node, prev);
a33fda35e Waiman Long locking/qspinlock...	458	arch_mcs_spin_lock_contended(&node->locked);
81b559866 Waiman Long locking/qspinlock...	459 460 461 462 463 464 465 466 467 468	/* * While waiting for the MCS lock, the next pointer may have * been set by another lock waiter. We optimistically load * the next pointer & prefetch the cacheline for writing * to reduce latency in the upcoming MCS unlock operation. */ next = READ_ONCE(node->next); if (next) prefetchw(next);
a33fda35e Waiman Long locking/qspinlock...	469 470 471	} /*
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	472 473	* we're at the head of the waitqueue, wait for the owner & pending to * go away.
a33fda35e Waiman Long locking/qspinlock...	474	*
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	475	* ,x,y -> ,0,0
2c83e8e94 Waiman Long locking/qspinlock...	476 477 478 479 480 481	* * this wait loop must use a load-acquire such that we match the * store-release that clears the locked bit and create lock * sequentiality; this is because the set_locked() function below * does not imply a full barrier. *
1c4941fd5 Waiman Long locking/pvqspinlo...	482 483	* The PV pv_wait_head_or_lock function, if active, will acquire * the lock and return a non-zero value. So we have to skip the
f9c811fac Will Deacon locking/qspinlock...	484 485	* atomic_cond_read_acquire() call. As the next PV queue head hasn't * been designated yet, there is no way for the locked value to become
1c4941fd5 Waiman Long locking/pvqspinlo...	486 487 488 489 490	* _Q_SLOW_VAL. So both the set_locked() and the * atomic_cmpxchg_relaxed() calls will be safe. * * If PV isn't active, 0 will be returned instead. *
a33fda35e Waiman Long locking/qspinlock...	491	*/
1c4941fd5 Waiman Long locking/pvqspinlo...	492 493	if ((val = pv_wait_head_or_lock(lock, node))) goto locked;
f9c811fac Will Deacon locking/qspinlock...	494	val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
a33fda35e Waiman Long locking/qspinlock...	495
1c4941fd5 Waiman Long locking/pvqspinlo...	496	locked:
a33fda35e Waiman Long locking/qspinlock...	497 498 499	/* * claim the lock: *
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	500	* n,0,0 -> 0,0,1 : lock, uncontended
59fb586b4 Will Deacon locking/qspinlock...	501	* ,,0 -> ,,1 : lock, contended
2c83e8e94 Waiman Long locking/qspinlock...	502	*
59fb586b4 Will Deacon locking/qspinlock...	503 504 505	* If the queue head is the only one in the queue (lock value == tail) * and nobody is pending, clear the tail code and grab the lock. * Otherwise, we only need to grab the lock.
a33fda35e Waiman Long locking/qspinlock...	506	*/
c61da58d8 Will Deacon locking/qspinlock...	507
ae75d9089 Will Deacon locking/qspinlock...	508	/*
756b1df4c Peter Zijlstra locking/qspinlock...	509 510	* In the PV case we might already have _Q_LOCKED_VAL set, because * of lock stealing; therefore we must also allow:
ae75d9089 Will Deacon locking/qspinlock...	511	*
756b1df4c Peter Zijlstra locking/qspinlock...	512 513 514 515 516	* n,0,1 -> 0,0,1 * * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the * above wait condition, therefore any concurrent setting of * PENDING will make the uncontended transition fail.
ae75d9089 Will Deacon locking/qspinlock...	517	*/
756b1df4c Peter Zijlstra locking/qspinlock...	518 519 520 521	if ((val & _Q_TAIL_MASK) == tail) { if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) goto release; /* No contention */ }
a33fda35e Waiman Long locking/qspinlock...	522
756b1df4c Peter Zijlstra locking/qspinlock...	523 524 525 526 527	/* * Either somebody is queued behind us or _Q_PENDING_VAL got set * which will then detect the remaining tail and queue behind us * ensuring we'll see a @next. */
c61da58d8 Will Deacon locking/qspinlock...	528	set_locked(lock);
a33fda35e Waiman Long locking/qspinlock...	529	/*
aa68744f8 Waiman Long locking/qspinlock...	530	* contended path; wait for next if not observed yet, release.
a33fda35e Waiman Long locking/qspinlock...	531	*/
c131a198c Will Deacon locking/qspinlock...	532 533	if (!next) next = smp_cond_load_relaxed(&node->next, (VAL));
a33fda35e Waiman Long locking/qspinlock...	534
2c83e8e94 Waiman Long locking/qspinlock...	535	arch_mcs_spin_unlock_contended(&next->locked);
75d227028 Waiman Long locking/pvqspinlo...	536	pv_kick_node(lock, next);
a33fda35e Waiman Long locking/qspinlock...	537 538 539 540 541	release: /* * release the node */
0fa809ca7 Waiman Long locking/pvqspinlo...	542	__this_cpu_dec(qnodes[0].mcs.count);
a33fda35e Waiman Long locking/qspinlock...	543 544	} EXPORT_SYMBOL(queued_spin_lock_slowpath);
a23db284f Waiman Long locking/pvqspinlo...	545 546 547 548 549 550 551 552 553 554 555 556 557	/* * Generate the paravirt code for queued_spin_unlock_slowpath(). */ #if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS) #define _GEN_PV_LOCK_SLOWPATH #undef pv_enabled #define pv_enabled() true #undef pv_init_node #undef pv_wait_node #undef pv_kick_node
1c4941fd5 Waiman Long locking/pvqspinlo...	558	#undef pv_wait_head_or_lock
a23db284f Waiman Long locking/pvqspinlo...	559 560 561 562 563 564 565 566	#undef queued_spin_lock_slowpath #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath #include "qspinlock_paravirt.h" #include "qspinlock.c" #endif