Eric Lee / smarc-fsl-linux-kernel

Blame view

kernel/locking/qspinlock.c 16.6 KB

c942fddf8 Thomas Gleixner treewide: Replace...	1	// SPDX-License-Identifier: GPL-2.0-or-later
a33fda35e Waiman Long locking/qspinlock...	2 3 4	/* * Queued spinlock *
a33fda35e Waiman Long locking/qspinlock...	5	* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
81d3dc9a3 Waiman Long locking/qspinlock...	6	* (C) Copyright 2013-2014,2018 Red Hat, Inc.
a33fda35e Waiman Long locking/qspinlock...	7	* (C) Copyright 2015 Intel Corp.
64d816cba Waiman Long locking/qspinlock...	8	* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
a33fda35e Waiman Long locking/qspinlock...	9	*
81d3dc9a3 Waiman Long locking/qspinlock...	10	* Authors: Waiman Long <longman@redhat.com>
a33fda35e Waiman Long locking/qspinlock...	11 12	* Peter Zijlstra <peterz@infradead.org> */
a23db284f Waiman Long locking/pvqspinlo...	13 14	#ifndef _GEN_PV_LOCK_SLOWPATH
a33fda35e Waiman Long locking/qspinlock...	15 16 17 18 19 20	#include <linux/smp.h> #include <linux/bug.h> #include <linux/cpumask.h> #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/mutex.h>
5671360f2 Stafford Horne locking/qspinlock...	21	#include <linux/prefetch.h>
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	22	#include <asm/byteorder.h>
a33fda35e Waiman Long locking/qspinlock...	23 24 25	#include <asm/qspinlock.h> /*
81d3dc9a3 Waiman Long locking/qspinlock...	26 27 28 29 30	* Include queued spinlock statistics code / #include "qspinlock_stat.h" /
a33fda35e Waiman Long locking/qspinlock...	31 32	* The basic principle of a queue-based spinlock can best be understood * by studying a classic queue-based spinlock implementation called the
57097124c Waiman Long locking/qspinlock...	33 34 35	* MCS lock. A copy of the original MCS lock paper ("Algorithms for Scalable * Synchronization on Shared-Memory Multiprocessors by Mellor-Crummey and * Scott") is available at
a33fda35e Waiman Long locking/qspinlock...	36	*
57097124c Waiman Long locking/qspinlock...	37	* https://bugzilla.kernel.org/show_bug.cgi?id=206115
a33fda35e Waiman Long locking/qspinlock...	38	*
57097124c Waiman Long locking/qspinlock...	39 40 41	* This queued spinlock implementation is based on the MCS lock, however to * make it fit the 4 bytes we assume spinlock_t to be, and preserve its * existing API, we must modify it somehow.
a33fda35e Waiman Long locking/qspinlock...	42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60	* * In particular; where the traditional MCS lock consists of a tail pointer * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to * unlock the next pending (next->locked), we compress both these: {tail, * next->locked} into a single u32 value. * * Since a spinlock disables recursion of its own context and there is a limit * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now * we can encode the tail by combining the 2-bit nesting level with the cpu * number. With one byte for the lock value and 3 bytes for the tail, only a * 32-bit word is now needed. Even though we only need 1 bit for the lock, * we extend it to a full byte to achieve better performance for architectures * that support atomic byte write. * * We also change the first spinner to spin on the lock bit instead of its * node; whereby avoiding the need to carry a node from lock to unlock, and * preserving existing lock API. This also makes the unlock code simpler and * faster.
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	61 62 63 64	* * N.B. The current implementation only supports architectures that allow * atomic operations on smaller 8-bit and 16-bit data types. *
a33fda35e Waiman Long locking/qspinlock...	65 66 67	*/ #include "mcs_spinlock.h"
0fa809ca7 Waiman Long locking/pvqspinlo...	68	#define MAX_NODES 4
a33fda35e Waiman Long locking/qspinlock...	69
0fa809ca7 Waiman Long locking/pvqspinlo...	70 71 72 73 74 75 76 77 78 79 80 81	/* * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in * size and four of them will fit nicely in one 64-byte cacheline. For * pvqspinlock, however, we need more space for extra data. To accommodate * that, we insert two more long words to pad it up to 32 bytes. IOW, only * two of them can fit in a cacheline in this case. That is OK as it is rare * to have more than 2 levels of slowpath nesting in actual use. We don't * want to penalize pvqspinlocks to optimize for a rare case in native * qspinlocks. */ struct qnode { struct mcs_spinlock mcs;
a23db284f Waiman Long locking/pvqspinlo...	82	#ifdef CONFIG_PARAVIRT_SPINLOCKS
0fa809ca7 Waiman Long locking/pvqspinlo...	83	long reserved[2];
a23db284f Waiman Long locking/pvqspinlo...	84	#endif
0fa809ca7 Waiman Long locking/pvqspinlo...	85	};
a23db284f Waiman Long locking/pvqspinlo...	86
a33fda35e Waiman Long locking/qspinlock...	87	/*
6512276d9 Will Deacon locking/qspinlock...	88 89 90 91 92 93 94 95 96 97 98 99	* The pending bit spinning loop count. * This heuristic is used to limit the number of lockword accesses * made by atomic_cond_read_relaxed when waiting for the lock to * transition out of the "== _Q_PENDING_VAL" state. We don't spin * indefinitely because there's no guarantee that we'll make forward * progress. / #ifndef _Q_PENDING_LOOPS #define _Q_PENDING_LOOPS 1 #endif /
a33fda35e Waiman Long locking/qspinlock...	100 101 102 103	* Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture.
a23db284f Waiman Long locking/pvqspinlo...	104 105	* * PV doubles the storage and uses the second cacheline for PV state.
a33fda35e Waiman Long locking/qspinlock...	106	*/
0fa809ca7 Waiman Long locking/pvqspinlo...	107	static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
a33fda35e Waiman Long locking/qspinlock...	108 109 110 111 112	/* * We must be able to distinguish between no-tail and the tail at 0:0, * therefore increment the cpu number by one. */
8d53fa190 Peter Zijlstra locking/qspinlock...	113	static inline __pure u32 encode_tail(int cpu, int idx)
a33fda35e Waiman Long locking/qspinlock...	114 115	{ u32 tail;
a33fda35e Waiman Long locking/qspinlock...	116 117 118 119 120	tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; tail \|= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ return tail; }
8d53fa190 Peter Zijlstra locking/qspinlock...	121	static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
a33fda35e Waiman Long locking/qspinlock...	122 123 124	{ int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
0fa809ca7 Waiman Long locking/pvqspinlo...	125 126 127 128 129 130 131	return per_cpu_ptr(&qnodes[idx].mcs, cpu); } static inline __pure struct mcs_spinlock grab_mcs_node(struct mcs_spinlock base, int idx) { return &((struct qnode *)base + idx)->mcs;
a33fda35e Waiman Long locking/qspinlock...	132	}
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	133	#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK \| _Q_PENDING_MASK)
2c83e8e94 Waiman Long locking/qspinlock...	134	#if _Q_PENDING_BITS == 8
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	135	/**
59fb586b4 Will Deacon locking/qspinlock...	136 137 138 139 140 141 142 143 144 145 146	* clear_pending - clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1, -> ,0, / static __always_inline void clear_pending(struct qspinlock lock) { WRITE_ONCE(lock->pending, 0); } /**
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	147 148 149 150 151 152 153 154 155	* clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1,0 -> ,0,1 * * Lock stealing is not allowed if this function is used. / static __always_inline void clear_pending_set_locked(struct qspinlock lock) {
625e88be1 Will Deacon locking/qspinlock...	156	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	157 158 159 160 161 162 163 164	} /* * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word * Return: The previous queue tail code word *
548095dea Paul E. McKenney locking: Remove s...	165	* xchg(lock, tail), which heads an address dependency
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	166 167 168 169 170	* * p,, -> n,, ; prev = xchg(lock, node) / static __always_inline u32 xchg_tail(struct qspinlock lock, u32 tail) {
64d816cba Waiman Long locking/qspinlock...	171	/*
9d4646d14 Will Deacon locking/qspinlock...	172 173	* We can use relaxed semantics since the caller ensures that the * MCS node is properly initialized before updating the tail.
64d816cba Waiman Long locking/qspinlock...	174	*/
9d4646d14 Will Deacon locking/qspinlock...	175	return (u32)xchg_relaxed(&lock->tail,
64d816cba Waiman Long locking/qspinlock...	176	tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	177 178 179	} #else /* _Q_PENDING_BITS == 8 */
a33fda35e Waiman Long locking/qspinlock...	180	/**
59fb586b4 Will Deacon locking/qspinlock...	181 182 183 184 185 186 187 188 189 190 191	* clear_pending - clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1, -> ,0, / static __always_inline void clear_pending(struct qspinlock lock) { atomic_andnot(_Q_PENDING_VAL, &lock->val); } /**
6403bd7d0 Waiman Long locking/qspinlock...	192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217	* clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure * * ,1,0 -> ,0,1 / static __always_inline void clear_pending_set_locked(struct qspinlock lock) { atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); } /** * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word * Return: The previous queue tail code word * * xchg(lock, tail) * * p,, -> n,, ; prev = xchg(lock, node) / static __always_inline u32 xchg_tail(struct qspinlock lock, u32 tail) { u32 old, new, val = atomic_read(&lock->val); for (;;) { new = (val & _Q_LOCKED_PENDING_MASK) \| tail;
64d816cba Waiman Long locking/qspinlock...	218	/*
9d4646d14 Will Deacon locking/qspinlock...	219 220 221	* We can use relaxed semantics since the caller ensures that * the MCS node is properly initialized before updating the * tail.
64d816cba Waiman Long locking/qspinlock...	222	*/
9d4646d14 Will Deacon locking/qspinlock...	223	old = atomic_cmpxchg_relaxed(&lock->val, val, new);
6403bd7d0 Waiman Long locking/qspinlock...	224 225 226 227 228 229 230	if (old == val) break; val = old; } return old; }
69f9cae90 Peter Zijlstra (Intel) locking/qspinlock...	231	#endif /* _Q_PENDING_BITS == 8 */
6403bd7d0 Waiman Long locking/qspinlock...	232 233	/**
7aa54be29 Peter Zijlstra locking/qspinlock...	234 235 236 237 238 239 240 241 242 243 244 245 246 247	* queued_fetch_set_pending_acquire - fetch the whole lock value and set pending * @lock : Pointer to queued spinlock structure * Return: The previous lock value * * ,,* -> ,1, / #ifndef queued_fetch_set_pending_acquire static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock lock) { return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); } #endif /**
2c83e8e94 Waiman Long locking/qspinlock...	248 249 250 251 252 253 254	* set_locked - Set the lock bit and own the lock * @lock: Pointer to queued spinlock structure * * ,,0 -> ,0,1 / static __always_inline void set_locked(struct qspinlock *lock) {
625e88be1 Will Deacon locking/qspinlock...	255	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
2c83e8e94 Waiman Long locking/qspinlock...	256	}
a23db284f Waiman Long locking/pvqspinlo...	257 258 259 260 261 262 263	/* * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for * all the PV callbacks. / static __always_inline void __pv_init_node(struct mcs_spinlock node) { }
cd0272fab Waiman Long locking/pvqspinlo...	264 265	static __always_inline void __pv_wait_node(struct mcs_spinlock node, struct mcs_spinlock prev) { }
75d227028 Waiman Long locking/pvqspinlo...	266 267	static __always_inline void __pv_kick_node(struct qspinlock lock, struct mcs_spinlock node) { }
1c4941fd5 Waiman Long locking/pvqspinlo...	268 269 270	static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock lock, struct mcs_spinlock node) { return 0; }
a23db284f Waiman Long locking/pvqspinlo...	271 272 273 274 275 276	#define pv_enabled() false #define pv_init_node __pv_init_node #define pv_wait_node __pv_wait_node #define pv_kick_node __pv_kick_node
1c4941fd5 Waiman Long locking/pvqspinlo...	277	#define pv_wait_head_or_lock __pv_wait_head_or_lock
a23db284f Waiman Long locking/pvqspinlo...	278 279 280 281 282 283	#ifdef CONFIG_PARAVIRT_SPINLOCKS #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif #endif /* _GEN_PV_LOCK_SLOWPATH */
2c83e8e94 Waiman Long locking/qspinlock...	284	/**
a33fda35e Waiman Long locking/qspinlock...	285 286 287 288	* queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure * @val: Current value of the queued spinlock 32-bit word *
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	289	* (queue tail, pending bit, lock value)
a33fda35e Waiman Long locking/qspinlock...	290	*
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	291 292 293 294 295 296 297 298 299 300 301 302 303	* fast : slow : unlock * : : * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (,,0) * : \| ^--------.------. / : * : v \ \ \| : * pending : (0,1,1) +--> (0,1,0) \ \| : * : \| ^--' \| \| : * : v \| \| : * uncontended : (n,x,y) +--> (n,0,0) --' \| : * queue : \| ^--' \| : * : v \| : * contended : (,x,y) +--> (,0,0) ---> (,0,1) -' : queue : ^--' :
a33fda35e Waiman Long locking/qspinlock...	304 305 306 307	/ void queued_spin_lock_slowpath(struct qspinlock lock, u32 val) { struct mcs_spinlock prev, next, *node;
59fb586b4 Will Deacon locking/qspinlock...	308	u32 old, tail;
a33fda35e Waiman Long locking/qspinlock...	309 310 311	int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
a23db284f Waiman Long locking/pvqspinlo...	312	if (pv_enabled())
81d3dc9a3 Waiman Long locking/qspinlock...	313	goto pv_queue;
a23db284f Waiman Long locking/pvqspinlo...	314
43b3f0289 Peter Zijlstra locking/qspinlock...	315	if (virt_spin_lock(lock))
2aa79af64 Peter Zijlstra (Intel) locking/qspinlock...	316	return;
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	317	/*
6512276d9 Will Deacon locking/qspinlock...	318 319	* Wait for in-progress pending->locked hand-overs with a bounded * number of spins so that we guarantee forward progress.
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	320 321 322 323	* * 0,1,0 -> 0,0,1 */ if (val == _Q_PENDING_VAL) {
6512276d9 Will Deacon locking/qspinlock...	324 325 326	int cnt = _Q_PENDING_LOOPS; val = atomic_cond_read_relaxed(&lock->val, (VAL != _Q_PENDING_VAL) \|\| !cnt--);
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	327 328 329	} /*
59fb586b4 Will Deacon locking/qspinlock...	330 331 332 333 334 335	* If we observe any contention; queue. / if (val & ~_Q_LOCKED_MASK) goto queue; /
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	336 337	* trylock \|\| pending *
756b1df4c Peter Zijlstra locking/qspinlock...	338	* 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	339	*/
7aa54be29 Peter Zijlstra locking/qspinlock...	340	val = queued_fetch_set_pending_acquire(lock);
756b1df4c Peter Zijlstra locking/qspinlock...	341
53bf57fab Peter Zijlstra locking/qspinlock...	342	/*
756b1df4c Peter Zijlstra locking/qspinlock...	343 344 345 346 347	* If we observe contention, there is a concurrent locker. * * Undo and queue; our setting of PENDING might have made the * n,0,0 -> 0,0,0 transition fail and it will now be waiting * on @next to become !NULL.
53bf57fab Peter Zijlstra locking/qspinlock...	348 349	*/ if (unlikely(val & ~_Q_LOCKED_MASK)) {
756b1df4c Peter Zijlstra locking/qspinlock...	350 351	/* Undo PENDING if we set it. */
53bf57fab Peter Zijlstra locking/qspinlock...	352 353	if (!(val & _Q_PENDING_MASK)) clear_pending(lock);
756b1df4c Peter Zijlstra locking/qspinlock...	354
53bf57fab Peter Zijlstra locking/qspinlock...	355	goto queue;
59fb586b4 Will Deacon locking/qspinlock...	356	}
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	357 358	/*
53bf57fab Peter Zijlstra locking/qspinlock...	359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375	* We're pending, wait for the owner to go away. * * 0,1,1 -> 0,1,0 * * this wait loop must be a load-acquire such that we match the * store-release that clears the locked bit and create lock * sequentiality; this is because not all * clear_pending_set_locked() implementations imply full * barriers. / if (val & _Q_LOCKED_MASK) atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); / * take ownership and clear the pending bit. * * 0,1,0 -> 0,0,1
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	376	*/
53bf57fab Peter Zijlstra locking/qspinlock...	377	clear_pending_set_locked(lock);
ad53fa10f Waiman Long locking/qspinlock...	378	lockevent_inc(lock_pending);
53bf57fab Peter Zijlstra locking/qspinlock...	379	return;
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	380 381 382 383 384 385	/* * End of pending bit optimistic spinning and beginning of MCS * queuing. */ queue:
ad53fa10f Waiman Long locking/qspinlock...	386	lockevent_inc(lock_slowpath);
81d3dc9a3 Waiman Long locking/qspinlock...	387	pv_queue:
0fa809ca7 Waiman Long locking/pvqspinlo...	388	node = this_cpu_ptr(&qnodes[0].mcs);
a33fda35e Waiman Long locking/qspinlock...	389 390	idx = node->count++; tail = encode_tail(smp_processor_id(), idx);
d682b596d Waiman Long locking/qspinlock...	391 392 393 394 395 396 397 398 399 400	/* * 4 nodes are allocated based on the assumption that there will * not be nested NMIs taking spinlocks. That may not be true in * some architectures even though the chance of needing more than * 4 nodes will still be extremely unlikely. When that happens, * we fall back to spinning on the lock directly without using * any MCS node. This is not the most elegant solution, but is * simple enough. */ if (unlikely(idx >= MAX_NODES)) {
ad53fa10f Waiman Long locking/qspinlock...	401	lockevent_inc(lock_no_node);
d682b596d Waiman Long locking/qspinlock...	402 403 404 405	while (!queued_spin_trylock(lock)) cpu_relax(); goto release; }
0fa809ca7 Waiman Long locking/pvqspinlo...	406	node = grab_mcs_node(node, idx);
11dc13224 Will Deacon locking/qspinlock...	407 408	/*
1222109a5 Waiman Long locking/qspinlock...	409 410	* Keep counts of non-zero index values: */
ad53fa10f Waiman Long locking/qspinlock...	411	lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
1222109a5 Waiman Long locking/qspinlock...	412 413	/*
11dc13224 Will Deacon locking/qspinlock...	414 415 416 417 418	* Ensure that we increment the head node->count before initialising * the actual node. If the compiler is kind enough to reorder these * stores, then an IRQ could overwrite our assignments. */ barrier();
a33fda35e Waiman Long locking/qspinlock...	419 420	node->locked = 0; node->next = NULL;
a23db284f Waiman Long locking/pvqspinlo...	421	pv_init_node(node);
a33fda35e Waiman Long locking/qspinlock...	422 423	/*
6403bd7d0 Waiman Long locking/qspinlock...	424 425 426	* We touched a (possibly) cold cacheline in the per-cpu queue node; * attempt the trylock once more in the hope someone let go while we * weren't watching.
a33fda35e Waiman Long locking/qspinlock...	427	*/
6403bd7d0 Waiman Long locking/qspinlock...	428 429	if (queued_spin_trylock(lock)) goto release;
a33fda35e Waiman Long locking/qspinlock...	430 431	/*
9d4646d14 Will Deacon locking/qspinlock...	432 433 434 435 436 437 438 439	* Ensure that the initialisation of @node is complete before we * publish the updated tail via xchg_tail() and potentially link * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. / smp_wmb(); / * Publish the updated tail.
6403bd7d0 Waiman Long locking/qspinlock...	440 441 442 443	* We have already touched the queueing cacheline; don't bother with * pending stuff. * * p,, -> n,,
a33fda35e Waiman Long locking/qspinlock...	444	*/
6403bd7d0 Waiman Long locking/qspinlock...	445	old = xchg_tail(lock, tail);
aa68744f8 Waiman Long locking/qspinlock...	446	next = NULL;
a33fda35e Waiman Long locking/qspinlock...	447 448 449 450 451	/* * if there was a previous node; link it and wait until reaching the * head of the waitqueue. */
6403bd7d0 Waiman Long locking/qspinlock...	452	if (old & _Q_TAIL_MASK) {
a33fda35e Waiman Long locking/qspinlock...	453	prev = decode_tail(old);
95bcade33 Will Deacon locking/qspinlock...	454
9d4646d14 Will Deacon locking/qspinlock...	455 456	/* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node);
a33fda35e Waiman Long locking/qspinlock...	457
cd0272fab Waiman Long locking/pvqspinlo...	458	pv_wait_node(node, prev);
a33fda35e Waiman Long locking/qspinlock...	459	arch_mcs_spin_lock_contended(&node->locked);
81b559866 Waiman Long locking/qspinlock...	460 461 462 463 464 465 466 467 468 469	/* * While waiting for the MCS lock, the next pointer may have * been set by another lock waiter. We optimistically load * the next pointer & prefetch the cacheline for writing * to reduce latency in the upcoming MCS unlock operation. */ next = READ_ONCE(node->next); if (next) prefetchw(next);
a33fda35e Waiman Long locking/qspinlock...	470 471 472	} /*
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	473 474	* we're at the head of the waitqueue, wait for the owner & pending to * go away.
a33fda35e Waiman Long locking/qspinlock...	475	*
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	476	* ,x,y -> ,0,0
2c83e8e94 Waiman Long locking/qspinlock...	477 478 479 480 481 482	* * this wait loop must use a load-acquire such that we match the * store-release that clears the locked bit and create lock * sequentiality; this is because the set_locked() function below * does not imply a full barrier. *
1c4941fd5 Waiman Long locking/pvqspinlo...	483 484	* The PV pv_wait_head_or_lock function, if active, will acquire * the lock and return a non-zero value. So we have to skip the
f9c811fac Will Deacon locking/qspinlock...	485 486	* atomic_cond_read_acquire() call. As the next PV queue head hasn't * been designated yet, there is no way for the locked value to become
1c4941fd5 Waiman Long locking/pvqspinlo...	487 488 489 490 491	* _Q_SLOW_VAL. So both the set_locked() and the * atomic_cmpxchg_relaxed() calls will be safe. * * If PV isn't active, 0 will be returned instead. *
a33fda35e Waiman Long locking/qspinlock...	492	*/
1c4941fd5 Waiman Long locking/pvqspinlo...	493 494	if ((val = pv_wait_head_or_lock(lock, node))) goto locked;
f9c811fac Will Deacon locking/qspinlock...	495	val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
a33fda35e Waiman Long locking/qspinlock...	496
1c4941fd5 Waiman Long locking/pvqspinlo...	497	locked:
a33fda35e Waiman Long locking/qspinlock...	498 499 500	/* * claim the lock: *
c1fb159db Peter Zijlstra (Intel) locking/qspinlock...	501	* n,0,0 -> 0,0,1 : lock, uncontended
59fb586b4 Will Deacon locking/qspinlock...	502	* ,,0 -> ,,1 : lock, contended
2c83e8e94 Waiman Long locking/qspinlock...	503	*
59fb586b4 Will Deacon locking/qspinlock...	504 505 506	* If the queue head is the only one in the queue (lock value == tail) * and nobody is pending, clear the tail code and grab the lock. * Otherwise, we only need to grab the lock.
a33fda35e Waiman Long locking/qspinlock...	507	*/
c61da58d8 Will Deacon locking/qspinlock...	508
ae75d9089 Will Deacon locking/qspinlock...	509	/*
756b1df4c Peter Zijlstra locking/qspinlock...	510 511	* In the PV case we might already have _Q_LOCKED_VAL set, because * of lock stealing; therefore we must also allow:
ae75d9089 Will Deacon locking/qspinlock...	512	*
756b1df4c Peter Zijlstra locking/qspinlock...	513 514 515 516 517	* n,0,1 -> 0,0,1 * * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the * above wait condition, therefore any concurrent setting of * PENDING will make the uncontended transition fail.
ae75d9089 Will Deacon locking/qspinlock...	518	*/
756b1df4c Peter Zijlstra locking/qspinlock...	519 520 521 522	if ((val & _Q_TAIL_MASK) == tail) { if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) goto release; /* No contention */ }
a33fda35e Waiman Long locking/qspinlock...	523
756b1df4c Peter Zijlstra locking/qspinlock...	524 525 526 527 528	/* * Either somebody is queued behind us or _Q_PENDING_VAL got set * which will then detect the remaining tail and queue behind us * ensuring we'll see a @next. */
c61da58d8 Will Deacon locking/qspinlock...	529	set_locked(lock);
a33fda35e Waiman Long locking/qspinlock...	530	/*
aa68744f8 Waiman Long locking/qspinlock...	531	* contended path; wait for next if not observed yet, release.
a33fda35e Waiman Long locking/qspinlock...	532	*/
c131a198c Will Deacon locking/qspinlock...	533 534	if (!next) next = smp_cond_load_relaxed(&node->next, (VAL));
a33fda35e Waiman Long locking/qspinlock...	535
2c83e8e94 Waiman Long locking/qspinlock...	536	arch_mcs_spin_unlock_contended(&next->locked);
75d227028 Waiman Long locking/pvqspinlo...	537	pv_kick_node(lock, next);
a33fda35e Waiman Long locking/qspinlock...	538 539 540 541 542	release: /* * release the node */
0fa809ca7 Waiman Long locking/pvqspinlo...	543	__this_cpu_dec(qnodes[0].mcs.count);
a33fda35e Waiman Long locking/qspinlock...	544 545	} EXPORT_SYMBOL(queued_spin_lock_slowpath);
a23db284f Waiman Long locking/pvqspinlo...	546 547 548 549 550 551 552 553 554 555 556 557 558	/* * Generate the paravirt code for queued_spin_unlock_slowpath(). */ #if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS) #define _GEN_PV_LOCK_SLOWPATH #undef pv_enabled #define pv_enabled() true #undef pv_init_node #undef pv_wait_node #undef pv_kick_node
1c4941fd5 Waiman Long locking/pvqspinlo...	559	#undef pv_wait_head_or_lock
a23db284f Waiman Long locking/pvqspinlo...	560 561 562 563 564 565	#undef queued_spin_lock_slowpath #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath #include "qspinlock_paravirt.h" #include "qspinlock.c"
05eee619e Zhenzhong Duan x86/kvm: Add "nop...	566 567 568 569 570 571 572	bool nopvspin __initdata; static __init int parse_nopvspin(char *arg) { nopvspin = true; return 0; } early_param("nopvspin", parse_nopvspin);
a23db284f Waiman Long locking/pvqspinlo...	573	#endif