locking/mutexes: Modify the way optimistic spinners are queued

The mutex->spin_mlock was introduced in order to ensure that only 1 thread spins for lock acquisition at a time to reduce cache line contention. When lock->owner is NULL and the lock->count is still not 1, the spinner(s) will continually release and obtain the lock->spin_mlock. This can generate quite a bit of overhead/contention, and also might just delay the spinner from getting the lock. This patch modifies the way optimistic spinners are queued by queuing before entering the optimistic spinning loop as oppose to acquiring before every call to mutex_spin_on_owner(). So in situations where the spinner requires a few extra spins before obtaining the lock, then there will only be 1 spinner trying to get the lock and it will avoid the overhead from unnecessarily unlocking and locking the spin_mlock. Signed-off-by: Jason Low <jason.low2@hp.com> Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Cc: chegu_vinod@hp.com Cc: Waiman.Long@hp.com Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1390936396-3962-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org>

locking/mutexes: Modify the way optimistic spinners are queued
The mutex->spin_mlock was introduced in order to ensure that only 1 thread spins for lock acquisition at a time to reduce cache line contention. When lock->owner is NULL and the lock->count is still not 1, the spinner(s) will continually release and obtain the lock->spin_mlock. This can generate quite a bit of overhead/contention, and also might just delay the spinner from getting the lock. This patch modifies the way optimistic spinners are queued by queuing before entering the optimistic spinning loop as oppose to acquiring before every call to mutex_spin_on_owner(). So in situations where the spinner requires a few extra spins before obtaining the lock, then there will only be 1 spinner trying to get the lock and it will avoid the overhead from unnecessarily unlocking and locking the spin_mlock. Signed-off-by: Jason Low <jason.low2@hp.com> Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Cc: chegu_vinod@hp.com Cc: Waiman.Long@hp.com Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1390936396-3962-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Jason Low · Ingo Molnar
1 parent 46af29e479
Showing 1 changed file with 6 additions and 9 deletions Side-by-side Diff
kernel/locking/mutex.c
@@ -403,9 +403,9 @@
 	if (!mutex_can_spin_on_owner(lock))
 		goto slowpath;
  
+	mcs_spin_lock(&lock->mcs_lock, &node);
 	for (;;) {
 		struct task_struct *owner;
-		struct mcs_spinlock  node;
  
 		if (use_ww_ctx && ww_ctx->acquired > 0) {
 			struct ww_mutex *ww;
  
  
@@ -420,19 +420,16 @@
 			 * performed the optimistic spinning cannot be done.
 			 */
 			if (ACCESS_ONCE(ww->ctx))
-				goto slowpath;
+				break;
 		}
  
 		/*
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
 		 */
-		mcs_spin_lock(&lock->mcs_lock, &node);
 		owner = ACCESS_ONCE(lock->owner);
-		if (owner && !mutex_spin_on_owner(lock, owner)) {
-			mcs_spin_unlock(&lock->mcs_lock, &node);
-			goto slowpath;
-		}
+		if (owner && !mutex_spin_on_owner(lock, owner))
+			break;
  
 		if ((atomic_read(&lock->count) == 1) &&
 		    (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
@@ -449,7 +446,6 @@
 			preempt_enable();
 			return 0;
 		}
-		mcs_spin_unlock(&lock->mcs_lock, &node);
  
 		/*
 		 * When there's no owner, we might have preempted between the
@@ -458,7 +454,7 @@
 		 * the owner complete.
 		 */
 		if (!owner && (need_resched() || rt_task(task)))
-			goto slowpath;
+			break;
  
 		/*
 		 * The cpu_relax() call is a compiler barrier which forces
@@ -468,6 +464,7 @@
 		 */
 		arch_mutex_cpu_relax();
 	}
+	mcs_spin_unlock(&lock->mcs_lock, &node);
 slowpath:
 #endif
 	spin_lock_mutex(&lock->wait_lock, flags);
...	...	@@ -403,9 +403,9 @@
403	403	if (!mutex_can_spin_on_owner(lock))
404	404	goto slowpath;
405	405
	406	+ mcs_spin_lock(&lock->mcs_lock, &node);
406	407	for (;;) {
407	408	struct task_struct *owner;
408		- struct mcs_spinlock node;
409	409
410	410	if (use_ww_ctx && ww_ctx->acquired > 0) {
411	411	struct ww_mutex *ww;
412	412
413	413
...	...	@@ -420,19 +420,16 @@
420	420	* performed the optimistic spinning cannot be done.
421	421	*/
422	422	if (ACCESS_ONCE(ww->ctx))
423		- goto slowpath;
	423	+ break;
424	424	}
425	425
426	426	/*
427	427	* If there's an owner, wait for it to either
428	428	* release the lock or go to sleep.
429	429	*/
430		- mcs_spin_lock(&lock->mcs_lock, &node);
431	430	owner = ACCESS_ONCE(lock->owner);
432		- if (owner && !mutex_spin_on_owner(lock, owner)) {
433		- mcs_spin_unlock(&lock->mcs_lock, &node);
434		- goto slowpath;
435		- }
	431	+ if (owner && !mutex_spin_on_owner(lock, owner))
	432	+ break;
436	433
437	434	if ((atomic_read(&lock->count) == 1) &&
438	435	(atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
...	...	@@ -449,7 +446,6 @@
449	446	preempt_enable();
450	447	return 0;
451	448	}
452		- mcs_spin_unlock(&lock->mcs_lock, &node);
453	449
454	450	/*
455	451	* When there's no owner, we might have preempted between the
...	...	@@ -458,7 +454,7 @@
458	454	* the owner complete.
459	455	*/
460	456	if (!owner && (need_resched() \|\| rt_task(task)))
461		- goto slowpath;
	457	+ break;
462	458
463	459	/*
464	460	* The cpu_relax() call is a compiler barrier which forces
...	...	@@ -468,6 +464,7 @@
468	464	*/
469	465	arch_mutex_cpu_relax();
470	466	}
	467	+ mcs_spin_unlock(&lock->mcs_lock, &node);
471	468	slowpath:
472	469	#endif
473	470	spin_lock_mutex(&lock->wait_lock, flags);