sched: Make dl_task_time() use task_rq_lock()

Kirill reported that a dl task can be throttled and dequeued at the same time. This happens, when it becomes throttled in schedule(), which is called to go to sleep: current->state = TASK_INTERRUPTIBLE; schedule() deactivate_task() dequeue_task_dl() update_curr_dl() start_dl_timer() __dequeue_task_dl() prev->on_rq = 0; This invalidates the assumption from commit 0f397f2c90ce ("sched/dl: Fix race in dl_task_timer()"): "The only reason we don't strictly need ->pi_lock now is because we're guaranteed to have p->state == TASK_RUNNING here and are thus free of ttwu races". And therefore we have to use the full task_rq_lock() here. This further amends the fact that we forgot to update the rq lock loop for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach scheduler to understand TASK_ON_RQ_MIGRATING state"). Reported-by: Kirill Tkhai <ktkhai@parallels.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Juri Lelli <juri.lelli@arm.com> Link: http://lkml.kernel.org/r/20150217123139.GN5029@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>

sched: Make dl_task_time() use task_rq_lock()
Kirill reported that a dl task can be throttled and dequeued at the same time. This happens, when it becomes throttled in schedule(), which is called to go to sleep: current->state = TASK_INTERRUPTIBLE; schedule() deactivate_task() dequeue_task_dl() update_curr_dl() start_dl_timer() __dequeue_task_dl() prev->on_rq = 0; This invalidates the assumption from commit 0f397f2c90ce ("sched/dl: Fix race in dl_task_timer()"): "The only reason we don't strictly need ->pi_lock now is because we're guaranteed to have p->state == TASK_RUNNING here and are thus free of ttwu races". And therefore we have to use the full task_rq_lock() here. This further amends the fact that we forgot to update the rq lock loop for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach scheduler to understand TASK_ON_RQ_MIGRATING state"). Reported-by: Kirill Tkhai <ktkhai@parallels.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Juri Lelli <juri.lelli@arm.com> Link: http://lkml.kernel.org/r/20150217123139.GN5029@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Peter Zijlstra · Ingo Molnar · Eric Lee · Eric Lee · Eric Lee · Eric Lee
1 parent 74b8a4cb6c
Showing 3 changed files with 79 additions and 85 deletions Side-by-side Diff
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/sched.h
@@ -307,82 +307,6 @@
 int sysctl_sched_rt_runtime = 950000;
  
 /*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
-
-	lockdep_assert_held(&p->pi_lock);
-
-	for (;;) {
-		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-			return rq;
-		raw_spin_unlock(&rq->lock);
-
-		while (unlikely(task_on_rq_migrating(p)))
-			cpu_relax();
-	}
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
-	__acquires(p->pi_lock)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
-
-	for (;;) {
-		raw_spin_lock_irqsave(&p->pi_lock, *flags);
-		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
-		/*
-		 *	move_queued_task()		task_rq_lock()
-		 *
-		 *	ACQUIRE (rq->lock)
-		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-		 *	[S] ->cpu = new_cpu		[L] task_rq()
-		 *					[L] ->on_rq
-		 *	RELEASE (rq->lock)
-		 *
-		 * If we observe the old cpu in task_rq_lock, the acquire of
-		 * the old rq->lock will fully serialize against the stores.
-		 *
-		 * If we observe the new cpu in task_rq_lock, the acquire will
-		 * pair with the WMB to ensure we must then also see migrating.
-		 */
-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-			return rq;
-		raw_spin_unlock(&rq->lock);
-		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
-		while (unlikely(task_on_rq_migrating(p)))
-			cpu_relax();
-	}
-}
-
-static void __task_rq_unlock(struct rq *rq)
-	__releases(rq->lock)
-{
-	raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
-	__releases(rq->lock)
-	__releases(p->pi_lock)
-{
-	raw_spin_unlock(&rq->lock);
-	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
-
-/*
  * this_rq_lock - lock this runqueue and disable interrupts.
  */
 static struct rq *this_rq_lock(void)
@@ -511,16 +511,10 @@
 						     struct sched_dl_entity,
 						     dl_timer);
 	struct task_struct *p = dl_task_of(dl_se);
+	unsigned long flags;
 	struct rq *rq;
-again:
-	rq = task_rq(p);
-	raw_spin_lock(&rq->lock);
  
-	if (rq != task_rq(p)) {
-		/* Task was moved, retrying. */
-		raw_spin_unlock(&rq->lock);
-		goto again;
-	}
+	rq = task_rq_lock(current, &flags);
  
 	/*
 	 * We need to take care of several possible races here:
@@ -555,7 +549,7 @@
 		push_dl_task(rq);
 #endif
 unlock:
-	raw_spin_unlock(&rq->lock);
+	task_rq_unlock(rq, current, &flags);
  
 	return HRTIMER_NORESTART;
 }
@@ -1380,6 +1380,82 @@
  
 extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
  
+/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	lockdep_assert_held(&p->pi_lock);
+
+	for (;;) {
+		rq = task_rq(p);
+		raw_spin_lock(&rq->lock);
+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+			return rq;
+		raw_spin_unlock(&rq->lock);
+
+		while (unlikely(task_on_rq_migrating(p)))
+			cpu_relax();
+	}
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+	__acquires(p->pi_lock)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	for (;;) {
+		raw_spin_lock_irqsave(&p->pi_lock, *flags);
+		rq = task_rq(p);
+		raw_spin_lock(&rq->lock);
+		/*
+		 *	move_queued_task()		task_rq_lock()
+		 *
+		 *	ACQUIRE (rq->lock)
+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
+		 *	[S] ->cpu = new_cpu		[L] task_rq()
+		 *					[L] ->on_rq
+		 *	RELEASE (rq->lock)
+		 *
+		 * If we observe the old cpu in task_rq_lock, the acquire of
+		 * the old rq->lock will fully serialize against the stores.
+		 *
+		 * If we observe the new cpu in task_rq_lock, the acquire will
+		 * pair with the WMB to ensure we must then also see migrating.
+		 */
+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+			return rq;
+		raw_spin_unlock(&rq->lock);
+		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+		while (unlikely(task_on_rq_migrating(p)))
+			cpu_relax();
+	}
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+	__releases(rq->lock)
+{
+	raw_spin_unlock(&rq->lock);
+}
+
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+	__releases(rq->lock)
+	__releases(p->pi_lock)
+{
+	raw_spin_unlock(&rq->lock);
+	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPT
...	...	@@ -307,82 +307,6 @@
307	307	int sysctl_sched_rt_runtime = 950000;
308	308
309	309	/*
310		- * __task_rq_lock - lock the rq @p resides on.
311		- */
312		-static inline struct rq __task_rq_lock(struct task_struct p)
313		- __acquires(rq->lock)
314		-{
315		- struct rq *rq;
316		-
317		- lockdep_assert_held(&p->pi_lock);
318		-
319		- for (;;) {
320		- rq = task_rq(p);
321		- raw_spin_lock(&rq->lock);
322		- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
323		- return rq;
324		- raw_spin_unlock(&rq->lock);
325		-
326		- while (unlikely(task_on_rq_migrating(p)))
327		- cpu_relax();
328		- }
329		-}
330		-
331		-/*
332		- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
333		- */
334		-static struct rq task_rq_lock(struct task_struct p, unsigned long *flags)
335		- __acquires(p->pi_lock)
336		- __acquires(rq->lock)
337		-{
338		- struct rq *rq;
339		-
340		- for (;;) {
341		- raw_spin_lock_irqsave(&p->pi_lock, *flags);
342		- rq = task_rq(p);
343		- raw_spin_lock(&rq->lock);
344		- /*
345		- * move_queued_task() task_rq_lock()
346		- *
347		- * ACQUIRE (rq->lock)
348		- * [S] ->on_rq = MIGRATING [L] rq = task_rq()
349		- * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
350		- * [S] ->cpu = new_cpu [L] task_rq()
351		- * [L] ->on_rq
352		- * RELEASE (rq->lock)
353		- *
354		- * If we observe the old cpu in task_rq_lock, the acquire of
355		- * the old rq->lock will fully serialize against the stores.
356		- *
357		- * If we observe the new cpu in task_rq_lock, the acquire will
358		- * pair with the WMB to ensure we must then also see migrating.
359		- */
360		- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
361		- return rq;
362		- raw_spin_unlock(&rq->lock);
363		- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
364		-
365		- while (unlikely(task_on_rq_migrating(p)))
366		- cpu_relax();
367		- }
368		-}
369		-
370		-static void __task_rq_unlock(struct rq *rq)
371		- __releases(rq->lock)
372		-{
373		- raw_spin_unlock(&rq->lock);
374		-}
375		-
376		-static inline void
377		-task_rq_unlock(struct rq rq, struct task_struct p, unsigned long *flags)
378		- __releases(rq->lock)
379		- __releases(p->pi_lock)
380		-{
381		- raw_spin_unlock(&rq->lock);
382		- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
383		-}
384		-
385		-/*
386	310	* this_rq_lock - lock this runqueue and disable interrupts.
387	311	*/
388	312	static struct rq *this_rq_lock(void)
...	...	@@ -511,16 +511,10 @@
511	511	struct sched_dl_entity,
512	512	dl_timer);
513	513	struct task_struct *p = dl_task_of(dl_se);
	514	+ unsigned long flags;
514	515	struct rq *rq;
515		-again:
516		- rq = task_rq(p);
517		- raw_spin_lock(&rq->lock);
518	516
519		- if (rq != task_rq(p)) {
520		- /* Task was moved, retrying. */
521		- raw_spin_unlock(&rq->lock);
522		- goto again;
523		- }
	517	+ rq = task_rq_lock(current, &flags);
524	518
525	519	/*
526	520	* We need to take care of several possible races here:
...	...	@@ -555,7 +549,7 @@
555	549	push_dl_task(rq);
556	550	#endif
557	551	unlock:
558		- raw_spin_unlock(&rq->lock);
	552	+ task_rq_unlock(rq, current, &flags);
559	553
560	554	return HRTIMER_NORESTART;
561	555	}
...	...	@@ -1380,6 +1380,82 @@
1380	1380
1381	1381	extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
1382	1382
	1383	+/*
	1384	+ * __task_rq_lock - lock the rq @p resides on.
	1385	+ */
	1386	+static inline struct rq __task_rq_lock(struct task_struct p)
	1387	+ __acquires(rq->lock)
	1388	+{
	1389	+ struct rq *rq;
	1390	+
	1391	+ lockdep_assert_held(&p->pi_lock);
	1392	+
	1393	+ for (;;) {
	1394	+ rq = task_rq(p);
	1395	+ raw_spin_lock(&rq->lock);
	1396	+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
	1397	+ return rq;
	1398	+ raw_spin_unlock(&rq->lock);
	1399	+
	1400	+ while (unlikely(task_on_rq_migrating(p)))
	1401	+ cpu_relax();
	1402	+ }
	1403	+}
	1404	+
	1405	+/*
	1406	+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
	1407	+ */
	1408	+static inline struct rq task_rq_lock(struct task_struct p, unsigned long *flags)
	1409	+ __acquires(p->pi_lock)
	1410	+ __acquires(rq->lock)
	1411	+{
	1412	+ struct rq *rq;
	1413	+
	1414	+ for (;;) {
	1415	+ raw_spin_lock_irqsave(&p->pi_lock, *flags);
	1416	+ rq = task_rq(p);
	1417	+ raw_spin_lock(&rq->lock);
	1418	+ /*
	1419	+ * move_queued_task() task_rq_lock()
	1420	+ *
	1421	+ * ACQUIRE (rq->lock)
	1422	+ * [S] ->on_rq = MIGRATING [L] rq = task_rq()
	1423	+ * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
	1424	+ * [S] ->cpu = new_cpu [L] task_rq()
	1425	+ * [L] ->on_rq
	1426	+ * RELEASE (rq->lock)
	1427	+ *
	1428	+ * If we observe the old cpu in task_rq_lock, the acquire of
	1429	+ * the old rq->lock will fully serialize against the stores.
	1430	+ *
	1431	+ * If we observe the new cpu in task_rq_lock, the acquire will
	1432	+ * pair with the WMB to ensure we must then also see migrating.
	1433	+ */
	1434	+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
	1435	+ return rq;
	1436	+ raw_spin_unlock(&rq->lock);
	1437	+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
	1438	+
	1439	+ while (unlikely(task_on_rq_migrating(p)))
	1440	+ cpu_relax();
	1441	+ }
	1442	+}
	1443	+
	1444	+static inline void __task_rq_unlock(struct rq *rq)
	1445	+ __releases(rq->lock)
	1446	+{
	1447	+ raw_spin_unlock(&rq->lock);
	1448	+}
	1449	+
	1450	+static inline void
	1451	+task_rq_unlock(struct rq rq, struct task_struct p, unsigned long *flags)
	1452	+ __releases(rq->lock)
	1453	+ __releases(p->pi_lock)
	1454	+{
	1455	+ raw_spin_unlock(&rq->lock);
	1456	+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
	1457	+}
	1458	+
1383	1459	#ifdef CONFIG_SMP
1384	1460	#ifdef CONFIG_PREEMPT
1385	1461