Commit 3960c8c0c7891dfc0f7be687cbdabb0d6916d614

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent 74b8a4cb6c

sched: Make dl_task_time() use task_rq_lock()

Kirill reported that a dl task can be throttled and dequeued at the
same time. This happens, when it becomes throttled in schedule(),
which is called to go to sleep:

current->state = TASK_INTERRUPTIBLE;
schedule()
    deactivate_task()
        dequeue_task_dl()
            update_curr_dl()
                start_dl_timer()
            __dequeue_task_dl()
    prev->on_rq = 0;

This invalidates the assumption from commit 0f397f2c90ce ("sched/dl:
Fix race in dl_task_timer()"):

  "The only reason we don't strictly need ->pi_lock now is because
   we're guaranteed to have p->state == TASK_RUNNING here and are
   thus free of ttwu races".

And therefore we have to use the full task_rq_lock() here.

This further amends the fact that we forgot to update the rq lock loop
for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach
scheduler to understand TASK_ON_RQ_MIGRATING state").

Reported-by: Kirill Tkhai <ktkhai@parallels.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Link: http://lkml.kernel.org/r/20150217123139.GN5029@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Showing 3 changed files with 79 additions and 85 deletions Side-by-side Diff

... ... @@ -307,82 +307,6 @@
307 307 int sysctl_sched_rt_runtime = 950000;
308 308  
309 309 /*
310   - * __task_rq_lock - lock the rq @p resides on.
311   - */
312   -static inline struct rq *__task_rq_lock(struct task_struct *p)
313   - __acquires(rq->lock)
314   -{
315   - struct rq *rq;
316   -
317   - lockdep_assert_held(&p->pi_lock);
318   -
319   - for (;;) {
320   - rq = task_rq(p);
321   - raw_spin_lock(&rq->lock);
322   - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
323   - return rq;
324   - raw_spin_unlock(&rq->lock);
325   -
326   - while (unlikely(task_on_rq_migrating(p)))
327   - cpu_relax();
328   - }
329   -}
330   -
331   -/*
332   - * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
333   - */
334   -static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
335   - __acquires(p->pi_lock)
336   - __acquires(rq->lock)
337   -{
338   - struct rq *rq;
339   -
340   - for (;;) {
341   - raw_spin_lock_irqsave(&p->pi_lock, *flags);
342   - rq = task_rq(p);
343   - raw_spin_lock(&rq->lock);
344   - /*
345   - * move_queued_task() task_rq_lock()
346   - *
347   - * ACQUIRE (rq->lock)
348   - * [S] ->on_rq = MIGRATING [L] rq = task_rq()
349   - * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
350   - * [S] ->cpu = new_cpu [L] task_rq()
351   - * [L] ->on_rq
352   - * RELEASE (rq->lock)
353   - *
354   - * If we observe the old cpu in task_rq_lock, the acquire of
355   - * the old rq->lock will fully serialize against the stores.
356   - *
357   - * If we observe the new cpu in task_rq_lock, the acquire will
358   - * pair with the WMB to ensure we must then also see migrating.
359   - */
360   - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
361   - return rq;
362   - raw_spin_unlock(&rq->lock);
363   - raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
364   -
365   - while (unlikely(task_on_rq_migrating(p)))
366   - cpu_relax();
367   - }
368   -}
369   -
370   -static void __task_rq_unlock(struct rq *rq)
371   - __releases(rq->lock)
372   -{
373   - raw_spin_unlock(&rq->lock);
374   -}
375   -
376   -static inline void
377   -task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
378   - __releases(rq->lock)
379   - __releases(p->pi_lock)
380   -{
381   - raw_spin_unlock(&rq->lock);
382   - raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
383   -}
384   -
385   -/*
386 310 * this_rq_lock - lock this runqueue and disable interrupts.
387 311 */
388 312 static struct rq *this_rq_lock(void)
kernel/sched/deadline.c
... ... @@ -511,16 +511,10 @@
511 511 struct sched_dl_entity,
512 512 dl_timer);
513 513 struct task_struct *p = dl_task_of(dl_se);
  514 + unsigned long flags;
514 515 struct rq *rq;
515   -again:
516   - rq = task_rq(p);
517   - raw_spin_lock(&rq->lock);
518 516  
519   - if (rq != task_rq(p)) {
520   - /* Task was moved, retrying. */
521   - raw_spin_unlock(&rq->lock);
522   - goto again;
523   - }
  517 + rq = task_rq_lock(current, &flags);
524 518  
525 519 /*
526 520 * We need to take care of several possible races here:
... ... @@ -555,7 +549,7 @@
555 549 push_dl_task(rq);
556 550 #endif
557 551 unlock:
558   - raw_spin_unlock(&rq->lock);
  552 + task_rq_unlock(rq, current, &flags);
559 553  
560 554 return HRTIMER_NORESTART;
561 555 }
kernel/sched/sched.h
... ... @@ -1380,6 +1380,82 @@
1380 1380  
1381 1381 extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
1382 1382  
  1383 +/*
  1384 + * __task_rq_lock - lock the rq @p resides on.
  1385 + */
  1386 +static inline struct rq *__task_rq_lock(struct task_struct *p)
  1387 + __acquires(rq->lock)
  1388 +{
  1389 + struct rq *rq;
  1390 +
  1391 + lockdep_assert_held(&p->pi_lock);
  1392 +
  1393 + for (;;) {
  1394 + rq = task_rq(p);
  1395 + raw_spin_lock(&rq->lock);
  1396 + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
  1397 + return rq;
  1398 + raw_spin_unlock(&rq->lock);
  1399 +
  1400 + while (unlikely(task_on_rq_migrating(p)))
  1401 + cpu_relax();
  1402 + }
  1403 +}
  1404 +
  1405 +/*
  1406 + * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
  1407 + */
  1408 +static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
  1409 + __acquires(p->pi_lock)
  1410 + __acquires(rq->lock)
  1411 +{
  1412 + struct rq *rq;
  1413 +
  1414 + for (;;) {
  1415 + raw_spin_lock_irqsave(&p->pi_lock, *flags);
  1416 + rq = task_rq(p);
  1417 + raw_spin_lock(&rq->lock);
  1418 + /*
  1419 + * move_queued_task() task_rq_lock()
  1420 + *
  1421 + * ACQUIRE (rq->lock)
  1422 + * [S] ->on_rq = MIGRATING [L] rq = task_rq()
  1423 + * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
  1424 + * [S] ->cpu = new_cpu [L] task_rq()
  1425 + * [L] ->on_rq
  1426 + * RELEASE (rq->lock)
  1427 + *
  1428 + * If we observe the old cpu in task_rq_lock, the acquire of
  1429 + * the old rq->lock will fully serialize against the stores.
  1430 + *
  1431 + * If we observe the new cpu in task_rq_lock, the acquire will
  1432 + * pair with the WMB to ensure we must then also see migrating.
  1433 + */
  1434 + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
  1435 + return rq;
  1436 + raw_spin_unlock(&rq->lock);
  1437 + raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
  1438 +
  1439 + while (unlikely(task_on_rq_migrating(p)))
  1440 + cpu_relax();
  1441 + }
  1442 +}
  1443 +
  1444 +static inline void __task_rq_unlock(struct rq *rq)
  1445 + __releases(rq->lock)
  1446 +{
  1447 + raw_spin_unlock(&rq->lock);
  1448 +}
  1449 +
  1450 +static inline void
  1451 +task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
  1452 + __releases(rq->lock)
  1453 + __releases(p->pi_lock)
  1454 +{
  1455 + raw_spin_unlock(&rq->lock);
  1456 + raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
  1457 +}
  1458 +
1383 1459 #ifdef CONFIG_SMP
1384 1460 #ifdef CONFIG_PREEMPT
1385 1461