Commit 60e69eed85bb7b5198ef70643b5895c26ad76ef7

Authored by Mike Galbraith
Committed by Ingo Molnar
1 parent 467cbd207a

sched/numa: Fix task_numa_free() lockdep splat

Sasha reported that lockdep claims that the following commit:
made numa_group.lock interrupt unsafe:

  156654f491dd ("sched/numa: Move task_numa_free() to __put_task_struct()")

While I don't see how that could be, given the commit in question moved
task_numa_free() from one irq enabled region to another, the below does
make both gripes and lockups upon gripe with numa=fake=4 go away.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Fixes: 156654f491dd ("sched/numa: Move task_numa_free() to __put_task_struct()")
Signed-off-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: torvalds@linux-foundation.org
Cc: mgorman@suse.com
Cc: akpm@linux-foundation.org
Cc: Dave Jones <davej@redhat.com>
Link: http://lkml.kernel.org/r/1396860915.5170.5.camel@marge.simpson.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Showing 2 changed files with 16 additions and 6 deletions Side-by-side Diff

... ... @@ -1497,7 +1497,7 @@
1497 1497 /* If the task is part of a group prevent parallel updates to group stats */
1498 1498 if (p->numa_group) {
1499 1499 group_lock = &p->numa_group->lock;
1500   - spin_lock(group_lock);
  1500 + spin_lock_irq(group_lock);
1501 1501 }
1502 1502  
1503 1503 /* Find the node with the highest number of faults */
... ... @@ -1572,7 +1572,7 @@
1572 1572 }
1573 1573 }
1574 1574  
1575   - spin_unlock(group_lock);
  1575 + spin_unlock_irq(group_lock);
1576 1576 }
1577 1577  
1578 1578 /* Preferred node as the node with the most faults */
... ... @@ -1677,7 +1677,8 @@
1677 1677 if (!join)
1678 1678 return;
1679 1679  
1680   - double_lock(&my_grp->lock, &grp->lock);
  1680 + BUG_ON(irqs_disabled());
  1681 + double_lock_irq(&my_grp->lock, &grp->lock);
1681 1682  
1682 1683 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
1683 1684 my_grp->faults[i] -= p->numa_faults_memory[i];
... ... @@ -1691,7 +1692,7 @@
1691 1692 grp->nr_tasks++;
1692 1693  
1693 1694 spin_unlock(&my_grp->lock);
1694   - spin_unlock(&grp->lock);
  1695 + spin_unlock_irq(&grp->lock);
1695 1696  
1696 1697 rcu_assign_pointer(p->numa_group, grp);
1697 1698  
1698 1699  
... ... @@ -1710,14 +1711,14 @@
1710 1711 void *numa_faults = p->numa_faults_memory;
1711 1712  
1712 1713 if (grp) {
1713   - spin_lock(&grp->lock);
  1714 + spin_lock_irq(&grp->lock);
1714 1715 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
1715 1716 grp->faults[i] -= p->numa_faults_memory[i];
1716 1717 grp->total_faults -= p->total_numa_faults;
1717 1718  
1718 1719 list_del(&p->numa_entry);
1719 1720 grp->nr_tasks--;
1720   - spin_unlock(&grp->lock);
  1721 + spin_unlock_irq(&grp->lock);
1721 1722 rcu_assign_pointer(p->numa_group, NULL);
1722 1723 put_numa_group(grp);
1723 1724 }
kernel/sched/sched.h
... ... @@ -1385,6 +1385,15 @@
1385 1385 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
1386 1386 }
1387 1387  
  1388 +static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
  1389 +{
  1390 + if (l1 > l2)
  1391 + swap(l1, l2);
  1392 +
  1393 + spin_lock_irq(l1);
  1394 + spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
  1395 +}
  1396 +
1388 1397 static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
1389 1398 {
1390 1399 if (l1 > l2)