Commit 35cfa2b0b491c37e23527822bf365610dbb188e5

Authored by Gavin Shan
Committed by Linus Torvalds
1 parent fc314d0a4a

mm/mmu_notifier: allocate mmu_notifier in advance

While allocating mmu_notifier with parameter GFP_KERNEL, swap would start
to work in case of tight available memory.  Eventually, that would lead to
a deadlock while the swap deamon swaps anonymous pages.  It was caused by
commit e0f3c3f78da29b ("mm/mmu_notifier: init notifier if necessary").

  =================================
  [ INFO: inconsistent lock state ]
  3.7.0-rc1+ #518 Not tainted
  ---------------------------------
  inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage.
  kswapd0/35 [HC0[0]:SC0[0]:HE1:SE1] takes:
   (&mapping->i_mmap_mutex){+.+.?.}, at: page_referenced+0x9c/0x2e0
  {RECLAIM_FS-ON-W} state was registered at:
     mark_held_locks+0x86/0x150
     lockdep_trace_alloc+0x67/0xc0
     kmem_cache_alloc_trace+0x33/0x230
     do_mmu_notifier_register+0x87/0x180
     mmu_notifier_register+0x13/0x20
     kvm_dev_ioctl+0x428/0x510
     do_vfs_ioctl+0x98/0x570
     sys_ioctl+0x91/0xb0
     system_call_fastpath+0x16/0x1b
  irq event stamp: 825
  hardirqs last  enabled at (825): _raw_spin_unlock_irq+0x30/0x60
  hardirqs last disabled at (824): _raw_spin_lock_irq+0x19/0x80
  softirqs last  enabled at (0): copy_process+0x630/0x17c0
  softirqs last disabled at (0): (null)
  ...

Simply back out the above commit, which was a small performance
optimization.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reported-by: Andrea Righi <andrea@betterlinux.com>
Tested-by: Andrea Righi <andrea@betterlinux.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Sagi Grimberg <sagig@mellanox.co.il>
Cc: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 13 additions and 13 deletions Side-by-side Diff

... ... @@ -196,28 +196,28 @@
196 196 BUG_ON(atomic_read(&mm->mm_users) <= 0);
197 197  
198 198 /*
199   - * Verify that mmu_notifier_init() already run and the global srcu is
200   - * initialized.
201   - */
  199 + * Verify that mmu_notifier_init() already run and the global srcu is
  200 + * initialized.
  201 + */
202 202 BUG_ON(!srcu.per_cpu_ref);
203 203  
  204 + ret = -ENOMEM;
  205 + mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
  206 + if (unlikely(!mmu_notifier_mm))
  207 + goto out;
  208 +
204 209 if (take_mmap_sem)
205 210 down_write(&mm->mmap_sem);
206 211 ret = mm_take_all_locks(mm);
207 212 if (unlikely(ret))
208   - goto out;
  213 + goto out_clean;
209 214  
210 215 if (!mm_has_notifiers(mm)) {
211   - mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm),
212   - GFP_KERNEL);
213   - if (unlikely(!mmu_notifier_mm)) {
214   - ret = -ENOMEM;
215   - goto out_of_mem;
216   - }
217 216 INIT_HLIST_HEAD(&mmu_notifier_mm->list);
218 217 spin_lock_init(&mmu_notifier_mm->lock);
219 218  
220 219 mm->mmu_notifier_mm = mmu_notifier_mm;
  220 + mmu_notifier_mm = NULL;
221 221 }
222 222 atomic_inc(&mm->mm_count);
223 223  
224 224  
225 225  
... ... @@ -233,12 +233,12 @@
233 233 hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
234 234 spin_unlock(&mm->mmu_notifier_mm->lock);
235 235  
236   -out_of_mem:
237 236 mm_drop_all_locks(mm);
238   -out:
  237 +out_clean:
239 238 if (take_mmap_sem)
240 239 up_write(&mm->mmap_sem);
241   -
  240 + kfree(mmu_notifier_mm);
  241 +out:
242 242 BUG_ON(atomic_read(&mm->mm_users) <= 0);
243 243 return ret;
244 244 }