Commit bc3e53f682d93df677dbd5006a404722b3adfe18

Authored by Christoph Lameter
Committed by Linus Torvalds
1 parent f11c0ca501

mm: distinguish between mlocked and pinned pages

Some kernel components pin user space memory (infiniband and perf) (by
increasing the page count) and account that memory as "mlocked".

The difference between mlocking and pinning is:

A. mlocked pages are marked with PG_mlocked and are exempt from
   swapping. Page migration may move them around though.
   They are kept on a special LRU list.

B. Pinned pages cannot be moved because something needs to
   directly access physical memory. They may not be on any
   LRU list.

I recently saw an mlockalled process where mm->locked_vm became
bigger than the virtual size of the process (!) because some
memory was accounted for twice:

Once when the page was mlocked and once when the Infiniband
layer increased the refcount because it needt to pin the RDMA
memory.

This patch introduces a separate counter for pinned pages and
accounts them seperately.

Signed-off-by: Christoph Lameter <cl@linux.com>
Cc: Mike Marciniszyn <infinipath@qlogic.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 6 changed files with 14 additions and 12 deletions Side-by-side Diff

drivers/infiniband/core/umem.c
... ... @@ -136,7 +136,7 @@
136 136  
137 137 down_write(&current->mm->mmap_sem);
138 138  
139   - locked = npages + current->mm->locked_vm;
  139 + locked = npages + current->mm->pinned_vm;
140 140 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
141 141  
142 142 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
... ... @@ -206,7 +206,7 @@
206 206 __ib_umem_release(context->device, umem, 0);
207 207 kfree(umem);
208 208 } else
209   - current->mm->locked_vm = locked;
  209 + current->mm->pinned_vm = locked;
210 210  
211 211 up_write(&current->mm->mmap_sem);
212 212 if (vma_list)
... ... @@ -222,7 +222,7 @@
222 222 struct ib_umem *umem = container_of(work, struct ib_umem, work);
223 223  
224 224 down_write(&umem->mm->mmap_sem);
225   - umem->mm->locked_vm -= umem->diff;
  225 + umem->mm->pinned_vm -= umem->diff;
226 226 up_write(&umem->mm->mmap_sem);
227 227 mmput(umem->mm);
228 228 kfree(umem);
drivers/infiniband/hw/ipath/ipath_user_pages.c
... ... @@ -79,7 +79,7 @@
79 79 goto bail_release;
80 80 }
81 81  
82   - current->mm->locked_vm += num_pages;
  82 + current->mm->pinned_vm += num_pages;
83 83  
84 84 ret = 0;
85 85 goto bail;
... ... @@ -178,7 +178,7 @@
178 178  
179 179 __ipath_release_user_pages(p, num_pages, 1);
180 180  
181   - current->mm->locked_vm -= num_pages;
  181 + current->mm->pinned_vm -= num_pages;
182 182  
183 183 up_write(&current->mm->mmap_sem);
184 184 }
... ... @@ -195,7 +195,7 @@
195 195 container_of(_work, struct ipath_user_pages_work, work);
196 196  
197 197 down_write(&work->mm->mmap_sem);
198   - work->mm->locked_vm -= work->num_pages;
  198 + work->mm->pinned_vm -= work->num_pages;
199 199 up_write(&work->mm->mmap_sem);
200 200 mmput(work->mm);
201 201 kfree(work);
drivers/infiniband/hw/qib/qib_user_pages.c
... ... @@ -74,7 +74,7 @@
74 74 goto bail_release;
75 75 }
76 76  
77   - current->mm->locked_vm += num_pages;
  77 + current->mm->pinned_vm += num_pages;
78 78  
79 79 ret = 0;
80 80 goto bail;
... ... @@ -151,7 +151,7 @@
151 151 __qib_release_user_pages(p, num_pages, 1);
152 152  
153 153 if (current->mm) {
154   - current->mm->locked_vm -= num_pages;
  154 + current->mm->pinned_vm -= num_pages;
155 155 up_write(&current->mm->mmap_sem);
156 156 }
157 157 }
... ... @@ -44,6 +44,7 @@
44 44 "VmPeak:\t%8lu kB\n"
45 45 "VmSize:\t%8lu kB\n"
46 46 "VmLck:\t%8lu kB\n"
  47 + "VmPin:\t%8lu kB\n"
47 48 "VmHWM:\t%8lu kB\n"
48 49 "VmRSS:\t%8lu kB\n"
49 50 "VmData:\t%8lu kB\n"
... ... @@ -55,6 +56,7 @@
55 56 hiwater_vm << (PAGE_SHIFT-10),
56 57 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
57 58 mm->locked_vm << (PAGE_SHIFT-10),
  59 + mm->pinned_vm << (PAGE_SHIFT-10),
58 60 hiwater_rss << (PAGE_SHIFT-10),
59 61 total_rss << (PAGE_SHIFT-10),
60 62 data << (PAGE_SHIFT-10),
include/linux/mm_types.h
... ... @@ -304,7 +304,7 @@
304 304 unsigned long hiwater_rss; /* High-watermark of RSS usage */
305 305 unsigned long hiwater_vm; /* High-water virtual memory usage */
306 306  
307   - unsigned long total_vm, locked_vm, shared_vm, exec_vm;
  307 + unsigned long total_vm, locked_vm, pinned_vm, shared_vm, exec_vm;
308 308 unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
309 309 unsigned long start_code, end_code, start_data, end_data;
310 310 unsigned long start_brk, brk, start_stack;
kernel/events/core.c
... ... @@ -3544,7 +3544,7 @@
3544 3544 struct ring_buffer *rb = event->rb;
3545 3545  
3546 3546 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
3547   - vma->vm_mm->locked_vm -= event->mmap_locked;
  3547 + vma->vm_mm->pinned_vm -= event->mmap_locked;
3548 3548 rcu_assign_pointer(event->rb, NULL);
3549 3549 mutex_unlock(&event->mmap_mutex);
3550 3550  
... ... @@ -3625,7 +3625,7 @@
3625 3625  
3626 3626 lock_limit = rlimit(RLIMIT_MEMLOCK);
3627 3627 lock_limit >>= PAGE_SHIFT;
3628   - locked = vma->vm_mm->locked_vm + extra;
  3628 + locked = vma->vm_mm->pinned_vm + extra;
3629 3629  
3630 3630 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
3631 3631 !capable(CAP_IPC_LOCK)) {
... ... @@ -3651,7 +3651,7 @@
3651 3651 atomic_long_add(user_extra, &user->locked_vm);
3652 3652 event->mmap_locked = extra;
3653 3653 event->mmap_user = get_current_user();
3654   - vma->vm_mm->locked_vm += event->mmap_locked;
  3654 + vma->vm_mm->pinned_vm += event->mmap_locked;
3655 3655  
3656 3656 unlock:
3657 3657 if (!ret)