Blame view

mm/mlock.c 22.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *	linux/mm/mlock.c
   *
   *  (C) Copyright 1995 Linus Torvalds
   *  (C) Copyright 2002 Christoph Hellwig
   */
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
7
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
  #include <linux/mman.h>
  #include <linux/mm.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
10
11
12
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/pagemap.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
13
  #include <linux/pagevec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
  #include <linux/mempolicy.h>
  #include <linux/syscalls.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
16
  #include <linux/sched.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
17
  #include <linux/export.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
18
19
20
  #include <linux/rmap.h>
  #include <linux/mmzone.h>
  #include <linux/hugetlb.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
21
22
  #include <linux/memcontrol.h>
  #include <linux/mm_inline.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
23
24
  
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25

e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
26
27
28
29
  int can_do_mlock(void)
  {
  	if (capable(CAP_IPC_LOCK))
  		return 1;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
30
  	if (rlimit(RLIMIT_MEMLOCK) != 0)
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
31
32
33
34
  		return 1;
  	return 0;
  }
  EXPORT_SYMBOL(can_do_mlock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35

b291f0003   Nick Piggin   mlock: mlocked pa...
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  /*
   * Mlocked pages are marked with PageMlocked() flag for efficient testing
   * in vmscan and, possibly, the fault path; and to support semi-accurate
   * statistics.
   *
   * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
   * be placed on the LRU "unevictable" list, rather than the [in]active lists.
   * The unevictable list is an LRU sibling list to the [in]active lists.
   * PageUnevictable is set to indicate the unevictable state.
   *
   * When lazy mlocking via vmscan, it is important to ensure that the
   * vma's VM_LOCKED status is not concurrently being modified, otherwise we
   * may have mlocked a page that is being munlocked. So lazy mlock must take
   * the mmap_sem for read, and verify that the vma really is locked
   * (see mm/rmap.c).
   */
  
  /*
   *  LRU accounting for clear_page_mlock()
   */
e6c509f85   Hugh Dickins   mm: use clear_pag...
56
  void clear_page_mlock(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
57
  {
e6c509f85   Hugh Dickins   mm: use clear_pag...
58
  	if (!TestClearPageMlocked(page))
b291f0003   Nick Piggin   mlock: mlocked pa...
59
  		return;
b291f0003   Nick Piggin   mlock: mlocked pa...
60

8449d21fb   David Rientjes   mm, thp: fix mloc...
61
62
  	mod_zone_page_state(page_zone(page), NR_MLOCK,
  			    -hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
63
  	count_vm_event(UNEVICTABLE_PGCLEARED);
b291f0003   Nick Piggin   mlock: mlocked pa...
64
65
66
67
  	if (!isolate_lru_page(page)) {
  		putback_lru_page(page);
  	} else {
  		/*
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
68
  		 * We lost the race. the page already moved to evictable list.
b291f0003   Nick Piggin   mlock: mlocked pa...
69
  		 */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
70
  		if (PageUnevictable(page))
5344b7e64   Nick Piggin   vmstat: mlocked p...
71
  			count_vm_event(UNEVICTABLE_PGSTRANDED);
b291f0003   Nick Piggin   mlock: mlocked pa...
72
73
74
75
76
77
78
79
80
  	}
  }
  
  /*
   * Mark page as mlocked if not already.
   * If page on LRU, isolate and putback to move to unevictable list.
   */
  void mlock_vma_page(struct page *page)
  {
57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
81
  	/* Serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
82
  	BUG_ON(!PageLocked(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
83
  	if (!TestSetPageMlocked(page)) {
8449d21fb   David Rientjes   mm, thp: fix mloc...
84
85
  		mod_zone_page_state(page_zone(page), NR_MLOCK,
  				    hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
86
87
88
89
  		count_vm_event(UNEVICTABLE_PGMLOCKED);
  		if (!isolate_lru_page(page))
  			putback_lru_page(page);
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
90
  }
7225522bb   Vlastimil Babka   mm: munlock: batc...
91
  /*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
   * Isolate a page from LRU with optional get_page() pin.
   * Assumes lru_lock already held and page already pinned.
   */
  static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
  {
  	if (PageLRU(page)) {
  		struct lruvec *lruvec;
  
  		lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
  		if (getpage)
  			get_page(page);
  		ClearPageLRU(page);
  		del_page_from_lru_list(page, lruvec, page_lru(page));
  		return true;
  	}
  
  	return false;
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
   * Finish munlock after successful page isolation
   *
   * Page must be locked. This is a wrapper for try_to_munlock()
   * and putback_lru_page() with munlock accounting.
   */
  static void __munlock_isolated_page(struct page *page)
  {
  	int ret = SWAP_AGAIN;
  
  	/*
  	 * Optimization: if the page was mapped just once, that's our mapping
  	 * and we don't need to check all the other vmas.
  	 */
  	if (page_mapcount(page) > 1)
  		ret = try_to_munlock(page);
  
  	/* Did try_to_unlock() succeed or punt? */
  	if (ret != SWAP_MLOCK)
  		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
  
  	putback_lru_page(page);
  }
  
  /*
   * Accounting for page isolation fail during munlock
   *
   * Performs accounting when page isolation fails in munlock. There is nothing
   * else to do because it means some other task has already removed the page
   * from the LRU. putback_lru_page() will take care of removing the page from
   * the unevictable list, if necessary. vmscan [page_referenced()] will move
   * the page back to the unevictable list if some other vma has it mlocked.
   */
  static void __munlock_isolation_failed(struct page *page)
  {
  	if (PageUnevictable(page))
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
147
  		__count_vm_event(UNEVICTABLE_PGSTRANDED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
148
  	else
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
149
  		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
150
  }
6927c1dd9   Lee Schermerhorn   mlock: replace st...
151
152
  /**
   * munlock_vma_page - munlock a vma page
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
153
154
155
156
   * @page - page to be unlocked, either a normal page or THP page head
   *
   * returns the size of the page as a page mask (0 for normal page,
   *         HPAGE_PMD_NR - 1 for THP head page)
b291f0003   Nick Piggin   mlock: mlocked pa...
157
   *
6927c1dd9   Lee Schermerhorn   mlock: replace st...
158
159
160
161
162
163
164
165
166
167
   * called from munlock()/munmap() path with page supposedly on the LRU.
   * When we munlock a page, because the vma where we found the page is being
   * munlock()ed or munmap()ed, we want to check whether other vmas hold the
   * page locked so that we can leave it on the unevictable lru list and not
   * bother vmscan with it.  However, to walk the page's rmap list in
   * try_to_munlock() we must isolate the page from the LRU.  If some other
   * task has removed the page from the LRU, we won't be able to do that.
   * So we clear the PageMlocked as we might not get another chance.  If we
   * can't isolate the page, we leave it for putback_lru_page() and vmscan
   * [page_referenced()/try_to_unmap()] to deal with.
b291f0003   Nick Piggin   mlock: mlocked pa...
168
   */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
169
  unsigned int munlock_vma_page(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
170
  {
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
171
  	unsigned int nr_pages;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
172
  	struct zone *zone = page_zone(page);
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
173

57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
174
  	/* For try_to_munlock() and to serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
175
  	BUG_ON(!PageLocked(page));
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
176
  	/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
177
178
179
  	 * Serialize with any parallel __split_huge_page_refcount() which
  	 * might otherwise copy PageMlocked to part of the tail pages before
  	 * we clear it in the head page. It also stabilizes hpage_nr_pages().
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
180
  	 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
  	spin_lock_irq(&zone->lru_lock);
  
  	nr_pages = hpage_nr_pages(page);
  	if (!TestClearPageMlocked(page))
  		goto unlock_out;
  
  	__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
  
  	if (__munlock_isolate_lru_page(page, true)) {
  		spin_unlock_irq(&zone->lru_lock);
  		__munlock_isolated_page(page);
  		goto out;
  	}
  	__munlock_isolation_failed(page);
  
  unlock_out:
  	spin_unlock_irq(&zone->lru_lock);
  
  out:
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
200
  	return nr_pages - 1;
b291f0003   Nick Piggin   mlock: mlocked pa...
201
  }
ba470de43   Rik van Riel   mmap: handle mloc...
202
  /**
408e82b78   Hugh Dickins   mm: munlock use f...
203
   * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
ba470de43   Rik van Riel   mmap: handle mloc...
204
205
206
   * @vma:   target vma
   * @start: start address
   * @end:   end address
ba470de43   Rik van Riel   mmap: handle mloc...
207
   *
408e82b78   Hugh Dickins   mm: munlock use f...
208
   * This takes care of making the pages present too.
b291f0003   Nick Piggin   mlock: mlocked pa...
209
   *
ba470de43   Rik van Riel   mmap: handle mloc...
210
   * return 0 on success, negative error code on error.
b291f0003   Nick Piggin   mlock: mlocked pa...
211
   *
ba470de43   Rik van Riel   mmap: handle mloc...
212
   * vma->vm_mm->mmap_sem must be held for at least read.
b291f0003   Nick Piggin   mlock: mlocked pa...
213
   */
cea10a19b   Michel Lespinasse   mm: directly use ...
214
215
  long __mlock_vma_pages_range(struct vm_area_struct *vma,
  		unsigned long start, unsigned long end, int *nonblocking)
b291f0003   Nick Piggin   mlock: mlocked pa...
216
217
  {
  	struct mm_struct *mm = vma->vm_mm;
28a35716d   Michel Lespinasse   mm: use long type...
218
  	unsigned long nr_pages = (end - start) / PAGE_SIZE;
408e82b78   Hugh Dickins   mm: munlock use f...
219
  	int gup_flags;
ba470de43   Rik van Riel   mmap: handle mloc...
220
221
222
223
224
  
  	VM_BUG_ON(start & ~PAGE_MASK);
  	VM_BUG_ON(end   & ~PAGE_MASK);
  	VM_BUG_ON(start < vma->vm_start);
  	VM_BUG_ON(end   > vma->vm_end);
408e82b78   Hugh Dickins   mm: munlock use f...
225
  	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
b291f0003   Nick Piggin   mlock: mlocked pa...
226

a1fde08c7   Linus Torvalds   VM: skip the stac...
227
  	gup_flags = FOLL_TOUCH | FOLL_MLOCK;
5ecfda041   Michel Lespinasse   mlock: avoid dirt...
228
229
230
231
232
233
  	/*
  	 * We want to touch writable mappings with a write fault in order
  	 * to break COW, except for shared mappings because these don't COW
  	 * and we would not want to dirty them for nothing.
  	 */
  	if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
58fa879e1   Hugh Dickins   mm: FOLL flags fo...
234
  		gup_flags |= FOLL_WRITE;
b291f0003   Nick Piggin   mlock: mlocked pa...
235

fdf4c587a   Michel Lespinasse   mlock: operate on...
236
237
238
239
240
241
  	/*
  	 * We want mlock to succeed for regions that have any permissions
  	 * other than PROT_NONE.
  	 */
  	if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
  		gup_flags |= FOLL_FORCE;
4805b02e9   Johannes Weiner   mm/mlock.c: docum...
242
243
244
245
  	/*
  	 * We made sure addr is within a VMA, so the following will
  	 * not result in a stack expansion that recurses back here.
  	 */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
246
  	return __get_user_pages(current, mm, start, nr_pages, gup_flags,
53a7706d5   Michel Lespinasse   mlock: do not hol...
247
  				NULL, NULL, nonblocking);
9978ad583   Lee Schermerhorn   mlock: make mlock...
248
249
250
251
252
253
254
255
256
257
258
259
  }
  
  /*
   * convert get_user_pages() return value to posix mlock() error
   */
  static int __mlock_posix_error_return(long retval)
  {
  	if (retval == -EFAULT)
  		retval = -ENOMEM;
  	else if (retval == -ENOMEM)
  		retval = -EAGAIN;
  	return retval;
b291f0003   Nick Piggin   mlock: mlocked pa...
260
  }
b291f0003   Nick Piggin   mlock: mlocked pa...
261
  /*
56afe477d   Vlastimil Babka   mm: munlock: bypa...
262
263
264
265
266
267
268
269
270
271
272
273
274
275
   * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
   *
   * The fast path is available only for evictable pages with single mapping.
   * Then we can bypass the per-cpu pvec and get better performance.
   * when mapcount > 1 we need try_to_munlock() which can fail.
   * when !page_evictable(), we need the full redo logic of putback_lru_page to
   * avoid leaving evictable page in unevictable list.
   *
   * In case of success, @page is added to @pvec and @pgrescued is incremented
   * in case that the page was previously unevictable. @page is also unlocked.
   */
  static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
  		int *pgrescued)
  {
309381fea   Sasha Levin   mm: dump page whe...
276
277
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
  
  	if (page_mapcount(page) <= 1 && page_evictable(page)) {
  		pagevec_add(pvec, page);
  		if (TestClearPageUnevictable(page))
  			(*pgrescued)++;
  		unlock_page(page);
  		return true;
  	}
  
  	return false;
  }
  
  /*
   * Putback multiple evictable pages to the LRU
   *
   * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
   * the pages might have meanwhile become unevictable but that is OK.
   */
  static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
  {
  	count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
  	/*
  	 *__pagevec_lru_add() calls release_pages() so we don't call
  	 * put_page() explicitly
  	 */
  	__pagevec_lru_add(pvec);
  	count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
308
309
310
311
312
313
314
   * Munlock a batch of pages from the same zone
   *
   * The work is split to two main phases. First phase clears the Mlocked flag
   * and attempts to isolate the pages, all under a single zone lru lock.
   * The second phase finishes the munlock only for pages where isolation
   * succeeded.
   *
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
315
   * Note that the pagevec may be modified during the process.
7225522bb   Vlastimil Babka   mm: munlock: batc...
316
317
318
319
320
   */
  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
  {
  	int i;
  	int nr = pagevec_count(pvec);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
321
  	int delta_munlocked;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
322
323
  	struct pagevec pvec_putback;
  	int pgrescued = 0;
7225522bb   Vlastimil Babka   mm: munlock: batc...
324

3b25df93c   Vlastimil Babka   mm: munlock: fix ...
325
  	pagevec_init(&pvec_putback, 0);
7225522bb   Vlastimil Babka   mm: munlock: batc...
326
327
328
329
330
331
  	/* Phase 1: page isolation */
  	spin_lock_irq(&zone->lru_lock);
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (TestClearPageMlocked(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
332
  			/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
333
334
  			 * We already have pin from follow_page_mask()
  			 * so we can spare the get_page() here.
7225522bb   Vlastimil Babka   mm: munlock: batc...
335
  			 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
336
337
338
339
  			if (__munlock_isolate_lru_page(page, false))
  				continue;
  			else
  				__munlock_isolation_failed(page);
7225522bb   Vlastimil Babka   mm: munlock: batc...
340
  		}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
341
342
343
344
345
346
347
348
349
  
  		/*
  		 * We won't be munlocking this page in the next phase
  		 * but we still need to release the follow_page_mask()
  		 * pin. We cannot do it under lru_lock however. If it's
  		 * the last pin, __page_cache_release() would deadlock.
  		 */
  		pagevec_add(&pvec_putback, pvec->pages[i]);
  		pvec->pages[i] = NULL;
7225522bb   Vlastimil Babka   mm: munlock: batc...
350
  	}
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
351
  	delta_munlocked = -nr + pagevec_count(&pvec_putback);
1ebb7cc6a   Vlastimil Babka   mm: munlock: batc...
352
  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
7225522bb   Vlastimil Babka   mm: munlock: batc...
353
  	spin_unlock_irq(&zone->lru_lock);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
354
355
  	/* Now we can release pins of pages that we are not munlocking */
  	pagevec_release(&pvec_putback);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
356
  	/* Phase 2: page munlock */
7225522bb   Vlastimil Babka   mm: munlock: batc...
357
358
359
360
361
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (page) {
  			lock_page(page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
362
363
  			if (!__putback_lru_fast_prepare(page, &pvec_putback,
  					&pgrescued)) {
5b40998ae   Vlastimil Babka   mm: munlock: remo...
364
365
366
367
368
  				/*
  				 * Slow path. We don't want to lose the last
  				 * pin before unlock_page()
  				 */
  				get_page(page); /* for putback_lru_page() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
369
370
  				__munlock_isolated_page(page);
  				unlock_page(page);
5b40998ae   Vlastimil Babka   mm: munlock: remo...
371
  				put_page(page); /* from follow_page_mask() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
372
  			}
7225522bb   Vlastimil Babka   mm: munlock: batc...
373
374
  		}
  	}
56afe477d   Vlastimil Babka   mm: munlock: bypa...
375

5b40998ae   Vlastimil Babka   mm: munlock: remo...
376
377
378
379
  	/*
  	 * Phase 3: page putback for pages that qualified for the fast path
  	 * This will also call put_page() to return pin from follow_page_mask()
  	 */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
380
381
  	if (pagevec_count(&pvec_putback))
  		__putback_lru_fast(&pvec_putback, pgrescued);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
  }
  
  /*
   * Fill up pagevec for __munlock_pagevec using pte walk
   *
   * The function expects that the struct page corresponding to @start address is
   * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
   *
   * The rest of @pvec is filled by subsequent pages within the same pmd and same
   * zone, as long as the pte's are present and vm_normal_page() succeeds. These
   * pages also get pinned.
   *
   * Returns the address of the next page that should be scanned. This equals
   * @start + PAGE_SIZE when no page could be added by the pte walk.
   */
  static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
  		struct vm_area_struct *vma, int zoneid,	unsigned long start,
  		unsigned long end)
  {
  	pte_t *pte;
  	spinlock_t *ptl;
  
  	/*
  	 * Initialize pte walk starting at the already pinned page where we
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
406
407
  	 * are sure that there is a pte, as it was pinned under the same
  	 * mmap_sem write op.
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
408
409
  	 */
  	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
410
411
412
413
  	/* Make sure we do not cross the page table boundary */
  	end = pgd_addr_end(start, end);
  	end = pud_addr_end(start, end);
  	end = pmd_addr_end(start, end);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
414
415
416
417
418
419
420
421
422
423
424
425
426
427
  
  	/* The page next to the pinned page is the first we will try to get */
  	start += PAGE_SIZE;
  	while (start < end) {
  		struct page *page = NULL;
  		pte++;
  		if (pte_present(*pte))
  			page = vm_normal_page(vma, start, *pte);
  		/*
  		 * Break if page could not be obtained or the page's node+zone does not
  		 * match
  		 */
  		if (!page || page_zone_id(page) != zoneid)
  			break;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
428

7a8010cd3   Vlastimil Babka   mm: munlock: manu...
429
430
431
432
433
434
435
436
437
438
439
  		get_page(page);
  		/*
  		 * Increase the address that will be returned *before* the
  		 * eventual break due to pvec becoming full by adding the page
  		 */
  		start += PAGE_SIZE;
  		if (pagevec_add(pvec, page) == 0)
  			break;
  	}
  	pte_unmap_unlock(pte, ptl);
  	return start;
7225522bb   Vlastimil Babka   mm: munlock: batc...
440
441
442
  }
  
  /*
ba470de43   Rik van Riel   mmap: handle mloc...
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
   * munlock_vma_pages_range() - munlock all pages in the vma range.'
   * @vma - vma containing range to be munlock()ed.
   * @start - start address in @vma of the range
   * @end - end of range in @vma.
   *
   *  For mremap(), munmap() and exit().
   *
   * Called with @vma VM_LOCKED.
   *
   * Returns with VM_LOCKED cleared.  Callers must be prepared to
   * deal with this.
   *
   * We don't save and restore VM_LOCKED here because pages are
   * still on lru.  In unmap path, pages might be scanned by reclaim
   * and re-mlocked by try_to_{munlock|unmap} before we unmap and
   * free them.  This will result in freeing mlocked pages.
b291f0003   Nick Piggin   mlock: mlocked pa...
459
   */
ba470de43   Rik van Riel   mmap: handle mloc...
460
  void munlock_vma_pages_range(struct vm_area_struct *vma,
408e82b78   Hugh Dickins   mm: munlock use f...
461
  			     unsigned long start, unsigned long end)
b291f0003   Nick Piggin   mlock: mlocked pa...
462
463
  {
  	vma->vm_flags &= ~VM_LOCKED;
408e82b78   Hugh Dickins   mm: munlock use f...
464

ff6a6da60   Michel Lespinasse   mm: accelerate mu...
465
  	while (start < end) {
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
466
  		struct page *page = NULL;
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
467
468
  		unsigned int page_mask;
  		unsigned long page_increm;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
469
470
471
  		struct pagevec pvec;
  		struct zone *zone;
  		int zoneid;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
472

7a8010cd3   Vlastimil Babka   mm: munlock: manu...
473
  		pagevec_init(&pvec, 0);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
474
475
476
477
478
479
480
  		/*
  		 * Although FOLL_DUMP is intended for get_dump_page(),
  		 * it just so happens that its special treatment of the
  		 * ZERO_PAGE (returning an error instead of doing get_page)
  		 * suits munlock very well (and if somehow an abnormal page
  		 * has sneaked into the range, we won't oops here: great).
  		 */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
481
  		page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
482
  				&page_mask);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
483
  		if (page && !IS_ERR(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
484
  			if (PageTransHuge(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
485
486
487
488
489
490
491
492
493
494
495
496
  				lock_page(page);
  				/*
  				 * Any THP page found by follow_page_mask() may
  				 * have gotten split before reaching
  				 * munlock_vma_page(), so we need to recompute
  				 * the page_mask here.
  				 */
  				page_mask = munlock_vma_page(page);
  				unlock_page(page);
  				put_page(page); /* follow_page_mask() */
  			} else {
  				/*
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
497
498
499
500
501
502
503
504
505
506
507
508
509
  				 * Non-huge pages are handled in batches via
  				 * pagevec. The pin from follow_page_mask()
  				 * prevents them from collapsing by THP.
  				 */
  				pagevec_add(&pvec, page);
  				zone = page_zone(page);
  				zoneid = page_zone_id(page);
  
  				/*
  				 * Try to fill the rest of pagevec using fast
  				 * pte walk. This will also update start to
  				 * the next page to process. Then munlock the
  				 * pagevec.
7225522bb   Vlastimil Babka   mm: munlock: batc...
510
  				 */
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
511
512
513
514
  				start = __munlock_pagevec_fill(&pvec, vma,
  						zoneid, start, end);
  				__munlock_pagevec(&pvec, zone);
  				goto next;
7225522bb   Vlastimil Babka   mm: munlock: batc...
515
  			}
408e82b78   Hugh Dickins   mm: munlock use f...
516
  		}
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
517
518
519
  		/* It's a bug to munlock in the middle of a THP page */
  		VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
  		page_increm = 1 + page_mask;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
520
  		start += page_increm * PAGE_SIZE;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
521
  next:
408e82b78   Hugh Dickins   mm: munlock use f...
522
523
  		cond_resched();
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
524
525
526
527
528
529
530
  }
  
  /*
   * mlock_fixup  - handle mlock[all]/munlock[all] requests.
   *
   * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
   * munlock is a no-op.  However, for some special vmas, we go ahead and
cea10a19b   Michel Lespinasse   mm: directly use ...
531
   * populate the ptes.
b291f0003   Nick Piggin   mlock: mlocked pa...
532
533
534
   *
   * For vmas that pass the filters, merge/split as appropriate.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
ca16d140a   KOSAKI Motohiro   mm: don't access ...
536
  	unsigned long start, unsigned long end, vm_flags_t newflags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
  {
b291f0003   Nick Piggin   mlock: mlocked pa...
538
  	struct mm_struct *mm = vma->vm_mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
539
  	pgoff_t pgoff;
b291f0003   Nick Piggin   mlock: mlocked pa...
540
  	int nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
541
  	int ret = 0;
ca16d140a   KOSAKI Motohiro   mm: don't access ...
542
  	int lock = !!(newflags & VM_LOCKED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543

fed067da4   Michel Lespinasse   mlock: only hold ...
544
  	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
31db58b3a   Stephen Wilson   mm: arch: make ge...
545
  	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
b291f0003   Nick Piggin   mlock: mlocked pa...
546
  		goto out;	/* don't set VM_LOCKED,  don't count */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
549
550
551
552
553
  	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
  			  vma->vm_file, pgoff, vma_policy(vma));
  	if (*prev) {
  		vma = *prev;
  		goto success;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554
555
556
557
558
559
560
561
562
563
564
565
566
567
  	if (start != vma->vm_start) {
  		ret = split_vma(mm, vma, start, 1);
  		if (ret)
  			goto out;
  	}
  
  	if (end != vma->vm_end) {
  		ret = split_vma(mm, vma, end, 0);
  		if (ret)
  			goto out;
  	}
  
  success:
  	/*
b291f0003   Nick Piggin   mlock: mlocked pa...
568
569
570
571
572
573
574
575
  	 * Keep track of amount of locked VM.
  	 */
  	nr_pages = (end - start) >> PAGE_SHIFT;
  	if (!lock)
  		nr_pages = -nr_pages;
  	mm->locked_vm += nr_pages;
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
577
  	 * vm_flags is protected by the mmap_sem held in write mode.
  	 * It's okay if try_to_unmap_one unmaps a page just after we
b291f0003   Nick Piggin   mlock: mlocked pa...
578
  	 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
579
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580

fed067da4   Michel Lespinasse   mlock: only hold ...
581
  	if (lock)
408e82b78   Hugh Dickins   mm: munlock use f...
582
  		vma->vm_flags = newflags;
fed067da4   Michel Lespinasse   mlock: only hold ...
583
  	else
408e82b78   Hugh Dickins   mm: munlock use f...
584
  		munlock_vma_pages_range(vma, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
  out:
b291f0003   Nick Piggin   mlock: mlocked pa...
587
  	*prev = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
588
589
590
591
592
593
594
595
  	return ret;
  }
  
  static int do_mlock(unsigned long start, size_t len, int on)
  {
  	unsigned long nstart, end, tmp;
  	struct vm_area_struct * vma, * prev;
  	int error;
fed067da4   Michel Lespinasse   mlock: only hold ...
596
597
  	VM_BUG_ON(start & ~PAGE_MASK);
  	VM_BUG_ON(len != PAGE_ALIGN(len));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
598
599
600
601
602
  	end = start + len;
  	if (end < start)
  		return -EINVAL;
  	if (end == start)
  		return 0;
097d59106   Linus Torvalds   vm: avoid using f...
603
  	vma = find_vma(current->mm, start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
604
605
  	if (!vma || vma->vm_start > start)
  		return -ENOMEM;
097d59106   Linus Torvalds   vm: avoid using f...
606
  	prev = vma->vm_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
607
608
609
610
  	if (start > vma->vm_start)
  		prev = vma;
  
  	for (nstart = start ; ; ) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
611
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
612
613
  
  		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
186930500   Michel Lespinasse   mm: introduce VM_...
614
615
  		newflags = vma->vm_flags & ~VM_LOCKED;
  		if (on)
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
616
  			newflags |= VM_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
  
  		tmp = vma->vm_end;
  		if (tmp > end)
  			tmp = end;
  		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
  		if (error)
  			break;
  		nstart = tmp;
  		if (nstart < prev->vm_end)
  			nstart = prev->vm_end;
  		if (nstart >= end)
  			break;
  
  		vma = prev->vm_next;
  		if (!vma || vma->vm_start != nstart) {
  			error = -ENOMEM;
  			break;
  		}
  	}
  	return error;
  }
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
638
639
640
641
642
643
644
645
  /*
   * __mm_populate - populate and/or mlock pages within a range of address space.
   *
   * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
   * flags. VMAs must be already marked with the desired vm_flags, and
   * mmap_sem must not be held.
   */
  int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
fed067da4   Michel Lespinasse   mlock: only hold ...
646
647
648
649
  {
  	struct mm_struct *mm = current->mm;
  	unsigned long end, nstart, nend;
  	struct vm_area_struct *vma = NULL;
53a7706d5   Michel Lespinasse   mlock: do not hol...
650
  	int locked = 0;
28a35716d   Michel Lespinasse   mm: use long type...
651
  	long ret = 0;
fed067da4   Michel Lespinasse   mlock: only hold ...
652
653
654
655
  
  	VM_BUG_ON(start & ~PAGE_MASK);
  	VM_BUG_ON(len != PAGE_ALIGN(len));
  	end = start + len;
fed067da4   Michel Lespinasse   mlock: only hold ...
656
657
658
659
660
  	for (nstart = start; nstart < end; nstart = nend) {
  		/*
  		 * We want to fault in pages for [nstart; end) address range.
  		 * Find first corresponding VMA.
  		 */
53a7706d5   Michel Lespinasse   mlock: do not hol...
661
662
663
  		if (!locked) {
  			locked = 1;
  			down_read(&mm->mmap_sem);
fed067da4   Michel Lespinasse   mlock: only hold ...
664
  			vma = find_vma(mm, nstart);
53a7706d5   Michel Lespinasse   mlock: do not hol...
665
  		} else if (nstart >= vma->vm_end)
fed067da4   Michel Lespinasse   mlock: only hold ...
666
667
668
669
670
671
672
673
  			vma = vma->vm_next;
  		if (!vma || vma->vm_start >= end)
  			break;
  		/*
  		 * Set [nstart; nend) to intersection of desired address
  		 * range with the first VMA. Also, skip undesirable VMA types.
  		 */
  		nend = min(end, vma->vm_end);
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
674
  		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
fed067da4   Michel Lespinasse   mlock: only hold ...
675
676
677
678
  			continue;
  		if (nstart < vma->vm_start)
  			nstart = vma->vm_start;
  		/*
53a7706d5   Michel Lespinasse   mlock: do not hol...
679
680
681
  		 * Now fault in a range of pages. __mlock_vma_pages_range()
  		 * double checks the vma flags, so that it won't mlock pages
  		 * if the vma was already munlocked.
fed067da4   Michel Lespinasse   mlock: only hold ...
682
  		 */
53a7706d5   Michel Lespinasse   mlock: do not hol...
683
684
685
686
687
688
  		ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
  		if (ret < 0) {
  			if (ignore_errors) {
  				ret = 0;
  				continue;	/* continue at next VMA */
  			}
5fdb20021   Michel Lespinasse   mm: move VM_LOCKE...
689
690
691
  			ret = __mlock_posix_error_return(ret);
  			break;
  		}
53a7706d5   Michel Lespinasse   mlock: do not hol...
692
693
  		nend = nstart + ret * PAGE_SIZE;
  		ret = 0;
fed067da4   Michel Lespinasse   mlock: only hold ...
694
  	}
53a7706d5   Michel Lespinasse   mlock: do not hol...
695
696
  	if (locked)
  		up_read(&mm->mmap_sem);
fed067da4   Michel Lespinasse   mlock: only hold ...
697
698
  	return ret;	/* 0 or negative error code */
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
699
  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
700
701
702
703
704
705
706
  {
  	unsigned long locked;
  	unsigned long lock_limit;
  	int error = -ENOMEM;
  
  	if (!can_do_mlock())
  		return -EPERM;
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
707
  	lru_add_drain_all();	/* flush pagevec */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
709
  	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
  	start &= PAGE_MASK;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
710
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
711
  	lock_limit >>= PAGE_SHIFT;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
712
713
714
715
716
  	locked = len >> PAGE_SHIFT;
  
  	down_write(&current->mm->mmap_sem);
  
  	locked += current->mm->locked_vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
717
718
719
720
  
  	/* check against resource limits */
  	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
  		error = do_mlock(start, len, 1);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
721

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722
  	up_write(&current->mm->mmap_sem);
fed067da4   Michel Lespinasse   mlock: only hold ...
723
  	if (!error)
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
724
  		error = __mm_populate(start, len, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
725
726
  	return error;
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
727
  SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
728
729
  {
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
731
  	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
  	start &= PAGE_MASK;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
732
733
  
  	down_write(&current->mm->mmap_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
734
735
  	ret = do_mlock(start, len, 0);
  	up_write(&current->mm->mmap_sem);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
736

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737
738
739
740
741
742
  	return ret;
  }
  
  static int do_mlockall(int flags)
  {
  	struct vm_area_struct * vma, * prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
743
744
  
  	if (flags & MCL_FUTURE)
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
745
  		current->mm->def_flags |= VM_LOCKED;
9977f0f16   Gerald Schaefer   mm: don't overwri...
746
  	else
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
747
  		current->mm->def_flags &= ~VM_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
749
750
751
  	if (flags == MCL_FUTURE)
  		goto out;
  
  	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
752
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
753

186930500   Michel Lespinasse   mm: introduce VM_...
754
755
  		newflags = vma->vm_flags & ~VM_LOCKED;
  		if (flags & MCL_CURRENT)
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
756
  			newflags |= VM_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
757
758
759
  
  		/* Ignore errors */
  		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
22356f447   Paul E. McKenney   mm: Place preempt...
760
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
761
762
763
764
  	}
  out:
  	return 0;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
765
  SYSCALL_DEFINE1(mlockall, int, flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
766
767
768
769
770
771
772
773
774
775
  {
  	unsigned long lock_limit;
  	int ret = -EINVAL;
  
  	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
  		goto out;
  
  	ret = -EPERM;
  	if (!can_do_mlock())
  		goto out;
df9d6985b   Christoph Lameter   mm: do not drain ...
776
777
  	if (flags & MCL_CURRENT)
  		lru_add_drain_all();	/* flush pagevec */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
778

59e99e5b9   Jiri Slaby   mm: use rlimit he...
779
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
782
  	lock_limit >>= PAGE_SHIFT;
  
  	ret = -ENOMEM;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
783
  	down_write(&current->mm->mmap_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
784
785
786
787
  	if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
  	    capable(CAP_IPC_LOCK))
  		ret = do_mlockall(flags);
  	up_write(&current->mm->mmap_sem);
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
788
789
  	if (!ret && (flags & MCL_CURRENT))
  		mm_populate(0, TASK_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790
791
792
  out:
  	return ret;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
793
  SYSCALL_DEFINE0(munlockall)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
  {
  	int ret;
  
  	down_write(&current->mm->mmap_sem);
  	ret = do_mlockall(0);
  	up_write(&current->mm->mmap_sem);
  	return ret;
  }
  
  /*
   * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
   * shm segments) get accounted against the user_struct instead.
   */
  static DEFINE_SPINLOCK(shmlock_user_lock);
  
  int user_shm_lock(size_t size, struct user_struct *user)
  {
  	unsigned long lock_limit, locked;
  	int allowed = 0;
  
  	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
815
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
5ed44a401   Herbert van den Bergh   do not limit lock...
816
817
  	if (lock_limit == RLIM_INFINITY)
  		allowed = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
819
  	lock_limit >>= PAGE_SHIFT;
  	spin_lock(&shmlock_user_lock);
5ed44a401   Herbert van den Bergh   do not limit lock...
820
821
  	if (!allowed &&
  	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
  		goto out;
  	get_uid(user);
  	user->locked_shm += locked;
  	allowed = 1;
  out:
  	spin_unlock(&shmlock_user_lock);
  	return allowed;
  }
  
  void user_shm_unlock(size_t size, struct user_struct *user)
  {
  	spin_lock(&shmlock_user_lock);
  	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	spin_unlock(&shmlock_user_lock);
  	free_uid(user);
  }