Blame view

mm/mlock.c 22.8 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *	linux/mm/mlock.c
   *
   *  (C) Copyright 1995 Linus Torvalds
   *  (C) Copyright 2002 Christoph Hellwig
   */
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
7
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
  #include <linux/mman.h>
  #include <linux/mm.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
10
11
12
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/pagemap.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
13
  #include <linux/pagevec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
  #include <linux/mempolicy.h>
  #include <linux/syscalls.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
16
  #include <linux/sched.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
17
  #include <linux/export.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
18
19
20
  #include <linux/rmap.h>
  #include <linux/mmzone.h>
  #include <linux/hugetlb.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
21
22
  #include <linux/memcontrol.h>
  #include <linux/mm_inline.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
23
24
  
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25

e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
26
27
28
29
  int can_do_mlock(void)
  {
  	if (capable(CAP_IPC_LOCK))
  		return 1;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
30
  	if (rlimit(RLIMIT_MEMLOCK) != 0)
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
31
32
33
34
  		return 1;
  	return 0;
  }
  EXPORT_SYMBOL(can_do_mlock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35

b291f0003   Nick Piggin   mlock: mlocked pa...
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  /*
   * Mlocked pages are marked with PageMlocked() flag for efficient testing
   * in vmscan and, possibly, the fault path; and to support semi-accurate
   * statistics.
   *
   * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
   * be placed on the LRU "unevictable" list, rather than the [in]active lists.
   * The unevictable list is an LRU sibling list to the [in]active lists.
   * PageUnevictable is set to indicate the unevictable state.
   *
   * When lazy mlocking via vmscan, it is important to ensure that the
   * vma's VM_LOCKED status is not concurrently being modified, otherwise we
   * may have mlocked a page that is being munlocked. So lazy mlock must take
   * the mmap_sem for read, and verify that the vma really is locked
   * (see mm/rmap.c).
   */
  
  /*
   *  LRU accounting for clear_page_mlock()
   */
e6c509f85   Hugh Dickins   mm: use clear_pag...
56
  void clear_page_mlock(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
57
  {
e6c509f85   Hugh Dickins   mm: use clear_pag...
58
  	if (!TestClearPageMlocked(page))
b291f0003   Nick Piggin   mlock: mlocked pa...
59
  		return;
b291f0003   Nick Piggin   mlock: mlocked pa...
60

8449d21fb   David Rientjes   mm, thp: fix mloc...
61
62
  	mod_zone_page_state(page_zone(page), NR_MLOCK,
  			    -hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
63
  	count_vm_event(UNEVICTABLE_PGCLEARED);
b291f0003   Nick Piggin   mlock: mlocked pa...
64
65
66
67
  	if (!isolate_lru_page(page)) {
  		putback_lru_page(page);
  	} else {
  		/*
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
68
  		 * We lost the race. the page already moved to evictable list.
b291f0003   Nick Piggin   mlock: mlocked pa...
69
  		 */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
70
  		if (PageUnevictable(page))
5344b7e64   Nick Piggin   vmstat: mlocked p...
71
  			count_vm_event(UNEVICTABLE_PGSTRANDED);
b291f0003   Nick Piggin   mlock: mlocked pa...
72
73
74
75
76
77
78
79
80
  	}
  }
  
  /*
   * Mark page as mlocked if not already.
   * If page on LRU, isolate and putback to move to unevictable list.
   */
  void mlock_vma_page(struct page *page)
  {
57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
81
  	/* Serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
82
  	BUG_ON(!PageLocked(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
83
  	if (!TestSetPageMlocked(page)) {
8449d21fb   David Rientjes   mm, thp: fix mloc...
84
85
  		mod_zone_page_state(page_zone(page), NR_MLOCK,
  				    hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
86
87
88
89
  		count_vm_event(UNEVICTABLE_PGMLOCKED);
  		if (!isolate_lru_page(page))
  			putback_lru_page(page);
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
90
  }
7225522bb   Vlastimil Babka   mm: munlock: batc...
91
  /*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
   * Isolate a page from LRU with optional get_page() pin.
   * Assumes lru_lock already held and page already pinned.
   */
  static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
  {
  	if (PageLRU(page)) {
  		struct lruvec *lruvec;
  
  		lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
  		if (getpage)
  			get_page(page);
  		ClearPageLRU(page);
  		del_page_from_lru_list(page, lruvec, page_lru(page));
  		return true;
  	}
  
  	return false;
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
   * Finish munlock after successful page isolation
   *
   * Page must be locked. This is a wrapper for try_to_munlock()
   * and putback_lru_page() with munlock accounting.
   */
  static void __munlock_isolated_page(struct page *page)
  {
  	int ret = SWAP_AGAIN;
  
  	/*
  	 * Optimization: if the page was mapped just once, that's our mapping
  	 * and we don't need to check all the other vmas.
  	 */
  	if (page_mapcount(page) > 1)
  		ret = try_to_munlock(page);
  
  	/* Did try_to_unlock() succeed or punt? */
  	if (ret != SWAP_MLOCK)
  		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
  
  	putback_lru_page(page);
  }
  
  /*
   * Accounting for page isolation fail during munlock
   *
   * Performs accounting when page isolation fails in munlock. There is nothing
   * else to do because it means some other task has already removed the page
   * from the LRU. putback_lru_page() will take care of removing the page from
   * the unevictable list, if necessary. vmscan [page_referenced()] will move
   * the page back to the unevictable list if some other vma has it mlocked.
   */
  static void __munlock_isolation_failed(struct page *page)
  {
  	if (PageUnevictable(page))
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
147
  		__count_vm_event(UNEVICTABLE_PGSTRANDED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
148
  	else
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
149
  		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
150
  }
6927c1dd9   Lee Schermerhorn   mlock: replace st...
151
152
  /**
   * munlock_vma_page - munlock a vma page
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
153
154
155
156
   * @page - page to be unlocked, either a normal page or THP page head
   *
   * returns the size of the page as a page mask (0 for normal page,
   *         HPAGE_PMD_NR - 1 for THP head page)
b291f0003   Nick Piggin   mlock: mlocked pa...
157
   *
6927c1dd9   Lee Schermerhorn   mlock: replace st...
158
159
160
161
162
163
164
165
166
167
   * called from munlock()/munmap() path with page supposedly on the LRU.
   * When we munlock a page, because the vma where we found the page is being
   * munlock()ed or munmap()ed, we want to check whether other vmas hold the
   * page locked so that we can leave it on the unevictable lru list and not
   * bother vmscan with it.  However, to walk the page's rmap list in
   * try_to_munlock() we must isolate the page from the LRU.  If some other
   * task has removed the page from the LRU, we won't be able to do that.
   * So we clear the PageMlocked as we might not get another chance.  If we
   * can't isolate the page, we leave it for putback_lru_page() and vmscan
   * [page_referenced()/try_to_unmap()] to deal with.
b291f0003   Nick Piggin   mlock: mlocked pa...
168
   */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
169
  unsigned int munlock_vma_page(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
170
  {
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
171
  	unsigned int nr_pages;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
172
  	struct zone *zone = page_zone(page);
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
173

57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
174
  	/* For try_to_munlock() and to serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
175
  	BUG_ON(!PageLocked(page));
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
176
  	/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
177
178
179
  	 * Serialize with any parallel __split_huge_page_refcount() which
  	 * might otherwise copy PageMlocked to part of the tail pages before
  	 * we clear it in the head page. It also stabilizes hpage_nr_pages().
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
180
  	 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
  	spin_lock_irq(&zone->lru_lock);
  
  	nr_pages = hpage_nr_pages(page);
  	if (!TestClearPageMlocked(page))
  		goto unlock_out;
  
  	__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
  
  	if (__munlock_isolate_lru_page(page, true)) {
  		spin_unlock_irq(&zone->lru_lock);
  		__munlock_isolated_page(page);
  		goto out;
  	}
  	__munlock_isolation_failed(page);
  
  unlock_out:
  	spin_unlock_irq(&zone->lru_lock);
  
  out:
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
200
  	return nr_pages - 1;
b291f0003   Nick Piggin   mlock: mlocked pa...
201
  }
ba470de43   Rik van Riel   mmap: handle mloc...
202
  /**
408e82b78   Hugh Dickins   mm: munlock use f...
203
   * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
ba470de43   Rik van Riel   mmap: handle mloc...
204
205
206
   * @vma:   target vma
   * @start: start address
   * @end:   end address
9a95f3cf7   Paul Cassella   mm: describe mmap...
207
   * @nonblocking:
ba470de43   Rik van Riel   mmap: handle mloc...
208
   *
408e82b78   Hugh Dickins   mm: munlock use f...
209
   * This takes care of making the pages present too.
b291f0003   Nick Piggin   mlock: mlocked pa...
210
   *
ba470de43   Rik van Riel   mmap: handle mloc...
211
   * return 0 on success, negative error code on error.
b291f0003   Nick Piggin   mlock: mlocked pa...
212
   *
9a95f3cf7   Paul Cassella   mm: describe mmap...
213
214
215
216
217
218
219
   * vma->vm_mm->mmap_sem must be held.
   *
   * If @nonblocking is NULL, it may be held for read or write and will
   * be unperturbed.
   *
   * If @nonblocking is non-NULL, it must held for read only and may be
   * released.  If it's released, *@nonblocking will be set to 0.
b291f0003   Nick Piggin   mlock: mlocked pa...
220
   */
cea10a19b   Michel Lespinasse   mm: directly use ...
221
222
  long __mlock_vma_pages_range(struct vm_area_struct *vma,
  		unsigned long start, unsigned long end, int *nonblocking)
b291f0003   Nick Piggin   mlock: mlocked pa...
223
224
  {
  	struct mm_struct *mm = vma->vm_mm;
28a35716d   Michel Lespinasse   mm: use long type...
225
  	unsigned long nr_pages = (end - start) / PAGE_SIZE;
408e82b78   Hugh Dickins   mm: munlock use f...
226
  	int gup_flags;
ba470de43   Rik van Riel   mmap: handle mloc...
227
228
229
  
  	VM_BUG_ON(start & ~PAGE_MASK);
  	VM_BUG_ON(end   & ~PAGE_MASK);
81d1b09c6   Sasha Levin   mm: convert a few...
230
231
  	VM_BUG_ON_VMA(start < vma->vm_start, vma);
  	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
96dad67ff   Sasha Levin   mm: use VM_BUG_ON...
232
  	VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
b291f0003   Nick Piggin   mlock: mlocked pa...
233

a1fde08c7   Linus Torvalds   VM: skip the stac...
234
  	gup_flags = FOLL_TOUCH | FOLL_MLOCK;
5ecfda041   Michel Lespinasse   mlock: avoid dirt...
235
236
237
238
239
240
  	/*
  	 * We want to touch writable mappings with a write fault in order
  	 * to break COW, except for shared mappings because these don't COW
  	 * and we would not want to dirty them for nothing.
  	 */
  	if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
58fa879e1   Hugh Dickins   mm: FOLL flags fo...
241
  		gup_flags |= FOLL_WRITE;
b291f0003   Nick Piggin   mlock: mlocked pa...
242

fdf4c587a   Michel Lespinasse   mlock: operate on...
243
244
245
246
247
248
  	/*
  	 * We want mlock to succeed for regions that have any permissions
  	 * other than PROT_NONE.
  	 */
  	if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
  		gup_flags |= FOLL_FORCE;
4805b02e9   Johannes Weiner   mm/mlock.c: docum...
249
250
251
252
  	/*
  	 * We made sure addr is within a VMA, so the following will
  	 * not result in a stack expansion that recurses back here.
  	 */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
253
  	return __get_user_pages(current, mm, start, nr_pages, gup_flags,
53a7706d5   Michel Lespinasse   mlock: do not hol...
254
  				NULL, NULL, nonblocking);
9978ad583   Lee Schermerhorn   mlock: make mlock...
255
256
257
258
259
260
261
262
263
264
265
266
  }
  
  /*
   * convert get_user_pages() return value to posix mlock() error
   */
  static int __mlock_posix_error_return(long retval)
  {
  	if (retval == -EFAULT)
  		retval = -ENOMEM;
  	else if (retval == -ENOMEM)
  		retval = -EAGAIN;
  	return retval;
b291f0003   Nick Piggin   mlock: mlocked pa...
267
  }
b291f0003   Nick Piggin   mlock: mlocked pa...
268
  /*
56afe477d   Vlastimil Babka   mm: munlock: bypa...
269
270
271
272
273
274
275
276
277
278
279
280
281
282
   * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
   *
   * The fast path is available only for evictable pages with single mapping.
   * Then we can bypass the per-cpu pvec and get better performance.
   * when mapcount > 1 we need try_to_munlock() which can fail.
   * when !page_evictable(), we need the full redo logic of putback_lru_page to
   * avoid leaving evictable page in unevictable list.
   *
   * In case of success, @page is added to @pvec and @pgrescued is incremented
   * in case that the page was previously unevictable. @page is also unlocked.
   */
  static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
  		int *pgrescued)
  {
309381fea   Sasha Levin   mm: dump page whe...
283
284
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
  
  	if (page_mapcount(page) <= 1 && page_evictable(page)) {
  		pagevec_add(pvec, page);
  		if (TestClearPageUnevictable(page))
  			(*pgrescued)++;
  		unlock_page(page);
  		return true;
  	}
  
  	return false;
  }
  
  /*
   * Putback multiple evictable pages to the LRU
   *
   * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
   * the pages might have meanwhile become unevictable but that is OK.
   */
  static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
  {
  	count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
  	/*
  	 *__pagevec_lru_add() calls release_pages() so we don't call
  	 * put_page() explicitly
  	 */
  	__pagevec_lru_add(pvec);
  	count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
315
316
317
318
319
320
321
   * Munlock a batch of pages from the same zone
   *
   * The work is split to two main phases. First phase clears the Mlocked flag
   * and attempts to isolate the pages, all under a single zone lru lock.
   * The second phase finishes the munlock only for pages where isolation
   * succeeded.
   *
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
322
   * Note that the pagevec may be modified during the process.
7225522bb   Vlastimil Babka   mm: munlock: batc...
323
324
325
326
327
   */
  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
  {
  	int i;
  	int nr = pagevec_count(pvec);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
328
  	int delta_munlocked;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
329
330
  	struct pagevec pvec_putback;
  	int pgrescued = 0;
7225522bb   Vlastimil Babka   mm: munlock: batc...
331

3b25df93c   Vlastimil Babka   mm: munlock: fix ...
332
  	pagevec_init(&pvec_putback, 0);
7225522bb   Vlastimil Babka   mm: munlock: batc...
333
334
335
336
337
338
  	/* Phase 1: page isolation */
  	spin_lock_irq(&zone->lru_lock);
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (TestClearPageMlocked(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
339
  			/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
340
341
  			 * We already have pin from follow_page_mask()
  			 * so we can spare the get_page() here.
7225522bb   Vlastimil Babka   mm: munlock: batc...
342
  			 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
343
344
345
346
  			if (__munlock_isolate_lru_page(page, false))
  				continue;
  			else
  				__munlock_isolation_failed(page);
7225522bb   Vlastimil Babka   mm: munlock: batc...
347
  		}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
348
349
350
351
352
353
354
355
356
  
  		/*
  		 * We won't be munlocking this page in the next phase
  		 * but we still need to release the follow_page_mask()
  		 * pin. We cannot do it under lru_lock however. If it's
  		 * the last pin, __page_cache_release() would deadlock.
  		 */
  		pagevec_add(&pvec_putback, pvec->pages[i]);
  		pvec->pages[i] = NULL;
7225522bb   Vlastimil Babka   mm: munlock: batc...
357
  	}
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
358
  	delta_munlocked = -nr + pagevec_count(&pvec_putback);
1ebb7cc6a   Vlastimil Babka   mm: munlock: batc...
359
  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
7225522bb   Vlastimil Babka   mm: munlock: batc...
360
  	spin_unlock_irq(&zone->lru_lock);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
361
362
  	/* Now we can release pins of pages that we are not munlocking */
  	pagevec_release(&pvec_putback);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
363
  	/* Phase 2: page munlock */
7225522bb   Vlastimil Babka   mm: munlock: batc...
364
365
366
367
368
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (page) {
  			lock_page(page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
369
370
  			if (!__putback_lru_fast_prepare(page, &pvec_putback,
  					&pgrescued)) {
5b40998ae   Vlastimil Babka   mm: munlock: remo...
371
372
373
374
375
  				/*
  				 * Slow path. We don't want to lose the last
  				 * pin before unlock_page()
  				 */
  				get_page(page); /* for putback_lru_page() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
376
377
  				__munlock_isolated_page(page);
  				unlock_page(page);
5b40998ae   Vlastimil Babka   mm: munlock: remo...
378
  				put_page(page); /* from follow_page_mask() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
379
  			}
7225522bb   Vlastimil Babka   mm: munlock: batc...
380
381
  		}
  	}
56afe477d   Vlastimil Babka   mm: munlock: bypa...
382

5b40998ae   Vlastimil Babka   mm: munlock: remo...
383
384
385
386
  	/*
  	 * Phase 3: page putback for pages that qualified for the fast path
  	 * This will also call put_page() to return pin from follow_page_mask()
  	 */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
387
388
  	if (pagevec_count(&pvec_putback))
  		__putback_lru_fast(&pvec_putback, pgrescued);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
  }
  
  /*
   * Fill up pagevec for __munlock_pagevec using pte walk
   *
   * The function expects that the struct page corresponding to @start address is
   * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
   *
   * The rest of @pvec is filled by subsequent pages within the same pmd and same
   * zone, as long as the pte's are present and vm_normal_page() succeeds. These
   * pages also get pinned.
   *
   * Returns the address of the next page that should be scanned. This equals
   * @start + PAGE_SIZE when no page could be added by the pte walk.
   */
  static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
  		struct vm_area_struct *vma, int zoneid,	unsigned long start,
  		unsigned long end)
  {
  	pte_t *pte;
  	spinlock_t *ptl;
  
  	/*
  	 * Initialize pte walk starting at the already pinned page where we
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
413
414
  	 * are sure that there is a pte, as it was pinned under the same
  	 * mmap_sem write op.
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
415
416
  	 */
  	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
417
418
419
420
  	/* Make sure we do not cross the page table boundary */
  	end = pgd_addr_end(start, end);
  	end = pud_addr_end(start, end);
  	end = pmd_addr_end(start, end);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
421
422
423
424
425
426
427
428
429
430
431
432
433
434
  
  	/* The page next to the pinned page is the first we will try to get */
  	start += PAGE_SIZE;
  	while (start < end) {
  		struct page *page = NULL;
  		pte++;
  		if (pte_present(*pte))
  			page = vm_normal_page(vma, start, *pte);
  		/*
  		 * Break if page could not be obtained or the page's node+zone does not
  		 * match
  		 */
  		if (!page || page_zone_id(page) != zoneid)
  			break;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
435

7a8010cd3   Vlastimil Babka   mm: munlock: manu...
436
437
438
439
440
441
442
443
444
445
446
  		get_page(page);
  		/*
  		 * Increase the address that will be returned *before* the
  		 * eventual break due to pvec becoming full by adding the page
  		 */
  		start += PAGE_SIZE;
  		if (pagevec_add(pvec, page) == 0)
  			break;
  	}
  	pte_unmap_unlock(pte, ptl);
  	return start;
7225522bb   Vlastimil Babka   mm: munlock: batc...
447
448
449
  }
  
  /*
ba470de43   Rik van Riel   mmap: handle mloc...
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
   * munlock_vma_pages_range() - munlock all pages in the vma range.'
   * @vma - vma containing range to be munlock()ed.
   * @start - start address in @vma of the range
   * @end - end of range in @vma.
   *
   *  For mremap(), munmap() and exit().
   *
   * Called with @vma VM_LOCKED.
   *
   * Returns with VM_LOCKED cleared.  Callers must be prepared to
   * deal with this.
   *
   * We don't save and restore VM_LOCKED here because pages are
   * still on lru.  In unmap path, pages might be scanned by reclaim
   * and re-mlocked by try_to_{munlock|unmap} before we unmap and
   * free them.  This will result in freeing mlocked pages.
b291f0003   Nick Piggin   mlock: mlocked pa...
466
   */
ba470de43   Rik van Riel   mmap: handle mloc...
467
  void munlock_vma_pages_range(struct vm_area_struct *vma,
408e82b78   Hugh Dickins   mm: munlock use f...
468
  			     unsigned long start, unsigned long end)
b291f0003   Nick Piggin   mlock: mlocked pa...
469
470
  {
  	vma->vm_flags &= ~VM_LOCKED;
408e82b78   Hugh Dickins   mm: munlock use f...
471

ff6a6da60   Michel Lespinasse   mm: accelerate mu...
472
  	while (start < end) {
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
473
  		struct page *page = NULL;
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
474
475
  		unsigned int page_mask;
  		unsigned long page_increm;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
476
477
478
  		struct pagevec pvec;
  		struct zone *zone;
  		int zoneid;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
479

7a8010cd3   Vlastimil Babka   mm: munlock: manu...
480
  		pagevec_init(&pvec, 0);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
481
482
483
484
485
486
487
  		/*
  		 * Although FOLL_DUMP is intended for get_dump_page(),
  		 * it just so happens that its special treatment of the
  		 * ZERO_PAGE (returning an error instead of doing get_page)
  		 * suits munlock very well (and if somehow an abnormal page
  		 * has sneaked into the range, we won't oops here: great).
  		 */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
488
  		page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
489
  				&page_mask);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
490
  		if (page && !IS_ERR(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
491
  			if (PageTransHuge(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
492
493
494
495
496
497
498
499
500
501
502
503
  				lock_page(page);
  				/*
  				 * Any THP page found by follow_page_mask() may
  				 * have gotten split before reaching
  				 * munlock_vma_page(), so we need to recompute
  				 * the page_mask here.
  				 */
  				page_mask = munlock_vma_page(page);
  				unlock_page(page);
  				put_page(page); /* follow_page_mask() */
  			} else {
  				/*
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
504
505
506
507
508
509
510
511
512
513
514
515
516
  				 * Non-huge pages are handled in batches via
  				 * pagevec. The pin from follow_page_mask()
  				 * prevents them from collapsing by THP.
  				 */
  				pagevec_add(&pvec, page);
  				zone = page_zone(page);
  				zoneid = page_zone_id(page);
  
  				/*
  				 * Try to fill the rest of pagevec using fast
  				 * pte walk. This will also update start to
  				 * the next page to process. Then munlock the
  				 * pagevec.
7225522bb   Vlastimil Babka   mm: munlock: batc...
517
  				 */
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
518
519
520
521
  				start = __munlock_pagevec_fill(&pvec, vma,
  						zoneid, start, end);
  				__munlock_pagevec(&pvec, zone);
  				goto next;
7225522bb   Vlastimil Babka   mm: munlock: batc...
522
  			}
408e82b78   Hugh Dickins   mm: munlock use f...
523
  		}
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
524
525
526
  		/* It's a bug to munlock in the middle of a THP page */
  		VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
  		page_increm = 1 + page_mask;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
527
  		start += page_increm * PAGE_SIZE;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
528
  next:
408e82b78   Hugh Dickins   mm: munlock use f...
529
530
  		cond_resched();
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
531
532
533
534
535
536
537
  }
  
  /*
   * mlock_fixup  - handle mlock[all]/munlock[all] requests.
   *
   * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
   * munlock is a no-op.  However, for some special vmas, we go ahead and
cea10a19b   Michel Lespinasse   mm: directly use ...
538
   * populate the ptes.
b291f0003   Nick Piggin   mlock: mlocked pa...
539
540
541
   *
   * For vmas that pass the filters, merge/split as appropriate.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
ca16d140a   KOSAKI Motohiro   mm: don't access ...
543
  	unsigned long start, unsigned long end, vm_flags_t newflags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
544
  {
b291f0003   Nick Piggin   mlock: mlocked pa...
545
  	struct mm_struct *mm = vma->vm_mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
  	pgoff_t pgoff;
b291f0003   Nick Piggin   mlock: mlocked pa...
547
  	int nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
  	int ret = 0;
ca16d140a   KOSAKI Motohiro   mm: don't access ...
549
  	int lock = !!(newflags & VM_LOCKED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
550

fed067da4   Michel Lespinasse   mlock: only hold ...
551
  	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
31db58b3a   Stephen Wilson   mm: arch: make ge...
552
  	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
b291f0003   Nick Piggin   mlock: mlocked pa...
553
  		goto out;	/* don't set VM_LOCKED,  don't count */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554
555
556
557
558
559
560
  	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
  			  vma->vm_file, pgoff, vma_policy(vma));
  	if (*prev) {
  		vma = *prev;
  		goto success;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
562
563
564
565
566
567
568
569
570
571
572
573
574
  	if (start != vma->vm_start) {
  		ret = split_vma(mm, vma, start, 1);
  		if (ret)
  			goto out;
  	}
  
  	if (end != vma->vm_end) {
  		ret = split_vma(mm, vma, end, 0);
  		if (ret)
  			goto out;
  	}
  
  success:
  	/*
b291f0003   Nick Piggin   mlock: mlocked pa...
575
576
577
578
579
580
581
582
  	 * Keep track of amount of locked VM.
  	 */
  	nr_pages = (end - start) >> PAGE_SHIFT;
  	if (!lock)
  		nr_pages = -nr_pages;
  	mm->locked_vm += nr_pages;
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
583
584
  	 * vm_flags is protected by the mmap_sem held in write mode.
  	 * It's okay if try_to_unmap_one unmaps a page just after we
b291f0003   Nick Piggin   mlock: mlocked pa...
585
  	 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587

fed067da4   Michel Lespinasse   mlock: only hold ...
588
  	if (lock)
408e82b78   Hugh Dickins   mm: munlock use f...
589
  		vma->vm_flags = newflags;
fed067da4   Michel Lespinasse   mlock: only hold ...
590
  	else
408e82b78   Hugh Dickins   mm: munlock use f...
591
  		munlock_vma_pages_range(vma, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
593
  out:
b291f0003   Nick Piggin   mlock: mlocked pa...
594
  	*prev = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
595
596
597
598
599
600
601
602
  	return ret;
  }
  
  static int do_mlock(unsigned long start, size_t len, int on)
  {
  	unsigned long nstart, end, tmp;
  	struct vm_area_struct * vma, * prev;
  	int error;
fed067da4   Michel Lespinasse   mlock: only hold ...
603
604
  	VM_BUG_ON(start & ~PAGE_MASK);
  	VM_BUG_ON(len != PAGE_ALIGN(len));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
605
606
607
608
609
  	end = start + len;
  	if (end < start)
  		return -EINVAL;
  	if (end == start)
  		return 0;
097d59106   Linus Torvalds   vm: avoid using f...
610
  	vma = find_vma(current->mm, start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
612
  	if (!vma || vma->vm_start > start)
  		return -ENOMEM;
097d59106   Linus Torvalds   vm: avoid using f...
613
  	prev = vma->vm_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614
615
616
617
  	if (start > vma->vm_start)
  		prev = vma;
  
  	for (nstart = start ; ; ) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
618
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
619
620
  
  		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
186930500   Michel Lespinasse   mm: introduce VM_...
621
622
  		newflags = vma->vm_flags & ~VM_LOCKED;
  		if (on)
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
623
  			newflags |= VM_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
  
  		tmp = vma->vm_end;
  		if (tmp > end)
  			tmp = end;
  		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
  		if (error)
  			break;
  		nstart = tmp;
  		if (nstart < prev->vm_end)
  			nstart = prev->vm_end;
  		if (nstart >= end)
  			break;
  
  		vma = prev->vm_next;
  		if (!vma || vma->vm_start != nstart) {
  			error = -ENOMEM;
  			break;
  		}
  	}
  	return error;
  }
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
645
646
647
648
649
650
651
652
  /*
   * __mm_populate - populate and/or mlock pages within a range of address space.
   *
   * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
   * flags. VMAs must be already marked with the desired vm_flags, and
   * mmap_sem must not be held.
   */
  int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
fed067da4   Michel Lespinasse   mlock: only hold ...
653
654
655
656
  {
  	struct mm_struct *mm = current->mm;
  	unsigned long end, nstart, nend;
  	struct vm_area_struct *vma = NULL;
53a7706d5   Michel Lespinasse   mlock: do not hol...
657
  	int locked = 0;
28a35716d   Michel Lespinasse   mm: use long type...
658
  	long ret = 0;
fed067da4   Michel Lespinasse   mlock: only hold ...
659
660
661
662
  
  	VM_BUG_ON(start & ~PAGE_MASK);
  	VM_BUG_ON(len != PAGE_ALIGN(len));
  	end = start + len;
fed067da4   Michel Lespinasse   mlock: only hold ...
663
664
665
666
667
  	for (nstart = start; nstart < end; nstart = nend) {
  		/*
  		 * We want to fault in pages for [nstart; end) address range.
  		 * Find first corresponding VMA.
  		 */
53a7706d5   Michel Lespinasse   mlock: do not hol...
668
669
670
  		if (!locked) {
  			locked = 1;
  			down_read(&mm->mmap_sem);
fed067da4   Michel Lespinasse   mlock: only hold ...
671
  			vma = find_vma(mm, nstart);
53a7706d5   Michel Lespinasse   mlock: do not hol...
672
  		} else if (nstart >= vma->vm_end)
fed067da4   Michel Lespinasse   mlock: only hold ...
673
674
675
676
677
678
679
680
  			vma = vma->vm_next;
  		if (!vma || vma->vm_start >= end)
  			break;
  		/*
  		 * Set [nstart; nend) to intersection of desired address
  		 * range with the first VMA. Also, skip undesirable VMA types.
  		 */
  		nend = min(end, vma->vm_end);
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
681
  		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
fed067da4   Michel Lespinasse   mlock: only hold ...
682
683
684
685
  			continue;
  		if (nstart < vma->vm_start)
  			nstart = vma->vm_start;
  		/*
53a7706d5   Michel Lespinasse   mlock: do not hol...
686
687
688
  		 * Now fault in a range of pages. __mlock_vma_pages_range()
  		 * double checks the vma flags, so that it won't mlock pages
  		 * if the vma was already munlocked.
fed067da4   Michel Lespinasse   mlock: only hold ...
689
  		 */
53a7706d5   Michel Lespinasse   mlock: do not hol...
690
691
692
693
694
695
  		ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
  		if (ret < 0) {
  			if (ignore_errors) {
  				ret = 0;
  				continue;	/* continue at next VMA */
  			}
5fdb20021   Michel Lespinasse   mm: move VM_LOCKE...
696
697
698
  			ret = __mlock_posix_error_return(ret);
  			break;
  		}
53a7706d5   Michel Lespinasse   mlock: do not hol...
699
700
  		nend = nstart + ret * PAGE_SIZE;
  		ret = 0;
fed067da4   Michel Lespinasse   mlock: only hold ...
701
  	}
53a7706d5   Michel Lespinasse   mlock: do not hol...
702
703
  	if (locked)
  		up_read(&mm->mmap_sem);
fed067da4   Michel Lespinasse   mlock: only hold ...
704
705
  	return ret;	/* 0 or negative error code */
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
706
  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
707
708
709
710
711
712
713
  {
  	unsigned long locked;
  	unsigned long lock_limit;
  	int error = -ENOMEM;
  
  	if (!can_do_mlock())
  		return -EPERM;
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
714
  	lru_add_drain_all();	/* flush pagevec */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
716
  	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
  	start &= PAGE_MASK;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
717
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
  	lock_limit >>= PAGE_SHIFT;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
719
720
721
722
723
  	locked = len >> PAGE_SHIFT;
  
  	down_write(&current->mm->mmap_sem);
  
  	locked += current->mm->locked_vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
726
727
  
  	/* check against resource limits */
  	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
  		error = do_mlock(start, len, 1);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
728

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729
  	up_write(&current->mm->mmap_sem);
fed067da4   Michel Lespinasse   mlock: only hold ...
730
  	if (!error)
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
731
  		error = __mm_populate(start, len, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
732
733
  	return error;
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
734
  SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
735
736
  {
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737
738
  	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
  	start &= PAGE_MASK;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
739
740
  
  	down_write(&current->mm->mmap_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
741
742
  	ret = do_mlock(start, len, 0);
  	up_write(&current->mm->mmap_sem);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
743

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
744
745
746
747
748
749
  	return ret;
  }
  
  static int do_mlockall(int flags)
  {
  	struct vm_area_struct * vma, * prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
750
751
  
  	if (flags & MCL_FUTURE)
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
752
  		current->mm->def_flags |= VM_LOCKED;
9977f0f16   Gerald Schaefer   mm: don't overwri...
753
  	else
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
754
  		current->mm->def_flags &= ~VM_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
755
756
757
758
  	if (flags == MCL_FUTURE)
  		goto out;
  
  	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
759
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
760

186930500   Michel Lespinasse   mm: introduce VM_...
761
762
  		newflags = vma->vm_flags & ~VM_LOCKED;
  		if (flags & MCL_CURRENT)
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
763
  			newflags |= VM_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
765
766
  
  		/* Ignore errors */
  		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
bde6c3aa9   Paul E. McKenney   rcu: Provide cond...
767
  		cond_resched_rcu_qs();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
769
770
771
  	}
  out:
  	return 0;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
772
  SYSCALL_DEFINE1(mlockall, int, flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
773
774
775
776
777
778
779
780
781
782
  {
  	unsigned long lock_limit;
  	int ret = -EINVAL;
  
  	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
  		goto out;
  
  	ret = -EPERM;
  	if (!can_do_mlock())
  		goto out;
df9d6985b   Christoph Lameter   mm: do not drain ...
783
784
  	if (flags & MCL_CURRENT)
  		lru_add_drain_all();	/* flush pagevec */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
785

59e99e5b9   Jiri Slaby   mm: use rlimit he...
786
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
788
789
  	lock_limit >>= PAGE_SHIFT;
  
  	ret = -ENOMEM;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
790
  	down_write(&current->mm->mmap_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
791
792
793
794
  	if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
  	    capable(CAP_IPC_LOCK))
  		ret = do_mlockall(flags);
  	up_write(&current->mm->mmap_sem);
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
795
796
  	if (!ret && (flags & MCL_CURRENT))
  		mm_populate(0, TASK_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
797
798
799
  out:
  	return ret;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
800
  SYSCALL_DEFINE0(munlockall)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
  {
  	int ret;
  
  	down_write(&current->mm->mmap_sem);
  	ret = do_mlockall(0);
  	up_write(&current->mm->mmap_sem);
  	return ret;
  }
  
  /*
   * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
   * shm segments) get accounted against the user_struct instead.
   */
  static DEFINE_SPINLOCK(shmlock_user_lock);
  
  int user_shm_lock(size_t size, struct user_struct *user)
  {
  	unsigned long lock_limit, locked;
  	int allowed = 0;
  
  	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
822
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
5ed44a401   Herbert van den Bergh   do not limit lock...
823
824
  	if (lock_limit == RLIM_INFINITY)
  		allowed = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
825
826
  	lock_limit >>= PAGE_SHIFT;
  	spin_lock(&shmlock_user_lock);
5ed44a401   Herbert van den Bergh   do not limit lock...
827
828
  	if (!allowed &&
  	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
  		goto out;
  	get_uid(user);
  	user->locked_shm += locked;
  	allowed = 1;
  out:
  	spin_unlock(&shmlock_user_lock);
  	return allowed;
  }
  
  void user_shm_unlock(size_t size, struct user_struct *user)
  {
  	spin_lock(&shmlock_user_lock);
  	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	spin_unlock(&shmlock_user_lock);
  	free_uid(user);
  }