Blame view

mm/mlock.c 22.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *	linux/mm/mlock.c
   *
   *  (C) Copyright 1995 Linus Torvalds
   *  (C) Copyright 2002 Christoph Hellwig
   */
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
7
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
  #include <linux/mman.h>
  #include <linux/mm.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
10
11
12
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/pagemap.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
13
  #include <linux/pagevec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
  #include <linux/mempolicy.h>
  #include <linux/syscalls.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
16
  #include <linux/sched.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
17
  #include <linux/export.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
18
19
20
  #include <linux/rmap.h>
  #include <linux/mmzone.h>
  #include <linux/hugetlb.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
21
22
  #include <linux/memcontrol.h>
  #include <linux/mm_inline.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
23
24
  
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25

7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
26
  bool can_do_mlock(void)
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
27
  {
59e99e5b9   Jiri Slaby   mm: use rlimit he...
28
  	if (rlimit(RLIMIT_MEMLOCK) != 0)
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
29
  		return true;
a5a6579db   Jeff Vander Stoep   mm: reorder can_d...
30
  	if (capable(CAP_IPC_LOCK))
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
31
32
  		return true;
  	return false;
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
33
34
  }
  EXPORT_SYMBOL(can_do_mlock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35

b291f0003   Nick Piggin   mlock: mlocked pa...
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  /*
   * Mlocked pages are marked with PageMlocked() flag for efficient testing
   * in vmscan and, possibly, the fault path; and to support semi-accurate
   * statistics.
   *
   * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
   * be placed on the LRU "unevictable" list, rather than the [in]active lists.
   * The unevictable list is an LRU sibling list to the [in]active lists.
   * PageUnevictable is set to indicate the unevictable state.
   *
   * When lazy mlocking via vmscan, it is important to ensure that the
   * vma's VM_LOCKED status is not concurrently being modified, otherwise we
   * may have mlocked a page that is being munlocked. So lazy mlock must take
   * the mmap_sem for read, and verify that the vma really is locked
   * (see mm/rmap.c).
   */
  
  /*
   *  LRU accounting for clear_page_mlock()
   */
e6c509f85   Hugh Dickins   mm: use clear_pag...
56
  void clear_page_mlock(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
57
  {
e6c509f85   Hugh Dickins   mm: use clear_pag...
58
  	if (!TestClearPageMlocked(page))
b291f0003   Nick Piggin   mlock: mlocked pa...
59
  		return;
b291f0003   Nick Piggin   mlock: mlocked pa...
60

8449d21fb   David Rientjes   mm, thp: fix mloc...
61
62
  	mod_zone_page_state(page_zone(page), NR_MLOCK,
  			    -hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
63
  	count_vm_event(UNEVICTABLE_PGCLEARED);
b291f0003   Nick Piggin   mlock: mlocked pa...
64
65
66
67
  	if (!isolate_lru_page(page)) {
  		putback_lru_page(page);
  	} else {
  		/*
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
68
  		 * We lost the race. the page already moved to evictable list.
b291f0003   Nick Piggin   mlock: mlocked pa...
69
  		 */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
70
  		if (PageUnevictable(page))
5344b7e64   Nick Piggin   vmstat: mlocked p...
71
  			count_vm_event(UNEVICTABLE_PGSTRANDED);
b291f0003   Nick Piggin   mlock: mlocked pa...
72
73
74
75
76
77
78
79
80
  	}
  }
  
  /*
   * Mark page as mlocked if not already.
   * If page on LRU, isolate and putback to move to unevictable list.
   */
  void mlock_vma_page(struct page *page)
  {
57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
81
  	/* Serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
82
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
83
84
  	VM_BUG_ON_PAGE(PageTail(page), page);
  	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
5344b7e64   Nick Piggin   vmstat: mlocked p...
85
  	if (!TestSetPageMlocked(page)) {
8449d21fb   David Rientjes   mm, thp: fix mloc...
86
87
  		mod_zone_page_state(page_zone(page), NR_MLOCK,
  				    hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
88
89
90
91
  		count_vm_event(UNEVICTABLE_PGMLOCKED);
  		if (!isolate_lru_page(page))
  			putback_lru_page(page);
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
92
  }
7225522bb   Vlastimil Babka   mm: munlock: batc...
93
  /*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
94
95
96
97
98
99
100
   * Isolate a page from LRU with optional get_page() pin.
   * Assumes lru_lock already held and page already pinned.
   */
  static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
  {
  	if (PageLRU(page)) {
  		struct lruvec *lruvec;
599d0c954   Mel Gorman   mm, vmscan: move ...
101
  		lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
102
103
104
105
106
107
108
109
110
111
112
  		if (getpage)
  			get_page(page);
  		ClearPageLRU(page);
  		del_page_from_lru_list(page, lruvec, page_lru(page));
  		return true;
  	}
  
  	return false;
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
   * Finish munlock after successful page isolation
   *
   * Page must be locked. This is a wrapper for try_to_munlock()
   * and putback_lru_page() with munlock accounting.
   */
  static void __munlock_isolated_page(struct page *page)
  {
  	int ret = SWAP_AGAIN;
  
  	/*
  	 * Optimization: if the page was mapped just once, that's our mapping
  	 * and we don't need to check all the other vmas.
  	 */
  	if (page_mapcount(page) > 1)
  		ret = try_to_munlock(page);
  
  	/* Did try_to_unlock() succeed or punt? */
  	if (ret != SWAP_MLOCK)
  		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
  
  	putback_lru_page(page);
  }
  
  /*
   * Accounting for page isolation fail during munlock
   *
   * Performs accounting when page isolation fails in munlock. There is nothing
   * else to do because it means some other task has already removed the page
   * from the LRU. putback_lru_page() will take care of removing the page from
   * the unevictable list, if necessary. vmscan [page_referenced()] will move
   * the page back to the unevictable list if some other vma has it mlocked.
   */
  static void __munlock_isolation_failed(struct page *page)
  {
  	if (PageUnevictable(page))
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
148
  		__count_vm_event(UNEVICTABLE_PGSTRANDED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
149
  	else
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
150
  		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
151
  }
6927c1dd9   Lee Schermerhorn   mlock: replace st...
152
153
  /**
   * munlock_vma_page - munlock a vma page
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
154
155
156
157
   * @page - page to be unlocked, either a normal page or THP page head
   *
   * returns the size of the page as a page mask (0 for normal page,
   *         HPAGE_PMD_NR - 1 for THP head page)
b291f0003   Nick Piggin   mlock: mlocked pa...
158
   *
6927c1dd9   Lee Schermerhorn   mlock: replace st...
159
160
161
162
163
164
165
166
167
168
   * called from munlock()/munmap() path with page supposedly on the LRU.
   * When we munlock a page, because the vma where we found the page is being
   * munlock()ed or munmap()ed, we want to check whether other vmas hold the
   * page locked so that we can leave it on the unevictable lru list and not
   * bother vmscan with it.  However, to walk the page's rmap list in
   * try_to_munlock() we must isolate the page from the LRU.  If some other
   * task has removed the page from the LRU, we won't be able to do that.
   * So we clear the PageMlocked as we might not get another chance.  If we
   * can't isolate the page, we leave it for putback_lru_page() and vmscan
   * [page_referenced()/try_to_unmap()] to deal with.
b291f0003   Nick Piggin   mlock: mlocked pa...
169
   */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
170
  unsigned int munlock_vma_page(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
171
  {
7162a1e87   Kirill A. Shutemov   mm: fix mlock acc...
172
  	int nr_pages;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
173
  	struct zone *zone = page_zone(page);
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
174

57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
175
  	/* For try_to_munlock() and to serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
176
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
177
  	VM_BUG_ON_PAGE(PageTail(page), page);
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
178
  	/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
179
180
181
  	 * Serialize with any parallel __split_huge_page_refcount() which
  	 * might otherwise copy PageMlocked to part of the tail pages before
  	 * we clear it in the head page. It also stabilizes hpage_nr_pages().
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
182
  	 */
a52633d8e   Mel Gorman   mm, vmscan: move ...
183
  	spin_lock_irq(zone_lru_lock(zone));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
184

655548bf6   Kirill A. Shutemov   thp: fix corner c...
185
186
187
  	if (!TestClearPageMlocked(page)) {
  		/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
  		nr_pages = 1;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
188
  		goto unlock_out;
655548bf6   Kirill A. Shutemov   thp: fix corner c...
189
  	}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
190

655548bf6   Kirill A. Shutemov   thp: fix corner c...
191
  	nr_pages = hpage_nr_pages(page);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
192
193
194
  	__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
  
  	if (__munlock_isolate_lru_page(page, true)) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
195
  		spin_unlock_irq(zone_lru_lock(zone));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
196
197
198
199
200
201
  		__munlock_isolated_page(page);
  		goto out;
  	}
  	__munlock_isolation_failed(page);
  
  unlock_out:
a52633d8e   Mel Gorman   mm, vmscan: move ...
202
  	spin_unlock_irq(zone_lru_lock(zone));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
203
204
  
  out:
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
205
  	return nr_pages - 1;
b291f0003   Nick Piggin   mlock: mlocked pa...
206
  }
9978ad583   Lee Schermerhorn   mlock: make mlock...
207
208
209
210
211
212
213
214
215
216
  /*
   * convert get_user_pages() return value to posix mlock() error
   */
  static int __mlock_posix_error_return(long retval)
  {
  	if (retval == -EFAULT)
  		retval = -ENOMEM;
  	else if (retval == -ENOMEM)
  		retval = -EAGAIN;
  	return retval;
b291f0003   Nick Piggin   mlock: mlocked pa...
217
  }
b291f0003   Nick Piggin   mlock: mlocked pa...
218
  /*
56afe477d   Vlastimil Babka   mm: munlock: bypa...
219
220
221
222
223
224
225
226
227
228
229
230
231
232
   * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
   *
   * The fast path is available only for evictable pages with single mapping.
   * Then we can bypass the per-cpu pvec and get better performance.
   * when mapcount > 1 we need try_to_munlock() which can fail.
   * when !page_evictable(), we need the full redo logic of putback_lru_page to
   * avoid leaving evictable page in unevictable list.
   *
   * In case of success, @page is added to @pvec and @pgrescued is incremented
   * in case that the page was previously unevictable. @page is also unlocked.
   */
  static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
  		int *pgrescued)
  {
309381fea   Sasha Levin   mm: dump page whe...
233
234
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
  
  	if (page_mapcount(page) <= 1 && page_evictable(page)) {
  		pagevec_add(pvec, page);
  		if (TestClearPageUnevictable(page))
  			(*pgrescued)++;
  		unlock_page(page);
  		return true;
  	}
  
  	return false;
  }
  
  /*
   * Putback multiple evictable pages to the LRU
   *
   * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
   * the pages might have meanwhile become unevictable but that is OK.
   */
  static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
  {
  	count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
  	/*
  	 *__pagevec_lru_add() calls release_pages() so we don't call
  	 * put_page() explicitly
  	 */
  	__pagevec_lru_add(pvec);
  	count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
265
266
267
268
269
270
271
   * Munlock a batch of pages from the same zone
   *
   * The work is split to two main phases. First phase clears the Mlocked flag
   * and attempts to isolate the pages, all under a single zone lru lock.
   * The second phase finishes the munlock only for pages where isolation
   * succeeded.
   *
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
272
   * Note that the pagevec may be modified during the process.
7225522bb   Vlastimil Babka   mm: munlock: batc...
273
274
275
276
277
   */
  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
  {
  	int i;
  	int nr = pagevec_count(pvec);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
278
  	int delta_munlocked;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
279
280
  	struct pagevec pvec_putback;
  	int pgrescued = 0;
7225522bb   Vlastimil Babka   mm: munlock: batc...
281

3b25df93c   Vlastimil Babka   mm: munlock: fix ...
282
  	pagevec_init(&pvec_putback, 0);
7225522bb   Vlastimil Babka   mm: munlock: batc...
283
  	/* Phase 1: page isolation */
a52633d8e   Mel Gorman   mm, vmscan: move ...
284
  	spin_lock_irq(zone_lru_lock(zone));
7225522bb   Vlastimil Babka   mm: munlock: batc...
285
286
287
288
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (TestClearPageMlocked(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
289
  			/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
290
291
  			 * We already have pin from follow_page_mask()
  			 * so we can spare the get_page() here.
7225522bb   Vlastimil Babka   mm: munlock: batc...
292
  			 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
293
294
295
296
  			if (__munlock_isolate_lru_page(page, false))
  				continue;
  			else
  				__munlock_isolation_failed(page);
7225522bb   Vlastimil Babka   mm: munlock: batc...
297
  		}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
298
299
300
301
302
303
304
305
306
  
  		/*
  		 * We won't be munlocking this page in the next phase
  		 * but we still need to release the follow_page_mask()
  		 * pin. We cannot do it under lru_lock however. If it's
  		 * the last pin, __page_cache_release() would deadlock.
  		 */
  		pagevec_add(&pvec_putback, pvec->pages[i]);
  		pvec->pages[i] = NULL;
7225522bb   Vlastimil Babka   mm: munlock: batc...
307
  	}
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
308
  	delta_munlocked = -nr + pagevec_count(&pvec_putback);
1ebb7cc6a   Vlastimil Babka   mm: munlock: batc...
309
  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
a52633d8e   Mel Gorman   mm, vmscan: move ...
310
  	spin_unlock_irq(zone_lru_lock(zone));
7225522bb   Vlastimil Babka   mm: munlock: batc...
311

3b25df93c   Vlastimil Babka   mm: munlock: fix ...
312
313
  	/* Now we can release pins of pages that we are not munlocking */
  	pagevec_release(&pvec_putback);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
314
  	/* Phase 2: page munlock */
7225522bb   Vlastimil Babka   mm: munlock: batc...
315
316
317
318
319
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (page) {
  			lock_page(page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
320
321
  			if (!__putback_lru_fast_prepare(page, &pvec_putback,
  					&pgrescued)) {
5b40998ae   Vlastimil Babka   mm: munlock: remo...
322
323
324
325
326
  				/*
  				 * Slow path. We don't want to lose the last
  				 * pin before unlock_page()
  				 */
  				get_page(page); /* for putback_lru_page() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
327
328
  				__munlock_isolated_page(page);
  				unlock_page(page);
5b40998ae   Vlastimil Babka   mm: munlock: remo...
329
  				put_page(page); /* from follow_page_mask() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
330
  			}
7225522bb   Vlastimil Babka   mm: munlock: batc...
331
332
  		}
  	}
56afe477d   Vlastimil Babka   mm: munlock: bypa...
333

5b40998ae   Vlastimil Babka   mm: munlock: remo...
334
335
336
337
  	/*
  	 * Phase 3: page putback for pages that qualified for the fast path
  	 * This will also call put_page() to return pin from follow_page_mask()
  	 */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
338
339
  	if (pagevec_count(&pvec_putback))
  		__putback_lru_fast(&pvec_putback, pgrescued);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
  }
  
  /*
   * Fill up pagevec for __munlock_pagevec using pte walk
   *
   * The function expects that the struct page corresponding to @start address is
   * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
   *
   * The rest of @pvec is filled by subsequent pages within the same pmd and same
   * zone, as long as the pte's are present and vm_normal_page() succeeds. These
   * pages also get pinned.
   *
   * Returns the address of the next page that should be scanned. This equals
   * @start + PAGE_SIZE when no page could be added by the pte walk.
   */
  static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
  		struct vm_area_struct *vma, int zoneid,	unsigned long start,
  		unsigned long end)
  {
  	pte_t *pte;
  	spinlock_t *ptl;
  
  	/*
  	 * Initialize pte walk starting at the already pinned page where we
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
364
365
  	 * are sure that there is a pte, as it was pinned under the same
  	 * mmap_sem write op.
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
366
367
  	 */
  	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
368
369
370
371
  	/* Make sure we do not cross the page table boundary */
  	end = pgd_addr_end(start, end);
  	end = pud_addr_end(start, end);
  	end = pmd_addr_end(start, end);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
372
373
374
375
376
377
378
379
380
381
382
383
384
385
  
  	/* The page next to the pinned page is the first we will try to get */
  	start += PAGE_SIZE;
  	while (start < end) {
  		struct page *page = NULL;
  		pte++;
  		if (pte_present(*pte))
  			page = vm_normal_page(vma, start, *pte);
  		/*
  		 * Break if page could not be obtained or the page's node+zone does not
  		 * match
  		 */
  		if (!page || page_zone_id(page) != zoneid)
  			break;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
386

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
387
388
389
390
391
392
  		/*
  		 * Do not use pagevec for PTE-mapped THP,
  		 * munlock_vma_pages_range() will handle them.
  		 */
  		if (PageTransCompound(page))
  			break;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
393
394
395
396
397
398
399
400
401
402
403
  		get_page(page);
  		/*
  		 * Increase the address that will be returned *before* the
  		 * eventual break due to pvec becoming full by adding the page
  		 */
  		start += PAGE_SIZE;
  		if (pagevec_add(pvec, page) == 0)
  			break;
  	}
  	pte_unmap_unlock(pte, ptl);
  	return start;
7225522bb   Vlastimil Babka   mm: munlock: batc...
404
405
406
  }
  
  /*
ba470de43   Rik van Riel   mmap: handle mloc...
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
   * munlock_vma_pages_range() - munlock all pages in the vma range.'
   * @vma - vma containing range to be munlock()ed.
   * @start - start address in @vma of the range
   * @end - end of range in @vma.
   *
   *  For mremap(), munmap() and exit().
   *
   * Called with @vma VM_LOCKED.
   *
   * Returns with VM_LOCKED cleared.  Callers must be prepared to
   * deal with this.
   *
   * We don't save and restore VM_LOCKED here because pages are
   * still on lru.  In unmap path, pages might be scanned by reclaim
   * and re-mlocked by try_to_{munlock|unmap} before we unmap and
   * free them.  This will result in freeing mlocked pages.
b291f0003   Nick Piggin   mlock: mlocked pa...
423
   */
ba470de43   Rik van Riel   mmap: handle mloc...
424
  void munlock_vma_pages_range(struct vm_area_struct *vma,
408e82b78   Hugh Dickins   mm: munlock use f...
425
  			     unsigned long start, unsigned long end)
b291f0003   Nick Piggin   mlock: mlocked pa...
426
  {
de60f5f10   Eric B Munson   mm: introduce VM_...
427
  	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
408e82b78   Hugh Dickins   mm: munlock use f...
428

ff6a6da60   Michel Lespinasse   mm: accelerate mu...
429
  	while (start < end) {
ab7a5af7f   Alexey Klimov   mm/mlock.c: drop ...
430
  		struct page *page;
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
431
432
  		unsigned int page_mask;
  		unsigned long page_increm;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
433
434
435
  		struct pagevec pvec;
  		struct zone *zone;
  		int zoneid;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
436

7a8010cd3   Vlastimil Babka   mm: munlock: manu...
437
  		pagevec_init(&pvec, 0);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
438
439
440
441
442
443
444
  		/*
  		 * Although FOLL_DUMP is intended for get_dump_page(),
  		 * it just so happens that its special treatment of the
  		 * ZERO_PAGE (returning an error instead of doing get_page)
  		 * suits munlock very well (and if somehow an abnormal page
  		 * has sneaked into the range, we won't oops here: great).
  		 */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
445
  		page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
446
  				&page_mask);
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
  		if (page && !IS_ERR(page)) {
  			if (PageTransTail(page)) {
  				VM_BUG_ON_PAGE(PageMlocked(page), page);
  				put_page(page); /* follow_page_mask() */
  			} else if (PageTransHuge(page)) {
  				lock_page(page);
  				/*
  				 * Any THP page found by follow_page_mask() may
  				 * have gotten split before reaching
  				 * munlock_vma_page(), so we need to recompute
  				 * the page_mask here.
  				 */
  				page_mask = munlock_vma_page(page);
  				unlock_page(page);
  				put_page(page); /* follow_page_mask() */
  			} else {
  				/*
  				 * Non-huge pages are handled in batches via
  				 * pagevec. The pin from follow_page_mask()
  				 * prevents them from collapsing by THP.
  				 */
  				pagevec_add(&pvec, page);
  				zone = page_zone(page);
  				zoneid = page_zone_id(page);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
471

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
472
473
474
475
476
477
478
479
480
481
482
  				/*
  				 * Try to fill the rest of pagevec using fast
  				 * pte walk. This will also update start to
  				 * the next page to process. Then munlock the
  				 * pagevec.
  				 */
  				start = __munlock_pagevec_fill(&pvec, vma,
  						zoneid, start, end);
  				__munlock_pagevec(&pvec, zone);
  				goto next;
  			}
408e82b78   Hugh Dickins   mm: munlock use f...
483
  		}
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
484
  		page_increm = 1 + page_mask;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
485
  		start += page_increm * PAGE_SIZE;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
486
  next:
408e82b78   Hugh Dickins   mm: munlock use f...
487
488
  		cond_resched();
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
489
490
491
492
493
494
495
  }
  
  /*
   * mlock_fixup  - handle mlock[all]/munlock[all] requests.
   *
   * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
   * munlock is a no-op.  However, for some special vmas, we go ahead and
cea10a19b   Michel Lespinasse   mm: directly use ...
496
   * populate the ptes.
b291f0003   Nick Piggin   mlock: mlocked pa...
497
498
499
   *
   * For vmas that pass the filters, merge/split as appropriate.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
500
  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
ca16d140a   KOSAKI Motohiro   mm: don't access ...
501
  	unsigned long start, unsigned long end, vm_flags_t newflags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
  {
b291f0003   Nick Piggin   mlock: mlocked pa...
503
  	struct mm_struct *mm = vma->vm_mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
  	pgoff_t pgoff;
b291f0003   Nick Piggin   mlock: mlocked pa...
505
  	int nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
  	int ret = 0;
ca16d140a   KOSAKI Motohiro   mm: don't access ...
507
  	int lock = !!(newflags & VM_LOCKED);
b155b4fde   Simon Guo   mm: mlock: avoid ...
508
  	vm_flags_t old_flags = vma->vm_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509

fed067da4   Michel Lespinasse   mlock: only hold ...
510
  	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
31db58b3a   Stephen Wilson   mm: arch: make ge...
511
  	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
b0f205c2a   Eric B Munson   mm: mlock: add ml...
512
513
  		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
  		goto out;
b291f0003   Nick Piggin   mlock: mlocked pa...
514

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
515
516
  	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
517
518
  			  vma->vm_file, pgoff, vma_policy(vma),
  			  vma->vm_userfaultfd_ctx);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519
520
521
522
  	if (*prev) {
  		vma = *prev;
  		goto success;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
523
524
525
526
527
528
529
530
531
532
533
534
535
536
  	if (start != vma->vm_start) {
  		ret = split_vma(mm, vma, start, 1);
  		if (ret)
  			goto out;
  	}
  
  	if (end != vma->vm_end) {
  		ret = split_vma(mm, vma, end, 0);
  		if (ret)
  			goto out;
  	}
  
  success:
  	/*
b291f0003   Nick Piggin   mlock: mlocked pa...
537
538
539
540
541
  	 * Keep track of amount of locked VM.
  	 */
  	nr_pages = (end - start) >> PAGE_SHIFT;
  	if (!lock)
  		nr_pages = -nr_pages;
b155b4fde   Simon Guo   mm: mlock: avoid ...
542
543
  	else if (old_flags & VM_LOCKED)
  		nr_pages = 0;
b291f0003   Nick Piggin   mlock: mlocked pa...
544
545
546
  	mm->locked_vm += nr_pages;
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
  	 * vm_flags is protected by the mmap_sem held in write mode.
  	 * It's okay if try_to_unmap_one unmaps a page just after we
fc05f5662   Kirill A. Shutemov   mm: rename __mloc...
549
  	 * set VM_LOCKED, populate_vma_page_range will bring it back.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
550
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551

fed067da4   Michel Lespinasse   mlock: only hold ...
552
  	if (lock)
408e82b78   Hugh Dickins   mm: munlock use f...
553
  		vma->vm_flags = newflags;
fed067da4   Michel Lespinasse   mlock: only hold ...
554
  	else
408e82b78   Hugh Dickins   mm: munlock use f...
555
  		munlock_vma_pages_range(vma, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
  out:
b291f0003   Nick Piggin   mlock: mlocked pa...
558
  	*prev = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
560
  	return ret;
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
561
562
  static int apply_vma_lock_flags(unsigned long start, size_t len,
  				vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563
564
565
566
  {
  	unsigned long nstart, end, tmp;
  	struct vm_area_struct * vma, * prev;
  	int error;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
567
  	VM_BUG_ON(offset_in_page(start));
fed067da4   Michel Lespinasse   mlock: only hold ...
568
  	VM_BUG_ON(len != PAGE_ALIGN(len));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
570
571
572
573
  	end = start + len;
  	if (end < start)
  		return -EINVAL;
  	if (end == start)
  		return 0;
097d59106   Linus Torvalds   vm: avoid using f...
574
  	vma = find_vma(current->mm, start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
576
  	if (!vma || vma->vm_start > start)
  		return -ENOMEM;
097d59106   Linus Torvalds   vm: avoid using f...
577
  	prev = vma->vm_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
579
580
581
  	if (start > vma->vm_start)
  		prev = vma;
  
  	for (nstart = start ; ; ) {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
582
  		vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
583

1aab92ec3   Eric B Munson   mm: mlock: refact...
584
  		newflags |= flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585

1aab92ec3   Eric B Munson   mm: mlock: refact...
586
  		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
  		tmp = vma->vm_end;
  		if (tmp > end)
  			tmp = end;
  		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
  		if (error)
  			break;
  		nstart = tmp;
  		if (nstart < prev->vm_end)
  			nstart = prev->vm_end;
  		if (nstart >= end)
  			break;
  
  		vma = prev->vm_next;
  		if (!vma || vma->vm_start != nstart) {
  			error = -ENOMEM;
  			break;
  		}
  	}
  	return error;
  }
0cf2f6f6d   Simon Guo   mm: mlock: check ...
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
  /*
   * Go through vma areas and sum size of mlocked
   * vma pages, as return value.
   * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
   * is also counted.
   * Return value: previously mlocked page counts
   */
  static int count_mm_mlocked_page_nr(struct mm_struct *mm,
  		unsigned long start, size_t len)
  {
  	struct vm_area_struct *vma;
  	int count = 0;
  
  	if (mm == NULL)
  		mm = current->mm;
  
  	vma = find_vma(mm, start);
  	if (vma == NULL)
  		vma = mm->mmap;
  
  	for (; vma ; vma = vma->vm_next) {
  		if (start >= vma->vm_end)
  			continue;
  		if (start + len <=  vma->vm_start)
  			break;
  		if (vma->vm_flags & VM_LOCKED) {
  			if (start > vma->vm_start)
  				count -= (start - vma->vm_start);
  			if (start + len < vma->vm_end) {
  				count += start + len - vma->vm_start;
  				break;
  			}
  			count += vma->vm_end - vma->vm_start;
  		}
  	}
  
  	return count >> PAGE_SHIFT;
  }
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
645
  static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
646
647
648
649
650
651
652
  {
  	unsigned long locked;
  	unsigned long lock_limit;
  	int error = -ENOMEM;
  
  	if (!can_do_mlock())
  		return -EPERM;
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
653
  	lru_add_drain_all();	/* flush pagevec */
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
654
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
655
  	start &= PAGE_MASK;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
656
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
  	lock_limit >>= PAGE_SHIFT;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
658
  	locked = len >> PAGE_SHIFT;
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
659
660
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
661
662
  
  	locked += current->mm->locked_vm;
0cf2f6f6d   Simon Guo   mm: mlock: check ...
663
664
665
666
667
668
669
670
671
672
  	if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
  		/*
  		 * It is possible that the regions requested intersect with
  		 * previously mlocked areas, that part area in "mm->locked_vm"
  		 * should not be counted to new mlock increment count. So check
  		 * and adjust locked count if necessary.
  		 */
  		locked -= count_mm_mlocked_page_nr(current->mm,
  				start, len);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
673
674
675
  
  	/* check against resource limits */
  	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
676
  		error = apply_vma_lock_flags(start, len, flags);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
677

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
678
  	up_write(&current->mm->mmap_sem);
c561259ca   Kirill A. Shutemov   mm: move gup() ->...
679
680
681
682
683
684
685
  	if (error)
  		return error;
  
  	error = __mm_populate(start, len, 0);
  	if (error)
  		return __mlock_posix_error_return(error);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
687
688
689
690
  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
  {
  	return do_mlock(start, len, VM_LOCKED);
  }
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
691
692
  SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
  {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
693
694
695
  	vm_flags_t vm_flags = VM_LOCKED;
  
  	if (flags & ~MLOCK_ONFAULT)
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
696
  		return -EINVAL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
697
698
699
700
  	if (flags & MLOCK_ONFAULT)
  		vm_flags |= VM_LOCKONFAULT;
  
  	return do_mlock(start, len, vm_flags);
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
701
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
702
  SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
704
  {
  	int ret;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
705
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706
  	start &= PAGE_MASK;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
707

dc0ef0df7   Michal Hocko   mm: make mmap_sem...
708
709
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
710
  	ret = apply_vma_lock_flags(start, len, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
711
  	up_write(&current->mm->mmap_sem);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
712

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
713
714
  	return ret;
  }
b0f205c2a   Eric B Munson   mm: mlock: add ml...
715
716
717
718
719
720
721
722
723
724
  /*
   * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
   * and translate into the appropriate modifications to mm->def_flags and/or the
   * flags for all current VMAs.
   *
   * There are a couple of subtleties with this.  If mlockall() is called multiple
   * times with different flags, the values do not necessarily stack.  If mlockall
   * is called once including the MCL_FUTURE flag and then a second time without
   * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
   */
1aab92ec3   Eric B Munson   mm: mlock: refact...
725
  static int apply_mlockall_flags(int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
726
727
  {
  	struct vm_area_struct * vma, * prev = NULL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
728
  	vm_flags_t to_add = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729

b0f205c2a   Eric B Munson   mm: mlock: add ml...
730
731
  	current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
  	if (flags & MCL_FUTURE) {
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
732
  		current->mm->def_flags |= VM_LOCKED;
1aab92ec3   Eric B Munson   mm: mlock: refact...
733

b0f205c2a   Eric B Munson   mm: mlock: add ml...
734
735
736
737
738
739
740
741
742
743
744
745
  		if (flags & MCL_ONFAULT)
  			current->mm->def_flags |= VM_LOCKONFAULT;
  
  		if (!(flags & MCL_CURRENT))
  			goto out;
  	}
  
  	if (flags & MCL_CURRENT) {
  		to_add |= VM_LOCKED;
  		if (flags & MCL_ONFAULT)
  			to_add |= VM_LOCKONFAULT;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
746
747
  
  	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
748
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
749

b0f205c2a   Eric B Munson   mm: mlock: add ml...
750
751
  		newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
  		newflags |= to_add;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
752
753
754
  
  		/* Ignore errors */
  		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
bde6c3aa9   Paul E. McKenney   rcu: Provide cond...
755
  		cond_resched_rcu_qs();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
756
757
758
759
  	}
  out:
  	return 0;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
760
  SYSCALL_DEFINE1(mlockall, int, flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
761
762
  {
  	unsigned long lock_limit;
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
763
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764

b0f205c2a   Eric B Munson   mm: mlock: add ml...
765
  	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)))
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
766
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
767

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
  	if (!can_do_mlock())
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
769
  		return -EPERM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
770

df9d6985b   Christoph Lameter   mm: do not drain ...
771
772
  	if (flags & MCL_CURRENT)
  		lru_add_drain_all();	/* flush pagevec */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
773

59e99e5b9   Jiri Slaby   mm: use rlimit he...
774
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775
  	lock_limit >>= PAGE_SHIFT;
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
776
777
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
778

dc0ef0df7   Michal Hocko   mm: make mmap_sem...
779
  	ret = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
  	if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
  	    capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
782
  		ret = apply_mlockall_flags(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
783
  	up_write(&current->mm->mmap_sem);
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
784
785
  	if (!ret && (flags & MCL_CURRENT))
  		mm_populate(0, TASK_SIZE);
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
786

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
788
  	return ret;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
789
  SYSCALL_DEFINE0(munlockall)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790
791
  {
  	int ret;
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
792
793
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
794
  	ret = apply_mlockall_flags(0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
  	up_write(&current->mm->mmap_sem);
  	return ret;
  }
  
  /*
   * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
   * shm segments) get accounted against the user_struct instead.
   */
  static DEFINE_SPINLOCK(shmlock_user_lock);
  
  int user_shm_lock(size_t size, struct user_struct *user)
  {
  	unsigned long lock_limit, locked;
  	int allowed = 0;
  
  	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
811
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
5ed44a401   Herbert van den Bergh   do not limit lock...
812
813
  	if (lock_limit == RLIM_INFINITY)
  		allowed = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
814
815
  	lock_limit >>= PAGE_SHIFT;
  	spin_lock(&shmlock_user_lock);
5ed44a401   Herbert van den Bergh   do not limit lock...
816
817
  	if (!allowed &&
  	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
  		goto out;
  	get_uid(user);
  	user->locked_shm += locked;
  	allowed = 1;
  out:
  	spin_unlock(&shmlock_user_lock);
  	return allowed;
  }
  
  void user_shm_unlock(size_t size, struct user_struct *user)
  {
  	spin_lock(&shmlock_user_lock);
  	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	spin_unlock(&shmlock_user_lock);
  	free_uid(user);
  }