Blame view

mm/mlock.c 22.6 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
  /*
   *	linux/mm/mlock.c
   *
   *  (C) Copyright 1995 Linus Torvalds
   *  (C) Copyright 2002 Christoph Hellwig
   */
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
8
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
  #include <linux/mman.h>
  #include <linux/mm.h>
8703e8a46   Ingo Molnar   sched/headers: Pr...
11
  #include <linux/sched/user.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
12
13
14
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/pagemap.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
15
  #include <linux/pagevec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
  #include <linux/mempolicy.h>
  #include <linux/syscalls.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
18
  #include <linux/sched.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
19
  #include <linux/export.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
20
21
22
  #include <linux/rmap.h>
  #include <linux/mmzone.h>
  #include <linux/hugetlb.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
23
24
  #include <linux/memcontrol.h>
  #include <linux/mm_inline.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
25
26
  
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27

7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
28
  bool can_do_mlock(void)
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
29
  {
59e99e5b9   Jiri Slaby   mm: use rlimit he...
30
  	if (rlimit(RLIMIT_MEMLOCK) != 0)
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
31
  		return true;
a5a6579db   Jeff Vander Stoep   mm: reorder can_d...
32
  	if (capable(CAP_IPC_LOCK))
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
33
34
  		return true;
  	return false;
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
35
36
  }
  EXPORT_SYMBOL(can_do_mlock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37

b291f0003   Nick Piggin   mlock: mlocked pa...
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
  /*
   * Mlocked pages are marked with PageMlocked() flag for efficient testing
   * in vmscan and, possibly, the fault path; and to support semi-accurate
   * statistics.
   *
   * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
   * be placed on the LRU "unevictable" list, rather than the [in]active lists.
   * The unevictable list is an LRU sibling list to the [in]active lists.
   * PageUnevictable is set to indicate the unevictable state.
   *
   * When lazy mlocking via vmscan, it is important to ensure that the
   * vma's VM_LOCKED status is not concurrently being modified, otherwise we
   * may have mlocked a page that is being munlocked. So lazy mlock must take
   * the mmap_sem for read, and verify that the vma really is locked
   * (see mm/rmap.c).
   */
  
  /*
   *  LRU accounting for clear_page_mlock()
   */
e6c509f85   Hugh Dickins   mm: use clear_pag...
58
  void clear_page_mlock(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
59
  {
e6c509f85   Hugh Dickins   mm: use clear_pag...
60
  	if (!TestClearPageMlocked(page))
b291f0003   Nick Piggin   mlock: mlocked pa...
61
  		return;
b291f0003   Nick Piggin   mlock: mlocked pa...
62

8449d21fb   David Rientjes   mm, thp: fix mloc...
63
64
  	mod_zone_page_state(page_zone(page), NR_MLOCK,
  			    -hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
65
  	count_vm_event(UNEVICTABLE_PGCLEARED);
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
66
67
68
69
70
71
  	/*
  	 * The previous TestClearPageMlocked() corresponds to the smp_mb()
  	 * in __pagevec_lru_add_fn().
  	 *
  	 * See __pagevec_lru_add_fn for more explanation.
  	 */
b291f0003   Nick Piggin   mlock: mlocked pa...
72
73
74
75
  	if (!isolate_lru_page(page)) {
  		putback_lru_page(page);
  	} else {
  		/*
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
76
  		 * We lost the race. the page already moved to evictable list.
b291f0003   Nick Piggin   mlock: mlocked pa...
77
  		 */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
78
  		if (PageUnevictable(page))
5344b7e64   Nick Piggin   vmstat: mlocked p...
79
  			count_vm_event(UNEVICTABLE_PGSTRANDED);
b291f0003   Nick Piggin   mlock: mlocked pa...
80
81
82
83
84
85
86
87
88
  	}
  }
  
  /*
   * Mark page as mlocked if not already.
   * If page on LRU, isolate and putback to move to unevictable list.
   */
  void mlock_vma_page(struct page *page)
  {
57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
89
  	/* Serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
90
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
91
92
  	VM_BUG_ON_PAGE(PageTail(page), page);
  	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
5344b7e64   Nick Piggin   vmstat: mlocked p...
93
  	if (!TestSetPageMlocked(page)) {
8449d21fb   David Rientjes   mm, thp: fix mloc...
94
95
  		mod_zone_page_state(page_zone(page), NR_MLOCK,
  				    hpage_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
96
97
98
99
  		count_vm_event(UNEVICTABLE_PGMLOCKED);
  		if (!isolate_lru_page(page))
  			putback_lru_page(page);
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
100
  }
7225522bb   Vlastimil Babka   mm: munlock: batc...
101
  /*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
102
103
104
105
106
107
108
   * Isolate a page from LRU with optional get_page() pin.
   * Assumes lru_lock already held and page already pinned.
   */
  static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
  {
  	if (PageLRU(page)) {
  		struct lruvec *lruvec;
599d0c954   Mel Gorman   mm, vmscan: move ...
109
  		lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
110
111
112
113
114
115
116
117
118
119
120
  		if (getpage)
  			get_page(page);
  		ClearPageLRU(page);
  		del_page_from_lru_list(page, lruvec, page_lru(page));
  		return true;
  	}
  
  	return false;
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
121
122
123
124
125
126
127
   * Finish munlock after successful page isolation
   *
   * Page must be locked. This is a wrapper for try_to_munlock()
   * and putback_lru_page() with munlock accounting.
   */
  static void __munlock_isolated_page(struct page *page)
  {
7225522bb   Vlastimil Babka   mm: munlock: batc...
128
129
130
131
132
  	/*
  	 * Optimization: if the page was mapped just once, that's our mapping
  	 * and we don't need to check all the other vmas.
  	 */
  	if (page_mapcount(page) > 1)
192d72325   Minchan Kim   mm: make try_to_m...
133
  		try_to_munlock(page);
7225522bb   Vlastimil Babka   mm: munlock: batc...
134
135
  
  	/* Did try_to_unlock() succeed or punt? */
192d72325   Minchan Kim   mm: make try_to_m...
136
  	if (!PageMlocked(page))
7225522bb   Vlastimil Babka   mm: munlock: batc...
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
  
  	putback_lru_page(page);
  }
  
  /*
   * Accounting for page isolation fail during munlock
   *
   * Performs accounting when page isolation fails in munlock. There is nothing
   * else to do because it means some other task has already removed the page
   * from the LRU. putback_lru_page() will take care of removing the page from
   * the unevictable list, if necessary. vmscan [page_referenced()] will move
   * the page back to the unevictable list if some other vma has it mlocked.
   */
  static void __munlock_isolation_failed(struct page *page)
  {
  	if (PageUnevictable(page))
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
154
  		__count_vm_event(UNEVICTABLE_PGSTRANDED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
155
  	else
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
156
  		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
157
  }
6927c1dd9   Lee Schermerhorn   mlock: replace st...
158
159
  /**
   * munlock_vma_page - munlock a vma page
b7701a5f2   Mike Rapoport   mm: docs: fixup p...
160
   * @page: page to be unlocked, either a normal page or THP page head
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
161
162
163
   *
   * returns the size of the page as a page mask (0 for normal page,
   *         HPAGE_PMD_NR - 1 for THP head page)
b291f0003   Nick Piggin   mlock: mlocked pa...
164
   *
6927c1dd9   Lee Schermerhorn   mlock: replace st...
165
166
167
168
169
170
171
172
173
174
   * called from munlock()/munmap() path with page supposedly on the LRU.
   * When we munlock a page, because the vma where we found the page is being
   * munlock()ed or munmap()ed, we want to check whether other vmas hold the
   * page locked so that we can leave it on the unevictable lru list and not
   * bother vmscan with it.  However, to walk the page's rmap list in
   * try_to_munlock() we must isolate the page from the LRU.  If some other
   * task has removed the page from the LRU, we won't be able to do that.
   * So we clear the PageMlocked as we might not get another chance.  If we
   * can't isolate the page, we leave it for putback_lru_page() and vmscan
   * [page_referenced()/try_to_unmap()] to deal with.
b291f0003   Nick Piggin   mlock: mlocked pa...
175
   */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
176
  unsigned int munlock_vma_page(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
177
  {
7162a1e87   Kirill A. Shutemov   mm: fix mlock acc...
178
  	int nr_pages;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
179
  	struct zone *zone = page_zone(page);
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
180

57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
181
  	/* For try_to_munlock() and to serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
182
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
183
  	VM_BUG_ON_PAGE(PageTail(page), page);
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
184
  	/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
185
186
187
  	 * Serialize with any parallel __split_huge_page_refcount() which
  	 * might otherwise copy PageMlocked to part of the tail pages before
  	 * we clear it in the head page. It also stabilizes hpage_nr_pages().
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
188
  	 */
a52633d8e   Mel Gorman   mm, vmscan: move ...
189
  	spin_lock_irq(zone_lru_lock(zone));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
190

655548bf6   Kirill A. Shutemov   thp: fix corner c...
191
192
193
  	if (!TestClearPageMlocked(page)) {
  		/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
  		nr_pages = 1;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
194
  		goto unlock_out;
655548bf6   Kirill A. Shutemov   thp: fix corner c...
195
  	}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
196

655548bf6   Kirill A. Shutemov   thp: fix corner c...
197
  	nr_pages = hpage_nr_pages(page);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
198
199
200
  	__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
  
  	if (__munlock_isolate_lru_page(page, true)) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
201
  		spin_unlock_irq(zone_lru_lock(zone));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
202
203
204
205
206
207
  		__munlock_isolated_page(page);
  		goto out;
  	}
  	__munlock_isolation_failed(page);
  
  unlock_out:
a52633d8e   Mel Gorman   mm, vmscan: move ...
208
  	spin_unlock_irq(zone_lru_lock(zone));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
209
210
  
  out:
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
211
  	return nr_pages - 1;
b291f0003   Nick Piggin   mlock: mlocked pa...
212
  }
9978ad583   Lee Schermerhorn   mlock: make mlock...
213
214
215
216
217
218
219
220
221
222
  /*
   * convert get_user_pages() return value to posix mlock() error
   */
  static int __mlock_posix_error_return(long retval)
  {
  	if (retval == -EFAULT)
  		retval = -ENOMEM;
  	else if (retval == -ENOMEM)
  		retval = -EAGAIN;
  	return retval;
b291f0003   Nick Piggin   mlock: mlocked pa...
223
  }
b291f0003   Nick Piggin   mlock: mlocked pa...
224
  /*
56afe477d   Vlastimil Babka   mm: munlock: bypa...
225
226
227
228
229
230
231
232
233
234
235
236
237
238
   * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
   *
   * The fast path is available only for evictable pages with single mapping.
   * Then we can bypass the per-cpu pvec and get better performance.
   * when mapcount > 1 we need try_to_munlock() which can fail.
   * when !page_evictable(), we need the full redo logic of putback_lru_page to
   * avoid leaving evictable page in unevictable list.
   *
   * In case of success, @page is added to @pvec and @pgrescued is incremented
   * in case that the page was previously unevictable. @page is also unlocked.
   */
  static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
  		int *pgrescued)
  {
309381fea   Sasha Levin   mm: dump page whe...
239
240
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
  
  	if (page_mapcount(page) <= 1 && page_evictable(page)) {
  		pagevec_add(pvec, page);
  		if (TestClearPageUnevictable(page))
  			(*pgrescued)++;
  		unlock_page(page);
  		return true;
  	}
  
  	return false;
  }
  
  /*
   * Putback multiple evictable pages to the LRU
   *
   * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
   * the pages might have meanwhile become unevictable but that is OK.
   */
  static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
  {
  	count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
  	/*
  	 *__pagevec_lru_add() calls release_pages() so we don't call
  	 * put_page() explicitly
  	 */
  	__pagevec_lru_add(pvec);
  	count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
271
272
273
274
275
276
277
   * Munlock a batch of pages from the same zone
   *
   * The work is split to two main phases. First phase clears the Mlocked flag
   * and attempts to isolate the pages, all under a single zone lru lock.
   * The second phase finishes the munlock only for pages where isolation
   * succeeded.
   *
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
278
   * Note that the pagevec may be modified during the process.
7225522bb   Vlastimil Babka   mm: munlock: batc...
279
280
281
282
283
   */
  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
  {
  	int i;
  	int nr = pagevec_count(pvec);
70feee0e1   Yisheng Xie   mlock: fix mlock ...
284
  	int delta_munlocked = -nr;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
285
286
  	struct pagevec pvec_putback;
  	int pgrescued = 0;
7225522bb   Vlastimil Babka   mm: munlock: batc...
287

866798201   Mel Gorman   mm, pagevec: remo...
288
  	pagevec_init(&pvec_putback);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
289

7225522bb   Vlastimil Babka   mm: munlock: batc...
290
  	/* Phase 1: page isolation */
a52633d8e   Mel Gorman   mm, vmscan: move ...
291
  	spin_lock_irq(zone_lru_lock(zone));
7225522bb   Vlastimil Babka   mm: munlock: batc...
292
293
294
295
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (TestClearPageMlocked(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
296
  			/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
297
298
  			 * We already have pin from follow_page_mask()
  			 * so we can spare the get_page() here.
7225522bb   Vlastimil Babka   mm: munlock: batc...
299
  			 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
300
301
302
303
  			if (__munlock_isolate_lru_page(page, false))
  				continue;
  			else
  				__munlock_isolation_failed(page);
70feee0e1   Yisheng Xie   mlock: fix mlock ...
304
305
  		} else {
  			delta_munlocked++;
7225522bb   Vlastimil Babka   mm: munlock: batc...
306
  		}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
307
308
309
310
311
312
313
314
315
  
  		/*
  		 * We won't be munlocking this page in the next phase
  		 * but we still need to release the follow_page_mask()
  		 * pin. We cannot do it under lru_lock however. If it's
  		 * the last pin, __page_cache_release() would deadlock.
  		 */
  		pagevec_add(&pvec_putback, pvec->pages[i]);
  		pvec->pages[i] = NULL;
7225522bb   Vlastimil Babka   mm: munlock: batc...
316
  	}
1ebb7cc6a   Vlastimil Babka   mm: munlock: batc...
317
  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
a52633d8e   Mel Gorman   mm, vmscan: move ...
318
  	spin_unlock_irq(zone_lru_lock(zone));
7225522bb   Vlastimil Babka   mm: munlock: batc...
319

3b25df93c   Vlastimil Babka   mm: munlock: fix ...
320
321
  	/* Now we can release pins of pages that we are not munlocking */
  	pagevec_release(&pvec_putback);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
322
  	/* Phase 2: page munlock */
7225522bb   Vlastimil Babka   mm: munlock: batc...
323
324
325
326
327
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (page) {
  			lock_page(page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
328
329
  			if (!__putback_lru_fast_prepare(page, &pvec_putback,
  					&pgrescued)) {
5b40998ae   Vlastimil Babka   mm: munlock: remo...
330
331
332
333
334
  				/*
  				 * Slow path. We don't want to lose the last
  				 * pin before unlock_page()
  				 */
  				get_page(page); /* for putback_lru_page() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
335
336
  				__munlock_isolated_page(page);
  				unlock_page(page);
5b40998ae   Vlastimil Babka   mm: munlock: remo...
337
  				put_page(page); /* from follow_page_mask() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
338
  			}
7225522bb   Vlastimil Babka   mm: munlock: batc...
339
340
  		}
  	}
56afe477d   Vlastimil Babka   mm: munlock: bypa...
341

5b40998ae   Vlastimil Babka   mm: munlock: remo...
342
343
344
345
  	/*
  	 * Phase 3: page putback for pages that qualified for the fast path
  	 * This will also call put_page() to return pin from follow_page_mask()
  	 */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
346
347
  	if (pagevec_count(&pvec_putback))
  		__putback_lru_fast(&pvec_putback, pgrescued);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
  }
  
  /*
   * Fill up pagevec for __munlock_pagevec using pte walk
   *
   * The function expects that the struct page corresponding to @start address is
   * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
   *
   * The rest of @pvec is filled by subsequent pages within the same pmd and same
   * zone, as long as the pte's are present and vm_normal_page() succeeds. These
   * pages also get pinned.
   *
   * Returns the address of the next page that should be scanned. This equals
   * @start + PAGE_SIZE when no page could be added by the pte walk.
   */
  static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
364
365
  			struct vm_area_struct *vma, struct zone *zone,
  			unsigned long start, unsigned long end)
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
366
367
368
369
370
371
  {
  	pte_t *pte;
  	spinlock_t *ptl;
  
  	/*
  	 * Initialize pte walk starting at the already pinned page where we
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
372
373
  	 * are sure that there is a pte, as it was pinned under the same
  	 * mmap_sem write op.
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
374
375
  	 */
  	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
376
377
  	/* Make sure we do not cross the page table boundary */
  	end = pgd_addr_end(start, end);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
378
  	end = p4d_addr_end(start, end);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
379
380
  	end = pud_addr_end(start, end);
  	end = pmd_addr_end(start, end);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
381
382
383
384
385
386
387
388
389
390
391
392
  
  	/* The page next to the pinned page is the first we will try to get */
  	start += PAGE_SIZE;
  	while (start < end) {
  		struct page *page = NULL;
  		pte++;
  		if (pte_present(*pte))
  			page = vm_normal_page(vma, start, *pte);
  		/*
  		 * Break if page could not be obtained or the page's node+zone does not
  		 * match
  		 */
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
393
  		if (!page || page_zone(page) != zone)
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
394
  			break;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
395

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
396
397
398
399
400
401
  		/*
  		 * Do not use pagevec for PTE-mapped THP,
  		 * munlock_vma_pages_range() will handle them.
  		 */
  		if (PageTransCompound(page))
  			break;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
402
403
404
405
406
407
408
409
410
411
412
  		get_page(page);
  		/*
  		 * Increase the address that will be returned *before* the
  		 * eventual break due to pvec becoming full by adding the page
  		 */
  		start += PAGE_SIZE;
  		if (pagevec_add(pvec, page) == 0)
  			break;
  	}
  	pte_unmap_unlock(pte, ptl);
  	return start;
7225522bb   Vlastimil Babka   mm: munlock: batc...
413
414
415
  }
  
  /*
ba470de43   Rik van Riel   mmap: handle mloc...
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
   * munlock_vma_pages_range() - munlock all pages in the vma range.'
   * @vma - vma containing range to be munlock()ed.
   * @start - start address in @vma of the range
   * @end - end of range in @vma.
   *
   *  For mremap(), munmap() and exit().
   *
   * Called with @vma VM_LOCKED.
   *
   * Returns with VM_LOCKED cleared.  Callers must be prepared to
   * deal with this.
   *
   * We don't save and restore VM_LOCKED here because pages are
   * still on lru.  In unmap path, pages might be scanned by reclaim
   * and re-mlocked by try_to_{munlock|unmap} before we unmap and
   * free them.  This will result in freeing mlocked pages.
b291f0003   Nick Piggin   mlock: mlocked pa...
432
   */
ba470de43   Rik van Riel   mmap: handle mloc...
433
  void munlock_vma_pages_range(struct vm_area_struct *vma,
408e82b78   Hugh Dickins   mm: munlock use f...
434
  			     unsigned long start, unsigned long end)
b291f0003   Nick Piggin   mlock: mlocked pa...
435
  {
de60f5f10   Eric B Munson   mm: introduce VM_...
436
  	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
408e82b78   Hugh Dickins   mm: munlock use f...
437

ff6a6da60   Michel Lespinasse   mm: accelerate mu...
438
  	while (start < end) {
ab7a5af7f   Alexey Klimov   mm/mlock.c: drop ...
439
  		struct page *page;
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
440
  		unsigned int page_mask = 0;
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
441
  		unsigned long page_increm;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
442
443
  		struct pagevec pvec;
  		struct zone *zone;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
444

866798201   Mel Gorman   mm, pagevec: remo...
445
  		pagevec_init(&pvec);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
446
447
448
449
450
451
452
  		/*
  		 * Although FOLL_DUMP is intended for get_dump_page(),
  		 * it just so happens that its special treatment of the
  		 * ZERO_PAGE (returning an error instead of doing get_page)
  		 * suits munlock very well (and if somehow an abnormal page
  		 * has sneaked into the range, we won't oops here: great).
  		 */
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
453
  		page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
454

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
455
456
457
458
459
460
461
462
463
  		if (page && !IS_ERR(page)) {
  			if (PageTransTail(page)) {
  				VM_BUG_ON_PAGE(PageMlocked(page), page);
  				put_page(page); /* follow_page_mask() */
  			} else if (PageTransHuge(page)) {
  				lock_page(page);
  				/*
  				 * Any THP page found by follow_page_mask() may
  				 * have gotten split before reaching
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
464
465
  				 * munlock_vma_page(), so we need to compute
  				 * the page_mask here instead.
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
466
467
468
469
470
471
472
473
474
475
476
477
  				 */
  				page_mask = munlock_vma_page(page);
  				unlock_page(page);
  				put_page(page); /* follow_page_mask() */
  			} else {
  				/*
  				 * Non-huge pages are handled in batches via
  				 * pagevec. The pin from follow_page_mask()
  				 * prevents them from collapsing by THP.
  				 */
  				pagevec_add(&pvec, page);
  				zone = page_zone(page);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
478

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
479
480
481
482
483
484
485
  				/*
  				 * Try to fill the rest of pagevec using fast
  				 * pte walk. This will also update start to
  				 * the next page to process. Then munlock the
  				 * pagevec.
  				 */
  				start = __munlock_pagevec_fill(&pvec, vma,
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
486
  						zone, start, end);
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
487
488
489
  				__munlock_pagevec(&pvec, zone);
  				goto next;
  			}
408e82b78   Hugh Dickins   mm: munlock use f...
490
  		}
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
491
  		page_increm = 1 + page_mask;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
492
  		start += page_increm * PAGE_SIZE;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
493
  next:
408e82b78   Hugh Dickins   mm: munlock use f...
494
495
  		cond_resched();
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
496
497
498
499
500
501
502
  }
  
  /*
   * mlock_fixup  - handle mlock[all]/munlock[all] requests.
   *
   * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
   * munlock is a no-op.  However, for some special vmas, we go ahead and
cea10a19b   Michel Lespinasse   mm: directly use ...
503
   * populate the ptes.
b291f0003   Nick Piggin   mlock: mlocked pa...
504
505
506
   *
   * For vmas that pass the filters, merge/split as appropriate.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
ca16d140a   KOSAKI Motohiro   mm: don't access ...
508
  	unsigned long start, unsigned long end, vm_flags_t newflags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509
  {
b291f0003   Nick Piggin   mlock: mlocked pa...
510
  	struct mm_struct *mm = vma->vm_mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
511
  	pgoff_t pgoff;
b291f0003   Nick Piggin   mlock: mlocked pa...
512
  	int nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
513
  	int ret = 0;
ca16d140a   KOSAKI Motohiro   mm: don't access ...
514
  	int lock = !!(newflags & VM_LOCKED);
b155b4fde   Simon Guo   mm: mlock: avoid ...
515
  	vm_flags_t old_flags = vma->vm_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516

fed067da4   Michel Lespinasse   mlock: only hold ...
517
  	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
e1fb4a086   Dave Jiang   dax: remove VM_MI...
518
519
  	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
  	    vma_is_dax(vma))
b0f205c2a   Eric B Munson   mm: mlock: add ml...
520
521
  		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
  		goto out;
b291f0003   Nick Piggin   mlock: mlocked pa...
522

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
523
524
  	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
525
526
  			  vma->vm_file, pgoff, vma_policy(vma),
  			  vma->vm_userfaultfd_ctx);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
527
528
529
530
  	if (*prev) {
  		vma = *prev;
  		goto success;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
531
532
533
534
535
536
537
538
539
540
541
542
543
544
  	if (start != vma->vm_start) {
  		ret = split_vma(mm, vma, start, 1);
  		if (ret)
  			goto out;
  	}
  
  	if (end != vma->vm_end) {
  		ret = split_vma(mm, vma, end, 0);
  		if (ret)
  			goto out;
  	}
  
  success:
  	/*
b291f0003   Nick Piggin   mlock: mlocked pa...
545
546
547
548
549
  	 * Keep track of amount of locked VM.
  	 */
  	nr_pages = (end - start) >> PAGE_SHIFT;
  	if (!lock)
  		nr_pages = -nr_pages;
b155b4fde   Simon Guo   mm: mlock: avoid ...
550
551
  	else if (old_flags & VM_LOCKED)
  		nr_pages = 0;
b291f0003   Nick Piggin   mlock: mlocked pa...
552
553
554
  	mm->locked_vm += nr_pages;
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
555
556
  	 * vm_flags is protected by the mmap_sem held in write mode.
  	 * It's okay if try_to_unmap_one unmaps a page just after we
fc05f5662   Kirill A. Shutemov   mm: rename __mloc...
557
  	 * set VM_LOCKED, populate_vma_page_range will bring it back.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559

fed067da4   Michel Lespinasse   mlock: only hold ...
560
  	if (lock)
408e82b78   Hugh Dickins   mm: munlock use f...
561
  		vma->vm_flags = newflags;
fed067da4   Michel Lespinasse   mlock: only hold ...
562
  	else
408e82b78   Hugh Dickins   mm: munlock use f...
563
  		munlock_vma_pages_range(vma, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
  out:
b291f0003   Nick Piggin   mlock: mlocked pa...
566
  	*prev = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
568
  	return ret;
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
569
570
  static int apply_vma_lock_flags(unsigned long start, size_t len,
  				vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
573
574
  {
  	unsigned long nstart, end, tmp;
  	struct vm_area_struct * vma, * prev;
  	int error;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
575
  	VM_BUG_ON(offset_in_page(start));
fed067da4   Michel Lespinasse   mlock: only hold ...
576
  	VM_BUG_ON(len != PAGE_ALIGN(len));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577
578
579
580
581
  	end = start + len;
  	if (end < start)
  		return -EINVAL;
  	if (end == start)
  		return 0;
097d59106   Linus Torvalds   vm: avoid using f...
582
  	vma = find_vma(current->mm, start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
583
584
  	if (!vma || vma->vm_start > start)
  		return -ENOMEM;
097d59106   Linus Torvalds   vm: avoid using f...
585
  	prev = vma->vm_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
587
588
589
  	if (start > vma->vm_start)
  		prev = vma;
  
  	for (nstart = start ; ; ) {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
590
  		vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
591

1aab92ec3   Eric B Munson   mm: mlock: refact...
592
  		newflags |= flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
593

1aab92ec3   Eric B Munson   mm: mlock: refact...
594
  		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
  		tmp = vma->vm_end;
  		if (tmp > end)
  			tmp = end;
  		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
  		if (error)
  			break;
  		nstart = tmp;
  		if (nstart < prev->vm_end)
  			nstart = prev->vm_end;
  		if (nstart >= end)
  			break;
  
  		vma = prev->vm_next;
  		if (!vma || vma->vm_start != nstart) {
  			error = -ENOMEM;
  			break;
  		}
  	}
  	return error;
  }
0cf2f6f6d   Simon Guo   mm: mlock: check ...
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
  /*
   * Go through vma areas and sum size of mlocked
   * vma pages, as return value.
   * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
   * is also counted.
   * Return value: previously mlocked page counts
   */
  static int count_mm_mlocked_page_nr(struct mm_struct *mm,
  		unsigned long start, size_t len)
  {
  	struct vm_area_struct *vma;
  	int count = 0;
  
  	if (mm == NULL)
  		mm = current->mm;
  
  	vma = find_vma(mm, start);
  	if (vma == NULL)
  		vma = mm->mmap;
  
  	for (; vma ; vma = vma->vm_next) {
  		if (start >= vma->vm_end)
  			continue;
  		if (start + len <=  vma->vm_start)
  			break;
  		if (vma->vm_flags & VM_LOCKED) {
  			if (start > vma->vm_start)
  				count -= (start - vma->vm_start);
  			if (start + len < vma->vm_end) {
  				count += start + len - vma->vm_start;
  				break;
  			}
  			count += vma->vm_end - vma->vm_start;
  		}
  	}
  
  	return count >> PAGE_SHIFT;
  }
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
653
  static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
654
655
656
657
658
659
660
  {
  	unsigned long locked;
  	unsigned long lock_limit;
  	int error = -ENOMEM;
  
  	if (!can_do_mlock())
  		return -EPERM;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
661
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
  	start &= PAGE_MASK;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
663
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
664
  	lock_limit >>= PAGE_SHIFT;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
665
  	locked = len >> PAGE_SHIFT;
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
666
667
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
668
669
  
  	locked += current->mm->locked_vm;
0cf2f6f6d   Simon Guo   mm: mlock: check ...
670
671
672
673
674
675
676
677
678
679
  	if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
  		/*
  		 * It is possible that the regions requested intersect with
  		 * previously mlocked areas, that part area in "mm->locked_vm"
  		 * should not be counted to new mlock increment count. So check
  		 * and adjust locked count if necessary.
  		 */
  		locked -= count_mm_mlocked_page_nr(current->mm,
  				start, len);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
680
681
682
  
  	/* check against resource limits */
  	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
683
  		error = apply_vma_lock_flags(start, len, flags);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
684

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685
  	up_write(&current->mm->mmap_sem);
c561259ca   Kirill A. Shutemov   mm: move gup() ->...
686
687
688
689
690
691
692
  	if (error)
  		return error;
  
  	error = __mm_populate(start, len, 0);
  	if (error)
  		return __mlock_posix_error_return(error);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
694
695
696
697
  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
  {
  	return do_mlock(start, len, VM_LOCKED);
  }
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
698
699
  SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
  {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
700
701
702
  	vm_flags_t vm_flags = VM_LOCKED;
  
  	if (flags & ~MLOCK_ONFAULT)
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
703
  		return -EINVAL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
704
705
706
707
  	if (flags & MLOCK_ONFAULT)
  		vm_flags |= VM_LOCKONFAULT;
  
  	return do_mlock(start, len, vm_flags);
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
708
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
709
  SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
710
711
  {
  	int ret;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
712
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
713
  	start &= PAGE_MASK;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
714

dc0ef0df7   Michal Hocko   mm: make mmap_sem...
715
716
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
717
  	ret = apply_vma_lock_flags(start, len, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
  	up_write(&current->mm->mmap_sem);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
719

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
721
  	return ret;
  }
b0f205c2a   Eric B Munson   mm: mlock: add ml...
722
723
724
725
726
727
728
729
730
731
  /*
   * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
   * and translate into the appropriate modifications to mm->def_flags and/or the
   * flags for all current VMAs.
   *
   * There are a couple of subtleties with this.  If mlockall() is called multiple
   * times with different flags, the values do not necessarily stack.  If mlockall
   * is called once including the MCL_FUTURE flag and then a second time without
   * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
   */
1aab92ec3   Eric B Munson   mm: mlock: refact...
732
  static int apply_mlockall_flags(int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
733
734
  {
  	struct vm_area_struct * vma, * prev = NULL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
735
  	vm_flags_t to_add = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
736

b0f205c2a   Eric B Munson   mm: mlock: add ml...
737
738
  	current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
  	if (flags & MCL_FUTURE) {
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
739
  		current->mm->def_flags |= VM_LOCKED;
1aab92ec3   Eric B Munson   mm: mlock: refact...
740

b0f205c2a   Eric B Munson   mm: mlock: add ml...
741
742
743
744
745
746
747
748
749
750
751
752
  		if (flags & MCL_ONFAULT)
  			current->mm->def_flags |= VM_LOCKONFAULT;
  
  		if (!(flags & MCL_CURRENT))
  			goto out;
  	}
  
  	if (flags & MCL_CURRENT) {
  		to_add |= VM_LOCKED;
  		if (flags & MCL_ONFAULT)
  			to_add |= VM_LOCKONFAULT;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
753
754
  
  	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
755
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
756

b0f205c2a   Eric B Munson   mm: mlock: add ml...
757
758
  		newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
  		newflags |= to_add;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
759
760
761
  
  		/* Ignore errors */
  		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
50d4fb781   Paul E. McKenney   mm: Eliminate con...
762
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
763
764
765
766
  	}
  out:
  	return 0;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
767
  SYSCALL_DEFINE1(mlockall, int, flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
769
  {
  	unsigned long lock_limit;
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
770
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
771

b0f205c2a   Eric B Munson   mm: mlock: add ml...
772
  	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)))
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
773
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
774

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775
  	if (!can_do_mlock())
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
776
  		return -EPERM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
777

59e99e5b9   Jiri Slaby   mm: use rlimit he...
778
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
779
  	lock_limit >>= PAGE_SHIFT;
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
780
781
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
782

dc0ef0df7   Michal Hocko   mm: make mmap_sem...
783
  	ret = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
784
785
  	if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
  	    capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
786
  		ret = apply_mlockall_flags(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
  	up_write(&current->mm->mmap_sem);
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
788
789
  	if (!ret && (flags & MCL_CURRENT))
  		mm_populate(0, TASK_SIZE);
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
790

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
791
792
  	return ret;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
793
  SYSCALL_DEFINE0(munlockall)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
795
  {
  	int ret;
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
796
797
  	if (down_write_killable(&current->mm->mmap_sem))
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
798
  	ret = apply_mlockall_flags(0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
  	up_write(&current->mm->mmap_sem);
  	return ret;
  }
  
  /*
   * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
   * shm segments) get accounted against the user_struct instead.
   */
  static DEFINE_SPINLOCK(shmlock_user_lock);
  
  int user_shm_lock(size_t size, struct user_struct *user)
  {
  	unsigned long lock_limit, locked;
  	int allowed = 0;
  
  	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
815
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
5ed44a401   Herbert van den Bergh   do not limit lock...
816
817
  	if (lock_limit == RLIM_INFINITY)
  		allowed = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
819
  	lock_limit >>= PAGE_SHIFT;
  	spin_lock(&shmlock_user_lock);
5ed44a401   Herbert van den Bergh   do not limit lock...
820
821
  	if (!allowed &&
  	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
  		goto out;
  	get_uid(user);
  	user->locked_shm += locked;
  	allowed = 1;
  out:
  	spin_unlock(&shmlock_user_lock);
  	return allowed;
  }
  
  void user_shm_unlock(size_t size, struct user_struct *user)
  {
  	spin_lock(&shmlock_user_lock);
  	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	spin_unlock(&shmlock_user_lock);
  	free_uid(user);
  }