Blame view

mm/mlock.c 22.7 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
  /*
   *	linux/mm/mlock.c
   *
   *  (C) Copyright 1995 Linus Torvalds
   *  (C) Copyright 2002 Christoph Hellwig
   */
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
8
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
  #include <linux/mman.h>
  #include <linux/mm.h>
8703e8a46   Ingo Molnar   sched/headers: Pr...
11
  #include <linux/sched/user.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
12
13
14
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/pagemap.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
15
  #include <linux/pagevec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
  #include <linux/mempolicy.h>
  #include <linux/syscalls.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
18
  #include <linux/sched.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
19
  #include <linux/export.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
20
21
22
  #include <linux/rmap.h>
  #include <linux/mmzone.h>
  #include <linux/hugetlb.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
23
24
  #include <linux/memcontrol.h>
  #include <linux/mm_inline.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
25
26
  
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27

7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
28
  bool can_do_mlock(void)
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
29
  {
59e99e5b9   Jiri Slaby   mm: use rlimit he...
30
  	if (rlimit(RLIMIT_MEMLOCK) != 0)
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
31
  		return true;
a5a6579db   Jeff Vander Stoep   mm: reorder can_d...
32
  	if (capable(CAP_IPC_LOCK))
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
33
34
  		return true;
  	return false;
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
35
36
  }
  EXPORT_SYMBOL(can_do_mlock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37

b291f0003   Nick Piggin   mlock: mlocked pa...
38
39
40
41
42
43
44
45
46
47
48
49
50
  /*
   * Mlocked pages are marked with PageMlocked() flag for efficient testing
   * in vmscan and, possibly, the fault path; and to support semi-accurate
   * statistics.
   *
   * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
   * be placed on the LRU "unevictable" list, rather than the [in]active lists.
   * The unevictable list is an LRU sibling list to the [in]active lists.
   * PageUnevictable is set to indicate the unevictable state.
   *
   * When lazy mlocking via vmscan, it is important to ensure that the
   * vma's VM_LOCKED status is not concurrently being modified, otherwise we
   * may have mlocked a page that is being munlocked. So lazy mlock must take
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
51
   * the mmap_lock for read, and verify that the vma really is locked
b291f0003   Nick Piggin   mlock: mlocked pa...
52
53
54
55
56
57
   * (see mm/rmap.c).
   */
  
  /*
   *  LRU accounting for clear_page_mlock()
   */
e6c509f85   Hugh Dickins   mm: use clear_pag...
58
  void clear_page_mlock(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
59
  {
e6c509f85   Hugh Dickins   mm: use clear_pag...
60
  	if (!TestClearPageMlocked(page))
b291f0003   Nick Piggin   mlock: mlocked pa...
61
  		return;
b291f0003   Nick Piggin   mlock: mlocked pa...
62

6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
63
  	mod_zone_page_state(page_zone(page), NR_MLOCK, -thp_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
64
  	count_vm_event(UNEVICTABLE_PGCLEARED);
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
65
66
67
68
69
70
  	/*
  	 * The previous TestClearPageMlocked() corresponds to the smp_mb()
  	 * in __pagevec_lru_add_fn().
  	 *
  	 * See __pagevec_lru_add_fn for more explanation.
  	 */
b291f0003   Nick Piggin   mlock: mlocked pa...
71
72
73
74
  	if (!isolate_lru_page(page)) {
  		putback_lru_page(page);
  	} else {
  		/*
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
75
  		 * We lost the race. the page already moved to evictable list.
b291f0003   Nick Piggin   mlock: mlocked pa...
76
  		 */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
77
  		if (PageUnevictable(page))
5344b7e64   Nick Piggin   vmstat: mlocked p...
78
  			count_vm_event(UNEVICTABLE_PGSTRANDED);
b291f0003   Nick Piggin   mlock: mlocked pa...
79
80
81
82
83
84
85
86
87
  	}
  }
  
  /*
   * Mark page as mlocked if not already.
   * If page on LRU, isolate and putback to move to unevictable list.
   */
  void mlock_vma_page(struct page *page)
  {
57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
88
  	/* Serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
89
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
90
91
  	VM_BUG_ON_PAGE(PageTail(page), page);
  	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
5344b7e64   Nick Piggin   vmstat: mlocked p...
92
  	if (!TestSetPageMlocked(page)) {
8449d21fb   David Rientjes   mm, thp: fix mloc...
93
  		mod_zone_page_state(page_zone(page), NR_MLOCK,
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
94
  				    thp_nr_pages(page));
5344b7e64   Nick Piggin   vmstat: mlocked p...
95
96
97
98
  		count_vm_event(UNEVICTABLE_PGMLOCKED);
  		if (!isolate_lru_page(page))
  			putback_lru_page(page);
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
99
  }
7225522bb   Vlastimil Babka   mm: munlock: batc...
100
  /*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
101
102
103
104
105
106
107
   * Isolate a page from LRU with optional get_page() pin.
   * Assumes lru_lock already held and page already pinned.
   */
  static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
  {
  	if (PageLRU(page)) {
  		struct lruvec *lruvec;
599d0c954   Mel Gorman   mm, vmscan: move ...
108
  		lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
109
110
111
112
113
114
115
116
117
118
119
  		if (getpage)
  			get_page(page);
  		ClearPageLRU(page);
  		del_page_from_lru_list(page, lruvec, page_lru(page));
  		return true;
  	}
  
  	return false;
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
120
121
122
123
124
125
126
   * Finish munlock after successful page isolation
   *
   * Page must be locked. This is a wrapper for try_to_munlock()
   * and putback_lru_page() with munlock accounting.
   */
  static void __munlock_isolated_page(struct page *page)
  {
7225522bb   Vlastimil Babka   mm: munlock: batc...
127
128
129
130
131
  	/*
  	 * Optimization: if the page was mapped just once, that's our mapping
  	 * and we don't need to check all the other vmas.
  	 */
  	if (page_mapcount(page) > 1)
192d72325   Minchan Kim   mm: make try_to_m...
132
  		try_to_munlock(page);
7225522bb   Vlastimil Babka   mm: munlock: batc...
133
134
  
  	/* Did try_to_unlock() succeed or punt? */
192d72325   Minchan Kim   mm: make try_to_m...
135
  	if (!PageMlocked(page))
7225522bb   Vlastimil Babka   mm: munlock: batc...
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
  
  	putback_lru_page(page);
  }
  
  /*
   * Accounting for page isolation fail during munlock
   *
   * Performs accounting when page isolation fails in munlock. There is nothing
   * else to do because it means some other task has already removed the page
   * from the LRU. putback_lru_page() will take care of removing the page from
   * the unevictable list, if necessary. vmscan [page_referenced()] will move
   * the page back to the unevictable list if some other vma has it mlocked.
   */
  static void __munlock_isolation_failed(struct page *page)
  {
  	if (PageUnevictable(page))
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
153
  		__count_vm_event(UNEVICTABLE_PGSTRANDED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
154
  	else
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
155
  		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
7225522bb   Vlastimil Babka   mm: munlock: batc...
156
  }
6927c1dd9   Lee Schermerhorn   mlock: replace st...
157
158
  /**
   * munlock_vma_page - munlock a vma page
b7701a5f2   Mike Rapoport   mm: docs: fixup p...
159
   * @page: page to be unlocked, either a normal page or THP page head
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
160
161
162
   *
   * returns the size of the page as a page mask (0 for normal page,
   *         HPAGE_PMD_NR - 1 for THP head page)
b291f0003   Nick Piggin   mlock: mlocked pa...
163
   *
6927c1dd9   Lee Schermerhorn   mlock: replace st...
164
165
166
167
168
169
170
171
172
173
   * called from munlock()/munmap() path with page supposedly on the LRU.
   * When we munlock a page, because the vma where we found the page is being
   * munlock()ed or munmap()ed, we want to check whether other vmas hold the
   * page locked so that we can leave it on the unevictable lru list and not
   * bother vmscan with it.  However, to walk the page's rmap list in
   * try_to_munlock() we must isolate the page from the LRU.  If some other
   * task has removed the page from the LRU, we won't be able to do that.
   * So we clear the PageMlocked as we might not get another chance.  If we
   * can't isolate the page, we leave it for putback_lru_page() and vmscan
   * [page_referenced()/try_to_unmap()] to deal with.
b291f0003   Nick Piggin   mlock: mlocked pa...
174
   */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
175
  unsigned int munlock_vma_page(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
176
  {
7162a1e87   Kirill A. Shutemov   mm: fix mlock acc...
177
  	int nr_pages;
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
178
  	pg_data_t *pgdat = page_pgdat(page);
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
179

57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
180
  	/* For try_to_munlock() and to serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
181
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
182
  	VM_BUG_ON_PAGE(PageTail(page), page);
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
183
  	/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
184
185
  	 * Serialize with any parallel __split_huge_page_refcount() which
  	 * might otherwise copy PageMlocked to part of the tail pages before
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
186
  	 * we clear it in the head page. It also stabilizes thp_nr_pages().
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
187
  	 */
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
188
  	spin_lock_irq(&pgdat->lru_lock);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
189

655548bf6   Kirill A. Shutemov   thp: fix corner c...
190
191
192
  	if (!TestClearPageMlocked(page)) {
  		/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
  		nr_pages = 1;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
193
  		goto unlock_out;
655548bf6   Kirill A. Shutemov   thp: fix corner c...
194
  	}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
195

6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
196
  	nr_pages = thp_nr_pages(page);
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
197
  	__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
198
199
  
  	if (__munlock_isolate_lru_page(page, true)) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
200
  		spin_unlock_irq(&pgdat->lru_lock);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
201
202
203
204
205
206
  		__munlock_isolated_page(page);
  		goto out;
  	}
  	__munlock_isolation_failed(page);
  
  unlock_out:
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
207
  	spin_unlock_irq(&pgdat->lru_lock);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
208
209
  
  out:
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
210
  	return nr_pages - 1;
b291f0003   Nick Piggin   mlock: mlocked pa...
211
  }
9978ad583   Lee Schermerhorn   mlock: make mlock...
212
213
214
215
216
217
218
219
220
221
  /*
   * convert get_user_pages() return value to posix mlock() error
   */
  static int __mlock_posix_error_return(long retval)
  {
  	if (retval == -EFAULT)
  		retval = -ENOMEM;
  	else if (retval == -ENOMEM)
  		retval = -EAGAIN;
  	return retval;
b291f0003   Nick Piggin   mlock: mlocked pa...
222
  }
b291f0003   Nick Piggin   mlock: mlocked pa...
223
  /*
56afe477d   Vlastimil Babka   mm: munlock: bypa...
224
225
226
227
228
229
230
231
232
233
234
235
236
237
   * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
   *
   * The fast path is available only for evictable pages with single mapping.
   * Then we can bypass the per-cpu pvec and get better performance.
   * when mapcount > 1 we need try_to_munlock() which can fail.
   * when !page_evictable(), we need the full redo logic of putback_lru_page to
   * avoid leaving evictable page in unevictable list.
   *
   * In case of success, @page is added to @pvec and @pgrescued is incremented
   * in case that the page was previously unevictable. @page is also unlocked.
   */
  static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
  		int *pgrescued)
  {
309381fea   Sasha Levin   mm: dump page whe...
238
239
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
  
  	if (page_mapcount(page) <= 1 && page_evictable(page)) {
  		pagevec_add(pvec, page);
  		if (TestClearPageUnevictable(page))
  			(*pgrescued)++;
  		unlock_page(page);
  		return true;
  	}
  
  	return false;
  }
  
  /*
   * Putback multiple evictable pages to the LRU
   *
   * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
   * the pages might have meanwhile become unevictable but that is OK.
   */
  static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
  {
  	count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
  	/*
  	 *__pagevec_lru_add() calls release_pages() so we don't call
  	 * put_page() explicitly
  	 */
  	__pagevec_lru_add(pvec);
  	count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
270
271
272
273
274
275
276
   * Munlock a batch of pages from the same zone
   *
   * The work is split to two main phases. First phase clears the Mlocked flag
   * and attempts to isolate the pages, all under a single zone lru lock.
   * The second phase finishes the munlock only for pages where isolation
   * succeeded.
   *
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
277
   * Note that the pagevec may be modified during the process.
7225522bb   Vlastimil Babka   mm: munlock: batc...
278
279
280
281
282
   */
  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
  {
  	int i;
  	int nr = pagevec_count(pvec);
70feee0e1   Yisheng Xie   mlock: fix mlock ...
283
  	int delta_munlocked = -nr;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
284
285
  	struct pagevec pvec_putback;
  	int pgrescued = 0;
7225522bb   Vlastimil Babka   mm: munlock: batc...
286

866798201   Mel Gorman   mm, pagevec: remo...
287
  	pagevec_init(&pvec_putback);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
288

7225522bb   Vlastimil Babka   mm: munlock: batc...
289
  	/* Phase 1: page isolation */
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
290
  	spin_lock_irq(&zone->zone_pgdat->lru_lock);
7225522bb   Vlastimil Babka   mm: munlock: batc...
291
292
293
294
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (TestClearPageMlocked(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
295
  			/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
296
297
  			 * We already have pin from follow_page_mask()
  			 * so we can spare the get_page() here.
7225522bb   Vlastimil Babka   mm: munlock: batc...
298
  			 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
299
300
301
302
  			if (__munlock_isolate_lru_page(page, false))
  				continue;
  			else
  				__munlock_isolation_failed(page);
70feee0e1   Yisheng Xie   mlock: fix mlock ...
303
304
  		} else {
  			delta_munlocked++;
7225522bb   Vlastimil Babka   mm: munlock: batc...
305
  		}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
306
307
308
309
310
311
312
313
314
  
  		/*
  		 * We won't be munlocking this page in the next phase
  		 * but we still need to release the follow_page_mask()
  		 * pin. We cannot do it under lru_lock however. If it's
  		 * the last pin, __page_cache_release() would deadlock.
  		 */
  		pagevec_add(&pvec_putback, pvec->pages[i]);
  		pvec->pages[i] = NULL;
7225522bb   Vlastimil Babka   mm: munlock: batc...
315
  	}
1ebb7cc6a   Vlastimil Babka   mm: munlock: batc...
316
  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
317
  	spin_unlock_irq(&zone->zone_pgdat->lru_lock);
7225522bb   Vlastimil Babka   mm: munlock: batc...
318

3b25df93c   Vlastimil Babka   mm: munlock: fix ...
319
320
  	/* Now we can release pins of pages that we are not munlocking */
  	pagevec_release(&pvec_putback);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
321
  	/* Phase 2: page munlock */
7225522bb   Vlastimil Babka   mm: munlock: batc...
322
323
324
325
326
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (page) {
  			lock_page(page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
327
328
  			if (!__putback_lru_fast_prepare(page, &pvec_putback,
  					&pgrescued)) {
5b40998ae   Vlastimil Babka   mm: munlock: remo...
329
330
331
332
333
  				/*
  				 * Slow path. We don't want to lose the last
  				 * pin before unlock_page()
  				 */
  				get_page(page); /* for putback_lru_page() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
334
335
  				__munlock_isolated_page(page);
  				unlock_page(page);
5b40998ae   Vlastimil Babka   mm: munlock: remo...
336
  				put_page(page); /* from follow_page_mask() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
337
  			}
7225522bb   Vlastimil Babka   mm: munlock: batc...
338
339
  		}
  	}
56afe477d   Vlastimil Babka   mm: munlock: bypa...
340

5b40998ae   Vlastimil Babka   mm: munlock: remo...
341
342
343
344
  	/*
  	 * Phase 3: page putback for pages that qualified for the fast path
  	 * This will also call put_page() to return pin from follow_page_mask()
  	 */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
345
346
  	if (pagevec_count(&pvec_putback))
  		__putback_lru_fast(&pvec_putback, pgrescued);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  }
  
  /*
   * Fill up pagevec for __munlock_pagevec using pte walk
   *
   * The function expects that the struct page corresponding to @start address is
   * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
   *
   * The rest of @pvec is filled by subsequent pages within the same pmd and same
   * zone, as long as the pte's are present and vm_normal_page() succeeds. These
   * pages also get pinned.
   *
   * Returns the address of the next page that should be scanned. This equals
   * @start + PAGE_SIZE when no page could be added by the pte walk.
   */
  static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
363
364
  			struct vm_area_struct *vma, struct zone *zone,
  			unsigned long start, unsigned long end)
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
365
366
367
368
369
370
  {
  	pte_t *pte;
  	spinlock_t *ptl;
  
  	/*
  	 * Initialize pte walk starting at the already pinned page where we
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
371
  	 * are sure that there is a pte, as it was pinned under the same
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
372
  	 * mmap_lock write op.
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
373
374
  	 */
  	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
375
376
  	/* Make sure we do not cross the page table boundary */
  	end = pgd_addr_end(start, end);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
377
  	end = p4d_addr_end(start, end);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
378
379
  	end = pud_addr_end(start, end);
  	end = pmd_addr_end(start, end);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
380
381
382
383
384
385
386
387
388
389
390
391
  
  	/* The page next to the pinned page is the first we will try to get */
  	start += PAGE_SIZE;
  	while (start < end) {
  		struct page *page = NULL;
  		pte++;
  		if (pte_present(*pte))
  			page = vm_normal_page(vma, start, *pte);
  		/*
  		 * Break if page could not be obtained or the page's node+zone does not
  		 * match
  		 */
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
392
  		if (!page || page_zone(page) != zone)
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
393
  			break;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
394

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
395
396
397
398
399
400
  		/*
  		 * Do not use pagevec for PTE-mapped THP,
  		 * munlock_vma_pages_range() will handle them.
  		 */
  		if (PageTransCompound(page))
  			break;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
401
402
403
404
405
406
407
408
409
410
411
  		get_page(page);
  		/*
  		 * Increase the address that will be returned *before* the
  		 * eventual break due to pvec becoming full by adding the page
  		 */
  		start += PAGE_SIZE;
  		if (pagevec_add(pvec, page) == 0)
  			break;
  	}
  	pte_unmap_unlock(pte, ptl);
  	return start;
7225522bb   Vlastimil Babka   mm: munlock: batc...
412
413
414
  }
  
  /*
ba470de43   Rik van Riel   mmap: handle mloc...
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
   * munlock_vma_pages_range() - munlock all pages in the vma range.'
   * @vma - vma containing range to be munlock()ed.
   * @start - start address in @vma of the range
   * @end - end of range in @vma.
   *
   *  For mremap(), munmap() and exit().
   *
   * Called with @vma VM_LOCKED.
   *
   * Returns with VM_LOCKED cleared.  Callers must be prepared to
   * deal with this.
   *
   * We don't save and restore VM_LOCKED here because pages are
   * still on lru.  In unmap path, pages might be scanned by reclaim
   * and re-mlocked by try_to_{munlock|unmap} before we unmap and
   * free them.  This will result in freeing mlocked pages.
b291f0003   Nick Piggin   mlock: mlocked pa...
431
   */
ba470de43   Rik van Riel   mmap: handle mloc...
432
  void munlock_vma_pages_range(struct vm_area_struct *vma,
408e82b78   Hugh Dickins   mm: munlock use f...
433
  			     unsigned long start, unsigned long end)
b291f0003   Nick Piggin   mlock: mlocked pa...
434
  {
de60f5f10   Eric B Munson   mm: introduce VM_...
435
  	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
408e82b78   Hugh Dickins   mm: munlock use f...
436

ff6a6da60   Michel Lespinasse   mm: accelerate mu...
437
  	while (start < end) {
ab7a5af7f   Alexey Klimov   mm/mlock.c: drop ...
438
  		struct page *page;
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
439
  		unsigned int page_mask = 0;
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
440
  		unsigned long page_increm;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
441
442
  		struct pagevec pvec;
  		struct zone *zone;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
443

866798201   Mel Gorman   mm, pagevec: remo...
444
  		pagevec_init(&pvec);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
445
446
447
448
449
450
451
  		/*
  		 * Although FOLL_DUMP is intended for get_dump_page(),
  		 * it just so happens that its special treatment of the
  		 * ZERO_PAGE (returning an error instead of doing get_page)
  		 * suits munlock very well (and if somehow an abnormal page
  		 * has sneaked into the range, we won't oops here: great).
  		 */
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
452
  		page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
453

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
454
455
456
457
458
459
460
461
462
  		if (page && !IS_ERR(page)) {
  			if (PageTransTail(page)) {
  				VM_BUG_ON_PAGE(PageMlocked(page), page);
  				put_page(page); /* follow_page_mask() */
  			} else if (PageTransHuge(page)) {
  				lock_page(page);
  				/*
  				 * Any THP page found by follow_page_mask() may
  				 * have gotten split before reaching
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
463
464
  				 * munlock_vma_page(), so we need to compute
  				 * the page_mask here instead.
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
465
466
467
468
469
470
471
472
473
474
475
476
  				 */
  				page_mask = munlock_vma_page(page);
  				unlock_page(page);
  				put_page(page); /* follow_page_mask() */
  			} else {
  				/*
  				 * Non-huge pages are handled in batches via
  				 * pagevec. The pin from follow_page_mask()
  				 * prevents them from collapsing by THP.
  				 */
  				pagevec_add(&pvec, page);
  				zone = page_zone(page);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
477

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
478
479
480
481
482
483
484
  				/*
  				 * Try to fill the rest of pagevec using fast
  				 * pte walk. This will also update start to
  				 * the next page to process. Then munlock the
  				 * pagevec.
  				 */
  				start = __munlock_pagevec_fill(&pvec, vma,
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
485
  						zone, start, end);
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
486
487
488
  				__munlock_pagevec(&pvec, zone);
  				goto next;
  			}
408e82b78   Hugh Dickins   mm: munlock use f...
489
  		}
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
490
  		page_increm = 1 + page_mask;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
491
  		start += page_increm * PAGE_SIZE;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
492
  next:
408e82b78   Hugh Dickins   mm: munlock use f...
493
494
  		cond_resched();
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
495
496
497
498
499
500
501
  }
  
  /*
   * mlock_fixup  - handle mlock[all]/munlock[all] requests.
   *
   * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
   * munlock is a no-op.  However, for some special vmas, we go ahead and
cea10a19b   Michel Lespinasse   mm: directly use ...
502
   * populate the ptes.
b291f0003   Nick Piggin   mlock: mlocked pa...
503
504
505
   *
   * For vmas that pass the filters, merge/split as appropriate.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
ca16d140a   KOSAKI Motohiro   mm: don't access ...
507
  	unsigned long start, unsigned long end, vm_flags_t newflags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
508
  {
b291f0003   Nick Piggin   mlock: mlocked pa...
509
  	struct mm_struct *mm = vma->vm_mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
  	pgoff_t pgoff;
b291f0003   Nick Piggin   mlock: mlocked pa...
511
  	int nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
512
  	int ret = 0;
ca16d140a   KOSAKI Motohiro   mm: don't access ...
513
  	int lock = !!(newflags & VM_LOCKED);
b155b4fde   Simon Guo   mm: mlock: avoid ...
514
  	vm_flags_t old_flags = vma->vm_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
515

fed067da4   Michel Lespinasse   mlock: only hold ...
516
  	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
e1fb4a086   Dave Jiang   dax: remove VM_MI...
517
518
  	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
  	    vma_is_dax(vma))
b0f205c2a   Eric B Munson   mm: mlock: add ml...
519
520
  		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
  		goto out;
b291f0003   Nick Piggin   mlock: mlocked pa...
521

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
522
523
  	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
524
525
  			  vma->vm_file, pgoff, vma_policy(vma),
  			  vma->vm_userfaultfd_ctx);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526
527
528
529
  	if (*prev) {
  		vma = *prev;
  		goto success;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
530
531
532
533
534
535
536
537
538
539
540
541
542
543
  	if (start != vma->vm_start) {
  		ret = split_vma(mm, vma, start, 1);
  		if (ret)
  			goto out;
  	}
  
  	if (end != vma->vm_end) {
  		ret = split_vma(mm, vma, end, 0);
  		if (ret)
  			goto out;
  	}
  
  success:
  	/*
b291f0003   Nick Piggin   mlock: mlocked pa...
544
545
546
547
548
  	 * Keep track of amount of locked VM.
  	 */
  	nr_pages = (end - start) >> PAGE_SHIFT;
  	if (!lock)
  		nr_pages = -nr_pages;
b155b4fde   Simon Guo   mm: mlock: avoid ...
549
550
  	else if (old_flags & VM_LOCKED)
  		nr_pages = 0;
b291f0003   Nick Piggin   mlock: mlocked pa...
551
552
553
  	mm->locked_vm += nr_pages;
  
  	/*
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
554
  	 * vm_flags is protected by the mmap_lock held in write mode.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
555
  	 * It's okay if try_to_unmap_one unmaps a page just after we
fc05f5662   Kirill A. Shutemov   mm: rename __mloc...
556
  	 * set VM_LOCKED, populate_vma_page_range will bring it back.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558

fed067da4   Michel Lespinasse   mlock: only hold ...
559
  	if (lock)
408e82b78   Hugh Dickins   mm: munlock use f...
560
  		vma->vm_flags = newflags;
fed067da4   Michel Lespinasse   mlock: only hold ...
561
  	else
408e82b78   Hugh Dickins   mm: munlock use f...
562
  		munlock_vma_pages_range(vma, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564
  out:
b291f0003   Nick Piggin   mlock: mlocked pa...
565
  	*prev = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
567
  	return ret;
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
568
569
  static int apply_vma_lock_flags(unsigned long start, size_t len,
  				vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
571
572
573
  {
  	unsigned long nstart, end, tmp;
  	struct vm_area_struct * vma, * prev;
  	int error;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
574
  	VM_BUG_ON(offset_in_page(start));
fed067da4   Michel Lespinasse   mlock: only hold ...
575
  	VM_BUG_ON(len != PAGE_ALIGN(len));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
577
578
579
580
  	end = start + len;
  	if (end < start)
  		return -EINVAL;
  	if (end == start)
  		return 0;
097d59106   Linus Torvalds   vm: avoid using f...
581
  	vma = find_vma(current->mm, start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582
583
  	if (!vma || vma->vm_start > start)
  		return -ENOMEM;
097d59106   Linus Torvalds   vm: avoid using f...
584
  	prev = vma->vm_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
586
587
588
  	if (start > vma->vm_start)
  		prev = vma;
  
  	for (nstart = start ; ; ) {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
589
  		vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590

1aab92ec3   Eric B Munson   mm: mlock: refact...
591
  		newflags |= flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592

1aab92ec3   Eric B Munson   mm: mlock: refact...
593
  		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
  		tmp = vma->vm_end;
  		if (tmp > end)
  			tmp = end;
  		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
  		if (error)
  			break;
  		nstart = tmp;
  		if (nstart < prev->vm_end)
  			nstart = prev->vm_end;
  		if (nstart >= end)
  			break;
  
  		vma = prev->vm_next;
  		if (!vma || vma->vm_start != nstart) {
  			error = -ENOMEM;
  			break;
  		}
  	}
  	return error;
  }
0cf2f6f6d   Simon Guo   mm: mlock: check ...
614
615
616
617
618
619
620
  /*
   * Go through vma areas and sum size of mlocked
   * vma pages, as return value.
   * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
   * is also counted.
   * Return value: previously mlocked page counts
   */
0874bb49b   swkhack   mm/mlock.c: chang...
621
  static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
0cf2f6f6d   Simon Guo   mm: mlock: check ...
622
623
624
  		unsigned long start, size_t len)
  {
  	struct vm_area_struct *vma;
0874bb49b   swkhack   mm/mlock.c: chang...
625
  	unsigned long count = 0;
0cf2f6f6d   Simon Guo   mm: mlock: check ...
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
  
  	if (mm == NULL)
  		mm = current->mm;
  
  	vma = find_vma(mm, start);
  	if (vma == NULL)
  		vma = mm->mmap;
  
  	for (; vma ; vma = vma->vm_next) {
  		if (start >= vma->vm_end)
  			continue;
  		if (start + len <=  vma->vm_start)
  			break;
  		if (vma->vm_flags & VM_LOCKED) {
  			if (start > vma->vm_start)
  				count -= (start - vma->vm_start);
  			if (start + len < vma->vm_end) {
  				count += start + len - vma->vm_start;
  				break;
  			}
  			count += vma->vm_end - vma->vm_start;
  		}
  	}
  
  	return count >> PAGE_SHIFT;
  }
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
652
  static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
653
654
655
656
  {
  	unsigned long locked;
  	unsigned long lock_limit;
  	int error = -ENOMEM;
057d33891   Andrey Konovalov   mm: untag user po...
657
  	start = untagged_addr(start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
659
  	if (!can_do_mlock())
  		return -EPERM;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
660
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
661
  	start &= PAGE_MASK;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
662
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
663
  	lock_limit >>= PAGE_SHIFT;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
664
  	locked = len >> PAGE_SHIFT;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
665
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
666
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
667
668
  
  	locked += current->mm->locked_vm;
0cf2f6f6d   Simon Guo   mm: mlock: check ...
669
670
671
672
673
674
675
676
677
678
  	if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
  		/*
  		 * It is possible that the regions requested intersect with
  		 * previously mlocked areas, that part area in "mm->locked_vm"
  		 * should not be counted to new mlock increment count. So check
  		 * and adjust locked count if necessary.
  		 */
  		locked -= count_mm_mlocked_page_nr(current->mm,
  				start, len);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
679
680
681
  
  	/* check against resource limits */
  	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
682
  		error = apply_vma_lock_flags(start, len, flags);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
683

d8ed45c5d   Michel Lespinasse   mmap locking API:...
684
  	mmap_write_unlock(current->mm);
c561259ca   Kirill A. Shutemov   mm: move gup() ->...
685
686
687
688
689
690
691
  	if (error)
  		return error;
  
  	error = __mm_populate(start, len, 0);
  	if (error)
  		return __mlock_posix_error_return(error);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
693
694
695
696
  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
  {
  	return do_mlock(start, len, VM_LOCKED);
  }
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
697
698
  SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
  {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
699
700
701
  	vm_flags_t vm_flags = VM_LOCKED;
  
  	if (flags & ~MLOCK_ONFAULT)
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
702
  		return -EINVAL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
703
704
705
706
  	if (flags & MLOCK_ONFAULT)
  		vm_flags |= VM_LOCKONFAULT;
  
  	return do_mlock(start, len, vm_flags);
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
707
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
708
  SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
710
  {
  	int ret;
057d33891   Andrey Konovalov   mm: untag user po...
711
  	start = untagged_addr(start);
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
712
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
713
  	start &= PAGE_MASK;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
714

d8ed45c5d   Michel Lespinasse   mmap locking API:...
715
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
716
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
717
  	ret = apply_vma_lock_flags(start, len, 0);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
718
  	mmap_write_unlock(current->mm);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
719

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
721
  	return ret;
  }
b0f205c2a   Eric B Munson   mm: mlock: add ml...
722
723
724
725
726
727
728
729
730
731
  /*
   * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
   * and translate into the appropriate modifications to mm->def_flags and/or the
   * flags for all current VMAs.
   *
   * There are a couple of subtleties with this.  If mlockall() is called multiple
   * times with different flags, the values do not necessarily stack.  If mlockall
   * is called once including the MCL_FUTURE flag and then a second time without
   * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
   */
1aab92ec3   Eric B Munson   mm: mlock: refact...
732
  static int apply_mlockall_flags(int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
733
734
  {
  	struct vm_area_struct * vma, * prev = NULL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
735
  	vm_flags_t to_add = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
736

b0f205c2a   Eric B Munson   mm: mlock: add ml...
737
738
  	current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
  	if (flags & MCL_FUTURE) {
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
739
  		current->mm->def_flags |= VM_LOCKED;
1aab92ec3   Eric B Munson   mm: mlock: refact...
740

b0f205c2a   Eric B Munson   mm: mlock: add ml...
741
742
743
744
745
746
747
748
749
750
751
752
  		if (flags & MCL_ONFAULT)
  			current->mm->def_flags |= VM_LOCKONFAULT;
  
  		if (!(flags & MCL_CURRENT))
  			goto out;
  	}
  
  	if (flags & MCL_CURRENT) {
  		to_add |= VM_LOCKED;
  		if (flags & MCL_ONFAULT)
  			to_add |= VM_LOCKONFAULT;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
753
754
  
  	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
755
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
756

b0f205c2a   Eric B Munson   mm: mlock: add ml...
757
758
  		newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
  		newflags |= to_add;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
759
760
761
  
  		/* Ignore errors */
  		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
50d4fb781   Paul E. McKenney   mm: Eliminate con...
762
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
763
764
765
766
  	}
  out:
  	return 0;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
767
  SYSCALL_DEFINE1(mlockall, int, flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
769
  {
  	unsigned long lock_limit;
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
770
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
771

dedca6350   Potyra, Stefan   mm/mlock.c: mlock...
772
773
  	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
  	    flags == MCL_ONFAULT)
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
774
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
776
  	if (!can_do_mlock())
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
777
  		return -EPERM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
778

59e99e5b9   Jiri Slaby   mm: use rlimit he...
779
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
  	lock_limit >>= PAGE_SHIFT;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
781
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
782
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
783

dc0ef0df7   Michal Hocko   mm: make mmap_sem...
784
  	ret = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
785
786
  	if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
  	    capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
787
  		ret = apply_mlockall_flags(flags);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
788
  	mmap_write_unlock(current->mm);
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
789
790
  	if (!ret && (flags & MCL_CURRENT))
  		mm_populate(0, TASK_SIZE);
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
791

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792
793
  	return ret;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
794
  SYSCALL_DEFINE0(munlockall)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
795
796
  {
  	int ret;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
797
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
798
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
799
  	ret = apply_mlockall_flags(0);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
800
  	mmap_write_unlock(current->mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
  	return ret;
  }
  
  /*
   * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
   * shm segments) get accounted against the user_struct instead.
   */
  static DEFINE_SPINLOCK(shmlock_user_lock);
  
  int user_shm_lock(size_t size, struct user_struct *user)
  {
  	unsigned long lock_limit, locked;
  	int allowed = 0;
  
  	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
816
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
5ed44a401   Herbert van den Bergh   do not limit lock...
817
818
  	if (lock_limit == RLIM_INFINITY)
  		allowed = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
819
820
  	lock_limit >>= PAGE_SHIFT;
  	spin_lock(&shmlock_user_lock);
5ed44a401   Herbert van den Bergh   do not limit lock...
821
822
  	if (!allowed &&
  	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
  		goto out;
  	get_uid(user);
  	user->locked_shm += locked;
  	allowed = 1;
  out:
  	spin_unlock(&shmlock_user_lock);
  	return allowed;
  }
  
  void user_shm_unlock(size_t size, struct user_struct *user)
  {
  	spin_lock(&shmlock_user_lock);
  	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	spin_unlock(&shmlock_user_lock);
  	free_uid(user);
  }