Blame view

mm/mlock.c 22.9 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
  /*
   *	linux/mm/mlock.c
   *
   *  (C) Copyright 1995 Linus Torvalds
   *  (C) Copyright 2002 Christoph Hellwig
   */
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
8
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
  #include <linux/mman.h>
  #include <linux/mm.h>
8703e8a46   Ingo Molnar   sched/headers: Pr...
11
  #include <linux/sched/user.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
12
13
14
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/pagemap.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
15
  #include <linux/pagevec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
  #include <linux/mempolicy.h>
  #include <linux/syscalls.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
18
  #include <linux/sched.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
19
  #include <linux/export.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
20
21
22
  #include <linux/rmap.h>
  #include <linux/mmzone.h>
  #include <linux/hugetlb.h>
7225522bb   Vlastimil Babka   mm: munlock: batc...
23
24
  #include <linux/memcontrol.h>
  #include <linux/mm_inline.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
25
26
  
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27

7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
28
  bool can_do_mlock(void)
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
29
  {
59e99e5b9   Jiri Slaby   mm: use rlimit he...
30
  	if (rlimit(RLIMIT_MEMLOCK) != 0)
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
31
  		return true;
a5a6579db   Jeff Vander Stoep   mm: reorder can_d...
32
  	if (capable(CAP_IPC_LOCK))
7f43add45   Wang Xiaoqiang   mm/mlock.c: chang...
33
34
  		return true;
  	return false;
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
35
36
  }
  EXPORT_SYMBOL(can_do_mlock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37

b291f0003   Nick Piggin   mlock: mlocked pa...
38
39
40
41
42
43
44
45
46
47
48
49
50
  /*
   * Mlocked pages are marked with PageMlocked() flag for efficient testing
   * in vmscan and, possibly, the fault path; and to support semi-accurate
   * statistics.
   *
   * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
   * be placed on the LRU "unevictable" list, rather than the [in]active lists.
   * The unevictable list is an LRU sibling list to the [in]active lists.
   * PageUnevictable is set to indicate the unevictable state.
   *
   * When lazy mlocking via vmscan, it is important to ensure that the
   * vma's VM_LOCKED status is not concurrently being modified, otherwise we
   * may have mlocked a page that is being munlocked. So lazy mlock must take
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
51
   * the mmap_lock for read, and verify that the vma really is locked
b291f0003   Nick Piggin   mlock: mlocked pa...
52
53
54
55
56
57
   * (see mm/rmap.c).
   */
  
  /*
   *  LRU accounting for clear_page_mlock()
   */
e6c509f85   Hugh Dickins   mm: use clear_pag...
58
  void clear_page_mlock(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
59
  {
0964730bf   Hugh Dickins   mlock: fix unevic...
60
  	int nr_pages;
e6c509f85   Hugh Dickins   mm: use clear_pag...
61
  	if (!TestClearPageMlocked(page))
b291f0003   Nick Piggin   mlock: mlocked pa...
62
  		return;
b291f0003   Nick Piggin   mlock: mlocked pa...
63

0964730bf   Hugh Dickins   mlock: fix unevic...
64
65
66
  	nr_pages = thp_nr_pages(page);
  	mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
  	count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
67
68
69
70
71
72
  	/*
  	 * The previous TestClearPageMlocked() corresponds to the smp_mb()
  	 * in __pagevec_lru_add_fn().
  	 *
  	 * See __pagevec_lru_add_fn for more explanation.
  	 */
b291f0003   Nick Piggin   mlock: mlocked pa...
73
74
75
76
  	if (!isolate_lru_page(page)) {
  		putback_lru_page(page);
  	} else {
  		/*
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
77
  		 * We lost the race. the page already moved to evictable list.
b291f0003   Nick Piggin   mlock: mlocked pa...
78
  		 */
8891d6da1   KOSAKI Motohiro   mm: remove lru_ad...
79
  		if (PageUnevictable(page))
0964730bf   Hugh Dickins   mlock: fix unevic...
80
  			count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
b291f0003   Nick Piggin   mlock: mlocked pa...
81
82
83
84
85
86
87
88
89
  	}
  }
  
  /*
   * Mark page as mlocked if not already.
   * If page on LRU, isolate and putback to move to unevictable list.
   */
  void mlock_vma_page(struct page *page)
  {
57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
90
  	/* Serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
91
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
92
93
  	VM_BUG_ON_PAGE(PageTail(page), page);
  	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
5344b7e64   Nick Piggin   vmstat: mlocked p...
94
  	if (!TestSetPageMlocked(page)) {
0964730bf   Hugh Dickins   mlock: fix unevic...
95
96
97
98
  		int nr_pages = thp_nr_pages(page);
  
  		mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
  		count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
5344b7e64   Nick Piggin   vmstat: mlocked p...
99
100
101
  		if (!isolate_lru_page(page))
  			putback_lru_page(page);
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
102
  }
7225522bb   Vlastimil Babka   mm: munlock: batc...
103
  /*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
104
105
106
107
108
109
110
   * Isolate a page from LRU with optional get_page() pin.
   * Assumes lru_lock already held and page already pinned.
   */
  static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
  {
  	if (PageLRU(page)) {
  		struct lruvec *lruvec;
599d0c954   Mel Gorman   mm, vmscan: move ...
111
  		lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
112
113
114
115
116
117
118
119
120
121
122
  		if (getpage)
  			get_page(page);
  		ClearPageLRU(page);
  		del_page_from_lru_list(page, lruvec, page_lru(page));
  		return true;
  	}
  
  	return false;
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
123
124
125
126
127
128
129
   * Finish munlock after successful page isolation
   *
   * Page must be locked. This is a wrapper for try_to_munlock()
   * and putback_lru_page() with munlock accounting.
   */
  static void __munlock_isolated_page(struct page *page)
  {
7225522bb   Vlastimil Babka   mm: munlock: batc...
130
131
132
133
134
  	/*
  	 * Optimization: if the page was mapped just once, that's our mapping
  	 * and we don't need to check all the other vmas.
  	 */
  	if (page_mapcount(page) > 1)
192d72325   Minchan Kim   mm: make try_to_m...
135
  		try_to_munlock(page);
7225522bb   Vlastimil Babka   mm: munlock: batc...
136
137
  
  	/* Did try_to_unlock() succeed or punt? */
192d72325   Minchan Kim   mm: make try_to_m...
138
  	if (!PageMlocked(page))
0964730bf   Hugh Dickins   mlock: fix unevic...
139
  		count_vm_events(UNEVICTABLE_PGMUNLOCKED, thp_nr_pages(page));
7225522bb   Vlastimil Babka   mm: munlock: batc...
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
  
  	putback_lru_page(page);
  }
  
  /*
   * Accounting for page isolation fail during munlock
   *
   * Performs accounting when page isolation fails in munlock. There is nothing
   * else to do because it means some other task has already removed the page
   * from the LRU. putback_lru_page() will take care of removing the page from
   * the unevictable list, if necessary. vmscan [page_referenced()] will move
   * the page back to the unevictable list if some other vma has it mlocked.
   */
  static void __munlock_isolation_failed(struct page *page)
  {
0964730bf   Hugh Dickins   mlock: fix unevic...
155
  	int nr_pages = thp_nr_pages(page);
7225522bb   Vlastimil Babka   mm: munlock: batc...
156
  	if (PageUnevictable(page))
0964730bf   Hugh Dickins   mlock: fix unevic...
157
  		__count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
7225522bb   Vlastimil Babka   mm: munlock: batc...
158
  	else
0964730bf   Hugh Dickins   mlock: fix unevic...
159
  		__count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
7225522bb   Vlastimil Babka   mm: munlock: batc...
160
  }
6927c1dd9   Lee Schermerhorn   mlock: replace st...
161
162
  /**
   * munlock_vma_page - munlock a vma page
b7701a5f2   Mike Rapoport   mm: docs: fixup p...
163
   * @page: page to be unlocked, either a normal page or THP page head
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
164
165
166
   *
   * returns the size of the page as a page mask (0 for normal page,
   *         HPAGE_PMD_NR - 1 for THP head page)
b291f0003   Nick Piggin   mlock: mlocked pa...
167
   *
6927c1dd9   Lee Schermerhorn   mlock: replace st...
168
169
170
171
172
173
174
175
176
177
   * called from munlock()/munmap() path with page supposedly on the LRU.
   * When we munlock a page, because the vma where we found the page is being
   * munlock()ed or munmap()ed, we want to check whether other vmas hold the
   * page locked so that we can leave it on the unevictable lru list and not
   * bother vmscan with it.  However, to walk the page's rmap list in
   * try_to_munlock() we must isolate the page from the LRU.  If some other
   * task has removed the page from the LRU, we won't be able to do that.
   * So we clear the PageMlocked as we might not get another chance.  If we
   * can't isolate the page, we leave it for putback_lru_page() and vmscan
   * [page_referenced()/try_to_unmap()] to deal with.
b291f0003   Nick Piggin   mlock: mlocked pa...
178
   */
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
179
  unsigned int munlock_vma_page(struct page *page)
b291f0003   Nick Piggin   mlock: mlocked pa...
180
  {
7162a1e87   Kirill A. Shutemov   mm: fix mlock acc...
181
  	int nr_pages;
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
182
  	pg_data_t *pgdat = page_pgdat(page);
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
183

57e68e9cd   Vlastimil Babka   mm: try_to_unmap_...
184
  	/* For try_to_munlock() and to serialize with page migration */
b291f0003   Nick Piggin   mlock: mlocked pa...
185
  	BUG_ON(!PageLocked(page));
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
186
  	VM_BUG_ON_PAGE(PageTail(page), page);
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
187
  	/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
188
189
  	 * Serialize with any parallel __split_huge_page_refcount() which
  	 * might otherwise copy PageMlocked to part of the tail pages before
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
190
  	 * we clear it in the head page. It also stabilizes thp_nr_pages().
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
191
  	 */
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
192
  	spin_lock_irq(&pgdat->lru_lock);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
193

655548bf6   Kirill A. Shutemov   thp: fix corner c...
194
195
196
  	if (!TestClearPageMlocked(page)) {
  		/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
  		nr_pages = 1;
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
197
  		goto unlock_out;
655548bf6   Kirill A. Shutemov   thp: fix corner c...
198
  	}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
199

6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
200
  	nr_pages = thp_nr_pages(page);
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
201
  	__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
202
203
  
  	if (__munlock_isolate_lru_page(page, true)) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
204
  		spin_unlock_irq(&pgdat->lru_lock);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
205
206
207
208
209
210
  		__munlock_isolated_page(page);
  		goto out;
  	}
  	__munlock_isolation_failed(page);
  
  unlock_out:
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
211
  	spin_unlock_irq(&pgdat->lru_lock);
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
212
213
  
  out:
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
214
  	return nr_pages - 1;
b291f0003   Nick Piggin   mlock: mlocked pa...
215
  }
9978ad583   Lee Schermerhorn   mlock: make mlock...
216
217
218
219
220
221
222
223
224
225
  /*
   * convert get_user_pages() return value to posix mlock() error
   */
  static int __mlock_posix_error_return(long retval)
  {
  	if (retval == -EFAULT)
  		retval = -ENOMEM;
  	else if (retval == -ENOMEM)
  		retval = -EAGAIN;
  	return retval;
b291f0003   Nick Piggin   mlock: mlocked pa...
226
  }
b291f0003   Nick Piggin   mlock: mlocked pa...
227
  /*
56afe477d   Vlastimil Babka   mm: munlock: bypa...
228
229
230
231
232
233
234
235
236
237
238
239
240
241
   * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
   *
   * The fast path is available only for evictable pages with single mapping.
   * Then we can bypass the per-cpu pvec and get better performance.
   * when mapcount > 1 we need try_to_munlock() which can fail.
   * when !page_evictable(), we need the full redo logic of putback_lru_page to
   * avoid leaving evictable page in unevictable list.
   *
   * In case of success, @page is added to @pvec and @pgrescued is incremented
   * in case that the page was previously unevictable. @page is also unlocked.
   */
  static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
  		int *pgrescued)
  {
309381fea   Sasha Levin   mm: dump page whe...
242
243
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
  
  	if (page_mapcount(page) <= 1 && page_evictable(page)) {
  		pagevec_add(pvec, page);
  		if (TestClearPageUnevictable(page))
  			(*pgrescued)++;
  		unlock_page(page);
  		return true;
  	}
  
  	return false;
  }
  
  /*
   * Putback multiple evictable pages to the LRU
   *
   * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
   * the pages might have meanwhile become unevictable but that is OK.
   */
  static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
  {
  	count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
  	/*
  	 *__pagevec_lru_add() calls release_pages() so we don't call
  	 * put_page() explicitly
  	 */
  	__pagevec_lru_add(pvec);
  	count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
  }
  
  /*
7225522bb   Vlastimil Babka   mm: munlock: batc...
274
275
276
277
278
279
280
   * Munlock a batch of pages from the same zone
   *
   * The work is split to two main phases. First phase clears the Mlocked flag
   * and attempts to isolate the pages, all under a single zone lru lock.
   * The second phase finishes the munlock only for pages where isolation
   * succeeded.
   *
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
281
   * Note that the pagevec may be modified during the process.
7225522bb   Vlastimil Babka   mm: munlock: batc...
282
283
284
285
286
   */
  static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
  {
  	int i;
  	int nr = pagevec_count(pvec);
70feee0e1   Yisheng Xie   mlock: fix mlock ...
287
  	int delta_munlocked = -nr;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
288
289
  	struct pagevec pvec_putback;
  	int pgrescued = 0;
7225522bb   Vlastimil Babka   mm: munlock: batc...
290

866798201   Mel Gorman   mm, pagevec: remo...
291
  	pagevec_init(&pvec_putback);
3b25df93c   Vlastimil Babka   mm: munlock: fix ...
292

7225522bb   Vlastimil Babka   mm: munlock: batc...
293
  	/* Phase 1: page isolation */
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
294
  	spin_lock_irq(&zone->zone_pgdat->lru_lock);
7225522bb   Vlastimil Babka   mm: munlock: batc...
295
296
297
298
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (TestClearPageMlocked(page)) {
7225522bb   Vlastimil Babka   mm: munlock: batc...
299
  			/*
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
300
301
  			 * We already have pin from follow_page_mask()
  			 * so we can spare the get_page() here.
7225522bb   Vlastimil Babka   mm: munlock: batc...
302
  			 */
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
303
304
305
306
  			if (__munlock_isolate_lru_page(page, false))
  				continue;
  			else
  				__munlock_isolation_failed(page);
70feee0e1   Yisheng Xie   mlock: fix mlock ...
307
308
  		} else {
  			delta_munlocked++;
7225522bb   Vlastimil Babka   mm: munlock: batc...
309
  		}
01cc2e586   Vlastimil Babka   mm: munlock: fix ...
310
311
312
313
314
315
316
317
318
  
  		/*
  		 * We won't be munlocking this page in the next phase
  		 * but we still need to release the follow_page_mask()
  		 * pin. We cannot do it under lru_lock however. If it's
  		 * the last pin, __page_cache_release() would deadlock.
  		 */
  		pagevec_add(&pvec_putback, pvec->pages[i]);
  		pvec->pages[i] = NULL;
7225522bb   Vlastimil Babka   mm: munlock: batc...
319
  	}
1ebb7cc6a   Vlastimil Babka   mm: munlock: batc...
320
  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
321
  	spin_unlock_irq(&zone->zone_pgdat->lru_lock);
7225522bb   Vlastimil Babka   mm: munlock: batc...
322

3b25df93c   Vlastimil Babka   mm: munlock: fix ...
323
324
  	/* Now we can release pins of pages that we are not munlocking */
  	pagevec_release(&pvec_putback);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
325
  	/* Phase 2: page munlock */
7225522bb   Vlastimil Babka   mm: munlock: batc...
326
327
328
329
330
  	for (i = 0; i < nr; i++) {
  		struct page *page = pvec->pages[i];
  
  		if (page) {
  			lock_page(page);
56afe477d   Vlastimil Babka   mm: munlock: bypa...
331
332
  			if (!__putback_lru_fast_prepare(page, &pvec_putback,
  					&pgrescued)) {
5b40998ae   Vlastimil Babka   mm: munlock: remo...
333
334
335
336
337
  				/*
  				 * Slow path. We don't want to lose the last
  				 * pin before unlock_page()
  				 */
  				get_page(page); /* for putback_lru_page() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
338
339
  				__munlock_isolated_page(page);
  				unlock_page(page);
5b40998ae   Vlastimil Babka   mm: munlock: remo...
340
  				put_page(page); /* from follow_page_mask() */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
341
  			}
7225522bb   Vlastimil Babka   mm: munlock: batc...
342
343
  		}
  	}
56afe477d   Vlastimil Babka   mm: munlock: bypa...
344

5b40998ae   Vlastimil Babka   mm: munlock: remo...
345
346
347
348
  	/*
  	 * Phase 3: page putback for pages that qualified for the fast path
  	 * This will also call put_page() to return pin from follow_page_mask()
  	 */
56afe477d   Vlastimil Babka   mm: munlock: bypa...
349
350
  	if (pagevec_count(&pvec_putback))
  		__putback_lru_fast(&pvec_putback, pgrescued);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
  }
  
  /*
   * Fill up pagevec for __munlock_pagevec using pte walk
   *
   * The function expects that the struct page corresponding to @start address is
   * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
   *
   * The rest of @pvec is filled by subsequent pages within the same pmd and same
   * zone, as long as the pte's are present and vm_normal_page() succeeds. These
   * pages also get pinned.
   *
   * Returns the address of the next page that should be scanned. This equals
   * @start + PAGE_SIZE when no page could be added by the pte walk.
   */
  static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
367
368
  			struct vm_area_struct *vma, struct zone *zone,
  			unsigned long start, unsigned long end)
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
369
370
371
372
373
374
  {
  	pte_t *pte;
  	spinlock_t *ptl;
  
  	/*
  	 * Initialize pte walk starting at the already pinned page where we
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
375
  	 * are sure that there is a pte, as it was pinned under the same
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
376
  	 * mmap_lock write op.
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
377
378
  	 */
  	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
379
380
  	/* Make sure we do not cross the page table boundary */
  	end = pgd_addr_end(start, end);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
381
  	end = p4d_addr_end(start, end);
eadb41ae8   Vlastimil Babka   mm/mlock.c: preve...
382
383
  	end = pud_addr_end(start, end);
  	end = pmd_addr_end(start, end);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
384
385
386
387
388
389
390
391
392
393
394
395
  
  	/* The page next to the pinned page is the first we will try to get */
  	start += PAGE_SIZE;
  	while (start < end) {
  		struct page *page = NULL;
  		pte++;
  		if (pte_present(*pte))
  			page = vm_normal_page(vma, start, *pte);
  		/*
  		 * Break if page could not be obtained or the page's node+zone does not
  		 * match
  		 */
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
396
  		if (!page || page_zone(page) != zone)
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
397
  			break;
56afe477d   Vlastimil Babka   mm: munlock: bypa...
398

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
399
400
401
402
403
404
  		/*
  		 * Do not use pagevec for PTE-mapped THP,
  		 * munlock_vma_pages_range() will handle them.
  		 */
  		if (PageTransCompound(page))
  			break;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
405
406
407
408
409
410
411
412
413
414
415
  		get_page(page);
  		/*
  		 * Increase the address that will be returned *before* the
  		 * eventual break due to pvec becoming full by adding the page
  		 */
  		start += PAGE_SIZE;
  		if (pagevec_add(pvec, page) == 0)
  			break;
  	}
  	pte_unmap_unlock(pte, ptl);
  	return start;
7225522bb   Vlastimil Babka   mm: munlock: batc...
416
417
418
  }
  
  /*
ba470de43   Rik van Riel   mmap: handle mloc...
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
   * munlock_vma_pages_range() - munlock all pages in the vma range.'
   * @vma - vma containing range to be munlock()ed.
   * @start - start address in @vma of the range
   * @end - end of range in @vma.
   *
   *  For mremap(), munmap() and exit().
   *
   * Called with @vma VM_LOCKED.
   *
   * Returns with VM_LOCKED cleared.  Callers must be prepared to
   * deal with this.
   *
   * We don't save and restore VM_LOCKED here because pages are
   * still on lru.  In unmap path, pages might be scanned by reclaim
   * and re-mlocked by try_to_{munlock|unmap} before we unmap and
   * free them.  This will result in freeing mlocked pages.
b291f0003   Nick Piggin   mlock: mlocked pa...
435
   */
ba470de43   Rik van Riel   mmap: handle mloc...
436
  void munlock_vma_pages_range(struct vm_area_struct *vma,
408e82b78   Hugh Dickins   mm: munlock use f...
437
  			     unsigned long start, unsigned long end)
b291f0003   Nick Piggin   mlock: mlocked pa...
438
  {
de60f5f10   Eric B Munson   mm: introduce VM_...
439
  	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
408e82b78   Hugh Dickins   mm: munlock use f...
440

ff6a6da60   Michel Lespinasse   mm: accelerate mu...
441
  	while (start < end) {
ab7a5af7f   Alexey Klimov   mm/mlock.c: drop ...
442
  		struct page *page;
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
443
  		unsigned int page_mask = 0;
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
444
  		unsigned long page_increm;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
445
446
  		struct pagevec pvec;
  		struct zone *zone;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
447

866798201   Mel Gorman   mm, pagevec: remo...
448
  		pagevec_init(&pvec);
6e919717c   Hugh Dickins   mm: m(un)lock avo...
449
450
451
452
453
454
455
  		/*
  		 * Although FOLL_DUMP is intended for get_dump_page(),
  		 * it just so happens that its special treatment of the
  		 * ZERO_PAGE (returning an error instead of doing get_page)
  		 * suits munlock very well (and if somehow an abnormal page
  		 * has sneaked into the range, we won't oops here: great).
  		 */
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
456
  		page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
457

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
458
459
460
461
462
463
464
465
466
  		if (page && !IS_ERR(page)) {
  			if (PageTransTail(page)) {
  				VM_BUG_ON_PAGE(PageMlocked(page), page);
  				put_page(page); /* follow_page_mask() */
  			} else if (PageTransHuge(page)) {
  				lock_page(page);
  				/*
  				 * Any THP page found by follow_page_mask() may
  				 * have gotten split before reaching
6ebb4a1b8   Kirill A. Shutemov   thp: fix another ...
467
468
  				 * munlock_vma_page(), so we need to compute
  				 * the page_mask here instead.
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
469
470
471
472
473
474
475
476
477
478
479
480
  				 */
  				page_mask = munlock_vma_page(page);
  				unlock_page(page);
  				put_page(page); /* follow_page_mask() */
  			} else {
  				/*
  				 * Non-huge pages are handled in batches via
  				 * pagevec. The pin from follow_page_mask()
  				 * prevents them from collapsing by THP.
  				 */
  				pagevec_add(&pvec, page);
  				zone = page_zone(page);
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
481

e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
482
483
484
485
486
487
488
  				/*
  				 * Try to fill the rest of pagevec using fast
  				 * pte walk. This will also update start to
  				 * the next page to process. Then munlock the
  				 * pagevec.
  				 */
  				start = __munlock_pagevec_fill(&pvec, vma,
9472f23c9   Joonsoo Kim   mm/mlock.c: use p...
489
  						zone, start, end);
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
490
491
492
  				__munlock_pagevec(&pvec, zone);
  				goto next;
  			}
408e82b78   Hugh Dickins   mm: munlock use f...
493
  		}
c424be1cb   Vlastimil Babka   mm: munlock: fix ...
494
  		page_increm = 1 + page_mask;
ff6a6da60   Michel Lespinasse   mm: accelerate mu...
495
  		start += page_increm * PAGE_SIZE;
7a8010cd3   Vlastimil Babka   mm: munlock: manu...
496
  next:
408e82b78   Hugh Dickins   mm: munlock use f...
497
498
  		cond_resched();
  	}
b291f0003   Nick Piggin   mlock: mlocked pa...
499
500
501
502
503
504
505
  }
  
  /*
   * mlock_fixup  - handle mlock[all]/munlock[all] requests.
   *
   * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
   * munlock is a no-op.  However, for some special vmas, we go ahead and
cea10a19b   Michel Lespinasse   mm: directly use ...
506
   * populate the ptes.
b291f0003   Nick Piggin   mlock: mlocked pa...
507
508
509
   *
   * For vmas that pass the filters, merge/split as appropriate.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
ca16d140a   KOSAKI Motohiro   mm: don't access ...
511
  	unsigned long start, unsigned long end, vm_flags_t newflags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
512
  {
b291f0003   Nick Piggin   mlock: mlocked pa...
513
  	struct mm_struct *mm = vma->vm_mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
  	pgoff_t pgoff;
b291f0003   Nick Piggin   mlock: mlocked pa...
515
  	int nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
  	int ret = 0;
ca16d140a   KOSAKI Motohiro   mm: don't access ...
517
  	int lock = !!(newflags & VM_LOCKED);
b155b4fde   Simon Guo   mm: mlock: avoid ...
518
  	vm_flags_t old_flags = vma->vm_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519

fed067da4   Michel Lespinasse   mlock: only hold ...
520
  	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
e1fb4a086   Dave Jiang   dax: remove VM_MI...
521
522
  	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
  	    vma_is_dax(vma))
b0f205c2a   Eric B Munson   mm: mlock: add ml...
523
524
  		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
  		goto out;
b291f0003   Nick Piggin   mlock: mlocked pa...
525

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526
527
  	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
528
529
  			  vma->vm_file, pgoff, vma_policy(vma),
  			  vma->vm_userfaultfd_ctx);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
530
531
532
533
  	if (*prev) {
  		vma = *prev;
  		goto success;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
534
535
536
537
538
539
540
541
542
543
544
545
546
547
  	if (start != vma->vm_start) {
  		ret = split_vma(mm, vma, start, 1);
  		if (ret)
  			goto out;
  	}
  
  	if (end != vma->vm_end) {
  		ret = split_vma(mm, vma, end, 0);
  		if (ret)
  			goto out;
  	}
  
  success:
  	/*
b291f0003   Nick Piggin   mlock: mlocked pa...
548
549
550
551
552
  	 * Keep track of amount of locked VM.
  	 */
  	nr_pages = (end - start) >> PAGE_SHIFT;
  	if (!lock)
  		nr_pages = -nr_pages;
b155b4fde   Simon Guo   mm: mlock: avoid ...
553
554
  	else if (old_flags & VM_LOCKED)
  		nr_pages = 0;
b291f0003   Nick Piggin   mlock: mlocked pa...
555
556
557
  	mm->locked_vm += nr_pages;
  
  	/*
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
558
  	 * vm_flags is protected by the mmap_lock held in write mode.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
  	 * It's okay if try_to_unmap_one unmaps a page just after we
fc05f5662   Kirill A. Shutemov   mm: rename __mloc...
560
  	 * set VM_LOCKED, populate_vma_page_range will bring it back.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562

fed067da4   Michel Lespinasse   mlock: only hold ...
563
  	if (lock)
408e82b78   Hugh Dickins   mm: munlock use f...
564
  		vma->vm_flags = newflags;
fed067da4   Michel Lespinasse   mlock: only hold ...
565
  	else
408e82b78   Hugh Dickins   mm: munlock use f...
566
  		munlock_vma_pages_range(vma, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
568
  out:
b291f0003   Nick Piggin   mlock: mlocked pa...
569
  	*prev = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
571
  	return ret;
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
572
573
  static int apply_vma_lock_flags(unsigned long start, size_t len,
  				vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
575
576
577
  {
  	unsigned long nstart, end, tmp;
  	struct vm_area_struct * vma, * prev;
  	int error;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
578
  	VM_BUG_ON(offset_in_page(start));
fed067da4   Michel Lespinasse   mlock: only hold ...
579
  	VM_BUG_ON(len != PAGE_ALIGN(len));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
581
582
583
584
  	end = start + len;
  	if (end < start)
  		return -EINVAL;
  	if (end == start)
  		return 0;
097d59106   Linus Torvalds   vm: avoid using f...
585
  	vma = find_vma(current->mm, start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
587
  	if (!vma || vma->vm_start > start)
  		return -ENOMEM;
097d59106   Linus Torvalds   vm: avoid using f...
588
  	prev = vma->vm_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589
590
591
592
  	if (start > vma->vm_start)
  		prev = vma;
  
  	for (nstart = start ; ; ) {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
593
  		vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594

1aab92ec3   Eric B Munson   mm: mlock: refact...
595
  		newflags |= flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596

1aab92ec3   Eric B Munson   mm: mlock: refact...
597
  		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
  		tmp = vma->vm_end;
  		if (tmp > end)
  			tmp = end;
  		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
  		if (error)
  			break;
  		nstart = tmp;
  		if (nstart < prev->vm_end)
  			nstart = prev->vm_end;
  		if (nstart >= end)
  			break;
  
  		vma = prev->vm_next;
  		if (!vma || vma->vm_start != nstart) {
  			error = -ENOMEM;
  			break;
  		}
  	}
  	return error;
  }
0cf2f6f6d   Simon Guo   mm: mlock: check ...
618
619
620
621
622
623
624
  /*
   * Go through vma areas and sum size of mlocked
   * vma pages, as return value.
   * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
   * is also counted.
   * Return value: previously mlocked page counts
   */
0874bb49b   swkhack   mm/mlock.c: chang...
625
  static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
0cf2f6f6d   Simon Guo   mm: mlock: check ...
626
627
628
  		unsigned long start, size_t len)
  {
  	struct vm_area_struct *vma;
0874bb49b   swkhack   mm/mlock.c: chang...
629
  	unsigned long count = 0;
0cf2f6f6d   Simon Guo   mm: mlock: check ...
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
  
  	if (mm == NULL)
  		mm = current->mm;
  
  	vma = find_vma(mm, start);
  	if (vma == NULL)
  		vma = mm->mmap;
  
  	for (; vma ; vma = vma->vm_next) {
  		if (start >= vma->vm_end)
  			continue;
  		if (start + len <=  vma->vm_start)
  			break;
  		if (vma->vm_flags & VM_LOCKED) {
  			if (start > vma->vm_start)
  				count -= (start - vma->vm_start);
  			if (start + len < vma->vm_end) {
  				count += start + len - vma->vm_start;
  				break;
  			}
  			count += vma->vm_end - vma->vm_start;
  		}
  	}
  
  	return count >> PAGE_SHIFT;
  }
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
656
  static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
658
659
660
  {
  	unsigned long locked;
  	unsigned long lock_limit;
  	int error = -ENOMEM;
057d33891   Andrey Konovalov   mm: untag user po...
661
  	start = untagged_addr(start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
663
  	if (!can_do_mlock())
  		return -EPERM;
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
664
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
665
  	start &= PAGE_MASK;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
666
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667
  	lock_limit >>= PAGE_SHIFT;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
668
  	locked = len >> PAGE_SHIFT;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
669
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
670
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
671
672
  
  	locked += current->mm->locked_vm;
0cf2f6f6d   Simon Guo   mm: mlock: check ...
673
674
675
676
677
678
679
680
681
682
  	if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
  		/*
  		 * It is possible that the regions requested intersect with
  		 * previously mlocked areas, that part area in "mm->locked_vm"
  		 * should not be counted to new mlock increment count. So check
  		 * and adjust locked count if necessary.
  		 */
  		locked -= count_mm_mlocked_page_nr(current->mm,
  				start, len);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
683
684
685
  
  	/* check against resource limits */
  	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
686
  		error = apply_vma_lock_flags(start, len, flags);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
687

d8ed45c5d   Michel Lespinasse   mmap locking API:...
688
  	mmap_write_unlock(current->mm);
c561259ca   Kirill A. Shutemov   mm: move gup() ->...
689
690
691
692
693
694
695
  	if (error)
  		return error;
  
  	error = __mm_populate(start, len, 0);
  	if (error)
  		return __mlock_posix_error_return(error);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
696
  }
1aab92ec3   Eric B Munson   mm: mlock: refact...
697
698
699
700
  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
  {
  	return do_mlock(start, len, VM_LOCKED);
  }
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
701
702
  SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
  {
b0f205c2a   Eric B Munson   mm: mlock: add ml...
703
704
705
  	vm_flags_t vm_flags = VM_LOCKED;
  
  	if (flags & ~MLOCK_ONFAULT)
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
706
  		return -EINVAL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
707
708
709
710
  	if (flags & MLOCK_ONFAULT)
  		vm_flags |= VM_LOCKONFAULT;
  
  	return do_mlock(start, len, vm_flags);
a8ca5d0ec   Eric B Munson   mm: mlock: add ne...
711
  }
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
712
  SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
713
714
  {
  	int ret;
057d33891   Andrey Konovalov   mm: untag user po...
715
  	start = untagged_addr(start);
8fd9e4883   Alexander Kuleshov   mm/mlock: use off...
716
  	len = PAGE_ALIGN(len + (offset_in_page(start)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
717
  	start &= PAGE_MASK;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
718

d8ed45c5d   Michel Lespinasse   mmap locking API:...
719
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
720
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
721
  	ret = apply_vma_lock_flags(start, len, 0);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
722
  	mmap_write_unlock(current->mm);
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
723

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
  	return ret;
  }
b0f205c2a   Eric B Munson   mm: mlock: add ml...
726
727
728
729
730
731
732
733
734
735
  /*
   * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
   * and translate into the appropriate modifications to mm->def_flags and/or the
   * flags for all current VMAs.
   *
   * There are a couple of subtleties with this.  If mlockall() is called multiple
   * times with different flags, the values do not necessarily stack.  If mlockall
   * is called once including the MCL_FUTURE flag and then a second time without
   * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
   */
1aab92ec3   Eric B Munson   mm: mlock: refact...
736
  static int apply_mlockall_flags(int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737
738
  {
  	struct vm_area_struct * vma, * prev = NULL;
b0f205c2a   Eric B Munson   mm: mlock: add ml...
739
  	vm_flags_t to_add = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
740

b0f205c2a   Eric B Munson   mm: mlock: add ml...
741
742
  	current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
  	if (flags & MCL_FUTURE) {
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
743
  		current->mm->def_flags |= VM_LOCKED;
1aab92ec3   Eric B Munson   mm: mlock: refact...
744

b0f205c2a   Eric B Munson   mm: mlock: add ml...
745
746
747
748
749
750
751
752
753
754
755
756
  		if (flags & MCL_ONFAULT)
  			current->mm->def_flags |= VM_LOCKONFAULT;
  
  		if (!(flags & MCL_CURRENT))
  			goto out;
  	}
  
  	if (flags & MCL_CURRENT) {
  		to_add |= VM_LOCKED;
  		if (flags & MCL_ONFAULT)
  			to_add |= VM_LOCKONFAULT;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
757
758
  
  	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
759
  		vm_flags_t newflags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
760

b0f205c2a   Eric B Munson   mm: mlock: add ml...
761
762
  		newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
  		newflags |= to_add;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
763
764
765
  
  		/* Ignore errors */
  		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
50d4fb781   Paul E. McKenney   mm: Eliminate con...
766
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
767
768
769
770
  	}
  out:
  	return 0;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
771
  SYSCALL_DEFINE1(mlockall, int, flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
  {
  	unsigned long lock_limit;
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
774
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775

dedca6350   Potyra, Stefan   mm/mlock.c: mlock...
776
777
  	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
  	    flags == MCL_ONFAULT)
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
778
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
779

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
  	if (!can_do_mlock())
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
781
  		return -EPERM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
782

59e99e5b9   Jiri Slaby   mm: use rlimit he...
783
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
784
  	lock_limit >>= PAGE_SHIFT;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
785
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
786
  		return -EINTR;
1f1cd7054   Davidlohr Bueso   mm/mlock: prepare...
787

dc0ef0df7   Michal Hocko   mm: make mmap_sem...
788
  	ret = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
790
  	if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
  	    capable(CAP_IPC_LOCK))
1aab92ec3   Eric B Munson   mm: mlock: refact...
791
  		ret = apply_mlockall_flags(flags);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
792
  	mmap_write_unlock(current->mm);
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
793
794
  	if (!ret && (flags & MCL_CURRENT))
  		mm_populate(0, TASK_SIZE);
86d2adccf   Alexey Klimov   mm/mlock.c: reorg...
795

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
796
797
  	return ret;
  }
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
798
  SYSCALL_DEFINE0(munlockall)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799
800
  {
  	int ret;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
801
  	if (mmap_write_lock_killable(current->mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
802
  		return -EINTR;
1aab92ec3   Eric B Munson   mm: mlock: refact...
803
  	ret = apply_mlockall_flags(0);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
804
  	mmap_write_unlock(current->mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
  	return ret;
  }
  
  /*
   * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
   * shm segments) get accounted against the user_struct instead.
   */
  static DEFINE_SPINLOCK(shmlock_user_lock);
  
  int user_shm_lock(size_t size, struct user_struct *user)
  {
  	unsigned long lock_limit, locked;
  	int allowed = 0;
  
  	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
59e99e5b9   Jiri Slaby   mm: use rlimit he...
820
  	lock_limit = rlimit(RLIMIT_MEMLOCK);
5ed44a401   Herbert van den Bergh   do not limit lock...
821
822
  	if (lock_limit == RLIM_INFINITY)
  		allowed = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
824
  	lock_limit >>= PAGE_SHIFT;
  	spin_lock(&shmlock_user_lock);
5ed44a401   Herbert van den Bergh   do not limit lock...
825
826
  	if (!allowed &&
  	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
  		goto out;
  	get_uid(user);
  	user->locked_shm += locked;
  	allowed = 1;
  out:
  	spin_unlock(&shmlock_user_lock);
  	return allowed;
  }
  
  void user_shm_unlock(size_t size, struct user_struct *user)
  {
  	spin_lock(&shmlock_user_lock);
  	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	spin_unlock(&shmlock_user_lock);
  	free_uid(user);
  }