Blame view

mm/mincore.c 7.53 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
  /*
   *	linux/mm/mincore.c
   *
2f77d1070   Linus Torvalds   Fix incorrect use...
5
   * Copyright (C) 1994-2006  Linus Torvalds
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
9
10
   */
  
  /*
   * The mincore() system call.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
  #include <linux/pagemap.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
12
  #include <linux/gfp.h>
a520110e4   Christoph Hellwig   mm: split out a n...
13
  #include <linux/pagewalk.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
  #include <linux/mman.h>
  #include <linux/syscalls.h>
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
16
17
  #include <linux/swap.h>
  #include <linux/swapops.h>
3a4f8a0b3   Hugh Dickins   mm: remove shmem_...
18
  #include <linux/shmem_fs.h>
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
19
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
21
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <asm/pgtable.h>
1e25a271c   Naoya Horiguchi   mincore: apply pa...
23
24
  static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
  			unsigned long end, struct mm_walk *walk)
f48840107   Johannes Weiner   mincore: break do...
25
26
  {
  #ifdef CONFIG_HUGETLB_PAGE
1e25a271c   Naoya Horiguchi   mincore: apply pa...
27
28
  	unsigned char present;
  	unsigned char *vec = walk->private;
f48840107   Johannes Weiner   mincore: break do...
29

1e25a271c   Naoya Horiguchi   mincore: apply pa...
30
31
32
33
34
35
36
37
  	/*
  	 * Hugepages under user process are always in RAM and never
  	 * swapped out, but theoretically it needs to be checked.
  	 */
  	present = pte && !huge_pte_none(huge_ptep_get(pte));
  	for (; addr != end; vec++, addr += PAGE_SIZE)
  		*vec = present;
  	walk->private = vec;
f48840107   Johannes Weiner   mincore: break do...
38
39
40
  #else
  	BUG();
  #endif
1e25a271c   Naoya Horiguchi   mincore: apply pa...
41
  	return 0;
f48840107   Johannes Weiner   mincore: break do...
42
  }
30bac164a   Linus Torvalds   Revert "Change mi...
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  /*
   * Later we can get more picky about what "in core" means precisely.
   * For now, simply check to see if the page is in the page cache,
   * and is up to date; i.e. that no page-in operation would be required
   * at this time if an application were to map and access this page.
   */
  static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
  {
  	unsigned char present = 0;
  	struct page *page;
  
  	/*
  	 * When tmpfs swaps out a page from a file, any process mapping that
  	 * file will not get a swp_entry_t in its pte, but rather it is like
  	 * any other file mapping (ie. marked !present and faulted in with
  	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
  	 */
  #ifdef CONFIG_SWAP
  	if (shmem_mapping(mapping)) {
  		page = find_get_entry(mapping, pgoff);
  		/*
  		 * shmem/tmpfs may return swap: account for swapcache
  		 * page too.
  		 */
  		if (xa_is_value(page)) {
  			swp_entry_t swp = radix_to_swp_entry(page);
aeb309b81   Huang Ying   mm/mincore.c: fix...
69
70
71
72
73
74
75
76
77
78
  			struct swap_info_struct *si;
  
  			/* Prevent swap device to being swapoff under us */
  			si = get_swap_device(swp);
  			if (si) {
  				page = find_get_page(swap_address_space(swp),
  						     swp_offset(swp));
  				put_swap_device(si);
  			} else
  				page = NULL;
30bac164a   Linus Torvalds   Revert "Change mi...
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  		}
  	} else
  		page = find_get_page(mapping, pgoff);
  #else
  	page = find_get_page(mapping, pgoff);
  #endif
  	if (page) {
  		present = PageUptodate(page);
  		put_page(page);
  	}
  
  	return present;
  }
  
  static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
  				struct vm_area_struct *vma, unsigned char *vec)
1e25a271c   Naoya Horiguchi   mincore: apply pa...
95
  {
574823bfa   Linus Torvalds   Change mincore() ...
96
  	unsigned long nr = (end - addr) >> PAGE_SHIFT;
30bac164a   Linus Torvalds   Revert "Change mi...
97
  	int i;
574823bfa   Linus Torvalds   Change mincore() ...
98

30bac164a   Linus Torvalds   Revert "Change mi...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  	if (vma->vm_file) {
  		pgoff_t pgoff;
  
  		pgoff = linear_page_index(vma, addr);
  		for (i = 0; i < nr; i++, pgoff++)
  			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
  	} else {
  		for (i = 0; i < nr; i++)
  			vec[i] = 0;
  	}
  	return nr;
  }
  
  static int mincore_unmapped_range(unsigned long addr, unsigned long end,
  				   struct mm_walk *walk)
  {
  	walk->private += __mincore_unmapped_range(addr, end,
  						  walk->vma, walk->private);
1e25a271c   Naoya Horiguchi   mincore: apply pa...
117
  	return 0;
f48840107   Johannes Weiner   mincore: break do...
118
  }
1e25a271c   Naoya Horiguchi   mincore: apply pa...
119
120
  static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  			struct mm_walk *walk)
f48840107   Johannes Weiner   mincore: break do...
121
122
  {
  	spinlock_t *ptl;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
123
  	struct vm_area_struct *vma = walk->vma;
f48840107   Johannes Weiner   mincore: break do...
124
  	pte_t *ptep;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
125
126
  	unsigned char *vec = walk->private;
  	int nr = (end - addr) >> PAGE_SHIFT;
b6ec57f4b   Kirill A. Shutemov   thp: change pmd_t...
127
128
  	ptl = pmd_trans_huge_lock(pmd, vma);
  	if (ptl) {
1e25a271c   Naoya Horiguchi   mincore: apply pa...
129
130
131
132
  		memset(vec, 1, nr);
  		spin_unlock(ptl);
  		goto out;
  	}
f48840107   Johannes Weiner   mincore: break do...
133

1e25a271c   Naoya Horiguchi   mincore: apply pa...
134
  	if (pmd_trans_unstable(pmd)) {
30bac164a   Linus Torvalds   Revert "Change mi...
135
  		__mincore_unmapped_range(addr, end, vma, vec);
1e25a271c   Naoya Horiguchi   mincore: apply pa...
136
137
138
139
140
  		goto out;
  	}
  
  	ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
  	for (; addr != end; ptep++, addr += PAGE_SIZE) {
f48840107   Johannes Weiner   mincore: break do...
141
  		pte_t pte = *ptep;
f48840107   Johannes Weiner   mincore: break do...
142
143
  
  		if (pte_none(pte))
30bac164a   Linus Torvalds   Revert "Change mi...
144
145
  			__mincore_unmapped_range(addr, addr + PAGE_SIZE,
  						 vma, vec);
f48840107   Johannes Weiner   mincore: break do...
146
  		else if (pte_present(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
147
  			*vec = 1;
0661a3361   Kirill A. Shutemov   mm: remove rest u...
148
  		else { /* pte is a swap entry */
f48840107   Johannes Weiner   mincore: break do...
149
  			swp_entry_t entry = pte_to_swp_entry(pte);
30bac164a   Linus Torvalds   Revert "Change mi...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
  			if (non_swap_entry(entry)) {
  				/*
  				 * migration or hwpoison entries are always
  				 * uptodate
  				 */
  				*vec = 1;
  			} else {
  #ifdef CONFIG_SWAP
  				*vec = mincore_page(swap_address_space(entry),
  						    swp_offset(entry));
  #else
  				WARN_ON(1);
  				*vec = 1;
  #endif
  			}
f48840107   Johannes Weiner   mincore: break do...
165
  		}
25ef0e50c   Johannes Weiner   mincore: pass ran...
166
  		vec++;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
167
  	}
f48840107   Johannes Weiner   mincore: break do...
168
  	pte_unmap_unlock(ptep - 1, ptl);
1e25a271c   Naoya Horiguchi   mincore: apply pa...
169
170
171
172
  out:
  	walk->private += nr;
  	cond_resched();
  	return 0;
e48293fd7   Johannes Weiner   mincore: do neste...
173
  }
134fca906   Jiri Kosina   mm/mincore.c: mak...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  static inline bool can_do_mincore(struct vm_area_struct *vma)
  {
  	if (vma_is_anonymous(vma))
  		return true;
  	if (!vma->vm_file)
  		return false;
  	/*
  	 * Reveal pagecache information only for non-anonymous mappings that
  	 * correspond to the files the calling process could (if tried) open
  	 * for writing; otherwise we'd be including shared non-exclusive
  	 * mappings, which opens a side channel.
  	 */
  	return inode_owner_or_capable(file_inode(vma->vm_file)) ||
  		inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
  }
7b86ac337   Christoph Hellwig   pagewalk: separat...
189
190
191
192
193
  static const struct mm_walk_ops mincore_walk_ops = {
  	.pmd_entry		= mincore_pte_range,
  	.pte_hole		= mincore_unmapped_range,
  	.hugetlb_entry		= mincore_hugetlb,
  };
2f77d1070   Linus Torvalds   Fix incorrect use...
194
195
196
197
198
  /*
   * Do a chunk of "sys_mincore()". We've already checked
   * all the arguments, we hold the mmap semaphore: we should
   * just return the amount of info we're asked for.
   */
6a60f1b35   Johannes Weiner   mincore: cleanups
199
  static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
  {
6a60f1b35   Johannes Weiner   mincore: cleanups
201
  	struct vm_area_struct *vma;
25ef0e50c   Johannes Weiner   mincore: pass ran...
202
  	unsigned long end;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
203
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204

6a60f1b35   Johannes Weiner   mincore: cleanups
205
  	vma = find_vma(current->mm, addr);
4fb23e439   Linus Torvalds   Fix up mm/mincore...
206
207
  	if (!vma || addr < vma->vm_start)
  		return -ENOMEM;
25ef0e50c   Johannes Weiner   mincore: pass ran...
208
  	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
134fca906   Jiri Kosina   mm/mincore.c: mak...
209
210
211
212
213
  	if (!can_do_mincore(vma)) {
  		unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
  		memset(vec, 1, pages);
  		return pages;
  	}
7b86ac337   Christoph Hellwig   pagewalk: separat...
214
  	err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec);
1e25a271c   Naoya Horiguchi   mincore: apply pa...
215
216
  	if (err < 0)
  		return err;
25ef0e50c   Johannes Weiner   mincore: pass ran...
217
  	return (end - addr) >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
  }
  
  /*
   * The mincore(2) system call.
   *
   * mincore() returns the memory residency status of the pages in the
   * current process's address space specified by [addr, addr + len).
   * The status is returned in a vector of bytes.  The least significant
   * bit of each byte is 1 if the referenced page is in memory, otherwise
   * it is zero.
   *
   * Because the status of a page can change after mincore() checks it
   * but before it returns to the application, the returned vector may
   * contain stale information.  Only locked pages are guaranteed to
   * remain in memory.
   *
   * return values:
   *  zero    - success
   *  -EFAULT - vec points to an illegal address
ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
237
   *  -EINVAL - addr is not a multiple of PAGE_SIZE
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
239
240
241
242
243
   *  -ENOMEM - Addresses in the range [addr, addr + len] are
   *		invalid for the address space of this process, or
   *		specify one or more pages which are not currently
   *		mapped
   *  -EAGAIN - A kernel resource was temporarily unavailable.
   */
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
244
245
  SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
  		unsigned char __user *, vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
  {
2f77d1070   Linus Torvalds   Fix incorrect use...
247
248
249
  	long retval;
  	unsigned long pages;
  	unsigned char *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250

057d33891   Andrey Konovalov   mm: untag user po...
251
  	start = untagged_addr(start);
2f77d1070   Linus Torvalds   Fix incorrect use...
252
  	/* Check the start address: needs to be page-aligned.. */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
253
  	if (start & ~PAGE_MASK)
2f77d1070   Linus Torvalds   Fix incorrect use...
254
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255

2f77d1070   Linus Torvalds   Fix incorrect use...
256
  	/* ..and we need to be passed a valid user-space range */
96d4f267e   Linus Torvalds   Remove 'type' arg...
257
  	if (!access_ok((void __user *) start, len))
2f77d1070   Linus Torvalds   Fix incorrect use...
258
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259

ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
260
  	/* This also avoids any overflows on PAGE_ALIGN */
2f77d1070   Linus Torvalds   Fix incorrect use...
261
  	pages = len >> PAGE_SHIFT;
e7bbdd071   Alexander Kuleshov   mm/mincore: use o...
262
  	pages += (offset_in_page(len)) != 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263

96d4f267e   Linus Torvalds   Remove 'type' arg...
264
  	if (!access_ok(vec, pages))
2f77d1070   Linus Torvalds   Fix incorrect use...
265
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266

2f77d1070   Linus Torvalds   Fix incorrect use...
267
268
  	tmp = (void *) __get_free_page(GFP_USER);
  	if (!tmp)
4fb23e439   Linus Torvalds   Fix up mm/mincore...
269
  		return -EAGAIN;
2f77d1070   Linus Torvalds   Fix incorrect use...
270
271
272
273
274
275
276
277
  
  	retval = 0;
  	while (pages) {
  		/*
  		 * Do at most PAGE_SIZE entries per iteration, due to
  		 * the temporary buffer size.
  		 */
  		down_read(&current->mm->mmap_sem);
6a60f1b35   Johannes Weiner   mincore: cleanups
278
  		retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
2f77d1070   Linus Torvalds   Fix incorrect use...
279
280
281
282
283
284
285
  		up_read(&current->mm->mmap_sem);
  
  		if (retval <= 0)
  			break;
  		if (copy_to_user(vec, tmp, retval)) {
  			retval = -EFAULT;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
  		}
2f77d1070   Linus Torvalds   Fix incorrect use...
287
288
289
290
  		pages -= retval;
  		vec += retval;
  		start += retval << PAGE_SHIFT;
  		retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
  	}
2f77d1070   Linus Torvalds   Fix incorrect use...
292
293
  	free_page((unsigned long) tmp);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
  }