Blame view

mm/mincore.c 7.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
  /*
   *	linux/mm/mincore.c
   *
2f77d1070   Linus Torvalds   Fix incorrect use...
5
   * Copyright (C) 1994-2006  Linus Torvalds
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
9
10
   */
  
  /*
   * The mincore() system call.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
  #include <linux/pagemap.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
12
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
  #include <linux/mm.h>
  #include <linux/mman.h>
  #include <linux/syscalls.h>
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
16
17
  #include <linux/swap.h>
  #include <linux/swapops.h>
3a4f8a0b3   Hugh Dickins   mm: remove shmem_...
18
  #include <linux/shmem_fs.h>
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
19
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
21
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <asm/pgtable.h>
1e25a271c   Naoya Horiguchi   mincore: apply pa...
23
24
  static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
  			unsigned long end, struct mm_walk *walk)
f48840107   Johannes Weiner   mincore: break do...
25
26
  {
  #ifdef CONFIG_HUGETLB_PAGE
1e25a271c   Naoya Horiguchi   mincore: apply pa...
27
28
  	unsigned char present;
  	unsigned char *vec = walk->private;
f48840107   Johannes Weiner   mincore: break do...
29

1e25a271c   Naoya Horiguchi   mincore: apply pa...
30
31
32
33
34
35
36
37
  	/*
  	 * Hugepages under user process are always in RAM and never
  	 * swapped out, but theoretically it needs to be checked.
  	 */
  	present = pte && !huge_pte_none(huge_ptep_get(pte));
  	for (; addr != end; vec++, addr += PAGE_SIZE)
  		*vec = present;
  	walk->private = vec;
f48840107   Johannes Weiner   mincore: break do...
38
39
40
  #else
  	BUG();
  #endif
1e25a271c   Naoya Horiguchi   mincore: apply pa...
41
  	return 0;
f48840107   Johannes Weiner   mincore: break do...
42
  }
30bac164a   Linus Torvalds   Revert "Change mi...
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  /*
   * Later we can get more picky about what "in core" means precisely.
   * For now, simply check to see if the page is in the page cache,
   * and is up to date; i.e. that no page-in operation would be required
   * at this time if an application were to map and access this page.
   */
  static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
  {
  	unsigned char present = 0;
  	struct page *page;
  
  	/*
  	 * When tmpfs swaps out a page from a file, any process mapping that
  	 * file will not get a swp_entry_t in its pte, but rather it is like
  	 * any other file mapping (ie. marked !present and faulted in with
  	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
  	 */
  #ifdef CONFIG_SWAP
  	if (shmem_mapping(mapping)) {
  		page = find_get_entry(mapping, pgoff);
  		/*
  		 * shmem/tmpfs may return swap: account for swapcache
  		 * page too.
  		 */
  		if (xa_is_value(page)) {
  			swp_entry_t swp = radix_to_swp_entry(page);
aeb309b81   Huang Ying   mm/mincore.c: fix...
69
70
71
72
73
74
75
76
77
78
  			struct swap_info_struct *si;
  
  			/* Prevent swap device to being swapoff under us */
  			si = get_swap_device(swp);
  			if (si) {
  				page = find_get_page(swap_address_space(swp),
  						     swp_offset(swp));
  				put_swap_device(si);
  			} else
  				page = NULL;
30bac164a   Linus Torvalds   Revert "Change mi...
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  		}
  	} else
  		page = find_get_page(mapping, pgoff);
  #else
  	page = find_get_page(mapping, pgoff);
  #endif
  	if (page) {
  		present = PageUptodate(page);
  		put_page(page);
  	}
  
  	return present;
  }
  
  static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
  				struct vm_area_struct *vma, unsigned char *vec)
1e25a271c   Naoya Horiguchi   mincore: apply pa...
95
  {
574823bfa   Linus Torvalds   Change mincore() ...
96
  	unsigned long nr = (end - addr) >> PAGE_SHIFT;
30bac164a   Linus Torvalds   Revert "Change mi...
97
  	int i;
574823bfa   Linus Torvalds   Change mincore() ...
98

30bac164a   Linus Torvalds   Revert "Change mi...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  	if (vma->vm_file) {
  		pgoff_t pgoff;
  
  		pgoff = linear_page_index(vma, addr);
  		for (i = 0; i < nr; i++, pgoff++)
  			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
  	} else {
  		for (i = 0; i < nr; i++)
  			vec[i] = 0;
  	}
  	return nr;
  }
  
  static int mincore_unmapped_range(unsigned long addr, unsigned long end,
  				   struct mm_walk *walk)
  {
  	walk->private += __mincore_unmapped_range(addr, end,
  						  walk->vma, walk->private);
1e25a271c   Naoya Horiguchi   mincore: apply pa...
117
  	return 0;
f48840107   Johannes Weiner   mincore: break do...
118
  }
1e25a271c   Naoya Horiguchi   mincore: apply pa...
119
120
  static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  			struct mm_walk *walk)
f48840107   Johannes Weiner   mincore: break do...
121
122
  {
  	spinlock_t *ptl;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
123
  	struct vm_area_struct *vma = walk->vma;
f48840107   Johannes Weiner   mincore: break do...
124
  	pte_t *ptep;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
125
126
  	unsigned char *vec = walk->private;
  	int nr = (end - addr) >> PAGE_SHIFT;
b6ec57f4b   Kirill A. Shutemov   thp: change pmd_t...
127
128
  	ptl = pmd_trans_huge_lock(pmd, vma);
  	if (ptl) {
1e25a271c   Naoya Horiguchi   mincore: apply pa...
129
130
131
132
  		memset(vec, 1, nr);
  		spin_unlock(ptl);
  		goto out;
  	}
f48840107   Johannes Weiner   mincore: break do...
133

1e25a271c   Naoya Horiguchi   mincore: apply pa...
134
  	if (pmd_trans_unstable(pmd)) {
30bac164a   Linus Torvalds   Revert "Change mi...
135
  		__mincore_unmapped_range(addr, end, vma, vec);
1e25a271c   Naoya Horiguchi   mincore: apply pa...
136
137
138
139
140
  		goto out;
  	}
  
  	ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
  	for (; addr != end; ptep++, addr += PAGE_SIZE) {
f48840107   Johannes Weiner   mincore: break do...
141
  		pte_t pte = *ptep;
f48840107   Johannes Weiner   mincore: break do...
142
143
  
  		if (pte_none(pte))
30bac164a   Linus Torvalds   Revert "Change mi...
144
145
  			__mincore_unmapped_range(addr, addr + PAGE_SIZE,
  						 vma, vec);
f48840107   Johannes Weiner   mincore: break do...
146
  		else if (pte_present(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
147
  			*vec = 1;
0661a3361   Kirill A. Shutemov   mm: remove rest u...
148
  		else { /* pte is a swap entry */
f48840107   Johannes Weiner   mincore: break do...
149
  			swp_entry_t entry = pte_to_swp_entry(pte);
30bac164a   Linus Torvalds   Revert "Change mi...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
  			if (non_swap_entry(entry)) {
  				/*
  				 * migration or hwpoison entries are always
  				 * uptodate
  				 */
  				*vec = 1;
  			} else {
  #ifdef CONFIG_SWAP
  				*vec = mincore_page(swap_address_space(entry),
  						    swp_offset(entry));
  #else
  				WARN_ON(1);
  				*vec = 1;
  #endif
  			}
f48840107   Johannes Weiner   mincore: break do...
165
  		}
25ef0e50c   Johannes Weiner   mincore: pass ran...
166
  		vec++;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
167
  	}
f48840107   Johannes Weiner   mincore: break do...
168
  	pte_unmap_unlock(ptep - 1, ptl);
1e25a271c   Naoya Horiguchi   mincore: apply pa...
169
170
171
172
  out:
  	walk->private += nr;
  	cond_resched();
  	return 0;
e48293fd7   Johannes Weiner   mincore: do neste...
173
  }
134fca906   Jiri Kosina   mm/mincore.c: mak...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  static inline bool can_do_mincore(struct vm_area_struct *vma)
  {
  	if (vma_is_anonymous(vma))
  		return true;
  	if (!vma->vm_file)
  		return false;
  	/*
  	 * Reveal pagecache information only for non-anonymous mappings that
  	 * correspond to the files the calling process could (if tried) open
  	 * for writing; otherwise we'd be including shared non-exclusive
  	 * mappings, which opens a side channel.
  	 */
  	return inode_owner_or_capable(file_inode(vma->vm_file)) ||
  		inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
  }
2f77d1070   Linus Torvalds   Fix incorrect use...
189
190
191
192
193
  /*
   * Do a chunk of "sys_mincore()". We've already checked
   * all the arguments, we hold the mmap semaphore: we should
   * just return the amount of info we're asked for.
   */
6a60f1b35   Johannes Weiner   mincore: cleanups
194
  static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
  {
6a60f1b35   Johannes Weiner   mincore: cleanups
196
  	struct vm_area_struct *vma;
25ef0e50c   Johannes Weiner   mincore: pass ran...
197
  	unsigned long end;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
198
199
200
201
202
203
204
  	int err;
  	struct mm_walk mincore_walk = {
  		.pmd_entry = mincore_pte_range,
  		.pte_hole = mincore_unmapped_range,
  		.hugetlb_entry = mincore_hugetlb,
  		.private = vec,
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205

6a60f1b35   Johannes Weiner   mincore: cleanups
206
  	vma = find_vma(current->mm, addr);
4fb23e439   Linus Torvalds   Fix up mm/mincore...
207
208
  	if (!vma || addr < vma->vm_start)
  		return -ENOMEM;
25ef0e50c   Johannes Weiner   mincore: pass ran...
209
  	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
134fca906   Jiri Kosina   mm/mincore.c: mak...
210
211
212
213
214
215
  	if (!can_do_mincore(vma)) {
  		unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
  		memset(vec, 1, pages);
  		return pages;
  	}
  	mincore_walk.mm = vma->vm_mm;
1e25a271c   Naoya Horiguchi   mincore: apply pa...
216
217
218
  	err = walk_page_range(addr, end, &mincore_walk);
  	if (err < 0)
  		return err;
25ef0e50c   Johannes Weiner   mincore: pass ran...
219
  	return (end - addr) >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
  }
  
  /*
   * The mincore(2) system call.
   *
   * mincore() returns the memory residency status of the pages in the
   * current process's address space specified by [addr, addr + len).
   * The status is returned in a vector of bytes.  The least significant
   * bit of each byte is 1 if the referenced page is in memory, otherwise
   * it is zero.
   *
   * Because the status of a page can change after mincore() checks it
   * but before it returns to the application, the returned vector may
   * contain stale information.  Only locked pages are guaranteed to
   * remain in memory.
   *
   * return values:
   *  zero    - success
   *  -EFAULT - vec points to an illegal address
ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
239
   *  -EINVAL - addr is not a multiple of PAGE_SIZE
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
241
242
243
244
245
   *  -ENOMEM - Addresses in the range [addr, addr + len] are
   *		invalid for the address space of this process, or
   *		specify one or more pages which are not currently
   *		mapped
   *  -EAGAIN - A kernel resource was temporarily unavailable.
   */
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
246
247
  SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
  		unsigned char __user *, vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
  {
2f77d1070   Linus Torvalds   Fix incorrect use...
249
250
251
  	long retval;
  	unsigned long pages;
  	unsigned char *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252

2f77d1070   Linus Torvalds   Fix incorrect use...
253
  	/* Check the start address: needs to be page-aligned.. */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
254
  	if (start & ~PAGE_MASK)
2f77d1070   Linus Torvalds   Fix incorrect use...
255
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256

2f77d1070   Linus Torvalds   Fix incorrect use...
257
  	/* ..and we need to be passed a valid user-space range */
96d4f267e   Linus Torvalds   Remove 'type' arg...
258
  	if (!access_ok((void __user *) start, len))
2f77d1070   Linus Torvalds   Fix incorrect use...
259
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260

ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
261
  	/* This also avoids any overflows on PAGE_ALIGN */
2f77d1070   Linus Torvalds   Fix incorrect use...
262
  	pages = len >> PAGE_SHIFT;
e7bbdd071   Alexander Kuleshov   mm/mincore: use o...
263
  	pages += (offset_in_page(len)) != 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264

96d4f267e   Linus Torvalds   Remove 'type' arg...
265
  	if (!access_ok(vec, pages))
2f77d1070   Linus Torvalds   Fix incorrect use...
266
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267

2f77d1070   Linus Torvalds   Fix incorrect use...
268
269
  	tmp = (void *) __get_free_page(GFP_USER);
  	if (!tmp)
4fb23e439   Linus Torvalds   Fix up mm/mincore...
270
  		return -EAGAIN;
2f77d1070   Linus Torvalds   Fix incorrect use...
271
272
273
274
275
276
277
278
  
  	retval = 0;
  	while (pages) {
  		/*
  		 * Do at most PAGE_SIZE entries per iteration, due to
  		 * the temporary buffer size.
  		 */
  		down_read(&current->mm->mmap_sem);
6a60f1b35   Johannes Weiner   mincore: cleanups
279
  		retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
2f77d1070   Linus Torvalds   Fix incorrect use...
280
281
282
283
284
285
286
  		up_read(&current->mm->mmap_sem);
  
  		if (retval <= 0)
  			break;
  		if (copy_to_user(vec, tmp, retval)) {
  			retval = -EFAULT;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
  		}
2f77d1070   Linus Torvalds   Fix incorrect use...
288
289
290
291
  		pages -= retval;
  		vec += retval;
  		start += retval << PAGE_SHIFT;
  		retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
  	}
2f77d1070   Linus Torvalds   Fix incorrect use...
293
294
  	free_page((unsigned long) tmp);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
295
  }