Blame view

mm/mincore.c 7.84 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   *	linux/mm/mincore.c
   *
2f77d1070   Linus Torvalds   Fix incorrect use...
4
   * Copyright (C) 1994-2006  Linus Torvalds
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
7
8
9
   */
  
  /*
   * The mincore() system call.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
  #include <linux/pagemap.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
11
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
  #include <linux/mm.h>
  #include <linux/mman.h>
  #include <linux/syscalls.h>
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
15
16
  #include <linux/swap.h>
  #include <linux/swapops.h>
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
17
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
  
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
f48840107   Johannes Weiner   mincore: break do...
21
  static void mincore_hugetlb_page_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
22
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
23
24
25
26
  				unsigned char *vec)
  {
  #ifdef CONFIG_HUGETLB_PAGE
  	struct hstate *h;
f48840107   Johannes Weiner   mincore: break do...
27

f48840107   Johannes Weiner   mincore: break do...
28
29
30
31
32
33
34
35
36
37
38
39
  	h = hstate_vma(vma);
  	while (1) {
  		unsigned char present;
  		pte_t *ptep;
  		/*
  		 * Huge pages are always in RAM for now, but
  		 * theoretically it needs to be checked.
  		 */
  		ptep = huge_pte_offset(current->mm,
  				       addr & huge_page_mask(h));
  		present = ptep && !huge_pte_none(huge_ptep_get(ptep));
  		while (1) {
25ef0e50c   Johannes Weiner   mincore: pass ran...
40
41
  			*vec = present;
  			vec++;
f48840107   Johannes Weiner   mincore: break do...
42
  			addr += PAGE_SIZE;
25ef0e50c   Johannes Weiner   mincore: pass ran...
43
  			if (addr == end)
f48840107   Johannes Weiner   mincore: break do...
44
45
46
47
48
49
50
51
52
53
  				return;
  			/* check hugepage border */
  			if (!(addr & ~huge_page_mask(h)))
  				break;
  		}
  	}
  #else
  	BUG();
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
55
56
57
58
59
  /*
   * Later we can get more picky about what "in core" means precisely.
   * For now, simply check to see if the page is in the page cache,
   * and is up to date; i.e. that no page-in operation would be required
   * at this time if an application were to map and access this page.
   */
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
60
  static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
62
  {
  	unsigned char present = 0;
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
63
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64

42da9cbd3   Nick Piggin   [PATCH] mm: minco...
65
66
67
68
  	/*
  	 * When tmpfs swaps out a page from a file, any process mapping that
  	 * file will not get a swp_entry_t in its pte, but rather it is like
  	 * any other file mapping (ie. marked !present and faulted in with
3c18ddd16   Nick Piggin   mm: remove nopage
69
  	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
70
  	 */
31475dd61   Hugh Dickins   mm: a few small u...
71
  #ifdef CONFIG_SWAP
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  	if (shmem_mapping(mapping)) {
  		page = find_get_entry(mapping, pgoff);
  		/*
  		 * shmem/tmpfs may return swap: account for swapcache
  		 * page too.
  		 */
  		if (radix_tree_exceptional_entry(page)) {
  			swp_entry_t swp = radix_to_swp_entry(page);
  			page = find_get_page(swap_address_space(swp), swp.val);
  		}
  	} else
  		page = find_get_page(mapping, pgoff);
  #else
  	page = find_get_page(mapping, pgoff);
31475dd61   Hugh Dickins   mm: a few small u...
86
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
91
92
93
  	if (page) {
  		present = PageUptodate(page);
  		page_cache_release(page);
  	}
  
  	return present;
  }
f48840107   Johannes Weiner   mincore: break do...
94
  static void mincore_unmapped_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
95
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
96
97
  				unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
98
  	unsigned long nr = (end - addr) >> PAGE_SHIFT;
f48840107   Johannes Weiner   mincore: break do...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  	int i;
  
  	if (vma->vm_file) {
  		pgoff_t pgoff;
  
  		pgoff = linear_page_index(vma, addr);
  		for (i = 0; i < nr; i++, pgoff++)
  			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
  	} else {
  		for (i = 0; i < nr; i++)
  			vec[i] = 0;
  	}
  }
  
  static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
25ef0e50c   Johannes Weiner   mincore: pass ran...
114
  			unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
115
116
  			unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
117
  	unsigned long next;
f48840107   Johannes Weiner   mincore: break do...
118
119
  	spinlock_t *ptl;
  	pte_t *ptep;
f48840107   Johannes Weiner   mincore: break do...
120
121
  
  	ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
25ef0e50c   Johannes Weiner   mincore: pass ran...
122
  	do {
f48840107   Johannes Weiner   mincore: break do...
123
124
  		pte_t pte = *ptep;
  		pgoff_t pgoff;
25ef0e50c   Johannes Weiner   mincore: pass ran...
125
  		next = addr + PAGE_SIZE;
f48840107   Johannes Weiner   mincore: break do...
126
  		if (pte_none(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
127
  			mincore_unmapped_range(vma, addr, next, vec);
f48840107   Johannes Weiner   mincore: break do...
128
  		else if (pte_present(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
129
  			*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
130
131
  		else if (pte_file(pte)) {
  			pgoff = pte_to_pgoff(pte);
25ef0e50c   Johannes Weiner   mincore: pass ran...
132
  			*vec = mincore_page(vma->vm_file->f_mapping, pgoff);
f48840107   Johannes Weiner   mincore: break do...
133
134
  		} else { /* pte is a swap entry */
  			swp_entry_t entry = pte_to_swp_entry(pte);
c313dc5de   Weijie Yang   mm: mincore: add ...
135
136
137
138
139
  			if (non_swap_entry(entry)) {
  				/*
  				 * migration or hwpoison entries are always
  				 * uptodate
  				 */
25ef0e50c   Johannes Weiner   mincore: pass ran...
140
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
141
142
143
  			} else {
  #ifdef CONFIG_SWAP
  				pgoff = entry.val;
33806f06d   Shaohua Li   swap: make each s...
144
145
  				*vec = mincore_page(swap_address_space(entry),
  					pgoff);
f48840107   Johannes Weiner   mincore: break do...
146
147
  #else
  				WARN_ON(1);
25ef0e50c   Johannes Weiner   mincore: pass ran...
148
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
149
150
151
  #endif
  			}
  		}
25ef0e50c   Johannes Weiner   mincore: pass ran...
152
153
  		vec++;
  	} while (ptep++, addr = next, addr != end);
f48840107   Johannes Weiner   mincore: break do...
154
155
  	pte_unmap_unlock(ptep - 1, ptl);
  }
e48293fd7   Johannes Weiner   mincore: do neste...
156
157
158
159
160
161
162
163
164
165
  static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pmd_t *pmd;
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
0ca1634d4   Johannes Weiner   thp: mincore tran...
166
167
168
169
170
171
172
  		if (pmd_trans_huge(*pmd)) {
  			if (mincore_huge_pmd(vma, pmd, addr, next, vec)) {
  				vec += (next - addr) >> PAGE_SHIFT;
  				continue;
  			}
  			/* fall through */
  		}
1a5a9906d   Andrea Arcangeli   mm: thp: fix pmd_...
173
  		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
e48293fd7   Johannes Weiner   mincore: do neste...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pte_range(vma, pmd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pmd++, addr = next, addr != end);
  }
  
  static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pud_t *pud;
  
  	pud = pud_offset(pgd, addr);
  	do {
  		next = pud_addr_end(addr, end);
  		if (pud_none_or_clear_bad(pud))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pmd_range(vma, pud, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pud++, addr = next, addr != end);
  }
  
  static void mincore_page_range(struct vm_area_struct *vma,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pgd_t *pgd;
  
  	pgd = pgd_offset(vma->vm_mm, addr);
  	do {
  		next = pgd_addr_end(addr, end);
  		if (pgd_none_or_clear_bad(pgd))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pud_range(vma, pgd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pgd++, addr = next, addr != end);
  }
2f77d1070   Linus Torvalds   Fix incorrect use...
216
217
218
219
220
  /*
   * Do a chunk of "sys_mincore()". We've already checked
   * all the arguments, we hold the mmap semaphore: we should
   * just return the amount of info we're asked for.
   */
6a60f1b35   Johannes Weiner   mincore: cleanups
221
  static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
  {
6a60f1b35   Johannes Weiner   mincore: cleanups
223
  	struct vm_area_struct *vma;
25ef0e50c   Johannes Weiner   mincore: pass ran...
224
  	unsigned long end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225

6a60f1b35   Johannes Weiner   mincore: cleanups
226
  	vma = find_vma(current->mm, addr);
4fb23e439   Linus Torvalds   Fix up mm/mincore...
227
228
  	if (!vma || addr < vma->vm_start)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
229

25ef0e50c   Johannes Weiner   mincore: pass ran...
230
  	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
6a60f1b35   Johannes Weiner   mincore: cleanups
231

e48293fd7   Johannes Weiner   mincore: do neste...
232
233
234
235
  	if (is_vm_hugetlb_page(vma))
  		mincore_hugetlb_page_range(vma, addr, end, vec);
  	else
  		mincore_page_range(vma, addr, end, vec);
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
236

25ef0e50c   Johannes Weiner   mincore: pass ran...
237
  	return (end - addr) >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
  }
  
  /*
   * The mincore(2) system call.
   *
   * mincore() returns the memory residency status of the pages in the
   * current process's address space specified by [addr, addr + len).
   * The status is returned in a vector of bytes.  The least significant
   * bit of each byte is 1 if the referenced page is in memory, otherwise
   * it is zero.
   *
   * Because the status of a page can change after mincore() checks it
   * but before it returns to the application, the returned vector may
   * contain stale information.  Only locked pages are guaranteed to
   * remain in memory.
   *
   * return values:
   *  zero    - success
   *  -EFAULT - vec points to an illegal address
   *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
   *  -ENOMEM - Addresses in the range [addr, addr + len] are
   *		invalid for the address space of this process, or
   *		specify one or more pages which are not currently
   *		mapped
   *  -EAGAIN - A kernel resource was temporarily unavailable.
   */
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
264
265
  SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
  		unsigned char __user *, vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266
  {
2f77d1070   Linus Torvalds   Fix incorrect use...
267
268
269
  	long retval;
  	unsigned long pages;
  	unsigned char *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270

2f77d1070   Linus Torvalds   Fix incorrect use...
271
272
273
  	/* Check the start address: needs to be page-aligned.. */
   	if (start & ~PAGE_CACHE_MASK)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274

2f77d1070   Linus Torvalds   Fix incorrect use...
275
276
277
  	/* ..and we need to be passed a valid user-space range */
  	if (!access_ok(VERIFY_READ, (void __user *) start, len))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278

2f77d1070   Linus Torvalds   Fix incorrect use...
279
280
281
  	/* This also avoids any overflows on PAGE_CACHE_ALIGN */
  	pages = len >> PAGE_SHIFT;
  	pages += (len & ~PAGE_MASK) != 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282

2f77d1070   Linus Torvalds   Fix incorrect use...
283
284
  	if (!access_ok(VERIFY_WRITE, vec, pages))
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285

2f77d1070   Linus Torvalds   Fix incorrect use...
286
287
  	tmp = (void *) __get_free_page(GFP_USER);
  	if (!tmp)
4fb23e439   Linus Torvalds   Fix up mm/mincore...
288
  		return -EAGAIN;
2f77d1070   Linus Torvalds   Fix incorrect use...
289
290
291
292
293
294
295
296
  
  	retval = 0;
  	while (pages) {
  		/*
  		 * Do at most PAGE_SIZE entries per iteration, due to
  		 * the temporary buffer size.
  		 */
  		down_read(&current->mm->mmap_sem);
6a60f1b35   Johannes Weiner   mincore: cleanups
297
  		retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
2f77d1070   Linus Torvalds   Fix incorrect use...
298
299
300
301
302
303
304
  		up_read(&current->mm->mmap_sem);
  
  		if (retval <= 0)
  			break;
  		if (copy_to_user(vec, tmp, retval)) {
  			retval = -EFAULT;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
  		}
2f77d1070   Linus Torvalds   Fix incorrect use...
306
307
308
309
  		pages -= retval;
  		vec += retval;
  		start += retval << PAGE_SHIFT;
  		retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
  	}
2f77d1070   Linus Torvalds   Fix incorrect use...
311
312
  	free_page((unsigned long) tmp);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
313
  }