Blame view

mm/mincore.c 7.58 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   *	linux/mm/mincore.c
   *
2f77d1070   Linus Torvalds   Fix incorrect use...
4
   * Copyright (C) 1994-2006  Linus Torvalds
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
7
8
9
   */
  
  /*
   * The mincore() system call.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
  #include <linux/pagemap.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
11
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
  #include <linux/mm.h>
  #include <linux/mman.h>
  #include <linux/syscalls.h>
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
15
16
  #include <linux/swap.h>
  #include <linux/swapops.h>
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
17
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
  
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
f48840107   Johannes Weiner   mincore: break do...
21
  static void mincore_hugetlb_page_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
22
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
23
24
25
26
  				unsigned char *vec)
  {
  #ifdef CONFIG_HUGETLB_PAGE
  	struct hstate *h;
f48840107   Johannes Weiner   mincore: break do...
27

f48840107   Johannes Weiner   mincore: break do...
28
29
30
31
32
33
34
35
36
37
38
39
  	h = hstate_vma(vma);
  	while (1) {
  		unsigned char present;
  		pte_t *ptep;
  		/*
  		 * Huge pages are always in RAM for now, but
  		 * theoretically it needs to be checked.
  		 */
  		ptep = huge_pte_offset(current->mm,
  				       addr & huge_page_mask(h));
  		present = ptep && !huge_pte_none(huge_ptep_get(ptep));
  		while (1) {
25ef0e50c   Johannes Weiner   mincore: pass ran...
40
41
  			*vec = present;
  			vec++;
f48840107   Johannes Weiner   mincore: break do...
42
  			addr += PAGE_SIZE;
25ef0e50c   Johannes Weiner   mincore: pass ran...
43
  			if (addr == end)
f48840107   Johannes Weiner   mincore: break do...
44
45
46
47
48
49
50
51
52
53
  				return;
  			/* check hugepage border */
  			if (!(addr & ~huge_page_mask(h)))
  				break;
  		}
  	}
  #else
  	BUG();
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
55
56
57
58
59
  /*
   * Later we can get more picky about what "in core" means precisely.
   * For now, simply check to see if the page is in the page cache,
   * and is up to date; i.e. that no page-in operation would be required
   * at this time if an application were to map and access this page.
   */
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
60
  static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
62
  {
  	unsigned char present = 0;
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
63
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64

42da9cbd3   Nick Piggin   [PATCH] mm: minco...
65
66
67
68
  	/*
  	 * When tmpfs swaps out a page from a file, any process mapping that
  	 * file will not get a swp_entry_t in its pte, but rather it is like
  	 * any other file mapping (ie. marked !present and faulted in with
3c18ddd16   Nick Piggin   mm: remove nopage
69
  	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
70
71
72
73
74
75
  	 *
  	 * However when tmpfs moves the page from pagecache and into swapcache,
  	 * it is still in core, but the find_get_page below won't find it.
  	 * No big deal, but make a note of it.
  	 */
  	page = find_get_page(mapping, pgoff);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
77
78
79
80
81
82
  	if (page) {
  		present = PageUptodate(page);
  		page_cache_release(page);
  	}
  
  	return present;
  }
f48840107   Johannes Weiner   mincore: break do...
83
  static void mincore_unmapped_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
84
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
85
86
  				unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
87
  	unsigned long nr = (end - addr) >> PAGE_SHIFT;
f48840107   Johannes Weiner   mincore: break do...
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  	int i;
  
  	if (vma->vm_file) {
  		pgoff_t pgoff;
  
  		pgoff = linear_page_index(vma, addr);
  		for (i = 0; i < nr; i++, pgoff++)
  			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
  	} else {
  		for (i = 0; i < nr; i++)
  			vec[i] = 0;
  	}
  }
  
  static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
25ef0e50c   Johannes Weiner   mincore: pass ran...
103
  			unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
104
105
  			unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
106
  	unsigned long next;
f48840107   Johannes Weiner   mincore: break do...
107
108
  	spinlock_t *ptl;
  	pte_t *ptep;
f48840107   Johannes Weiner   mincore: break do...
109
110
  
  	ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
25ef0e50c   Johannes Weiner   mincore: pass ran...
111
  	do {
f48840107   Johannes Weiner   mincore: break do...
112
113
  		pte_t pte = *ptep;
  		pgoff_t pgoff;
25ef0e50c   Johannes Weiner   mincore: pass ran...
114
  		next = addr + PAGE_SIZE;
f48840107   Johannes Weiner   mincore: break do...
115
  		if (pte_none(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
116
  			mincore_unmapped_range(vma, addr, next, vec);
f48840107   Johannes Weiner   mincore: break do...
117
  		else if (pte_present(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
118
  			*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
119
120
  		else if (pte_file(pte)) {
  			pgoff = pte_to_pgoff(pte);
25ef0e50c   Johannes Weiner   mincore: pass ran...
121
  			*vec = mincore_page(vma->vm_file->f_mapping, pgoff);
f48840107   Johannes Weiner   mincore: break do...
122
123
124
125
126
  		} else { /* pte is a swap entry */
  			swp_entry_t entry = pte_to_swp_entry(pte);
  
  			if (is_migration_entry(entry)) {
  				/* migration entries are always uptodate */
25ef0e50c   Johannes Weiner   mincore: pass ran...
127
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
128
129
130
  			} else {
  #ifdef CONFIG_SWAP
  				pgoff = entry.val;
25ef0e50c   Johannes Weiner   mincore: pass ran...
131
  				*vec = mincore_page(&swapper_space, pgoff);
f48840107   Johannes Weiner   mincore: break do...
132
133
  #else
  				WARN_ON(1);
25ef0e50c   Johannes Weiner   mincore: pass ran...
134
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
135
136
137
  #endif
  			}
  		}
25ef0e50c   Johannes Weiner   mincore: pass ran...
138
139
  		vec++;
  	} while (ptep++, addr = next, addr != end);
f48840107   Johannes Weiner   mincore: break do...
140
141
  	pte_unmap_unlock(ptep - 1, ptl);
  }
e48293fd7   Johannes Weiner   mincore: do neste...
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pmd_t *pmd;
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
  		if (pmd_none_or_clear_bad(pmd))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pte_range(vma, pmd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pmd++, addr = next, addr != end);
  }
  
  static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pud_t *pud;
  
  	pud = pud_offset(pgd, addr);
  	do {
  		next = pud_addr_end(addr, end);
  		if (pud_none_or_clear_bad(pud))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pmd_range(vma, pud, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pud++, addr = next, addr != end);
  }
  
  static void mincore_page_range(struct vm_area_struct *vma,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pgd_t *pgd;
  
  	pgd = pgd_offset(vma->vm_mm, addr);
  	do {
  		next = pgd_addr_end(addr, end);
  		if (pgd_none_or_clear_bad(pgd))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pud_range(vma, pgd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pgd++, addr = next, addr != end);
  }
2f77d1070   Linus Torvalds   Fix incorrect use...
195
196
197
198
199
  /*
   * Do a chunk of "sys_mincore()". We've already checked
   * all the arguments, we hold the mmap semaphore: we should
   * just return the amount of info we're asked for.
   */
6a60f1b35   Johannes Weiner   mincore: cleanups
200
  static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
  {
6a60f1b35   Johannes Weiner   mincore: cleanups
202
  	struct vm_area_struct *vma;
25ef0e50c   Johannes Weiner   mincore: pass ran...
203
  	unsigned long end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204

6a60f1b35   Johannes Weiner   mincore: cleanups
205
  	vma = find_vma(current->mm, addr);
4fb23e439   Linus Torvalds   Fix up mm/mincore...
206
207
  	if (!vma || addr < vma->vm_start)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208

25ef0e50c   Johannes Weiner   mincore: pass ran...
209
  	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
6a60f1b35   Johannes Weiner   mincore: cleanups
210

4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
211
  	if (is_vm_hugetlb_page(vma)) {
25ef0e50c   Johannes Weiner   mincore: pass ran...
212
213
  		mincore_hugetlb_page_range(vma, addr, end, vec);
  		return (end - addr) >> PAGE_SHIFT;
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
214
  	}
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
215

25ef0e50c   Johannes Weiner   mincore: pass ran...
216
  	end = pmd_addr_end(addr, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
217

e48293fd7   Johannes Weiner   mincore: do neste...
218
219
220
221
  	if (is_vm_hugetlb_page(vma))
  		mincore_hugetlb_page_range(vma, addr, end, vec);
  	else
  		mincore_page_range(vma, addr, end, vec);
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
222

25ef0e50c   Johannes Weiner   mincore: pass ran...
223
  	return (end - addr) >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
  }
  
  /*
   * The mincore(2) system call.
   *
   * mincore() returns the memory residency status of the pages in the
   * current process's address space specified by [addr, addr + len).
   * The status is returned in a vector of bytes.  The least significant
   * bit of each byte is 1 if the referenced page is in memory, otherwise
   * it is zero.
   *
   * Because the status of a page can change after mincore() checks it
   * but before it returns to the application, the returned vector may
   * contain stale information.  Only locked pages are guaranteed to
   * remain in memory.
   *
   * return values:
   *  zero    - success
   *  -EFAULT - vec points to an illegal address
   *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
   *  -ENOMEM - Addresses in the range [addr, addr + len] are
   *		invalid for the address space of this process, or
   *		specify one or more pages which are not currently
   *		mapped
   *  -EAGAIN - A kernel resource was temporarily unavailable.
   */
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
250
251
  SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
  		unsigned char __user *, vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
  {
2f77d1070   Linus Torvalds   Fix incorrect use...
253
254
255
  	long retval;
  	unsigned long pages;
  	unsigned char *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256

2f77d1070   Linus Torvalds   Fix incorrect use...
257
258
259
  	/* Check the start address: needs to be page-aligned.. */
   	if (start & ~PAGE_CACHE_MASK)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260

2f77d1070   Linus Torvalds   Fix incorrect use...
261
262
263
  	/* ..and we need to be passed a valid user-space range */
  	if (!access_ok(VERIFY_READ, (void __user *) start, len))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264

2f77d1070   Linus Torvalds   Fix incorrect use...
265
266
267
  	/* This also avoids any overflows on PAGE_CACHE_ALIGN */
  	pages = len >> PAGE_SHIFT;
  	pages += (len & ~PAGE_MASK) != 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268

2f77d1070   Linus Torvalds   Fix incorrect use...
269
270
  	if (!access_ok(VERIFY_WRITE, vec, pages))
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271

2f77d1070   Linus Torvalds   Fix incorrect use...
272
273
  	tmp = (void *) __get_free_page(GFP_USER);
  	if (!tmp)
4fb23e439   Linus Torvalds   Fix up mm/mincore...
274
  		return -EAGAIN;
2f77d1070   Linus Torvalds   Fix incorrect use...
275
276
277
278
279
280
281
282
  
  	retval = 0;
  	while (pages) {
  		/*
  		 * Do at most PAGE_SIZE entries per iteration, due to
  		 * the temporary buffer size.
  		 */
  		down_read(&current->mm->mmap_sem);
6a60f1b35   Johannes Weiner   mincore: cleanups
283
  		retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
2f77d1070   Linus Torvalds   Fix incorrect use...
284
285
286
287
288
289
290
  		up_read(&current->mm->mmap_sem);
  
  		if (retval <= 0)
  			break;
  		if (copy_to_user(vec, tmp, retval)) {
  			retval = -EFAULT;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
  		}
2f77d1070   Linus Torvalds   Fix incorrect use...
292
293
294
295
  		pages -= retval;
  		vec += retval;
  		start += retval << PAGE_SHIFT;
  		retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
  	}
2f77d1070   Linus Torvalds   Fix incorrect use...
297
298
  	free_page((unsigned long) tmp);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
  }