Blame view

mm/mincore.c 7.79 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   *	linux/mm/mincore.c
   *
2f77d1070   Linus Torvalds   Fix incorrect use...
4
   * Copyright (C) 1994-2006  Linus Torvalds
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
7
8
9
   */
  
  /*
   * The mincore() system call.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
  #include <linux/pagemap.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
11
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
  #include <linux/mm.h>
  #include <linux/mman.h>
  #include <linux/syscalls.h>
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
15
16
  #include <linux/swap.h>
  #include <linux/swapops.h>
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
17
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
  
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
f48840107   Johannes Weiner   mincore: break do...
21
  static void mincore_hugetlb_page_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
22
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
23
24
25
26
  				unsigned char *vec)
  {
  #ifdef CONFIG_HUGETLB_PAGE
  	struct hstate *h;
f48840107   Johannes Weiner   mincore: break do...
27

f48840107   Johannes Weiner   mincore: break do...
28
29
30
31
32
33
34
35
36
37
38
39
  	h = hstate_vma(vma);
  	while (1) {
  		unsigned char present;
  		pte_t *ptep;
  		/*
  		 * Huge pages are always in RAM for now, but
  		 * theoretically it needs to be checked.
  		 */
  		ptep = huge_pte_offset(current->mm,
  				       addr & huge_page_mask(h));
  		present = ptep && !huge_pte_none(huge_ptep_get(ptep));
  		while (1) {
25ef0e50c   Johannes Weiner   mincore: pass ran...
40
41
  			*vec = present;
  			vec++;
f48840107   Johannes Weiner   mincore: break do...
42
  			addr += PAGE_SIZE;
25ef0e50c   Johannes Weiner   mincore: pass ran...
43
  			if (addr == end)
f48840107   Johannes Weiner   mincore: break do...
44
45
46
47
48
49
50
51
52
53
  				return;
  			/* check hugepage border */
  			if (!(addr & ~huge_page_mask(h)))
  				break;
  		}
  	}
  #else
  	BUG();
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
55
56
57
58
59
  /*
   * Later we can get more picky about what "in core" means precisely.
   * For now, simply check to see if the page is in the page cache,
   * and is up to date; i.e. that no page-in operation would be required
   * at this time if an application were to map and access this page.
   */
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
60
  static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
62
  {
  	unsigned char present = 0;
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
63
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64

42da9cbd3   Nick Piggin   [PATCH] mm: minco...
65
66
67
68
  	/*
  	 * When tmpfs swaps out a page from a file, any process mapping that
  	 * file will not get a swp_entry_t in its pte, but rather it is like
  	 * any other file mapping (ie. marked !present and faulted in with
3c18ddd16   Nick Piggin   mm: remove nopage
69
  	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
70
71
  	 */
  	page = find_get_page(mapping, pgoff);
31475dd61   Hugh Dickins   mm: a few small u...
72
  #ifdef CONFIG_SWAP
8079b1c85   Hugh Dickins   mm: clarify the r...
73
  	/* shmem/tmpfs may return swap: account for swapcache page too. */
31475dd61   Hugh Dickins   mm: a few small u...
74
75
76
77
78
  	if (radix_tree_exceptional_entry(page)) {
  		swp_entry_t swap = radix_to_swp_entry(page);
  		page = find_get_page(&swapper_space, swap.val);
  	}
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79
80
81
82
83
84
85
  	if (page) {
  		present = PageUptodate(page);
  		page_cache_release(page);
  	}
  
  	return present;
  }
f48840107   Johannes Weiner   mincore: break do...
86
  static void mincore_unmapped_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
87
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
88
89
  				unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
90
  	unsigned long nr = (end - addr) >> PAGE_SHIFT;
f48840107   Johannes Weiner   mincore: break do...
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  	int i;
  
  	if (vma->vm_file) {
  		pgoff_t pgoff;
  
  		pgoff = linear_page_index(vma, addr);
  		for (i = 0; i < nr; i++, pgoff++)
  			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
  	} else {
  		for (i = 0; i < nr; i++)
  			vec[i] = 0;
  	}
  }
  
  static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
25ef0e50c   Johannes Weiner   mincore: pass ran...
106
  			unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
107
108
  			unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
109
  	unsigned long next;
f48840107   Johannes Weiner   mincore: break do...
110
111
  	spinlock_t *ptl;
  	pte_t *ptep;
f48840107   Johannes Weiner   mincore: break do...
112
113
  
  	ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
25ef0e50c   Johannes Weiner   mincore: pass ran...
114
  	do {
f48840107   Johannes Weiner   mincore: break do...
115
116
  		pte_t pte = *ptep;
  		pgoff_t pgoff;
25ef0e50c   Johannes Weiner   mincore: pass ran...
117
  		next = addr + PAGE_SIZE;
f48840107   Johannes Weiner   mincore: break do...
118
  		if (pte_none(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
119
  			mincore_unmapped_range(vma, addr, next, vec);
f48840107   Johannes Weiner   mincore: break do...
120
  		else if (pte_present(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
121
  			*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
122
123
  		else if (pte_file(pte)) {
  			pgoff = pte_to_pgoff(pte);
25ef0e50c   Johannes Weiner   mincore: pass ran...
124
  			*vec = mincore_page(vma->vm_file->f_mapping, pgoff);
f48840107   Johannes Weiner   mincore: break do...
125
126
127
128
129
  		} else { /* pte is a swap entry */
  			swp_entry_t entry = pte_to_swp_entry(pte);
  
  			if (is_migration_entry(entry)) {
  				/* migration entries are always uptodate */
25ef0e50c   Johannes Weiner   mincore: pass ran...
130
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
131
132
133
  			} else {
  #ifdef CONFIG_SWAP
  				pgoff = entry.val;
25ef0e50c   Johannes Weiner   mincore: pass ran...
134
  				*vec = mincore_page(&swapper_space, pgoff);
f48840107   Johannes Weiner   mincore: break do...
135
136
  #else
  				WARN_ON(1);
25ef0e50c   Johannes Weiner   mincore: pass ran...
137
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
138
139
140
  #endif
  			}
  		}
25ef0e50c   Johannes Weiner   mincore: pass ran...
141
142
  		vec++;
  	} while (ptep++, addr = next, addr != end);
f48840107   Johannes Weiner   mincore: break do...
143
144
  	pte_unmap_unlock(ptep - 1, ptl);
  }
e48293fd7   Johannes Weiner   mincore: do neste...
145
146
147
148
149
150
151
152
153
154
  static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pmd_t *pmd;
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
0ca1634d4   Johannes Weiner   thp: mincore tran...
155
156
157
158
159
160
161
  		if (pmd_trans_huge(*pmd)) {
  			if (mincore_huge_pmd(vma, pmd, addr, next, vec)) {
  				vec += (next - addr) >> PAGE_SHIFT;
  				continue;
  			}
  			/* fall through */
  		}
e48293fd7   Johannes Weiner   mincore: do neste...
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
  		if (pmd_none_or_clear_bad(pmd))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pte_range(vma, pmd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pmd++, addr = next, addr != end);
  }
  
  static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pud_t *pud;
  
  	pud = pud_offset(pgd, addr);
  	do {
  		next = pud_addr_end(addr, end);
  		if (pud_none_or_clear_bad(pud))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pmd_range(vma, pud, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pud++, addr = next, addr != end);
  }
  
  static void mincore_page_range(struct vm_area_struct *vma,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pgd_t *pgd;
  
  	pgd = pgd_offset(vma->vm_mm, addr);
  	do {
  		next = pgd_addr_end(addr, end);
  		if (pgd_none_or_clear_bad(pgd))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pud_range(vma, pgd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pgd++, addr = next, addr != end);
  }
2f77d1070   Linus Torvalds   Fix incorrect use...
205
206
207
208
209
  /*
   * Do a chunk of "sys_mincore()". We've already checked
   * all the arguments, we hold the mmap semaphore: we should
   * just return the amount of info we're asked for.
   */
6a60f1b35   Johannes Weiner   mincore: cleanups
210
  static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  {
6a60f1b35   Johannes Weiner   mincore: cleanups
212
  	struct vm_area_struct *vma;
25ef0e50c   Johannes Weiner   mincore: pass ran...
213
  	unsigned long end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214

6a60f1b35   Johannes Weiner   mincore: cleanups
215
  	vma = find_vma(current->mm, addr);
4fb23e439   Linus Torvalds   Fix up mm/mincore...
216
217
  	if (!vma || addr < vma->vm_start)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218

25ef0e50c   Johannes Weiner   mincore: pass ran...
219
  	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
6a60f1b35   Johannes Weiner   mincore: cleanups
220

4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
221
  	if (is_vm_hugetlb_page(vma)) {
25ef0e50c   Johannes Weiner   mincore: pass ran...
222
223
  		mincore_hugetlb_page_range(vma, addr, end, vec);
  		return (end - addr) >> PAGE_SHIFT;
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
224
  	}
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
225

25ef0e50c   Johannes Weiner   mincore: pass ran...
226
  	end = pmd_addr_end(addr, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227

e48293fd7   Johannes Weiner   mincore: do neste...
228
229
230
231
  	if (is_vm_hugetlb_page(vma))
  		mincore_hugetlb_page_range(vma, addr, end, vec);
  	else
  		mincore_page_range(vma, addr, end, vec);
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
232

25ef0e50c   Johannes Weiner   mincore: pass ran...
233
  	return (end - addr) >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
  }
  
  /*
   * The mincore(2) system call.
   *
   * mincore() returns the memory residency status of the pages in the
   * current process's address space specified by [addr, addr + len).
   * The status is returned in a vector of bytes.  The least significant
   * bit of each byte is 1 if the referenced page is in memory, otherwise
   * it is zero.
   *
   * Because the status of a page can change after mincore() checks it
   * but before it returns to the application, the returned vector may
   * contain stale information.  Only locked pages are guaranteed to
   * remain in memory.
   *
   * return values:
   *  zero    - success
   *  -EFAULT - vec points to an illegal address
   *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
   *  -ENOMEM - Addresses in the range [addr, addr + len] are
   *		invalid for the address space of this process, or
   *		specify one or more pages which are not currently
   *		mapped
   *  -EAGAIN - A kernel resource was temporarily unavailable.
   */
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
260
261
  SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
  		unsigned char __user *, vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
  {
2f77d1070   Linus Torvalds   Fix incorrect use...
263
264
265
  	long retval;
  	unsigned long pages;
  	unsigned char *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266

2f77d1070   Linus Torvalds   Fix incorrect use...
267
268
269
  	/* Check the start address: needs to be page-aligned.. */
   	if (start & ~PAGE_CACHE_MASK)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270

2f77d1070   Linus Torvalds   Fix incorrect use...
271
272
273
  	/* ..and we need to be passed a valid user-space range */
  	if (!access_ok(VERIFY_READ, (void __user *) start, len))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274

2f77d1070   Linus Torvalds   Fix incorrect use...
275
276
277
  	/* This also avoids any overflows on PAGE_CACHE_ALIGN */
  	pages = len >> PAGE_SHIFT;
  	pages += (len & ~PAGE_MASK) != 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278

2f77d1070   Linus Torvalds   Fix incorrect use...
279
280
  	if (!access_ok(VERIFY_WRITE, vec, pages))
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281

2f77d1070   Linus Torvalds   Fix incorrect use...
282
283
  	tmp = (void *) __get_free_page(GFP_USER);
  	if (!tmp)
4fb23e439   Linus Torvalds   Fix up mm/mincore...
284
  		return -EAGAIN;
2f77d1070   Linus Torvalds   Fix incorrect use...
285
286
287
288
289
290
291
292
  
  	retval = 0;
  	while (pages) {
  		/*
  		 * Do at most PAGE_SIZE entries per iteration, due to
  		 * the temporary buffer size.
  		 */
  		down_read(&current->mm->mmap_sem);
6a60f1b35   Johannes Weiner   mincore: cleanups
293
  		retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
2f77d1070   Linus Torvalds   Fix incorrect use...
294
295
296
297
298
299
300
  		up_read(&current->mm->mmap_sem);
  
  		if (retval <= 0)
  			break;
  		if (copy_to_user(vec, tmp, retval)) {
  			retval = -EFAULT;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
  		}
2f77d1070   Linus Torvalds   Fix incorrect use...
302
303
304
305
  		pages -= retval;
  		vec += retval;
  		start += retval << PAGE_SHIFT;
  		retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
306
  	}
2f77d1070   Linus Torvalds   Fix incorrect use...
307
308
  	free_page((unsigned long) tmp);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
309
  }