Blame view

mm/mincore.c 7.82 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   *	linux/mm/mincore.c
   *
2f77d1070   Linus Torvalds   Fix incorrect use...
4
   * Copyright (C) 1994-2006  Linus Torvalds
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
7
8
9
   */
  
  /*
   * The mincore() system call.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
  #include <linux/pagemap.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
11
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
  #include <linux/mm.h>
  #include <linux/mman.h>
  #include <linux/syscalls.h>
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
15
16
  #include <linux/swap.h>
  #include <linux/swapops.h>
4f16fc107   Naoya Horiguchi   mm: hugetlb: fix ...
17
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
  
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
f48840107   Johannes Weiner   mincore: break do...
21
  static void mincore_hugetlb_page_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
22
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
23
24
25
26
  				unsigned char *vec)
  {
  #ifdef CONFIG_HUGETLB_PAGE
  	struct hstate *h;
f48840107   Johannes Weiner   mincore: break do...
27

f48840107   Johannes Weiner   mincore: break do...
28
29
30
31
32
33
34
35
36
37
38
39
  	h = hstate_vma(vma);
  	while (1) {
  		unsigned char present;
  		pte_t *ptep;
  		/*
  		 * Huge pages are always in RAM for now, but
  		 * theoretically it needs to be checked.
  		 */
  		ptep = huge_pte_offset(current->mm,
  				       addr & huge_page_mask(h));
  		present = ptep && !huge_pte_none(huge_ptep_get(ptep));
  		while (1) {
25ef0e50c   Johannes Weiner   mincore: pass ran...
40
41
  			*vec = present;
  			vec++;
f48840107   Johannes Weiner   mincore: break do...
42
  			addr += PAGE_SIZE;
25ef0e50c   Johannes Weiner   mincore: pass ran...
43
  			if (addr == end)
f48840107   Johannes Weiner   mincore: break do...
44
45
46
47
48
49
50
51
52
53
  				return;
  			/* check hugepage border */
  			if (!(addr & ~huge_page_mask(h)))
  				break;
  		}
  	}
  #else
  	BUG();
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
55
56
57
58
59
  /*
   * Later we can get more picky about what "in core" means precisely.
   * For now, simply check to see if the page is in the page cache,
   * and is up to date; i.e. that no page-in operation would be required
   * at this time if an application were to map and access this page.
   */
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
60
  static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
62
  {
  	unsigned char present = 0;
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
63
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64

42da9cbd3   Nick Piggin   [PATCH] mm: minco...
65
66
67
68
  	/*
  	 * When tmpfs swaps out a page from a file, any process mapping that
  	 * file will not get a swp_entry_t in its pte, but rather it is like
  	 * any other file mapping (ie. marked !present and faulted in with
3c18ddd16   Nick Piggin   mm: remove nopage
69
  	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
70
  	 */
31475dd61   Hugh Dickins   mm: a few small u...
71
  #ifdef CONFIG_SWAP
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  	if (shmem_mapping(mapping)) {
  		page = find_get_entry(mapping, pgoff);
  		/*
  		 * shmem/tmpfs may return swap: account for swapcache
  		 * page too.
  		 */
  		if (radix_tree_exceptional_entry(page)) {
  			swp_entry_t swp = radix_to_swp_entry(page);
  			page = find_get_page(swap_address_space(swp), swp.val);
  		}
  	} else
  		page = find_get_page(mapping, pgoff);
  #else
  	page = find_get_page(mapping, pgoff);
31475dd61   Hugh Dickins   mm: a few small u...
86
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
91
92
93
  	if (page) {
  		present = PageUptodate(page);
  		page_cache_release(page);
  	}
  
  	return present;
  }
f48840107   Johannes Weiner   mincore: break do...
94
  static void mincore_unmapped_range(struct vm_area_struct *vma,
25ef0e50c   Johannes Weiner   mincore: pass ran...
95
  				unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
96
97
  				unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
98
  	unsigned long nr = (end - addr) >> PAGE_SHIFT;
f48840107   Johannes Weiner   mincore: break do...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  	int i;
  
  	if (vma->vm_file) {
  		pgoff_t pgoff;
  
  		pgoff = linear_page_index(vma, addr);
  		for (i = 0; i < nr; i++, pgoff++)
  			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
  	} else {
  		for (i = 0; i < nr; i++)
  			vec[i] = 0;
  	}
  }
  
  static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
25ef0e50c   Johannes Weiner   mincore: pass ran...
114
  			unsigned long addr, unsigned long end,
f48840107   Johannes Weiner   mincore: break do...
115
116
  			unsigned char *vec)
  {
25ef0e50c   Johannes Weiner   mincore: pass ran...
117
  	unsigned long next;
f48840107   Johannes Weiner   mincore: break do...
118
119
  	spinlock_t *ptl;
  	pte_t *ptep;
f48840107   Johannes Weiner   mincore: break do...
120
121
  
  	ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
25ef0e50c   Johannes Weiner   mincore: pass ran...
122
  	do {
f48840107   Johannes Weiner   mincore: break do...
123
124
  		pte_t pte = *ptep;
  		pgoff_t pgoff;
25ef0e50c   Johannes Weiner   mincore: pass ran...
125
  		next = addr + PAGE_SIZE;
f48840107   Johannes Weiner   mincore: break do...
126
  		if (pte_none(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
127
  			mincore_unmapped_range(vma, addr, next, vec);
f48840107   Johannes Weiner   mincore: break do...
128
  		else if (pte_present(pte))
25ef0e50c   Johannes Weiner   mincore: pass ran...
129
  			*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
130
131
  		else if (pte_file(pte)) {
  			pgoff = pte_to_pgoff(pte);
25ef0e50c   Johannes Weiner   mincore: pass ran...
132
  			*vec = mincore_page(vma->vm_file->f_mapping, pgoff);
f48840107   Johannes Weiner   mincore: break do...
133
134
135
136
137
  		} else { /* pte is a swap entry */
  			swp_entry_t entry = pte_to_swp_entry(pte);
  
  			if (is_migration_entry(entry)) {
  				/* migration entries are always uptodate */
25ef0e50c   Johannes Weiner   mincore: pass ran...
138
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
139
140
141
  			} else {
  #ifdef CONFIG_SWAP
  				pgoff = entry.val;
33806f06d   Shaohua Li   swap: make each s...
142
143
  				*vec = mincore_page(swap_address_space(entry),
  					pgoff);
f48840107   Johannes Weiner   mincore: break do...
144
145
  #else
  				WARN_ON(1);
25ef0e50c   Johannes Weiner   mincore: pass ran...
146
  				*vec = 1;
f48840107   Johannes Weiner   mincore: break do...
147
148
149
  #endif
  			}
  		}
25ef0e50c   Johannes Weiner   mincore: pass ran...
150
151
  		vec++;
  	} while (ptep++, addr = next, addr != end);
f48840107   Johannes Weiner   mincore: break do...
152
153
  	pte_unmap_unlock(ptep - 1, ptl);
  }
e48293fd7   Johannes Weiner   mincore: do neste...
154
155
156
157
158
159
160
161
162
163
  static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pmd_t *pmd;
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
0ca1634d4   Johannes Weiner   thp: mincore tran...
164
165
166
167
168
169
170
  		if (pmd_trans_huge(*pmd)) {
  			if (mincore_huge_pmd(vma, pmd, addr, next, vec)) {
  				vec += (next - addr) >> PAGE_SHIFT;
  				continue;
  			}
  			/* fall through */
  		}
1a5a9906d   Andrea Arcangeli   mm: thp: fix pmd_...
171
  		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
e48293fd7   Johannes Weiner   mincore: do neste...
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pte_range(vma, pmd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pmd++, addr = next, addr != end);
  }
  
  static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pud_t *pud;
  
  	pud = pud_offset(pgd, addr);
  	do {
  		next = pud_addr_end(addr, end);
  		if (pud_none_or_clear_bad(pud))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pmd_range(vma, pud, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pud++, addr = next, addr != end);
  }
  
  static void mincore_page_range(struct vm_area_struct *vma,
  			unsigned long addr, unsigned long end,
  			unsigned char *vec)
  {
  	unsigned long next;
  	pgd_t *pgd;
  
  	pgd = pgd_offset(vma->vm_mm, addr);
  	do {
  		next = pgd_addr_end(addr, end);
  		if (pgd_none_or_clear_bad(pgd))
  			mincore_unmapped_range(vma, addr, next, vec);
  		else
  			mincore_pud_range(vma, pgd, addr, next, vec);
  		vec += (next - addr) >> PAGE_SHIFT;
  	} while (pgd++, addr = next, addr != end);
  }
2f77d1070   Linus Torvalds   Fix incorrect use...
214
215
216
217
218
  /*
   * Do a chunk of "sys_mincore()". We've already checked
   * all the arguments, we hold the mmap semaphore: we should
   * just return the amount of info we're asked for.
   */
6a60f1b35   Johannes Weiner   mincore: cleanups
219
  static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
  {
6a60f1b35   Johannes Weiner   mincore: cleanups
221
  	struct vm_area_struct *vma;
25ef0e50c   Johannes Weiner   mincore: pass ran...
222
  	unsigned long end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223

6a60f1b35   Johannes Weiner   mincore: cleanups
224
  	vma = find_vma(current->mm, addr);
4fb23e439   Linus Torvalds   Fix up mm/mincore...
225
226
  	if (!vma || addr < vma->vm_start)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227

25ef0e50c   Johannes Weiner   mincore: pass ran...
228
  	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
6a60f1b35   Johannes Weiner   mincore: cleanups
229

e48293fd7   Johannes Weiner   mincore: do neste...
230
231
232
233
  	if (is_vm_hugetlb_page(vma))
  		mincore_hugetlb_page_range(vma, addr, end, vec);
  	else
  		mincore_page_range(vma, addr, end, vec);
42da9cbd3   Nick Piggin   [PATCH] mm: minco...
234

25ef0e50c   Johannes Weiner   mincore: pass ran...
235
  	return (end - addr) >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
  }
  
  /*
   * The mincore(2) system call.
   *
   * mincore() returns the memory residency status of the pages in the
   * current process's address space specified by [addr, addr + len).
   * The status is returned in a vector of bytes.  The least significant
   * bit of each byte is 1 if the referenced page is in memory, otherwise
   * it is zero.
   *
   * Because the status of a page can change after mincore() checks it
   * but before it returns to the application, the returned vector may
   * contain stale information.  Only locked pages are guaranteed to
   * remain in memory.
   *
   * return values:
   *  zero    - success
   *  -EFAULT - vec points to an illegal address
   *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
   *  -ENOMEM - Addresses in the range [addr, addr + len] are
   *		invalid for the address space of this process, or
   *		specify one or more pages which are not currently
   *		mapped
   *  -EAGAIN - A kernel resource was temporarily unavailable.
   */
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
262
263
  SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
  		unsigned char __user *, vec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264
  {
2f77d1070   Linus Torvalds   Fix incorrect use...
265
266
267
  	long retval;
  	unsigned long pages;
  	unsigned char *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268

2f77d1070   Linus Torvalds   Fix incorrect use...
269
270
271
  	/* Check the start address: needs to be page-aligned.. */
   	if (start & ~PAGE_CACHE_MASK)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272

2f77d1070   Linus Torvalds   Fix incorrect use...
273
274
275
  	/* ..and we need to be passed a valid user-space range */
  	if (!access_ok(VERIFY_READ, (void __user *) start, len))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276

2f77d1070   Linus Torvalds   Fix incorrect use...
277
278
279
  	/* This also avoids any overflows on PAGE_CACHE_ALIGN */
  	pages = len >> PAGE_SHIFT;
  	pages += (len & ~PAGE_MASK) != 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280

2f77d1070   Linus Torvalds   Fix incorrect use...
281
282
  	if (!access_ok(VERIFY_WRITE, vec, pages))
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283

2f77d1070   Linus Torvalds   Fix incorrect use...
284
285
  	tmp = (void *) __get_free_page(GFP_USER);
  	if (!tmp)
4fb23e439   Linus Torvalds   Fix up mm/mincore...
286
  		return -EAGAIN;
2f77d1070   Linus Torvalds   Fix incorrect use...
287
288
289
290
291
292
293
294
  
  	retval = 0;
  	while (pages) {
  		/*
  		 * Do at most PAGE_SIZE entries per iteration, due to
  		 * the temporary buffer size.
  		 */
  		down_read(&current->mm->mmap_sem);
6a60f1b35   Johannes Weiner   mincore: cleanups
295
  		retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
2f77d1070   Linus Torvalds   Fix incorrect use...
296
297
298
299
300
301
302
  		up_read(&current->mm->mmap_sem);
  
  		if (retval <= 0)
  			break;
  		if (copy_to_user(vec, tmp, retval)) {
  			retval = -EFAULT;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
  		}
2f77d1070   Linus Torvalds   Fix incorrect use...
304
305
306
307
  		pages -= retval;
  		vec += retval;
  		start += retval << PAGE_SHIFT;
  		retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
308
  	}
2f77d1070   Linus Torvalds   Fix incorrect use...
309
310
  	free_page((unsigned long) tmp);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
311
  }