Doug / smarc-fsl-linux-kernel | Embedian Git Server

Commit 479db0bf408e65baa14d2a9821abfcbc0804b847

Authored by Nick Piggin 2008-08-21 05:09:18 +0800

Committed by Linus Torvalds 2008-08-21 06:40:32 +0800

Exists in master and in 7 other branches

mm: dirty page tracking race fix

There is a race with dirty page accounting where a page may not properly
be accounted for.

clear_page_dirty_for_io() calls page_mkclean; then TestClearPageDirty.

page_mkclean walks the rmaps for that page, and for each one it cleans and
write protects the pte if it was dirty.  It uses page_check_address to
find the pte.  That function has a shortcut to avoid the ptl if the pte is
not present.  Unfortunately, the pte can be switched to not-present then
back to present by other code while holding the page table lock -- this
should not be a signal for page_mkclean to ignore that pte, because it may
be dirty.

For example, powerpc64's set_pte_at will clear a previously present pte
before setting it to the desired value.  There may also be other code in
core mm or in arch which do similar things.

The consequence of the bug is loss of data integrity due to msync, and
loss of dirty page accounting accuracy.  XIP's __xip_unmap could easily
also be unreliable (depending on the exact XIP locking scheme), which can
lead to data corruption.

Fix this by having an option to always take ptl to check the pte in
page_check_address.

It's possible to retain this optimization for page_referenced and
try_to_unmap.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jared Hulbert <jaredeh@gmail.com>
Cc: Carsten Otte <cotte@freenet.de>
Cc: Hugh Dickins <hugh@veritas.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 11 additions and 7 deletions Inline Diff

include/linux/rmap.h
mm/filemap_xip.c
mm/rmap.c

include/linux/rmap.h

Diff comments View file @ 479db0b

mm/filemap_xip.c

Diff comments View file @ 479db0b

1	/*	1	/*
2	* linux/mm/filemap_xip.c	2	* linux/mm/filemap_xip.c
3	*	3	*
4	* Copyright (C) 2005 IBM Corporation	4	* Copyright (C) 2005 IBM Corporation
5	* Author: Carsten Otte <cotte@de.ibm.com>	5	* Author: Carsten Otte <cotte@de.ibm.com>
6	*	6	*
7	* derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds	7	* derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
8	*	8	*
9	*/	9	*/
10		10
11	#include <linux/fs.h>	11	#include <linux/fs.h>
12	#include <linux/pagemap.h>	12	#include <linux/pagemap.h>
13	#include <linux/module.h>	13	#include <linux/module.h>
14	#include <linux/uio.h>	14	#include <linux/uio.h>
15	#include <linux/rmap.h>	15	#include <linux/rmap.h>
16	#include <linux/mmu_notifier.h>	16	#include <linux/mmu_notifier.h>
17	#include <linux/sched.h>	17	#include <linux/sched.h>
18	#include <asm/tlbflush.h>	18	#include <asm/tlbflush.h>
19	#include <asm/io.h>	19	#include <asm/io.h>
20		20
21	/*	21	/*
22	* We do use our own empty page to avoid interference with other users	22	* We do use our own empty page to avoid interference with other users
23	* of ZERO_PAGE(), such as /dev/zero	23	* of ZERO_PAGE(), such as /dev/zero
24	*/	24	*/
25	static struct page *__xip_sparse_page;	25	static struct page *__xip_sparse_page;
26		26
27	static struct page *xip_sparse_page(void)	27	static struct page *xip_sparse_page(void)
28	{	28	{
29	if (!__xip_sparse_page) {	29	if (!__xip_sparse_page) {
30	struct page *page = alloc_page(GFP_HIGHUSER \| __GFP_ZERO);	30	struct page *page = alloc_page(GFP_HIGHUSER \| __GFP_ZERO);
31		31
32	if (page) {	32	if (page) {
33	static DEFINE_SPINLOCK(xip_alloc_lock);	33	static DEFINE_SPINLOCK(xip_alloc_lock);
34	spin_lock(&xip_alloc_lock);	34	spin_lock(&xip_alloc_lock);
35	if (!__xip_sparse_page)	35	if (!__xip_sparse_page)
36	__xip_sparse_page = page;	36	__xip_sparse_page = page;
37	else	37	else
38	__free_page(page);	38	__free_page(page);
39	spin_unlock(&xip_alloc_lock);	39	spin_unlock(&xip_alloc_lock);
40	}	40	}
41	}	41	}
42	return __xip_sparse_page;	42	return __xip_sparse_page;
43	}	43	}
44		44
45	/*	45	/*
46	* This is a file read routine for execute in place files, and uses	46	* This is a file read routine for execute in place files, and uses
47	* the mapping->a_ops->get_xip_mem() function for the actual low-level	47	* the mapping->a_ops->get_xip_mem() function for the actual low-level
48	* stuff.	48	* stuff.
49	*	49	*
50	* Note the struct file* is not used at all. It may be NULL.	50	* Note the struct file* is not used at all. It may be NULL.
51	*/	51	*/
52	static ssize_t	52	static ssize_t
53	do_xip_mapping_read(struct address_space *mapping,	53	do_xip_mapping_read(struct address_space *mapping,
54	struct file_ra_state *_ra,	54	struct file_ra_state *_ra,
55	struct file *filp,	55	struct file *filp,
56	char __user *buf,	56	char __user *buf,
57	size_t len,	57	size_t len,
58	loff_t *ppos)	58	loff_t *ppos)
59	{	59	{
60	struct inode *inode = mapping->host;	60	struct inode *inode = mapping->host;
61	pgoff_t index, end_index;	61	pgoff_t index, end_index;
62	unsigned long offset;	62	unsigned long offset;
63	loff_t isize, pos;	63	loff_t isize, pos;
64	size_t copied = 0, error = 0;	64	size_t copied = 0, error = 0;
65		65
66	BUG_ON(!mapping->a_ops->get_xip_mem);	66	BUG_ON(!mapping->a_ops->get_xip_mem);
67		67
68	pos = *ppos;	68	pos = *ppos;
69	index = pos >> PAGE_CACHE_SHIFT;	69	index = pos >> PAGE_CACHE_SHIFT;
70	offset = pos & ~PAGE_CACHE_MASK;	70	offset = pos & ~PAGE_CACHE_MASK;
71		71
72	isize = i_size_read(inode);	72	isize = i_size_read(inode);
73	if (!isize)	73	if (!isize)
74	goto out;	74	goto out;
75		75
76	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;	76	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
77	do {	77	do {
78	unsigned long nr, left;	78	unsigned long nr, left;
79	void *xip_mem;	79	void *xip_mem;
80	unsigned long xip_pfn;	80	unsigned long xip_pfn;
81	int zero = 0;	81	int zero = 0;
82		82
83	/* nr is the maximum number of bytes to copy from this page */	83	/* nr is the maximum number of bytes to copy from this page */
84	nr = PAGE_CACHE_SIZE;	84	nr = PAGE_CACHE_SIZE;
85	if (index >= end_index) {	85	if (index >= end_index) {
86	if (index > end_index)	86	if (index > end_index)
87	goto out;	87	goto out;
88	nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;	88	nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
89	if (nr <= offset) {	89	if (nr <= offset) {
90	goto out;	90	goto out;
91	}	91	}
92	}	92	}
93	nr = nr - offset;	93	nr = nr - offset;
94	if (nr > len)	94	if (nr > len)
95	nr = len;	95	nr = len;
96		96
97	error = mapping->a_ops->get_xip_mem(mapping, index, 0,	97	error = mapping->a_ops->get_xip_mem(mapping, index, 0,
98	&xip_mem, &xip_pfn);	98	&xip_mem, &xip_pfn);
99	if (unlikely(error)) {	99	if (unlikely(error)) {
100	if (error == -ENODATA) {	100	if (error == -ENODATA) {
101	/* sparse */	101	/* sparse */
102	zero = 1;	102	zero = 1;
103	} else	103	} else
104	goto out;	104	goto out;
105	}	105	}
106		106
107	/* If users can be writing to this page using arbitrary	107	/* If users can be writing to this page using arbitrary
108	* virtual addresses, take care about potential aliasing	108	* virtual addresses, take care about potential aliasing
109	* before reading the page on the kernel side.	109	* before reading the page on the kernel side.
110	*/	110	*/
111	if (mapping_writably_mapped(mapping))	111	if (mapping_writably_mapped(mapping))
112	/* address based flush */ ;	112	/* address based flush */ ;
113		113
114	/*	114	/*
115	* Ok, we have the mem, so now we can copy it to user space...	115	* Ok, we have the mem, so now we can copy it to user space...
116	*	116	*
117	* The actor routine returns how many bytes were actually used..	117	* The actor routine returns how many bytes were actually used..
118	* NOTE! This may not be the same as how much of a user buffer	118	* NOTE! This may not be the same as how much of a user buffer
119	* we filled up (we may be padding etc), so we can only update	119	* we filled up (we may be padding etc), so we can only update
120	* "pos" here (the actor routine has to update the user buffer	120	* "pos" here (the actor routine has to update the user buffer
121	* pointers and the remaining count).	121	* pointers and the remaining count).
122	*/	122	*/
123	if (!zero)	123	if (!zero)
124	left = __copy_to_user(buf+copied, xip_mem+offset, nr);	124	left = __copy_to_user(buf+copied, xip_mem+offset, nr);
125	else	125	else
126	left = __clear_user(buf + copied, nr);	126	left = __clear_user(buf + copied, nr);
127		127
128	if (left) {	128	if (left) {
129	error = -EFAULT;	129	error = -EFAULT;
130	goto out;	130	goto out;
131	}	131	}
132		132
133	copied += (nr - left);	133	copied += (nr - left);
134	offset += (nr - left);	134	offset += (nr - left);
135	index += offset >> PAGE_CACHE_SHIFT;	135	index += offset >> PAGE_CACHE_SHIFT;
136	offset &= ~PAGE_CACHE_MASK;	136	offset &= ~PAGE_CACHE_MASK;
137	} while (copied < len);	137	} while (copied < len);
138		138
139	out:	139	out:
140	*ppos = pos + copied;	140	*ppos = pos + copied;
141	if (filp)	141	if (filp)
142	file_accessed(filp);	142	file_accessed(filp);
143		143
144	return (copied ? copied : error);	144	return (copied ? copied : error);
145	}	145	}
146		146
147	ssize_t	147	ssize_t
148	xip_file_read(struct file filp, char __user buf, size_t len, loff_t *ppos)	148	xip_file_read(struct file filp, char __user buf, size_t len, loff_t *ppos)
149	{	149	{
150	if (!access_ok(VERIFY_WRITE, buf, len))	150	if (!access_ok(VERIFY_WRITE, buf, len))
151	return -EFAULT;	151	return -EFAULT;
152		152
153	return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,	153	return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
154	buf, len, ppos);	154	buf, len, ppos);
155	}	155	}
156	EXPORT_SYMBOL_GPL(xip_file_read);	156	EXPORT_SYMBOL_GPL(xip_file_read);
157		157
158	/*	158	/*
159	* __xip_unmap is invoked from xip_unmap and	159	* __xip_unmap is invoked from xip_unmap and
160	* xip_write	160	* xip_write
161	*	161	*
162	* This function walks all vmas of the address_space and unmaps the	162	* This function walks all vmas of the address_space and unmaps the
163	* __xip_sparse_page when found at pgoff.	163	* __xip_sparse_page when found at pgoff.
164	*/	164	*/
165	static void	165	static void
166	__xip_unmap (struct address_space * mapping,	166	__xip_unmap (struct address_space * mapping,
167	unsigned long pgoff)	167	unsigned long pgoff)
168	{	168	{
169	struct vm_area_struct *vma;	169	struct vm_area_struct *vma;
170	struct mm_struct *mm;	170	struct mm_struct *mm;
171	struct prio_tree_iter iter;	171	struct prio_tree_iter iter;
172	unsigned long address;	172	unsigned long address;
173	pte_t *pte;	173	pte_t *pte;
174	pte_t pteval;	174	pte_t pteval;
175	spinlock_t *ptl;	175	spinlock_t *ptl;
176	struct page *page;	176	struct page *page;
177		177
178	page = __xip_sparse_page;	178	page = __xip_sparse_page;
179	if (!page)	179	if (!page)
180	return;	180	return;
181		181
182	spin_lock(&mapping->i_mmap_lock);	182	spin_lock(&mapping->i_mmap_lock);
183	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {	183	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
184	mm = vma->vm_mm;	184	mm = vma->vm_mm;
185	address = vma->vm_start +	185	address = vma->vm_start +
186	((pgoff - vma->vm_pgoff) << PAGE_SHIFT);	186	((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
187	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);	187	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);
188	pte = page_check_address(page, mm, address, &ptl);	188	pte = page_check_address(page, mm, address, &ptl, 1);
189	if (pte) {	189	if (pte) {
190	/* Nuke the page table entry. */	190	/* Nuke the page table entry. */
191	flush_cache_page(vma, address, pte_pfn(*pte));	191	flush_cache_page(vma, address, pte_pfn(*pte));
192	pteval = ptep_clear_flush_notify(vma, address, pte);	192	pteval = ptep_clear_flush_notify(vma, address, pte);
193	page_remove_rmap(page, vma);	193	page_remove_rmap(page, vma);
194	dec_mm_counter(mm, file_rss);	194	dec_mm_counter(mm, file_rss);
195	BUG_ON(pte_dirty(pteval));	195	BUG_ON(pte_dirty(pteval));
196	pte_unmap_unlock(pte, ptl);	196	pte_unmap_unlock(pte, ptl);
197	page_cache_release(page);	197	page_cache_release(page);
198	}	198	}
199	}	199	}
200	spin_unlock(&mapping->i_mmap_lock);	200	spin_unlock(&mapping->i_mmap_lock);
201	}	201	}
202		202
203	/*	203	/*
204	* xip_fault() is invoked via the vma operations vector for a	204	* xip_fault() is invoked via the vma operations vector for a
205	* mapped memory region to read in file data during a page fault.	205	* mapped memory region to read in file data during a page fault.
206	*	206	*
207	* This function is derived from filemap_fault, but used for execute in place	207	* This function is derived from filemap_fault, but used for execute in place
208	*/	208	*/
209	static int xip_file_fault(struct vm_area_struct vma, struct vm_fault vmf)	209	static int xip_file_fault(struct vm_area_struct vma, struct vm_fault vmf)
210	{	210	{
211	struct file *file = vma->vm_file;	211	struct file *file = vma->vm_file;
212	struct address_space *mapping = file->f_mapping;	212	struct address_space *mapping = file->f_mapping;
213	struct inode *inode = mapping->host;	213	struct inode *inode = mapping->host;
214	pgoff_t size;	214	pgoff_t size;
215	void *xip_mem;	215	void *xip_mem;
216	unsigned long xip_pfn;	216	unsigned long xip_pfn;
217	struct page *page;	217	struct page *page;
218	int error;	218	int error;
219		219
220	/* XXX: are VM_FAULT_ codes OK? */	220	/* XXX: are VM_FAULT_ codes OK? */
221		221
222	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;	222	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
223	if (vmf->pgoff >= size)	223	if (vmf->pgoff >= size)
224	return VM_FAULT_SIGBUS;	224	return VM_FAULT_SIGBUS;
225		225
226	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,	226	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
227	&xip_mem, &xip_pfn);	227	&xip_mem, &xip_pfn);
228	if (likely(!error))	228	if (likely(!error))
229	goto found;	229	goto found;
230	if (error != -ENODATA)	230	if (error != -ENODATA)
231	return VM_FAULT_OOM;	231	return VM_FAULT_OOM;
232		232
233	/* sparse block */	233	/* sparse block */
234	if ((vma->vm_flags & (VM_WRITE \| VM_MAYWRITE)) &&	234	if ((vma->vm_flags & (VM_WRITE \| VM_MAYWRITE)) &&
235	(vma->vm_flags & (VM_SHARED \| VM_MAYSHARE)) &&	235	(vma->vm_flags & (VM_SHARED \| VM_MAYSHARE)) &&
236	(!(mapping->host->i_sb->s_flags & MS_RDONLY))) {	236	(!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
237	int err;	237	int err;
238		238
239	/* maybe shared writable, allocate new block */	239	/* maybe shared writable, allocate new block */
240	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,	240	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
241	&xip_mem, &xip_pfn);	241	&xip_mem, &xip_pfn);
242	if (error)	242	if (error)
243	return VM_FAULT_SIGBUS;	243	return VM_FAULT_SIGBUS;
244	/* unmap sparse mappings at pgoff from all other vmas */	244	/* unmap sparse mappings at pgoff from all other vmas */
245	__xip_unmap(mapping, vmf->pgoff);	245	__xip_unmap(mapping, vmf->pgoff);
246		246
247	found:	247	found:
248	err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,	248	err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
249	xip_pfn);	249	xip_pfn);
250	if (err == -ENOMEM)	250	if (err == -ENOMEM)
251	return VM_FAULT_OOM;	251	return VM_FAULT_OOM;
252	BUG_ON(err);	252	BUG_ON(err);
253	return VM_FAULT_NOPAGE;	253	return VM_FAULT_NOPAGE;
254	} else {	254	} else {
255	/* not shared and writable, use xip_sparse_page() */	255	/* not shared and writable, use xip_sparse_page() */
256	page = xip_sparse_page();	256	page = xip_sparse_page();
257	if (!page)	257	if (!page)
258	return VM_FAULT_OOM;	258	return VM_FAULT_OOM;
259		259
260	page_cache_get(page);	260	page_cache_get(page);
261	vmf->page = page;	261	vmf->page = page;
262	return 0;	262	return 0;
263	}	263	}
264	}	264	}
265		265
266	static struct vm_operations_struct xip_file_vm_ops = {	266	static struct vm_operations_struct xip_file_vm_ops = {
267	.fault = xip_file_fault,	267	.fault = xip_file_fault,
268	};	268	};
269		269
270	int xip_file_mmap(struct file * file, struct vm_area_struct * vma)	270	int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
271	{	271	{
272	BUG_ON(!file->f_mapping->a_ops->get_xip_mem);	272	BUG_ON(!file->f_mapping->a_ops->get_xip_mem);
273		273
274	file_accessed(file);	274	file_accessed(file);
275	vma->vm_ops = &xip_file_vm_ops;	275	vma->vm_ops = &xip_file_vm_ops;
276	vma->vm_flags \|= VM_CAN_NONLINEAR \| VM_MIXEDMAP;	276	vma->vm_flags \|= VM_CAN_NONLINEAR \| VM_MIXEDMAP;
277	return 0;	277	return 0;
278	}	278	}
279	EXPORT_SYMBOL_GPL(xip_file_mmap);	279	EXPORT_SYMBOL_GPL(xip_file_mmap);
280		280
281	static ssize_t	281	static ssize_t
282	__xip_file_write(struct file filp, const char __user buf,	282	__xip_file_write(struct file filp, const char __user buf,
283	size_t count, loff_t pos, loff_t *ppos)	283	size_t count, loff_t pos, loff_t *ppos)
284	{	284	{
285	struct address_space * mapping = filp->f_mapping;	285	struct address_space * mapping = filp->f_mapping;
286	const struct address_space_operations *a_ops = mapping->a_ops;	286	const struct address_space_operations *a_ops = mapping->a_ops;
287	struct inode *inode = mapping->host;	287	struct inode *inode = mapping->host;
288	long status = 0;	288	long status = 0;
289	size_t bytes;	289	size_t bytes;
290	ssize_t written = 0;	290	ssize_t written = 0;
291		291
292	BUG_ON(!mapping->a_ops->get_xip_mem);	292	BUG_ON(!mapping->a_ops->get_xip_mem);
293		293
294	do {	294	do {
295	unsigned long index;	295	unsigned long index;
296	unsigned long offset;	296	unsigned long offset;
297	size_t copied;	297	size_t copied;
298	void *xip_mem;	298	void *xip_mem;
299	unsigned long xip_pfn;	299	unsigned long xip_pfn;
300		300
301	offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */	301	offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
302	index = pos >> PAGE_CACHE_SHIFT;	302	index = pos >> PAGE_CACHE_SHIFT;
303	bytes = PAGE_CACHE_SIZE - offset;	303	bytes = PAGE_CACHE_SIZE - offset;
304	if (bytes > count)	304	if (bytes > count)
305	bytes = count;	305	bytes = count;
306		306
307	status = a_ops->get_xip_mem(mapping, index, 0,	307	status = a_ops->get_xip_mem(mapping, index, 0,
308	&xip_mem, &xip_pfn);	308	&xip_mem, &xip_pfn);
309	if (status == -ENODATA) {	309	if (status == -ENODATA) {
310	/* we allocate a new page unmap it */	310	/* we allocate a new page unmap it */
311	status = a_ops->get_xip_mem(mapping, index, 1,	311	status = a_ops->get_xip_mem(mapping, index, 1,
312	&xip_mem, &xip_pfn);	312	&xip_mem, &xip_pfn);
313	if (!status)	313	if (!status)
314	/* unmap page at pgoff from all other vmas */	314	/* unmap page at pgoff from all other vmas */
315	__xip_unmap(mapping, index);	315	__xip_unmap(mapping, index);
316	}	316	}
317		317
318	if (status)	318	if (status)
319	break;	319	break;
320		320
321	copied = bytes -	321	copied = bytes -
322	__copy_from_user_nocache(xip_mem + offset, buf, bytes);	322	__copy_from_user_nocache(xip_mem + offset, buf, bytes);
323		323
324	if (likely(copied > 0)) {	324	if (likely(copied > 0)) {
325	status = copied;	325	status = copied;
326		326
327	if (status >= 0) {	327	if (status >= 0) {
328	written += status;	328	written += status;
329	count -= status;	329	count -= status;
330	pos += status;	330	pos += status;
331	buf += status;	331	buf += status;
332	}	332	}
333	}	333	}
334	if (unlikely(copied != bytes))	334	if (unlikely(copied != bytes))
335	if (status >= 0)	335	if (status >= 0)
336	status = -EFAULT;	336	status = -EFAULT;
337	if (status < 0)	337	if (status < 0)
338	break;	338	break;
339	} while (count);	339	} while (count);
340	*ppos = pos;	340	*ppos = pos;
341	/*	341	/*
342	* No need to use i_size_read() here, the i_size	342	* No need to use i_size_read() here, the i_size
343	* cannot change under us because we hold i_mutex.	343	* cannot change under us because we hold i_mutex.
344	*/	344	*/
345	if (pos > inode->i_size) {	345	if (pos > inode->i_size) {
346	i_size_write(inode, pos);	346	i_size_write(inode, pos);
347	mark_inode_dirty(inode);	347	mark_inode_dirty(inode);
348	}	348	}
349		349
350	return written ? written : status;	350	return written ? written : status;
351	}	351	}
352		352
353	ssize_t	353	ssize_t
354	xip_file_write(struct file filp, const char __user buf, size_t len,	354	xip_file_write(struct file filp, const char __user buf, size_t len,
355	loff_t *ppos)	355	loff_t *ppos)
356	{	356	{
357	struct address_space *mapping = filp->f_mapping;	357	struct address_space *mapping = filp->f_mapping;
358	struct inode *inode = mapping->host;	358	struct inode *inode = mapping->host;
359	size_t count;	359	size_t count;
360	loff_t pos;	360	loff_t pos;
361	ssize_t ret;	361	ssize_t ret;
362		362
363	mutex_lock(&inode->i_mutex);	363	mutex_lock(&inode->i_mutex);
364		364
365	if (!access_ok(VERIFY_READ, buf, len)) {	365	if (!access_ok(VERIFY_READ, buf, len)) {
366	ret=-EFAULT;	366	ret=-EFAULT;
367	goto out_up;	367	goto out_up;
368	}	368	}
369		369
370	pos = *ppos;	370	pos = *ppos;
371	count = len;	371	count = len;
372		372
373	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);	373	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
374		374
375	/* We can write back this queue in page reclaim */	375	/* We can write back this queue in page reclaim */
376	current->backing_dev_info = mapping->backing_dev_info;	376	current->backing_dev_info = mapping->backing_dev_info;
377		377
378	ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));	378	ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));
379	if (ret)	379	if (ret)
380	goto out_backing;	380	goto out_backing;
381	if (count == 0)	381	if (count == 0)
382	goto out_backing;	382	goto out_backing;
383		383
384	ret = file_remove_suid(filp);	384	ret = file_remove_suid(filp);
385	if (ret)	385	if (ret)
386	goto out_backing;	386	goto out_backing;
387		387
388	file_update_time(filp);	388	file_update_time(filp);
389		389
390	ret = __xip_file_write (filp, buf, count, pos, ppos);	390	ret = __xip_file_write (filp, buf, count, pos, ppos);
391		391
392	out_backing:	392	out_backing:
393	current->backing_dev_info = NULL;	393	current->backing_dev_info = NULL;
394	out_up:	394	out_up:
395	mutex_unlock(&inode->i_mutex);	395	mutex_unlock(&inode->i_mutex);
396	return ret;	396	return ret;
397	}	397	}
398	EXPORT_SYMBOL_GPL(xip_file_write);	398	EXPORT_SYMBOL_GPL(xip_file_write);
399		399
400	/*	400	/*
401	* truncate a page used for execute in place	401	* truncate a page used for execute in place
402	* functionality is analog to block_truncate_page but does use get_xip_mem	402	* functionality is analog to block_truncate_page but does use get_xip_mem
403	* to get the page instead of page cache	403	* to get the page instead of page cache
404	*/	404	*/
405	int	405	int
406	xip_truncate_page(struct address_space *mapping, loff_t from)	406	xip_truncate_page(struct address_space *mapping, loff_t from)
407	{	407	{
408	pgoff_t index = from >> PAGE_CACHE_SHIFT;	408	pgoff_t index = from >> PAGE_CACHE_SHIFT;
409	unsigned offset = from & (PAGE_CACHE_SIZE-1);	409	unsigned offset = from & (PAGE_CACHE_SIZE-1);
410	unsigned blocksize;	410	unsigned blocksize;
411	unsigned length;	411	unsigned length;
412	void *xip_mem;	412	void *xip_mem;
413	unsigned long xip_pfn;	413	unsigned long xip_pfn;
414	int err;	414	int err;
415		415
416	BUG_ON(!mapping->a_ops->get_xip_mem);	416	BUG_ON(!mapping->a_ops->get_xip_mem);
417		417
418	blocksize = 1 << mapping->host->i_blkbits;	418	blocksize = 1 << mapping->host->i_blkbits;
419	length = offset & (blocksize - 1);	419	length = offset & (blocksize - 1);
420		420
421	/* Block boundary? Nothing to do */	421	/* Block boundary? Nothing to do */
422	if (!length)	422	if (!length)
423	return 0;	423	return 0;
424		424
425	length = blocksize - length;	425	length = blocksize - length;
426		426
427	err = mapping->a_ops->get_xip_mem(mapping, index, 0,	427	err = mapping->a_ops->get_xip_mem(mapping, index, 0,
428	&xip_mem, &xip_pfn);	428	&xip_mem, &xip_pfn);
429	if (unlikely(err)) {	429	if (unlikely(err)) {
430	if (err == -ENODATA)	430	if (err == -ENODATA)
431	/* Hole? No need to truncate */	431	/* Hole? No need to truncate */
432	return 0;	432	return 0;
433	else	433	else
434	return err;	434	return err;
435	}	435	}
436	memset(xip_mem + offset, 0, length);	436	memset(xip_mem + offset, 0, length);
437	return 0;	437	return 0;
438	}	438	}
439	EXPORT_SYMBOL_GPL(xip_truncate_page);	439	EXPORT_SYMBOL_GPL(xip_truncate_page);
440		440

mm/rmap.c

Diff comments View file @ 479db0b

 /*
  * mm/rmap.c - physical to virtual reverse mappings
  *
  * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
  * Released under the General Public License (GPL).
  *
  * Simple, low overhead reverse mapping scheme.
  * Please try to keep this thing as modular as possible.
  *
  * Provides methods for unmapping each kind of mapped page:
  * the anon methods track anonymous pages, and
  * the file methods track pages belonging to an inode.
  *
  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
  * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004
  */
 /*
  * Lock ordering in mm:
  *
  * inode->i_mutex	(while writing or truncating, not reading or faulting)
  *   inode->i_alloc_sem (vmtruncate_range)
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
  *       mapping->i_mmap_lock
  *         anon_vma->lock
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
  *             swap_lock (in swap_duplicate, swap_info_get)
  *               mmlist_lock (in mmput, drain_mmlist and others)
  *               mapping->private_lock (in __set_page_dirty_buffers)
  *               inode_lock (in set_page_dirty's __mark_inode_dirty)
  *                 sb_lock (within inode_lock in fs/fs-writeback.c)
  *                 mapping->tree_lock (widely used, in set_page_dirty,
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within inode_lock in __sync_single_inode)
  */
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/rmap.h>
 #include <linux/rcupdate.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/memcontrol.h>
 #include <linux/mmu_notifier.h>
 #include <asm/tlbflush.h>
 struct kmem_cache *anon_vma_cachep;
 /* This must be called under the mmap_sem. */
 int anon_vma_prepare(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	might_sleep();
 	if (unlikely(!anon_vma)) {
 		struct mm_struct *mm = vma->vm_mm;
 		struct anon_vma *allocated, *locked;
 		anon_vma = find_mergeable_anon_vma(vma);
 		if (anon_vma) {
 			allocated = NULL;
 			locked = anon_vma;
 			spin_lock(&locked->lock);
 		} else {
 			anon_vma = anon_vma_alloc();
 			if (unlikely(!anon_vma))
 				return -ENOMEM;
 			allocated = anon_vma;
 			locked = NULL;
 		}
 		/* page_table_lock to protect against threads */
 		spin_lock(&mm->page_table_lock);
 		if (likely(!vma->anon_vma)) {
 			vma->anon_vma = anon_vma;
 			list_add_tail(&vma->anon_vma_node, &anon_vma->head);
 			allocated = NULL;
 		}
 		spin_unlock(&mm->page_table_lock);
 		if (locked)
 			spin_unlock(&locked->lock);
 		if (unlikely(allocated))
 			anon_vma_free(allocated);
 	}
 	return 0;
 }
 void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
 {
 	BUG_ON(vma->anon_vma != next->anon_vma);
 	list_del(&next->anon_vma_node);
 }
 void __anon_vma_link(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
 		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
 }
 void anon_vma_link(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma) {
 		spin_lock(&anon_vma->lock);
 		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
 		spin_unlock(&anon_vma->lock);
 	}
 }
 void anon_vma_unlink(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	int empty;
 	if (!anon_vma)
 		return;
 	spin_lock(&anon_vma->lock);
 	list_del(&vma->anon_vma_node);
 	/* We must garbage collect the anon_vma if it's empty */
 	empty = list_empty(&anon_vma->head);
 	spin_unlock(&anon_vma->lock);
 	if (empty)
 		anon_vma_free(anon_vma);
 }
 static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
 	spin_lock_init(&anon_vma->lock);
 	INIT_LIST_HEAD(&anon_vma->head);
 }
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
 			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
 }
 /*
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
 static struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma;
 	unsigned long anon_mapping;
 	rcu_read_lock();
 	anon_mapping = (unsigned long) page->mapping;
 	if (!(anon_mapping & PAGE_MAPPING_ANON))
 		goto out;
 	if (!page_mapped(page))
 		goto out;
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
 	spin_lock(&anon_vma->lock);
 	return anon_vma;
 out:
 	rcu_read_unlock();
 	return NULL;
 }
 static void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
 	spin_unlock(&anon_vma->lock);
 	rcu_read_unlock();
 }
 /*
  * At what user virtual address is page expected in @vma?
  * Returns virtual address or -EFAULT if page's index/offset is not
  * within the range mapped the @vma.
  */
 static inline unsigned long
 vma_address(struct page *page, struct vm_area_struct *vma)
 {
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	unsigned long address;
 	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
 	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
 		/* page should be within @vma mapping range */
 		return -EFAULT;
 	}
 	return address;
 }
 /*
  * At what user virtual address is page expected in vma? checking that the
  * page matches the vma: currently only used on anon pages, by unuse_vma;
  */
 unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 {
 	if (PageAnon(page)) {
 		if ((void *)vma->anon_vma !=
 		    (void *)page->mapping - PAGE_MAPPING_ANON)
 			return -EFAULT;
 	} else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
 		if (!vma->vm_file ||
 		    vma->vm_file->f_mapping != page->mapping)
 			return -EFAULT;
 	} else
 		return -EFAULT;
 	return vma_address(page, vma);
 }
 /*
  * Check that @page is mapped at @address into @mm.
  *
+ * If @sync is false, page_check_address may perform a racy check to avoid
+ * the page table lock when the pte is not present (helpful when reclaiming
+ * highly shared pages).
+ *
  * On success returns with pte mapped and locked.
  */
 pte_t *page_check_address(struct page *page, struct mm_struct *mm,
-			  unsigned long address, spinlock_t **ptlp)
+			  unsigned long address, spinlock_t **ptlp, int sync)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	spinlock_t *ptl;
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
 		return NULL;
 	pud = pud_offset(pgd, address);
 	if (!pud_present(*pud))
 		return NULL;
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd))
 		return NULL;
 	pte = pte_offset_map(pmd, address);
 	/* Make a quick check before getting the lock */
-	if (!pte_present(*pte)) {
+	if (!sync && !pte_present(*pte)) {
 		pte_unmap(pte);
 		return NULL;
 	}
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
 	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
 		*ptlp = ptl;
 		return pte;
 	}
 	pte_unmap_unlock(pte, ptl);
 	return NULL;
 }
 /*
  * Subfunctions of page_referenced: page_referenced_one called
  * repeatedly from either page_referenced_anon or page_referenced_file.
  */
 static int page_referenced_one(struct page *page,
 	struct vm_area_struct *vma, unsigned int *mapcount)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
 	pte_t *pte;
 	spinlock_t *ptl;
 	int referenced = 0;
 	address = vma_address(page, vma);
 	if (address == -EFAULT)
 		goto out;
-	pte = page_check_address(page, mm, address, &ptl);
+	pte = page_check_address(page, mm, address, &ptl, 0);
 	if (!pte)
 		goto out;
 	if (vma->vm_flags & VM_LOCKED) {
 		referenced++;
 		*mapcount = 1;	/* break early from loop */
 	} else if (ptep_clear_flush_young_notify(vma, address, pte))
 		referenced++;
 	/* Pretend the page is referenced if the task has the
 	   swap token and is in the middle of a page fault. */
 	if (mm != current->mm && has_swap_token(mm) &&
 			rwsem_is_locked(&mm->mmap_sem))
 		referenced++;
 	(*mapcount)--;
 	pte_unmap_unlock(pte, ptl);
 out:
 	return referenced;
 }
 static int page_referenced_anon(struct page *page,
 				struct mem_cgroup *mem_cont)
 {
 	unsigned int mapcount;
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
 	int referenced = 0;
 	anon_vma = page_lock_anon_vma(page);
 	if (!anon_vma)
 		return referenced;
 	mapcount = page_mapcount(page);
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
 		/*
 		 * If we are reclaiming on behalf of a cgroup, skip
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
 		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
 		referenced += page_referenced_one(page, vma, &mapcount);
 		if (!mapcount)
 			break;
 	}
 	page_unlock_anon_vma(anon_vma);
 	return referenced;
 }
 /**
  * page_referenced_file - referenced check for object-based rmap
  * @page: the page we're checking references on.
  * @mem_cont: target memory controller
  *
  * For an object-based mapped page, find all the places it is mapped and
  * check/clear the referenced flag.  This is done by following the page->mapping
  * pointer, then walking the chain of vmas it holds.  It returns the number
  * of references it found.
  *
  * This function is only called from page_referenced for object-based pages.
  */
 static int page_referenced_file(struct page *page,
 				struct mem_cgroup *mem_cont)
 {
 	unsigned int mapcount;
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
 	struct prio_tree_iter iter;
 	int referenced = 0;
 	/*
 	 * The caller's checks on page->mapping and !PageAnon have made
 	 * sure that this is a file page: the check for page->mapping
 	 * excludes the case just before it gets set on an anon page.
 	 */
 	BUG_ON(PageAnon(page));
 	/*
 	 * The page lock not only makes sure that page->mapping cannot
 	 * suddenly be NULLified by truncation, it makes sure that the
 	 * structure at mapping cannot be freed and reused yet,
 	 * so we can safely take mapping->i_mmap_lock.
 	 */
 	BUG_ON(!PageLocked(page));
 	spin_lock(&mapping->i_mmap_lock);
 	/*
 	 * i_mmap_lock does not stabilize mapcount at all, but mapcount
 	 * is more likely to be accurate if we note it after spinning.
 	 */
 	mapcount = page_mapcount(page);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		/*
 		 * If we are reclaiming on behalf of a cgroup, skip
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
 		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
 		if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
 				  == (VM_LOCKED|VM_MAYSHARE)) {
 			referenced++;
 			break;
 		}
 		referenced += page_referenced_one(page, vma, &mapcount);
 		if (!mapcount)
 			break;
 	}
 	spin_unlock(&mapping->i_mmap_lock);
 	return referenced;
 }
 /**
  * page_referenced - test if the page was referenced
  * @page: the page to test
  * @is_locked: caller holds lock on the page
  * @mem_cont: target memory controller
  *
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of ptes which referenced the page.
  */
 int page_referenced(struct page *page, int is_locked,
 			struct mem_cgroup *mem_cont)
 {
 	int referenced = 0;
 	if (TestClearPageReferenced(page))
 		referenced++;
 	if (page_mapped(page) && page->mapping) {
 		if (PageAnon(page))
 			referenced += page_referenced_anon(page, mem_cont);
 		else if (is_locked)
 			referenced += page_referenced_file(page, mem_cont);
 		else if (!trylock_page(page))
 			referenced++;
 		else {
 			if (page->mapping)
 				referenced +=
 					page_referenced_file(page, mem_cont);
 			unlock_page(page);
 		}
 	}
 	if (page_test_and_clear_young(page))
 		referenced++;
 	return referenced;
 }
 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
 	pte_t *pte;
 	spinlock_t *ptl;
 	int ret = 0;
 	address = vma_address(page, vma);
 	if (address == -EFAULT)
 		goto out;
-	pte = page_check_address(page, mm, address, &ptl);
+	pte = page_check_address(page, mm, address, &ptl, 1);
 	if (!pte)
 		goto out;
 	if (pte_dirty(*pte) || pte_write(*pte)) {
 		pte_t entry;
 		flush_cache_page(vma, address, pte_pfn(*pte));
 		entry = ptep_clear_flush_notify(vma, address, pte);
 		entry = pte_wrprotect(entry);
 		entry = pte_mkclean(entry);
 		set_pte_at(mm, address, pte, entry);
 		ret = 1;
 	}
 	pte_unmap_unlock(pte, ptl);
 out:
 	return ret;
 }
 static int page_mkclean_file(struct address_space *mapping, struct page *page)
 {
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
 	struct prio_tree_iter iter;
 	int ret = 0;
 	BUG_ON(PageAnon(page));
 	spin_lock(&mapping->i_mmap_lock);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		if (vma->vm_flags & VM_SHARED)
 			ret += page_mkclean_one(page, vma);
 	}
 	spin_unlock(&mapping->i_mmap_lock);
 	return ret;
 }
 int page_mkclean(struct page *page)
 {
 	int ret = 0;
 	BUG_ON(!PageLocked(page));
 	if (page_mapped(page)) {
 		struct address_space *mapping = page_mapping(page);
 		if (mapping) {
 			ret = page_mkclean_file(mapping, page);
 			if (page_test_dirty(page)) {
 				page_clear_dirty(page);
 				ret = 1;
 			}
 		}
 	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(page_mkclean);
 /**
  * __page_set_anon_rmap - setup new anonymous rmap
  * @page:	the page to add the mapping to
  * @vma:	the vm area in which the mapping is added
  * @address:	the user virtual address mapped
  */
 static void __page_set_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	BUG_ON(!anon_vma);
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 	page->mapping = (struct address_space *) anon_vma;
 	page->index = linear_page_index(vma, address);
 	/*
 	 * nr_mapped state can be updated without turning off
 	 * interrupts because it is not modified via interrupt.
 	 */
 	__inc_zone_page_state(page, NR_ANON_PAGES);
 }
 /**
  * __page_check_anon_rmap - sanity check anonymous rmap addition
  * @page:	the page to add the mapping to
  * @vma:	the vm area in which the mapping is added
  * @address:	the user virtual address mapped
  */
 static void __page_check_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
 #ifdef CONFIG_DEBUG_VM
 	/*
 	 * The page's anon-rmap details (mapping and index) are guaranteed to
 	 * be set up correctly at this point.
 	 *
 	 * We have exclusion against page_add_anon_rmap because the caller
 	 * always holds the page locked, except if called from page_dup_rmap,
 	 * in which case the page is already known to be setup.
 	 *
 	 * We have exclusion against page_add_new_anon_rmap because those pages
 	 * are initially only visible via the pagetables, and the pte is locked
 	 * over the call to page_add_new_anon_rmap.
 	 */
 	struct anon_vma *anon_vma = vma->anon_vma;
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 	BUG_ON(page->mapping != (struct address_space *)anon_vma);
 	BUG_ON(page->index != linear_page_index(vma, address));
 #endif
 }
 /**
  * page_add_anon_rmap - add pte mapping to an anonymous page
  * @page:	the page to add the mapping to
  * @vma:	the vm area in which the mapping is added
  * @address:	the user virtual address mapped
  *
  * The caller needs to hold the pte lock and the page must be locked.
  */
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	if (atomic_inc_and_test(&page->_mapcount))
 		__page_set_anon_rmap(page, vma, address);
 	else
 		__page_check_anon_rmap(page, vma, address);
 }
 /**
  * page_add_new_anon_rmap - add pte mapping to a new anonymous page
  * @page:	the page to add the mapping to
  * @vma:	the vm area in which the mapping is added
  * @address:	the user virtual address mapped
  *
  * Same as page_add_anon_rmap but must only be called on *new* pages.
  * This means the inc-and-test can be bypassed.
  * Page does not have to be locked.
  */
 void page_add_new_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
 	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
 	__page_set_anon_rmap(page, vma, address);
 }
 /**
  * page_add_file_rmap - add pte mapping to a file page
  * @page: the page to add the mapping to
  *
  * The caller needs to hold the pte lock.
  */
 void page_add_file_rmap(struct page *page)
 {
 	if (atomic_inc_and_test(&page->_mapcount))
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
 }
 #ifdef CONFIG_DEBUG_VM
 /**
  * page_dup_rmap - duplicate pte mapping to a page
  * @page:	the page to add the mapping to
  * @vma:	the vm area being duplicated
  * @address:	the user virtual address mapped
  *
  * For copy_page_range only: minimal extract from page_add_file_rmap /
  * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
  * quicker.
  *
  * The caller needs to hold the pte lock.
  */
 void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
 {
 	BUG_ON(page_mapcount(page) == 0);
 	if (PageAnon(page))
 		__page_check_anon_rmap(page, vma, address);
 	atomic_inc(&page->_mapcount);
 }
 #endif
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
  * @vma: the vm area in which the mapping is removed
  *
  * The caller needs to hold the pte lock.
  */
 void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
 {
 	if (atomic_add_negative(-1, &page->_mapcount)) {
 		if (unlikely(page_mapcount(page) < 0)) {
 			printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
 			printk (KERN_EMERG "  page pfn = %lx\n", page_to_pfn(page));
 			printk (KERN_EMERG "  page->flags = %lx\n", page->flags);
 			printk (KERN_EMERG "  page->count = %x\n", page_count(page));
 			printk (KERN_EMERG "  page->mapping = %p\n", page->mapping);
 			print_symbol (KERN_EMERG "  vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
 			if (vma->vm_ops) {
 				print_symbol (KERN_EMERG "  vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
 			}
 			if (vma->vm_file && vma->vm_file->f_op)
 				print_symbol (KERN_EMERG "  vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
 			BUG();
 		}
 		/*
 		 * Now that the last pte has gone, s390 must transfer dirty
 		 * flag from storage key to struct page.  We can usually skip
 		 * this if the page is anon, so about to be freed; but perhaps
 		 * not if it's in swapcache - there might be another pte slot
 		 * containing the swap entry, but page not yet written to swap.
 		 */
 		if ((!PageAnon(page) || PageSwapCache(page)) &&
 		    page_test_dirty(page)) {
 			page_clear_dirty(page);
 			set_page_dirty(page);
 		}
 		mem_cgroup_uncharge_page(page);
 		__dec_zone_page_state(page,
 			PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
 		/*
 		 * It would be tidy to reset the PageAnon mapping here,
 		 * but that might overwrite a racing page_add_anon_rmap
 		 * which increments mapcount after us but sets mapping
 		 * before us: so leave the reset to free_hot_cold_page,
 		 * and remember that it's only reliable while mapped.
 		 * Leaving it set also helps swapoff to reinstate ptes
 		 * faster for those pages still in swapcache.
 		 */
 	}
 }
 /*
  * Subfunctions of try_to_unmap: try_to_unmap_one called
  * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
  */
 static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				int migration)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
 	pte_t *pte;
 	pte_t pteval;
 	spinlock_t *ptl;
 	int ret = SWAP_AGAIN;
 	address = vma_address(page, vma);
 	if (address == -EFAULT)
 		goto out;
-	pte = page_check_address(page, mm, address, &ptl);
+	pte = page_check_address(page, mm, address, &ptl, 0);
 	if (!pte)
 		goto out;
 	/*
 	 * If the page is mlock()d, we cannot swap it out.
 	 * If it's recently referenced (perhaps page_referenced
 	 * skipped over this mm) then we should reactivate it.
 	 */
 	if (!migration && ((vma->vm_flags & VM_LOCKED) ||
 			(ptep_clear_flush_young_notify(vma, address, pte)))) {
 		ret = SWAP_FAIL;
 		goto out_unmap;
 	}
 	/* Nuke the page table entry. */
 	flush_cache_page(vma, address, page_to_pfn(page));
 	pteval = ptep_clear_flush_notify(vma, address, pte);
 	/* Move the dirty bit to the physical page now the pte is gone. */
 	if (pte_dirty(pteval))
 		set_page_dirty(page);
 	/* Update high watermark before we lower rss */
 	update_hiwater_rss(mm);
 	if (PageAnon(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
 		if (PageSwapCache(page)) {
 			/*
 			 * Store the swap location in the pte.
 			 * See handle_pte_fault() ...
 			 */
 			swap_duplicate(entry);
 			if (list_empty(&mm->mmlist)) {
 				spin_lock(&mmlist_lock);
 				if (list_empty(&mm->mmlist))
 					list_add(&mm->mmlist, &init_mm.mmlist);
 				spin_unlock(&mmlist_lock);
 			}
 			dec_mm_counter(mm, anon_rss);
 #ifdef CONFIG_MIGRATION
 		} else {
 			/*
 			 * Store the pfn of the page in a special migration
 			 * pte. do_swap_page() will wait until the migration
 			 * pte is removed and then restart fault handling.
 			 */
 			BUG_ON(!migration);
 			entry = make_migration_entry(page, pte_write(pteval));
 #endif
 		}
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 		BUG_ON(pte_file(*pte));
 	} else
 #ifdef CONFIG_MIGRATION
 	if (migration) {
 		/* Establish migration entry for a file page */
 		swp_entry_t entry;
 		entry = make_migration_entry(page, pte_write(pteval));
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 	} else
 #endif
 		dec_mm_counter(mm, file_rss);
 	page_remove_rmap(page, vma);
 	page_cache_release(page);
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 out:
 	return ret;
 }
 /*
  * objrmap doesn't work for nonlinear VMAs because the assumption that
  * offset-into-file correlates with offset-into-virtual-addresses does not hold.
  * Consequently, given a particular page and its ->index, we cannot locate the
  * ptes which are mapping that page without an exhaustive linear search.
  *
  * So what this code does is a mini "virtual scan" of each nonlinear VMA which
  * maps the file to which the target page belongs.  The ->vm_private_data field
  * holds the current cursor into that scan.  Successive searches will circulate
  * around the vma's virtual address space.
  *
  * So as more replacement pressure is applied to the pages in a nonlinear VMA,
  * more scanning pressure is placed against them as well.   Eventually pages
  * will become fully unmapped and are eligible for eviction.
  *
  * For very sparsely populated VMAs this is a little inefficient - chances are
  * there there won't be many ptes located within the scan cluster.  In this case
  * maybe we could scan further - to the end of the pte page, perhaps.
  */
 #define CLUSTER_SIZE	min(32*PAGE_SIZE, PMD_SIZE)
 #define CLUSTER_MASK	(~(CLUSTER_SIZE - 1))
 static void try_to_unmap_cluster(unsigned long cursor,
 	unsigned int *mapcount, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t pteval;
 	spinlock_t *ptl;
 	struct page *page;
 	unsigned long address;
 	unsigned long end;
 	address = (vma->vm_start + cursor) & CLUSTER_MASK;
 	end = address + CLUSTER_SIZE;
 	if (address < vma->vm_start)
 		address = vma->vm_start;
 	if (end > vma->vm_end)
 		end = vma->vm_end;
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
 		return;
 	pud = pud_offset(pgd, address);
 	if (!pud_present(*pud))
 		return;
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd))
 		return;
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 	/* Update high watermark before we lower rss */
 	update_hiwater_rss(mm);
 	for (; address < end; pte++, address += PAGE_SIZE) {
 		if (!pte_present(*pte))
 			continue;
 		page = vm_normal_page(vma, address, *pte);
 		BUG_ON(!page || PageAnon(page));
 		if (ptep_clear_flush_young_notify(vma, address, pte))
 			continue;
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pte));
 		pteval = ptep_clear_flush_notify(vma, address, pte);
 		/* If nonlinear, store the file page offset in the pte. */
 		if (page->index != linear_page_index(vma, address))
 			set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
 		/* Move the dirty bit to the physical page now the pte is gone. */
 		if (pte_dirty(pteval))
 			set_page_dirty(page);
 		page_remove_rmap(page, vma);
 		page_cache_release(page);
 		dec_mm_counter(mm, file_rss);
 		(*mapcount)--;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
 }
 static int try_to_unmap_anon(struct page *page, int migration)
 {
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
 	int ret = SWAP_AGAIN;
 	anon_vma = page_lock_anon_vma(page);
 	if (!anon_vma)
 		return ret;
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
 		ret = try_to_unmap_one(page, vma, migration);
 		if (ret == SWAP_FAIL || !page_mapped(page))
 			break;
 	}
 	page_unlock_anon_vma(anon_vma);
 	return ret;
 }
 /**
  * try_to_unmap_file - unmap file page using the object-based rmap method
  * @page: the page to unmap
  * @migration: migration flag
  *
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
  *
  * This function is only called from try_to_unmap for object-based pages.
  */
 static int try_to_unmap_file(struct page *page, int migration)
 {
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
 	struct prio_tree_iter iter;
 	int ret = SWAP_AGAIN;
 	unsigned long cursor;
 	unsigned long max_nl_cursor = 0;
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
 	spin_lock(&mapping->i_mmap_lock);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		ret = try_to_unmap_one(page, vma, migration);
 		if (ret == SWAP_FAIL || !page_mapped(page))
 			goto out;
 	}
 	if (list_empty(&mapping->i_mmap_nonlinear))
 		goto out;
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
 		if ((vma->vm_flags & VM_LOCKED) && !migration)
 			continue;
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
 			max_nl_cursor = cursor;
 		cursor = vma->vm_end - vma->vm_start;
 		if (cursor > max_nl_size)
 			max_nl_size = cursor;
 	}
 	if (max_nl_size == 0) {	/* any nonlinears locked or reserved */
 		ret = SWAP_FAIL;
 		goto out;
 	}
 	/*
 	 * We don't try to search for this page in the nonlinear vmas,
 	 * and page_referenced wouldn't have found it anyway.  Instead
 	 * just walk the nonlinear vmas trying to age and unmap some.
 	 * The mapcount of the page we came in with is irrelevant,
 	 * but even so use it as a guide to how hard we should try?
 	 */
 	mapcount = page_mapcount(page);
 	if (!mapcount)
 		goto out;
 	cond_resched_lock(&mapping->i_mmap_lock);
 	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
 	if (max_nl_cursor == 0)
 		max_nl_cursor = CLUSTER_SIZE;
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
 			if ((vma->vm_flags & VM_LOCKED) && !migration)
 				continue;
 			cursor = (unsigned long) vma->vm_private_data;
 			while ( cursor < max_nl_cursor &&
 				cursor < vma->vm_end - vma->vm_start) {
 				try_to_unmap_cluster(cursor, &mapcount, vma);
 				cursor += CLUSTER_SIZE;
 				vma->vm_private_data = (void *) cursor;
 				if ((int)mapcount <= 0)
 					goto out;
 			}
 			vma->vm_private_data = (void *) max_nl_cursor;
 		}
 		cond_resched_lock(&mapping->i_mmap_lock);
 		max_nl_cursor += CLUSTER_SIZE;
 	} while (max_nl_cursor <= max_nl_size);
 	/*
 	 * Don't loop forever (perhaps all the remaining pages are
 	 * in locked vmas).  Reset cursor on all unreserved nonlinear
 	 * vmas, now forgetting on which ones it had fallen behind.
 	 */
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
 		vma->vm_private_data = NULL;
 out:
 	spin_unlock(&mapping->i_mmap_lock);
 	return ret;
 }
 /**
  * try_to_unmap - try to remove all page table mappings to a page
  * @page: the page to get unmapped
  * @migration: migration flag
  *
  * Tries to remove all the page table entries which are mapping this
  * page, used in the pageout path.  Caller must hold the page lock.
  * Return values are:
  *
  * SWAP_SUCCESS	- we succeeded in removing all mappings
  * SWAP_AGAIN	- we missed a mapping, try again later
  * SWAP_FAIL	- the page is unswappable
  */
 int try_to_unmap(struct page *page, int migration)
 {
 	int ret;
 	BUG_ON(!PageLocked(page));
 	if (PageAnon(page))
 		ret = try_to_unmap_anon(page, migration);
 	else
 		ret = try_to_unmap_file(page, migration);
 	if (!page_mapped(page))
 		ret = SWAP_SUCCESS;
 	return ret;
 }

1	#ifndef _LINUX_RMAP_H	1	#ifndef _LINUX_RMAP_H
2	#define _LINUX_RMAP_H	2	#define _LINUX_RMAP_H
3	/*	3	/*
4	* Declarations for Reverse Mapping functions in mm/rmap.c	4	* Declarations for Reverse Mapping functions in mm/rmap.c
5	*/	5	*/
6		6
7	#include <linux/list.h>	7	#include <linux/list.h>
8	#include <linux/slab.h>	8	#include <linux/slab.h>
9	#include <linux/mm.h>	9	#include <linux/mm.h>
10	#include <linux/spinlock.h>	10	#include <linux/spinlock.h>
11	#include <linux/memcontrol.h>	11	#include <linux/memcontrol.h>
12		12
13	/*	13	/*
14	* The anon_vma heads a list of private "related" vmas, to scan if	14	* The anon_vma heads a list of private "related" vmas, to scan if
15	* an anonymous page pointing to this anon_vma needs to be unmapped:	15	* an anonymous page pointing to this anon_vma needs to be unmapped:
16	* the vmas on the list will be related by forking, or by splitting.	16	* the vmas on the list will be related by forking, or by splitting.
17	*	17	*
18	* Since vmas come and go as they are split and merged (particularly	18	* Since vmas come and go as they are split and merged (particularly
19	* in mprotect), the mapping field of an anonymous page cannot point	19	* in mprotect), the mapping field of an anonymous page cannot point
20	* directly to a vma: instead it points to an anon_vma, on whose list	20	* directly to a vma: instead it points to an anon_vma, on whose list
21	* the related vmas can be easily linked or unlinked.	21	* the related vmas can be easily linked or unlinked.
22	*	22	*
23	* After unlinking the last vma on the list, we must garbage collect	23	* After unlinking the last vma on the list, we must garbage collect
24	* the anon_vma object itself: we're guaranteed no page can be	24	* the anon_vma object itself: we're guaranteed no page can be
25	* pointing to this anon_vma once its vma list is empty.	25	* pointing to this anon_vma once its vma list is empty.
26	*/	26	*/
27	struct anon_vma {	27	struct anon_vma {
28	spinlock_t lock; /* Serialize access to vma list */	28	spinlock_t lock; /* Serialize access to vma list */
29	/*	29	/*
30	* NOTE: the LSB of the head.next is set by	30	* NOTE: the LSB of the head.next is set by
31	* mm_take_all_locks() _after_ taking the above lock. So the	31	* mm_take_all_locks() _after_ taking the above lock. So the
32	* head must only be read/written after taking the above lock	32	* head must only be read/written after taking the above lock
33	* to be sure to see a valid next pointer. The LSB bit itself	33	* to be sure to see a valid next pointer. The LSB bit itself
34	* is serialized by a system wide lock only visible to	34	* is serialized by a system wide lock only visible to
35	* mm_take_all_locks() (mm_all_locks_mutex).	35	* mm_take_all_locks() (mm_all_locks_mutex).
36	*/	36	*/
37	struct list_head head; /* List of private "related" vmas */	37	struct list_head head; /* List of private "related" vmas */
38	};	38	};
39		39
40	#ifdef CONFIG_MMU	40	#ifdef CONFIG_MMU
41		41
42	extern struct kmem_cache *anon_vma_cachep;	42	extern struct kmem_cache *anon_vma_cachep;
43		43
44	static inline struct anon_vma *anon_vma_alloc(void)	44	static inline struct anon_vma *anon_vma_alloc(void)
45	{	45	{
46	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);	46	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
47	}	47	}
48		48
49	static inline void anon_vma_free(struct anon_vma *anon_vma)	49	static inline void anon_vma_free(struct anon_vma *anon_vma)
50	{	50	{
51	kmem_cache_free(anon_vma_cachep, anon_vma);	51	kmem_cache_free(anon_vma_cachep, anon_vma);
52	}	52	}
53		53
54	static inline void anon_vma_lock(struct vm_area_struct *vma)	54	static inline void anon_vma_lock(struct vm_area_struct *vma)
55	{	55	{
56	struct anon_vma *anon_vma = vma->anon_vma;	56	struct anon_vma *anon_vma = vma->anon_vma;
57	if (anon_vma)	57	if (anon_vma)
58	spin_lock(&anon_vma->lock);	58	spin_lock(&anon_vma->lock);
59	}	59	}
60		60
61	static inline void anon_vma_unlock(struct vm_area_struct *vma)	61	static inline void anon_vma_unlock(struct vm_area_struct *vma)
62	{	62	{
63	struct anon_vma *anon_vma = vma->anon_vma;	63	struct anon_vma *anon_vma = vma->anon_vma;
64	if (anon_vma)	64	if (anon_vma)
65	spin_unlock(&anon_vma->lock);	65	spin_unlock(&anon_vma->lock);
66	}	66	}
67		67
68	/*	68	/*
69	* anon_vma helper functions.	69	* anon_vma helper functions.
70	*/	70	*/
71	void anon_vma_init(void); /* create anon_vma_cachep */	71	void anon_vma_init(void); /* create anon_vma_cachep */
72	int anon_vma_prepare(struct vm_area_struct *);	72	int anon_vma_prepare(struct vm_area_struct *);
73	void __anon_vma_merge(struct vm_area_struct , struct vm_area_struct );	73	void __anon_vma_merge(struct vm_area_struct , struct vm_area_struct );
74	void anon_vma_unlink(struct vm_area_struct *);	74	void anon_vma_unlink(struct vm_area_struct *);
75	void anon_vma_link(struct vm_area_struct *);	75	void anon_vma_link(struct vm_area_struct *);
76	void __anon_vma_link(struct vm_area_struct *);	76	void __anon_vma_link(struct vm_area_struct *);
77		77
78	/*	78	/*
79	* rmap interfaces called when adding or removing pte of page	79	* rmap interfaces called when adding or removing pte of page
80	*/	80	*/
81	void page_add_anon_rmap(struct page , struct vm_area_struct , unsigned long);	81	void page_add_anon_rmap(struct page , struct vm_area_struct , unsigned long);
82	void page_add_new_anon_rmap(struct page , struct vm_area_struct , unsigned long);	82	void page_add_new_anon_rmap(struct page , struct vm_area_struct , unsigned long);
83	void page_add_file_rmap(struct page *);	83	void page_add_file_rmap(struct page *);
84	void page_remove_rmap(struct page , struct vm_area_struct );	84	void page_remove_rmap(struct page , struct vm_area_struct );
85		85
86	#ifdef CONFIG_DEBUG_VM	86	#ifdef CONFIG_DEBUG_VM
87	void page_dup_rmap(struct page page, struct vm_area_struct vma, unsigned long address);	87	void page_dup_rmap(struct page page, struct vm_area_struct vma, unsigned long address);
88	#else	88	#else
89	static inline void page_dup_rmap(struct page page, struct vm_area_struct vma, unsigned long address)	89	static inline void page_dup_rmap(struct page page, struct vm_area_struct vma, unsigned long address)
90	{	90	{
91	atomic_inc(&page->_mapcount);	91	atomic_inc(&page->_mapcount);
92	}	92	}
93	#endif	93	#endif
94		94
95	/*	95	/*
96	* Called from mm/vmscan.c to handle paging out	96	* Called from mm/vmscan.c to handle paging out
97	*/	97	*/
98	int page_referenced(struct page , int is_locked, struct mem_cgroup cnt);	98	int page_referenced(struct page , int is_locked, struct mem_cgroup cnt);
99	int try_to_unmap(struct page *, int ignore_refs);	99	int try_to_unmap(struct page *, int ignore_refs);
100		100
101	/*	101	/*
102	* Called from mm/filemap_xip.c to unmap empty zero page	102	* Called from mm/filemap_xip.c to unmap empty zero page
103	*/	103	*/
104	pte_t page_check_address(struct page , struct mm_struct *,	104	pte_t page_check_address(struct page , struct mm_struct *,
105	unsigned long, spinlock_t **);	105	unsigned long, spinlock_t **, int);
106		106
107	/*	107	/*
108	* Used by swapoff to help locate where page is expected in vma.	108	* Used by swapoff to help locate where page is expected in vma.
109	*/	109	*/
110	unsigned long page_address_in_vma(struct page , struct vm_area_struct );	110	unsigned long page_address_in_vma(struct page , struct vm_area_struct );
111		111
112	/*	112	/*
113	* Cleans the PTEs of shared mappings.	113	* Cleans the PTEs of shared mappings.
114	* (and since clean PTEs should also be readonly, write protects them too)	114	* (and since clean PTEs should also be readonly, write protects them too)
115	*	115	*
116	* returns the number of cleaned PTEs.	116	* returns the number of cleaned PTEs.
117	*/	117	*/
118	int page_mkclean(struct page *);	118	int page_mkclean(struct page *);
119		119
120	#else /* !CONFIG_MMU */	120	#else /* !CONFIG_MMU */
121		121
122	#define anon_vma_init() do {} while (0)	122	#define anon_vma_init() do {} while (0)
123	#define anon_vma_prepare(vma) (0)	123	#define anon_vma_prepare(vma) (0)
124	#define anon_vma_link(vma) do {} while (0)	124	#define anon_vma_link(vma) do {} while (0)
125		125
126	#define page_referenced(page,l,cnt) TestClearPageReferenced(page)	126	#define page_referenced(page,l,cnt) TestClearPageReferenced(page)
127	#define try_to_unmap(page, refs) SWAP_FAIL	127	#define try_to_unmap(page, refs) SWAP_FAIL
128		128
129	static inline int page_mkclean(struct page *page)	129	static inline int page_mkclean(struct page *page)
130	{	130	{
131	return 0;	131	return 0;
132	}	132	}
133		133
134		134
135	#endif /* CONFIG_MMU */	135	#endif /* CONFIG_MMU */
136		136
137	/*	137	/*
138	* Return values of try_to_unmap	138	* Return values of try_to_unmap
139	*/	139	*/
140	#define SWAP_SUCCESS 0	140	#define SWAP_SUCCESS 0
141	#define SWAP_AGAIN 1	141	#define SWAP_AGAIN 1
142	#define SWAP_FAIL 2	142	#define SWAP_FAIL 2
143		143
144	#endif /* _LINUX_RMAP_H */	144	#endif /* _LINUX_RMAP_H */
145		145