Commit 4c21e2f2441dc5fbb957b030333f5a3f2d02dea7
Committed by
Linus Torvalds
1 parent
b38c6845b6
Exists in
master
and in
4 other branches
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with a many-threaded application which concurrently initializes different parts of a large anonymous area. This patch corrects that, by using a separate spinlock per page table page, to guard the page table entries in that page, instead of using the mm's single page_table_lock. (But even then, page_table_lock is still used to guard page table allocation, and anon_vma allocation.) In this implementation, the spinlock is tucked inside the struct page of the page table page: with a BUILD_BUG_ON in case it overflows - which it would in the case of 32-bit PA-RISC with spinlock debugging enabled. Splitting the lock is not quite for free: another cacheline access. Ideally, I suppose we would use split ptlock only for multi-threaded processes on multi-cpu machines; but deciding that dynamically would have its own costs. So for now enable it by config, at some number of cpus - since the Kconfig language doesn't support inequalities, let preprocessor compare that with NR_CPUS. But I don't think it's worth being user-configurable: for good testing of both split and unsplit configs, split now at 4 cpus, and perhaps change that to 8 later. There is a benefit even for singly threaded processes: kswapd can be attacking one part of the mm while another part is busy faulting. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 23 changed files with 138 additions and 79 deletions Side-by-side Diff
- arch/arm/mm/mm-armv.c
- arch/frv/mm/pgalloc.c
- arch/i386/mm/pgtable.c
- arch/um/kernel/skas/mmu.c
- fs/afs/file.c
- fs/buffer.c
- fs/jfs/jfs_metapage.c
- fs/xfs/linux-2.6/xfs_buf.c
- include/linux/buffer_head.h
- include/linux/mm.h
- kernel/kexec.c
- mm/Kconfig
- mm/filemap.c
- mm/memory.c
- mm/mremap.c
- mm/page_alloc.c
- mm/page_io.c
- mm/rmap.c
- mm/shmem.c
- mm/swap.c
- mm/swap_state.c
- mm/swapfile.c
- mm/vmscan.c
arch/arm/mm/mm-armv.c
arch/frv/mm/pgalloc.c
... | ... | @@ -87,14 +87,14 @@ |
87 | 87 | if (pgd_list) |
88 | 88 | pgd_list->private = (unsigned long) &page->index; |
89 | 89 | pgd_list = page; |
90 | - page->private = (unsigned long) &pgd_list; | |
90 | + set_page_private(page, (unsigned long)&pgd_list); | |
91 | 91 | } |
92 | 92 | |
93 | 93 | static inline void pgd_list_del(pgd_t *pgd) |
94 | 94 | { |
95 | 95 | struct page *next, **pprev, *page = virt_to_page(pgd); |
96 | 96 | next = (struct page *) page->index; |
97 | - pprev = (struct page **) page->private; | |
97 | + pprev = (struct page **)page_private(page); | |
98 | 98 | *pprev = next; |
99 | 99 | if (next) |
100 | 100 | next->private = (unsigned long) pprev; |
arch/i386/mm/pgtable.c
... | ... | @@ -188,19 +188,19 @@ |
188 | 188 | struct page *page = virt_to_page(pgd); |
189 | 189 | page->index = (unsigned long)pgd_list; |
190 | 190 | if (pgd_list) |
191 | - pgd_list->private = (unsigned long)&page->index; | |
191 | + set_page_private(pgd_list, (unsigned long)&page->index); | |
192 | 192 | pgd_list = page; |
193 | - page->private = (unsigned long)&pgd_list; | |
193 | + set_page_private(page, (unsigned long)&pgd_list); | |
194 | 194 | } |
195 | 195 | |
196 | 196 | static inline void pgd_list_del(pgd_t *pgd) |
197 | 197 | { |
198 | 198 | struct page *next, **pprev, *page = virt_to_page(pgd); |
199 | 199 | next = (struct page *)page->index; |
200 | - pprev = (struct page **)page->private; | |
200 | + pprev = (struct page **)page_private(page); | |
201 | 201 | *pprev = next; |
202 | 202 | if (next) |
203 | - next->private = (unsigned long)pprev; | |
203 | + set_page_private(next, (unsigned long)pprev); | |
204 | 204 | } |
205 | 205 | |
206 | 206 | void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) |
arch/um/kernel/skas/mmu.c
... | ... | @@ -144,6 +144,7 @@ |
144 | 144 | |
145 | 145 | if(!proc_mm || !ptrace_faultinfo){ |
146 | 146 | free_page(mmu->id.stack); |
147 | + pte_lock_deinit(virt_to_page(mmu->last_page_table)); | |
147 | 148 | pte_free_kernel((pte_t *) mmu->last_page_table); |
148 | 149 | dec_page_state(nr_page_table_pages); |
149 | 150 | #ifdef CONFIG_3_LEVEL_PGTABLES |
fs/afs/file.c
... | ... | @@ -291,8 +291,8 @@ |
291 | 291 | cachefs_uncache_page(vnode->cache, page); |
292 | 292 | #endif |
293 | 293 | |
294 | - pageio = (struct cachefs_page *) page->private; | |
295 | - page->private = 0; | |
294 | + pageio = (struct cachefs_page *) page_private(page); | |
295 | + set_page_private(page, 0); | |
296 | 296 | ClearPagePrivate(page); |
297 | 297 | |
298 | 298 | if (pageio) |
fs/buffer.c
fs/jfs/jfs_metapage.c
... | ... | @@ -86,7 +86,7 @@ |
86 | 86 | atomic_t io_count; |
87 | 87 | struct metapage *mp[MPS_PER_PAGE]; |
88 | 88 | }; |
89 | -#define mp_anchor(page) ((struct meta_anchor *)page->private) | |
89 | +#define mp_anchor(page) ((struct meta_anchor *)page_private(page)) | |
90 | 90 | |
91 | 91 | static inline struct metapage *page_to_mp(struct page *page, uint offset) |
92 | 92 | { |
... | ... | @@ -108,7 +108,7 @@ |
108 | 108 | if (!a) |
109 | 109 | return -ENOMEM; |
110 | 110 | memset(a, 0, sizeof(struct meta_anchor)); |
111 | - page->private = (unsigned long)a; | |
111 | + set_page_private(page, (unsigned long)a); | |
112 | 112 | SetPagePrivate(page); |
113 | 113 | kmap(page); |
114 | 114 | } |
... | ... | @@ -136,7 +136,7 @@ |
136 | 136 | a->mp[index] = NULL; |
137 | 137 | if (--a->mp_count == 0) { |
138 | 138 | kfree(a); |
139 | - page->private = 0; | |
139 | + set_page_private(page, 0); | |
140 | 140 | ClearPagePrivate(page); |
141 | 141 | kunmap(page); |
142 | 142 | } |
143 | 143 | |
... | ... | @@ -156,13 +156,13 @@ |
156 | 156 | #else |
157 | 157 | static inline struct metapage *page_to_mp(struct page *page, uint offset) |
158 | 158 | { |
159 | - return PagePrivate(page) ? (struct metapage *)page->private : NULL; | |
159 | + return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; | |
160 | 160 | } |
161 | 161 | |
162 | 162 | static inline int insert_metapage(struct page *page, struct metapage *mp) |
163 | 163 | { |
164 | 164 | if (mp) { |
165 | - page->private = (unsigned long)mp; | |
165 | + set_page_private(page, (unsigned long)mp); | |
166 | 166 | SetPagePrivate(page); |
167 | 167 | kmap(page); |
168 | 168 | } |
... | ... | @@ -171,7 +171,7 @@ |
171 | 171 | |
172 | 172 | static inline void remove_metapage(struct page *page, struct metapage *mp) |
173 | 173 | { |
174 | - page->private = 0; | |
174 | + set_page_private(page, 0); | |
175 | 175 | ClearPagePrivate(page); |
176 | 176 | kunmap(page); |
177 | 177 | } |
fs/xfs/linux-2.6/xfs_buf.c
... | ... | @@ -181,8 +181,9 @@ |
181 | 181 | size_t offset, |
182 | 182 | size_t length) |
183 | 183 | { |
184 | - page->private |= page_region_mask(offset, length); | |
185 | - if (page->private == ~0UL) | |
184 | + set_page_private(page, | |
185 | + page_private(page) | page_region_mask(offset, length)); | |
186 | + if (page_private(page) == ~0UL) | |
186 | 187 | SetPageUptodate(page); |
187 | 188 | } |
188 | 189 | |
... | ... | @@ -194,7 +195,7 @@ |
194 | 195 | { |
195 | 196 | unsigned long mask = page_region_mask(offset, length); |
196 | 197 | |
197 | - return (mask && (page->private & mask) == mask); | |
198 | + return (mask && (page_private(page) & mask) == mask); | |
198 | 199 | } |
199 | 200 | |
200 | 201 | /* |
include/linux/buffer_head.h
... | ... | @@ -126,8 +126,8 @@ |
126 | 126 | /* If we *know* page->private refers to buffer_heads */ |
127 | 127 | #define page_buffers(page) \ |
128 | 128 | ({ \ |
129 | - BUG_ON(!PagePrivate(page)); \ | |
130 | - ((struct buffer_head *)(page)->private); \ | |
129 | + BUG_ON(!PagePrivate(page)); \ | |
130 | + ((struct buffer_head *)page_private(page)); \ | |
131 | 131 | }) |
132 | 132 | #define page_has_buffers(page) PagePrivate(page) |
133 | 133 | |
... | ... | @@ -219,7 +219,7 @@ |
219 | 219 | { |
220 | 220 | page_cache_get(page); |
221 | 221 | SetPagePrivate(page); |
222 | - page->private = (unsigned long)head; | |
222 | + set_page_private(page, (unsigned long)head); | |
223 | 223 | } |
224 | 224 | |
225 | 225 | static inline void get_bh(struct buffer_head *bh) |
include/linux/mm.h
... | ... | @@ -226,13 +226,18 @@ |
226 | 226 | * to show when page is mapped |
227 | 227 | * & limit reverse map searches. |
228 | 228 | */ |
229 | - unsigned long private; /* Mapping-private opaque data: | |
229 | + union { | |
230 | + unsigned long private; /* Mapping-private opaque data: | |
230 | 231 | * usually used for buffer_heads |
231 | 232 | * if PagePrivate set; used for |
232 | 233 | * swp_entry_t if PageSwapCache |
233 | 234 | * When page is free, this indicates |
234 | 235 | * order in the buddy system. |
235 | 236 | */ |
237 | +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | |
238 | + spinlock_t ptl; | |
239 | +#endif | |
240 | + } u; | |
236 | 241 | struct address_space *mapping; /* If low bit clear, points to |
237 | 242 | * inode address_space, or NULL. |
238 | 243 | * If page mapped as anonymous |
... | ... | @@ -260,6 +265,9 @@ |
260 | 265 | #endif /* WANT_PAGE_VIRTUAL */ |
261 | 266 | }; |
262 | 267 | |
268 | +#define page_private(page) ((page)->u.private) | |
269 | +#define set_page_private(page, v) ((page)->u.private = (v)) | |
270 | + | |
263 | 271 | /* |
264 | 272 | * FIXME: take this include out, include page-flags.h in |
265 | 273 | * files which need it (119 of them) |
266 | 274 | |
267 | 275 | |
... | ... | @@ -311,17 +319,17 @@ |
311 | 319 | |
312 | 320 | #ifdef CONFIG_HUGETLB_PAGE |
313 | 321 | |
314 | -static inline int page_count(struct page *p) | |
322 | +static inline int page_count(struct page *page) | |
315 | 323 | { |
316 | - if (PageCompound(p)) | |
317 | - p = (struct page *)p->private; | |
318 | - return atomic_read(&(p)->_count) + 1; | |
324 | + if (PageCompound(page)) | |
325 | + page = (struct page *)page_private(page); | |
326 | + return atomic_read(&page->_count) + 1; | |
319 | 327 | } |
320 | 328 | |
321 | 329 | static inline void get_page(struct page *page) |
322 | 330 | { |
323 | 331 | if (unlikely(PageCompound(page))) |
324 | - page = (struct page *)page->private; | |
332 | + page = (struct page *)page_private(page); | |
325 | 333 | atomic_inc(&page->_count); |
326 | 334 | } |
327 | 335 | |
... | ... | @@ -587,7 +595,7 @@ |
587 | 595 | static inline pgoff_t page_index(struct page *page) |
588 | 596 | { |
589 | 597 | if (unlikely(PageSwapCache(page))) |
590 | - return page->private; | |
598 | + return page_private(page); | |
591 | 599 | return page->index; |
592 | 600 | } |
593 | 601 | |
594 | 602 | |
... | ... | @@ -779,9 +787,31 @@ |
779 | 787 | } |
780 | 788 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ |
781 | 789 | |
790 | +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | |
791 | +/* | |
792 | + * We tuck a spinlock to guard each pagetable page into its struct page, | |
793 | + * at page->private, with BUILD_BUG_ON to make sure that this will not | |
794 | + * overflow into the next struct page (as it might with DEBUG_SPINLOCK). | |
795 | + * When freeing, reset page->mapping so free_pages_check won't complain. | |
796 | + */ | |
797 | +#define __pte_lockptr(page) &((page)->u.ptl) | |
798 | +#define pte_lock_init(_page) do { \ | |
799 | + spin_lock_init(__pte_lockptr(_page)); \ | |
800 | +} while (0) | |
801 | +#define pte_lock_deinit(page) ((page)->mapping = NULL) | |
802 | +#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) | |
803 | +#else | |
804 | +/* | |
805 | + * We use mm->page_table_lock to guard all pagetable pages of the mm. | |
806 | + */ | |
807 | +#define pte_lock_init(page) do {} while (0) | |
808 | +#define pte_lock_deinit(page) do {} while (0) | |
809 | +#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) | |
810 | +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | |
811 | + | |
782 | 812 | #define pte_offset_map_lock(mm, pmd, address, ptlp) \ |
783 | 813 | ({ \ |
784 | - spinlock_t *__ptl = &(mm)->page_table_lock; \ | |
814 | + spinlock_t *__ptl = pte_lockptr(mm, pmd); \ | |
785 | 815 | pte_t *__pte = pte_offset_map(pmd, address); \ |
786 | 816 | *(ptlp) = __ptl; \ |
787 | 817 | spin_lock(__ptl); \ |
kernel/kexec.c
... | ... | @@ -334,7 +334,7 @@ |
334 | 334 | if (pages) { |
335 | 335 | unsigned int count, i; |
336 | 336 | pages->mapping = NULL; |
337 | - pages->private = order; | |
337 | + set_page_private(pages, order); | |
338 | 338 | count = 1 << order; |
339 | 339 | for (i = 0; i < count; i++) |
340 | 340 | SetPageReserved(pages + i); |
... | ... | @@ -347,7 +347,7 @@ |
347 | 347 | { |
348 | 348 | unsigned int order, count, i; |
349 | 349 | |
350 | - order = page->private; | |
350 | + order = page_private(page); | |
351 | 351 | count = 1 << order; |
352 | 352 | for (i = 0; i < count; i++) |
353 | 353 | ClearPageReserved(page + i); |
mm/Kconfig
... | ... | @@ -111,4 +111,17 @@ |
111 | 111 | config SPARSEMEM_EXTREME |
112 | 112 | def_bool y |
113 | 113 | depends on SPARSEMEM && !SPARSEMEM_STATIC |
114 | + | |
115 | +# Heavily threaded applications may benefit from splitting the mm-wide | |
116 | +# page_table_lock, so that faults on different parts of the user address | |
117 | +# space can be handled with less contention: split it at this NR_CPUS. | |
118 | +# Default to 4 for wider testing, though 8 might be more appropriate. | |
119 | +# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. | |
120 | +# PA-RISC's debug spinlock_t is too large for the 32-bit struct page. | |
121 | +# | |
122 | +config SPLIT_PTLOCK_CPUS | |
123 | + int | |
124 | + default "4096" if ARM && !CPU_CACHE_VIPT | |
125 | + default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT | |
126 | + default "4" |
mm/filemap.c
... | ... | @@ -152,7 +152,7 @@ |
152 | 152 | * in the ->sync_page() methods make essential use of the |
153 | 153 | * page_mapping(), merely passing the page down to the backing |
154 | 154 | * device's unplug functions when it's non-NULL, which in turn |
155 | - * ignore it for all cases but swap, where only page->private is | |
155 | + * ignore it for all cases but swap, where only page_private(page) is | |
156 | 156 | * of interest. When page_mapping() does go NULL, the entire |
157 | 157 | * call stack gracefully ignores the page and returns. |
158 | 158 | * -- wli |
mm/memory.c
... | ... | @@ -114,6 +114,7 @@ |
114 | 114 | { |
115 | 115 | struct page *page = pmd_page(*pmd); |
116 | 116 | pmd_clear(pmd); |
117 | + pte_lock_deinit(page); | |
117 | 118 | pte_free_tlb(tlb, page); |
118 | 119 | dec_page_state(nr_page_table_pages); |
119 | 120 | tlb->mm->nr_ptes--; |
120 | 121 | |
121 | 122 | |
... | ... | @@ -294,10 +295,12 @@ |
294 | 295 | if (!new) |
295 | 296 | return -ENOMEM; |
296 | 297 | |
298 | + pte_lock_init(new); | |
297 | 299 | spin_lock(&mm->page_table_lock); |
298 | - if (pmd_present(*pmd)) /* Another has populated it */ | |
300 | + if (pmd_present(*pmd)) { /* Another has populated it */ | |
301 | + pte_lock_deinit(new); | |
299 | 302 | pte_free(new); |
300 | - else { | |
303 | + } else { | |
301 | 304 | mm->nr_ptes++; |
302 | 305 | inc_page_state(nr_page_table_pages); |
303 | 306 | pmd_populate(mm, pmd, new); |
... | ... | @@ -432,7 +435,7 @@ |
432 | 435 | if (!dst_pte) |
433 | 436 | return -ENOMEM; |
434 | 437 | src_pte = pte_offset_map_nested(src_pmd, addr); |
435 | - src_ptl = &src_mm->page_table_lock; | |
438 | + src_ptl = pte_lockptr(src_mm, src_pmd); | |
436 | 439 | spin_lock(src_ptl); |
437 | 440 | |
438 | 441 | do { |
439 | 442 | |
440 | 443 | |
... | ... | @@ -1194,15 +1197,16 @@ |
1194 | 1197 | * (but do_wp_page is only called after already making such a check; |
1195 | 1198 | * and do_anonymous_page and do_no_page can safely check later on). |
1196 | 1199 | */ |
1197 | -static inline int pte_unmap_same(struct mm_struct *mm, | |
1200 | +static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, | |
1198 | 1201 | pte_t *page_table, pte_t orig_pte) |
1199 | 1202 | { |
1200 | 1203 | int same = 1; |
1201 | 1204 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) |
1202 | 1205 | if (sizeof(pte_t) > sizeof(unsigned long)) { |
1203 | - spin_lock(&mm->page_table_lock); | |
1206 | + spinlock_t *ptl = pte_lockptr(mm, pmd); | |
1207 | + spin_lock(ptl); | |
1204 | 1208 | same = pte_same(*page_table, orig_pte); |
1205 | - spin_unlock(&mm->page_table_lock); | |
1209 | + spin_unlock(ptl); | |
1206 | 1210 | } |
1207 | 1211 | #endif |
1208 | 1212 | pte_unmap(page_table); |
... | ... | @@ -1655,7 +1659,7 @@ |
1655 | 1659 | pte_t pte; |
1656 | 1660 | int ret = VM_FAULT_MINOR; |
1657 | 1661 | |
1658 | - if (!pte_unmap_same(mm, page_table, orig_pte)) | |
1662 | + if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | |
1659 | 1663 | goto out; |
1660 | 1664 | |
1661 | 1665 | entry = pte_to_swp_entry(orig_pte); |
... | ... | @@ -1773,7 +1777,7 @@ |
1773 | 1777 | page_cache_get(page); |
1774 | 1778 | entry = mk_pte(page, vma->vm_page_prot); |
1775 | 1779 | |
1776 | - ptl = &mm->page_table_lock; | |
1780 | + ptl = pte_lockptr(mm, pmd); | |
1777 | 1781 | spin_lock(ptl); |
1778 | 1782 | if (!pte_none(*page_table)) |
1779 | 1783 | goto release; |
... | ... | @@ -1934,7 +1938,7 @@ |
1934 | 1938 | pgoff_t pgoff; |
1935 | 1939 | int err; |
1936 | 1940 | |
1937 | - if (!pte_unmap_same(mm, page_table, orig_pte)) | |
1941 | + if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | |
1938 | 1942 | return VM_FAULT_MINOR; |
1939 | 1943 | |
1940 | 1944 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { |
... | ... | @@ -1992,7 +1996,7 @@ |
1992 | 1996 | pte, pmd, write_access, entry); |
1993 | 1997 | } |
1994 | 1998 | |
1995 | - ptl = &mm->page_table_lock; | |
1999 | + ptl = pte_lockptr(mm, pmd); | |
1996 | 2000 | spin_lock(ptl); |
1997 | 2001 | if (unlikely(!pte_same(*pte, entry))) |
1998 | 2002 | goto unlock; |
mm/mremap.c
... | ... | @@ -72,7 +72,7 @@ |
72 | 72 | struct address_space *mapping = NULL; |
73 | 73 | struct mm_struct *mm = vma->vm_mm; |
74 | 74 | pte_t *old_pte, *new_pte, pte; |
75 | - spinlock_t *old_ptl; | |
75 | + spinlock_t *old_ptl, *new_ptl; | |
76 | 76 | |
77 | 77 | if (vma->vm_file) { |
78 | 78 | /* |
79 | 79 | |
... | ... | @@ -88,8 +88,15 @@ |
88 | 88 | new_vma->vm_truncate_count = 0; |
89 | 89 | } |
90 | 90 | |
91 | + /* | |
92 | + * We don't have to worry about the ordering of src and dst | |
93 | + * pte locks because exclusive mmap_sem prevents deadlock. | |
94 | + */ | |
91 | 95 | old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); |
92 | 96 | new_pte = pte_offset_map_nested(new_pmd, new_addr); |
97 | + new_ptl = pte_lockptr(mm, new_pmd); | |
98 | + if (new_ptl != old_ptl) | |
99 | + spin_lock(new_ptl); | |
93 | 100 | |
94 | 101 | for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, |
95 | 102 | new_pte++, new_addr += PAGE_SIZE) { |
... | ... | @@ -101,6 +108,8 @@ |
101 | 108 | set_pte_at(mm, new_addr, new_pte, pte); |
102 | 109 | } |
103 | 110 | |
111 | + if (new_ptl != old_ptl) | |
112 | + spin_unlock(new_ptl); | |
104 | 113 | pte_unmap_nested(new_pte - 1); |
105 | 114 | pte_unmap_unlock(old_pte - 1, old_ptl); |
106 | 115 | if (mapping) |
mm/page_alloc.c
... | ... | @@ -154,7 +154,7 @@ |
154 | 154 | struct page *p = page + i; |
155 | 155 | |
156 | 156 | SetPageCompound(p); |
157 | - p->private = (unsigned long)page; | |
157 | + set_page_private(p, (unsigned long)page); | |
158 | 158 | } |
159 | 159 | } |
160 | 160 | |
... | ... | @@ -174,7 +174,7 @@ |
174 | 174 | |
175 | 175 | if (!PageCompound(p)) |
176 | 176 | bad_page(__FUNCTION__, page); |
177 | - if (p->private != (unsigned long)page) | |
177 | + if (page_private(p) != (unsigned long)page) | |
178 | 178 | bad_page(__FUNCTION__, page); |
179 | 179 | ClearPageCompound(p); |
180 | 180 | } |
181 | 181 | |
182 | 182 | |
... | ... | @@ -187,18 +187,18 @@ |
187 | 187 | * So, we don't need atomic page->flags operations here. |
188 | 188 | */ |
189 | 189 | static inline unsigned long page_order(struct page *page) { |
190 | - return page->private; | |
190 | + return page_private(page); | |
191 | 191 | } |
192 | 192 | |
193 | 193 | static inline void set_page_order(struct page *page, int order) { |
194 | - page->private = order; | |
194 | + set_page_private(page, order); | |
195 | 195 | __SetPagePrivate(page); |
196 | 196 | } |
197 | 197 | |
198 | 198 | static inline void rmv_page_order(struct page *page) |
199 | 199 | { |
200 | 200 | __ClearPagePrivate(page); |
201 | - page->private = 0; | |
201 | + set_page_private(page, 0); | |
202 | 202 | } |
203 | 203 | |
204 | 204 | /* |
... | ... | @@ -238,7 +238,7 @@ |
238 | 238 | * (a) the buddy is free && |
239 | 239 | * (b) the buddy is on the buddy system && |
240 | 240 | * (c) a page and its buddy have the same order. |
241 | - * for recording page's order, we use page->private and PG_private. | |
241 | + * for recording page's order, we use page_private(page) and PG_private. | |
242 | 242 | * |
243 | 243 | */ |
244 | 244 | static inline int page_is_buddy(struct page *page, int order) |
... | ... | @@ -264,7 +264,7 @@ |
264 | 264 | * parts of the VM system. |
265 | 265 | * At each level, we keep a list of pages, which are heads of continuous |
266 | 266 | * free pages of length of (1 << order) and marked with PG_Private.Page's |
267 | - * order is recorded in page->private field. | |
267 | + * order is recorded in page_private(page) field. | |
268 | 268 | * So when we are allocating or freeing one, we can derive the state of the |
269 | 269 | * other. That is, if we allocate a small block, and both were |
270 | 270 | * free, the remainder of the region must be split into blocks. |
... | ... | @@ -463,7 +463,7 @@ |
463 | 463 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | |
464 | 464 | 1 << PG_referenced | 1 << PG_arch_1 | |
465 | 465 | 1 << PG_checked | 1 << PG_mappedtodisk); |
466 | - page->private = 0; | |
466 | + set_page_private(page, 0); | |
467 | 467 | set_page_refs(page, order); |
468 | 468 | kernel_map_pages(page, 1 << order, 1); |
469 | 469 | } |
mm/page_io.c
... | ... | @@ -91,7 +91,8 @@ |
91 | 91 | unlock_page(page); |
92 | 92 | goto out; |
93 | 93 | } |
94 | - bio = get_swap_bio(GFP_NOIO, page->private, page, end_swap_bio_write); | |
94 | + bio = get_swap_bio(GFP_NOIO, page_private(page), page, | |
95 | + end_swap_bio_write); | |
95 | 96 | if (bio == NULL) { |
96 | 97 | set_page_dirty(page); |
97 | 98 | unlock_page(page); |
... | ... | @@ -115,7 +116,8 @@ |
115 | 116 | |
116 | 117 | BUG_ON(!PageLocked(page)); |
117 | 118 | ClearPageUptodate(page); |
118 | - bio = get_swap_bio(GFP_KERNEL, page->private, page, end_swap_bio_read); | |
119 | + bio = get_swap_bio(GFP_KERNEL, page_private(page), page, | |
120 | + end_swap_bio_read); | |
119 | 121 | if (bio == NULL) { |
120 | 122 | unlock_page(page); |
121 | 123 | ret = -ENOMEM; |
mm/rmap.c
... | ... | @@ -274,7 +274,7 @@ |
274 | 274 | return NULL; |
275 | 275 | } |
276 | 276 | |
277 | - ptl = &mm->page_table_lock; | |
277 | + ptl = pte_lockptr(mm, pmd); | |
278 | 278 | spin_lock(ptl); |
279 | 279 | if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { |
280 | 280 | *ptlp = ptl; |
... | ... | @@ -550,7 +550,7 @@ |
550 | 550 | update_hiwater_rss(mm); |
551 | 551 | |
552 | 552 | if (PageAnon(page)) { |
553 | - swp_entry_t entry = { .val = page->private }; | |
553 | + swp_entry_t entry = { .val = page_private(page) }; | |
554 | 554 | /* |
555 | 555 | * Store the swap location in the pte. |
556 | 556 | * See handle_pte_fault() ... |
mm/shmem.c
... | ... | @@ -71,9 +71,6 @@ |
71 | 71 | /* Pretend that each entry is of this size in directory's i_size */ |
72 | 72 | #define BOGO_DIRENT_SIZE 20 |
73 | 73 | |
74 | -/* Keep swapped page count in private field of indirect struct page */ | |
75 | -#define nr_swapped private | |
76 | - | |
77 | 74 | /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ |
78 | 75 | enum sgp_type { |
79 | 76 | SGP_QUICK, /* don't try more than file page cache lookup */ |
... | ... | @@ -324,8 +321,10 @@ |
324 | 321 | |
325 | 322 | entry->val = value; |
326 | 323 | info->swapped += incdec; |
327 | - if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) | |
328 | - kmap_atomic_to_page(entry)->nr_swapped += incdec; | |
324 | + if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { | |
325 | + struct page *page = kmap_atomic_to_page(entry); | |
326 | + set_page_private(page, page_private(page) + incdec); | |
327 | + } | |
329 | 328 | } |
330 | 329 | |
331 | 330 | /* |
... | ... | @@ -368,9 +367,8 @@ |
368 | 367 | |
369 | 368 | spin_unlock(&info->lock); |
370 | 369 | page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); |
371 | - if (page) { | |
372 | - page->nr_swapped = 0; | |
373 | - } | |
370 | + if (page) | |
371 | + set_page_private(page, 0); | |
374 | 372 | spin_lock(&info->lock); |
375 | 373 | |
376 | 374 | if (!page) { |
... | ... | @@ -561,7 +559,7 @@ |
561 | 559 | diroff = 0; |
562 | 560 | } |
563 | 561 | subdir = dir[diroff]; |
564 | - if (subdir && subdir->nr_swapped) { | |
562 | + if (subdir && page_private(subdir)) { | |
565 | 563 | size = limit - idx; |
566 | 564 | if (size > ENTRIES_PER_PAGE) |
567 | 565 | size = ENTRIES_PER_PAGE; |
568 | 566 | |
... | ... | @@ -572,10 +570,10 @@ |
572 | 570 | nr_swaps_freed += freed; |
573 | 571 | if (offset) |
574 | 572 | spin_lock(&info->lock); |
575 | - subdir->nr_swapped -= freed; | |
573 | + set_page_private(subdir, page_private(subdir) - freed); | |
576 | 574 | if (offset) |
577 | 575 | spin_unlock(&info->lock); |
578 | - BUG_ON(subdir->nr_swapped > offset); | |
576 | + BUG_ON(page_private(subdir) > offset); | |
579 | 577 | } |
580 | 578 | if (offset) |
581 | 579 | offset = 0; |
... | ... | @@ -743,7 +741,7 @@ |
743 | 741 | dir = shmem_dir_map(subdir); |
744 | 742 | } |
745 | 743 | subdir = *dir; |
746 | - if (subdir && subdir->nr_swapped) { | |
744 | + if (subdir && page_private(subdir)) { | |
747 | 745 | ptr = shmem_swp_map(subdir); |
748 | 746 | size = limit - idx; |
749 | 747 | if (size > ENTRIES_PER_PAGE) |
mm/swap.c
mm/swap_state.c
... | ... | @@ -83,7 +83,7 @@ |
83 | 83 | page_cache_get(page); |
84 | 84 | SetPageLocked(page); |
85 | 85 | SetPageSwapCache(page); |
86 | - page->private = entry.val; | |
86 | + set_page_private(page, entry.val); | |
87 | 87 | total_swapcache_pages++; |
88 | 88 | pagecache_acct(1); |
89 | 89 | } |
... | ... | @@ -126,8 +126,8 @@ |
126 | 126 | BUG_ON(PageWriteback(page)); |
127 | 127 | BUG_ON(PagePrivate(page)); |
128 | 128 | |
129 | - radix_tree_delete(&swapper_space.page_tree, page->private); | |
130 | - page->private = 0; | |
129 | + radix_tree_delete(&swapper_space.page_tree, page_private(page)); | |
130 | + set_page_private(page, 0); | |
131 | 131 | ClearPageSwapCache(page); |
132 | 132 | total_swapcache_pages--; |
133 | 133 | pagecache_acct(-1); |
... | ... | @@ -197,7 +197,7 @@ |
197 | 197 | { |
198 | 198 | swp_entry_t entry; |
199 | 199 | |
200 | - entry.val = page->private; | |
200 | + entry.val = page_private(page); | |
201 | 201 | |
202 | 202 | write_lock_irq(&swapper_space.tree_lock); |
203 | 203 | __delete_from_swap_cache(page); |
mm/swapfile.c
... | ... | @@ -61,7 +61,7 @@ |
61 | 61 | swp_entry_t entry; |
62 | 62 | |
63 | 63 | down_read(&swap_unplug_sem); |
64 | - entry.val = page->private; | |
64 | + entry.val = page_private(page); | |
65 | 65 | if (PageSwapCache(page)) { |
66 | 66 | struct block_device *bdev = swap_info[swp_type(entry)].bdev; |
67 | 67 | struct backing_dev_info *bdi; |
... | ... | @@ -69,8 +69,8 @@ |
69 | 69 | /* |
70 | 70 | * If the page is removed from swapcache from under us (with a |
71 | 71 | * racy try_to_unuse/swapoff) we need an additional reference |
72 | - * count to avoid reading garbage from page->private above. If | |
73 | - * the WARN_ON triggers during a swapoff it maybe the race | |
72 | + * count to avoid reading garbage from page_private(page) above. | |
73 | + * If the WARN_ON triggers during a swapoff it maybe the race | |
74 | 74 | * condition and it's harmless. However if it triggers without |
75 | 75 | * swapoff it signals a problem. |
76 | 76 | */ |
... | ... | @@ -294,7 +294,7 @@ |
294 | 294 | struct swap_info_struct *p; |
295 | 295 | swp_entry_t entry; |
296 | 296 | |
297 | - entry.val = page->private; | |
297 | + entry.val = page_private(page); | |
298 | 298 | p = swap_info_get(entry); |
299 | 299 | if (p) { |
300 | 300 | /* Subtract the 1 for the swap cache itself */ |
... | ... | @@ -339,7 +339,7 @@ |
339 | 339 | if (page_count(page) != 2) /* 2: us + cache */ |
340 | 340 | return 0; |
341 | 341 | |
342 | - entry.val = page->private; | |
342 | + entry.val = page_private(page); | |
343 | 343 | p = swap_info_get(entry); |
344 | 344 | if (!p) |
345 | 345 | return 0; |
... | ... | @@ -1042,7 +1042,7 @@ |
1042 | 1042 | BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */ |
1043 | 1043 | |
1044 | 1044 | if (PageSwapCache(page)) { |
1045 | - swp_entry_t entry = { .val = page->private }; | |
1045 | + swp_entry_t entry = { .val = page_private(page) }; | |
1046 | 1046 | struct swap_info_struct *sis; |
1047 | 1047 | |
1048 | 1048 | sis = get_swap_info_struct(swp_type(entry)); |
mm/vmscan.c
... | ... | @@ -521,7 +521,7 @@ |
521 | 521 | |
522 | 522 | #ifdef CONFIG_SWAP |
523 | 523 | if (PageSwapCache(page)) { |
524 | - swp_entry_t swap = { .val = page->private }; | |
524 | + swp_entry_t swap = { .val = page_private(page) }; | |
525 | 525 | __delete_from_swap_cache(page); |
526 | 526 | write_unlock_irq(&mapping->tree_lock); |
527 | 527 | swap_free(swap); |