Commit 89f5b7da2a6bad2e84670422ab8192382a5aeb9f
1 parent
9bedbcb207
Exists in
master
and in
4 other branches
Reinstate ZERO_PAGE optimization in 'get_user_pages()' and fix XIP
KAMEZAWA Hiroyuki and Oleg Nesterov point out that since the commit 557ed1fa2620dc119adb86b34c614e152a629a80 ("remove ZERO_PAGE") removed the ZERO_PAGE from the VM mappings, any users of get_user_pages() will generally now populate the VM with real empty pages needlessly. We used to get the ZERO_PAGE when we did the "handle_mm_fault()", but since fault handling no longer uses ZERO_PAGE for new anonymous pages, we now need to handle that special case in follow_page() instead. In particular, the removal of ZERO_PAGE effectively removed the core file writing optimization where we would skip writing pages that had not been populated at all, and increased memory pressure a lot by allocating all those useless newly zeroed pages. This reinstates the optimization by making the unmapped PTE case the same as for a non-existent page table, which already did this correctly. While at it, this also fixes the XIP case for follow_page(), where the caller could not differentiate between the case of a page that simply could not be used (because it had no "struct page" associated with it) and a page that just wasn't mapped. We do that by simply returning an error pointer for pages that could not be turned into a "struct page *". The error is arbitrarily picked to be EFAULT, since that was what get_user_pages() already used for the equivalent IO-mapped page case. [ Also removed an impossible test for pte_offset_map_lock() failing: that's not how that function works ] Acked-by: Oleg Nesterov <oleg@tv-sign.ru> Acked-by: Nick Piggin <npiggin@suse.de> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland McGrath <roland@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 24 additions and 5 deletions Side-by-side Diff
arch/powerpc/kernel/vdso.c
... | ... | @@ -142,7 +142,7 @@ |
142 | 142 | printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), |
143 | 143 | page_count(pg), |
144 | 144 | pg->flags); |
145 | - if (upg/* && pg != upg*/) { | |
145 | + if (upg && !IS_ERR(upg) /* && pg != upg*/) { | |
146 | 146 | printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) |
147 | 147 | << PAGE_SHIFT), |
148 | 148 | page_count(upg), |
mm/memory.c
... | ... | @@ -999,17 +999,15 @@ |
999 | 999 | goto no_page_table; |
1000 | 1000 | |
1001 | 1001 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); |
1002 | - if (!ptep) | |
1003 | - goto out; | |
1004 | 1002 | |
1005 | 1003 | pte = *ptep; |
1006 | 1004 | if (!pte_present(pte)) |
1007 | - goto unlock; | |
1005 | + goto no_page; | |
1008 | 1006 | if ((flags & FOLL_WRITE) && !pte_write(pte)) |
1009 | 1007 | goto unlock; |
1010 | 1008 | page = vm_normal_page(vma, address, pte); |
1011 | 1009 | if (unlikely(!page)) |
1012 | - goto unlock; | |
1010 | + goto bad_page; | |
1013 | 1011 | |
1014 | 1012 | if (flags & FOLL_GET) |
1015 | 1013 | get_page(page); |
... | ... | @@ -1024,6 +1022,15 @@ |
1024 | 1022 | out: |
1025 | 1023 | return page; |
1026 | 1024 | |
1025 | +bad_page: | |
1026 | + pte_unmap_unlock(ptep, ptl); | |
1027 | + return ERR_PTR(-EFAULT); | |
1028 | + | |
1029 | +no_page: | |
1030 | + pte_unmap_unlock(ptep, ptl); | |
1031 | + if (!pte_none(pte)) | |
1032 | + return page; | |
1033 | + /* Fall through to ZERO_PAGE handling */ | |
1027 | 1034 | no_page_table: |
1028 | 1035 | /* |
1029 | 1036 | * When core dumping an enormous anonymous area that nobody |
... | ... | @@ -1159,6 +1166,8 @@ |
1159 | 1166 | |
1160 | 1167 | cond_resched(); |
1161 | 1168 | } |
1169 | + if (IS_ERR(page)) | |
1170 | + return i ? i : PTR_ERR(page); | |
1162 | 1171 | if (pages) { |
1163 | 1172 | pages[i] = page; |
1164 | 1173 |
mm/migrate.c
... | ... | @@ -865,6 +865,11 @@ |
865 | 865 | goto set_status; |
866 | 866 | |
867 | 867 | page = follow_page(vma, pp->addr, FOLL_GET); |
868 | + | |
869 | + err = PTR_ERR(page); | |
870 | + if (IS_ERR(page)) | |
871 | + goto set_status; | |
872 | + | |
868 | 873 | err = -ENOENT; |
869 | 874 | if (!page) |
870 | 875 | goto set_status; |
... | ... | @@ -928,6 +933,11 @@ |
928 | 933 | goto set_status; |
929 | 934 | |
930 | 935 | page = follow_page(vma, pm->addr, 0); |
936 | + | |
937 | + err = PTR_ERR(page); | |
938 | + if (IS_ERR(page)) | |
939 | + goto set_status; | |
940 | + | |
931 | 941 | err = -ENOENT; |
932 | 942 | /* Use PageReserved to check for zero page */ |
933 | 943 | if (!page || PageReserved(page)) |