Commit 3bf5ee95648c694bac4d13529563c230cd4fe5f2

Authored by Hugh Dickins
Committed by Linus Torvalds
1 parent ee39b37b23

[PATCH] freepgt: hugetlb_free_pgd_range

ia64 and ppc64 had hugetlb_free_pgtables functions which were no longer being
called, and it wasn't obvious what to do about them.

The ppc64 case turns out to be easy: the associated tables are noted elsewhere
and freed later, safe to either skip its hugetlb areas or go through the
motions of freeing nothing.  Since ia64 does need a special case, restore to
ppc64 the special case of skipping them.

The ia64 hugetlb case has been broken since pgd_addr_end went in, though it
probably appeared to work okay if you just had one such area; in fact it's
been broken much longer if you consider a long munmap spanning from another
region into the hugetlb region.

In the ia64 hugetlb region, more virtual address bits are available than in
the other regions, yet the page tables are structured the same way: the page
at the bottom is larger.  Here we need to scale down each addr before passing
it to the standard free_pgd_range.  Was about to write a hugely_scaled_down
macro, but found htlbpage_to_page already exists for just this purpose.  Fixed
off-by-one in ia64 is_hugepage_only_range.

Uninline free_pgd_range to make it available to ia64.  Make sure the
vma-gathering loop in free_pgtables cannot join a hugepage_only_range to any
other (safe to join huges?  probably but don't bother).

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 8 changed files with 65 additions and 38 deletions Side-by-side Diff

arch/ia64/mm/hugetlbpage.c
... ... @@ -186,13 +186,30 @@
186 186 return NULL;
187 187 }
188 188  
189   -/*
190   - * Do nothing, until we've worked out what to do! To allow build, we
191   - * must remove reference to clear_page_range since it no longer exists.
192   - */
193   -void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
194   - unsigned long start, unsigned long end)
  189 +void hugetlb_free_pgd_range(struct mmu_gather **tlb,
  190 + unsigned long addr, unsigned long end,
  191 + unsigned long floor, unsigned long ceiling)
195 192 {
  193 + /*
  194 + * This is called only when is_hugepage_only_range(addr,),
  195 + * and it follows that is_hugepage_only_range(end,) also.
  196 + *
  197 + * The offset of these addresses from the base of the hugetlb
  198 + * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that
  199 + * the standard free_pgd_range will free the right page tables.
  200 + *
  201 + * If floor and ceiling are also in the hugetlb region, they
  202 + * must likewise be scaled down; but if outside, left unchanged.
  203 + */
  204 +
  205 + addr = htlbpage_to_page(addr);
  206 + end = htlbpage_to_page(end);
  207 + if (is_hugepage_only_range(tlb->mm, floor, HPAGE_SIZE))
  208 + floor = htlbpage_to_page(floor);
  209 + if (is_hugepage_only_range(tlb->mm, ceiling, HPAGE_SIZE))
  210 + ceiling = htlbpage_to_page(ceiling);
  211 +
  212 + free_pgd_range(tlb, addr, end, floor, ceiling);
196 213 }
197 214  
198 215 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
arch/ppc64/mm/hugetlbpage.c
... ... @@ -430,16 +430,6 @@
430 430 flush_tlb_pending();
431 431 }
432 432  
433   -void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
434   - unsigned long start, unsigned long end)
435   -{
436   - /* Because the huge pgtables are only 2 level, they can take
437   - * at most around 4M, much less than one hugepage which the
438   - * process is presumably entitled to use. So we don't bother
439   - * freeing up the pagetables on unmap, and wait until
440   - * destroy_context() to clean up the lot. */
441   -}
442   -
443 433 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
444 434 {
445 435 struct mm_struct *mm = current->mm;
include/asm-ia64/page.h
... ... @@ -139,7 +139,7 @@
139 139 # define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
140 140 # define is_hugepage_only_range(mm, addr, len) \
141 141 (REGION_NUMBER(addr) == REGION_HPAGE && \
142   - REGION_NUMBER((addr)+(len)) == REGION_HPAGE)
  142 + REGION_NUMBER((addr)+(len)-1) == REGION_HPAGE)
143 143 extern unsigned int hpage_shift;
144 144 #endif
145 145  
include/asm-ia64/pgtable.h
... ... @@ -472,8 +472,8 @@
472 472 #define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT)
473 473 #define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1))
474 474 struct mmu_gather;
475   -extern void hugetlb_free_pgtables(struct mmu_gather *tlb,
476   - struct vm_area_struct * prev, unsigned long start, unsigned long end);
  475 +void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
  476 + unsigned long end, unsigned long floor, unsigned long ceiling);
477 477 #endif
478 478  
479 479 /*
include/asm-ppc64/pgtable.h
... ... @@ -500,9 +500,15 @@
500 500  
501 501 extern void paging_init(void);
502 502  
503   -struct mmu_gather;
504   -void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
505   - unsigned long start, unsigned long end);
  503 +/*
  504 + * Because the huge pgtables are only 2 level, they can take
  505 + * at most around 4M, much less than one hugepage which the
  506 + * process is presumably entitled to use. So we don't bother
  507 + * freeing up the pagetables on unmap, and wait until
  508 + * destroy_context() to clean up the lot.
  509 + */
  510 +#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
  511 + do { } while (0)
506 512  
507 513 /*
508 514 * This gets called at the end of handling a page fault, when
include/linux/hugetlb.h
... ... @@ -37,7 +37,8 @@
37 37  
38 38 #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
39 39 #define is_hugepage_only_range(mm, addr, len) 0
40   -#define hugetlb_free_pgtables(tlb, prev, start, end) do { } while (0)
  40 +#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
  41 + do { } while (0)
41 42 #endif
42 43  
43 44 #ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE
... ... @@ -72,7 +73,8 @@
72 73 #define prepare_hugepage_range(addr, len) (-EINVAL)
73 74 #define pmd_huge(x) 0
74 75 #define is_hugepage_only_range(mm, addr, len) 0
75   -#define hugetlb_free_pgtables(tlb, prev, start, end) do { } while (0)
  76 +#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
  77 + do { } while (0)
76 78 #define alloc_huge_page() ({ NULL; })
77 79 #define free_huge_page(p) ({ (void)(p); BUG(); })
78 80  
... ... @@ -587,7 +587,9 @@
587 587 struct vm_area_struct *start_vma, unsigned long start_addr,
588 588 unsigned long end_addr, unsigned long *nr_accounted,
589 589 struct zap_details *);
590   -void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
  590 +void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
  591 + unsigned long end, unsigned long floor, unsigned long ceiling);
  592 +void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
591 593 unsigned long floor, unsigned long ceiling);
592 594 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
593 595 struct vm_area_struct *vma);
... ... @@ -190,7 +190,7 @@
190 190 *
191 191 * Must be called with pagetable lock held.
192 192 */
193   -static inline void free_pgd_range(struct mmu_gather *tlb,
  193 +void free_pgd_range(struct mmu_gather **tlb,
194 194 unsigned long addr, unsigned long end,
195 195 unsigned long floor, unsigned long ceiling)
196 196 {
197 197  
198 198  
199 199  
200 200  
201 201  
202 202  
... ... @@ -241,37 +241,47 @@
241 241 return;
242 242  
243 243 start = addr;
244   - pgd = pgd_offset(tlb->mm, addr);
  244 + pgd = pgd_offset((*tlb)->mm, addr);
245 245 do {
246 246 next = pgd_addr_end(addr, end);
247 247 if (pgd_none_or_clear_bad(pgd))
248 248 continue;
249   - free_pud_range(tlb, pgd, addr, next, floor, ceiling);
  249 + free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
250 250 } while (pgd++, addr = next, addr != end);
251 251  
252   - if (!tlb_is_full_mm(tlb))
253   - flush_tlb_pgtables(tlb->mm, start, end);
  252 + if (!tlb_is_full_mm(*tlb))
  253 + flush_tlb_pgtables((*tlb)->mm, start, end);
254 254 }
255 255  
256 256 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
257   - unsigned long floor, unsigned long ceiling)
  257 + unsigned long floor, unsigned long ceiling)
258 258 {
259 259 while (vma) {
260 260 struct vm_area_struct *next = vma->vm_next;
261 261 unsigned long addr = vma->vm_start;
262 262  
263   - /* Optimization: gather nearby vmas into a single call down */
264   - while (next && next->vm_start <= vma->vm_end + PMD_SIZE) {
265   - vma = next;
266   - next = vma->vm_next;
267   - }
268   - free_pgd_range(*tlb, addr, vma->vm_end,
  263 + if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
  264 + hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
269 265 floor, next? next->vm_start: ceiling);
  266 + } else {
  267 + /*
  268 + * Optimization: gather nearby vmas into one call down
  269 + */
  270 + while (next && next->vm_start <= vma->vm_end + PMD_SIZE
  271 + && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
  272 + HPAGE_SIZE)) {
  273 + vma = next;
  274 + next = vma->vm_next;
  275 + }
  276 + free_pgd_range(tlb, addr, vma->vm_end,
  277 + floor, next? next->vm_start: ceiling);
  278 + }
270 279 vma = next;
271 280 }
272 281 }
273 282  
274   -pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
  283 +pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
  284 + unsigned long address)
275 285 {
276 286 if (!pmd_present(*pmd)) {
277 287 struct page *new;