Commit 2165009bdf63f79716a36ad545df14c3cdf958b7

Authored by Dave Hansen
Committed by Linus Torvalds
1 parent cfc53f65f5

pagemap: pass mm into pagewalkers

We need this at least for huge page detection for now, because powerpc
needs the vm_area_struct to be able to determine whether a virtual address
is referring to a huge page (its pmd_huge() doesn't work).

It might also come in handy for some of the other users.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 56 additions and 47 deletions Side-by-side Diff

... ... @@ -315,9 +315,9 @@
315 315 };
316 316  
317 317 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
318   - void *private)
  318 + struct mm_walk *walk)
319 319 {
320   - struct mem_size_stats *mss = private;
  320 + struct mem_size_stats *mss = walk->private;
321 321 struct vm_area_struct *vma = mss->vma;
322 322 pte_t *pte, ptent;
323 323 spinlock_t *ptl;
324 324  
325 325  
... ... @@ -365,19 +365,21 @@
365 365 return 0;
366 366 }
367 367  
368   -static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
369   -
370 368 static int show_smap(struct seq_file *m, void *v)
371 369 {
372 370 struct vm_area_struct *vma = v;
373 371 struct mem_size_stats mss;
374 372 int ret;
  373 + struct mm_walk smaps_walk = {
  374 + .pmd_entry = smaps_pte_range,
  375 + .mm = vma->vm_mm,
  376 + .private = &mss,
  377 + };
375 378  
376 379 memset(&mss, 0, sizeof mss);
377 380 mss.vma = vma;
378 381 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
379   - walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
380   - &smaps_walk, &mss);
  382 + walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
381 383  
382 384 ret = show_map(m, v);
383 385 if (ret)
384 386  
... ... @@ -426,9 +428,9 @@
426 428 };
427 429  
428 430 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
429   - unsigned long end, void *private)
  431 + unsigned long end, struct mm_walk *walk)
430 432 {
431   - struct vm_area_struct *vma = private;
  433 + struct vm_area_struct *vma = walk->private;
432 434 pte_t *pte, ptent;
433 435 spinlock_t *ptl;
434 436 struct page *page;
... ... @@ -452,8 +454,6 @@
452 454 return 0;
453 455 }
454 456  
455   -static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
456   -
457 457 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
458 458 size_t count, loff_t *ppos)
459 459 {
460 460  
461 461  
... ... @@ -476,11 +476,17 @@
476 476 return -ESRCH;
477 477 mm = get_task_mm(task);
478 478 if (mm) {
  479 + static struct mm_walk clear_refs_walk;
  480 + memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
  481 + clear_refs_walk.pmd_entry = clear_refs_pte_range;
  482 + clear_refs_walk.mm = mm;
479 483 down_read(&mm->mmap_sem);
480   - for (vma = mm->mmap; vma; vma = vma->vm_next)
  484 + for (vma = mm->mmap; vma; vma = vma->vm_next) {
  485 + clear_refs_walk.private = vma;
481 486 if (!is_vm_hugetlb_page(vma))
482   - walk_page_range(mm, vma->vm_start, vma->vm_end,
483   - &clear_refs_walk, vma);
  487 + walk_page_range(vma->vm_start, vma->vm_end,
  488 + &clear_refs_walk);
  489 + }
484 490 flush_tlb_mm(mm);
485 491 up_read(&mm->mmap_sem);
486 492 mmput(mm);
487 493  
... ... @@ -528,9 +534,9 @@
528 534 }
529 535  
530 536 static int pagemap_pte_hole(unsigned long start, unsigned long end,
531   - void *private)
  537 + struct mm_walk *walk)
532 538 {
533   - struct pagemapread *pm = private;
  539 + struct pagemapread *pm = walk->private;
534 540 unsigned long addr;
535 541 int err = 0;
536 542 for (addr = start; addr < end; addr += PAGE_SIZE) {
537 543  
... ... @@ -548,9 +554,9 @@
548 554 }
549 555  
550 556 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
551   - void *private)
  557 + struct mm_walk *walk)
552 558 {
553   - struct pagemapread *pm = private;
  559 + struct pagemapread *pm = walk->private;
554 560 pte_t *pte;
555 561 int err = 0;
556 562  
... ... @@ -675,8 +681,8 @@
675 681 * user buffer is tracked in "pm", and the walk
676 682 * will stop when we hit the end of the buffer.
677 683 */
678   - ret = walk_page_range(mm, start_vaddr, end_vaddr,
679   - &pagemap_walk, &pm);
  684 + ret = walk_page_range(start_vaddr, end_vaddr,
  685 + &pagemap_walk);
680 686 if (ret == PM_END_OF_BUFFER)
681 687 ret = 0;
682 688 /* don't need mmap_sem for these, but this looks cleaner */
... ... @@ -760,16 +760,17 @@
760 760 * (see walk_page_range for more details)
761 761 */
762 762 struct mm_walk {
763   - int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *);
764   - int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
765   - int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
766   - int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
767   - int (*pte_hole)(unsigned long, unsigned long, void *);
  763 + int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, struct mm_walk *);
  764 + int (*pud_entry)(pud_t *, unsigned long, unsigned long, struct mm_walk *);
  765 + int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
  766 + int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
  767 + int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
  768 + struct mm_struct *mm;
  769 + void *private;
768 770 };
769 771  
770   -int walk_page_range(const struct mm_struct *, unsigned long addr,
771   - unsigned long end, const struct mm_walk *walk,
772   - void *private);
  772 +int walk_page_range(unsigned long addr, unsigned long end,
  773 + struct mm_walk *walk);
773 774 void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
774 775 unsigned long end, unsigned long floor, unsigned long ceiling);
775 776 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
... ... @@ -3,14 +3,14 @@
3 3 #include <linux/sched.h>
4 4  
5 5 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
6   - const struct mm_walk *walk, void *private)
  6 + struct mm_walk *walk)
7 7 {
8 8 pte_t *pte;
9 9 int err = 0;
10 10  
11 11 pte = pte_offset_map(pmd, addr);
12 12 for (;;) {
13   - err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private);
  13 + err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
14 14 if (err)
15 15 break;
16 16 addr += PAGE_SIZE;
... ... @@ -24,7 +24,7 @@
24 24 }
25 25  
26 26 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
27   - const struct mm_walk *walk, void *private)
  27 + struct mm_walk *walk)
28 28 {
29 29 pmd_t *pmd;
30 30 unsigned long next;
31 31  
32 32  
... ... @@ -35,15 +35,15 @@
35 35 next = pmd_addr_end(addr, end);
36 36 if (pmd_none_or_clear_bad(pmd)) {
37 37 if (walk->pte_hole)
38   - err = walk->pte_hole(addr, next, private);
  38 + err = walk->pte_hole(addr, next, walk);
39 39 if (err)
40 40 break;
41 41 continue;
42 42 }
43 43 if (walk->pmd_entry)
44   - err = walk->pmd_entry(pmd, addr, next, private);
  44 + err = walk->pmd_entry(pmd, addr, next, walk);
45 45 if (!err && walk->pte_entry)
46   - err = walk_pte_range(pmd, addr, next, walk, private);
  46 + err = walk_pte_range(pmd, addr, next, walk);
47 47 if (err)
48 48 break;
49 49 } while (pmd++, addr = next, addr != end);
... ... @@ -52,7 +52,7 @@
52 52 }
53 53  
54 54 static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
55   - const struct mm_walk *walk, void *private)
  55 + struct mm_walk *walk)
56 56 {
57 57 pud_t *pud;
58 58 unsigned long next;
59 59  
60 60  
... ... @@ -63,15 +63,15 @@
63 63 next = pud_addr_end(addr, end);
64 64 if (pud_none_or_clear_bad(pud)) {
65 65 if (walk->pte_hole)
66   - err = walk->pte_hole(addr, next, private);
  66 + err = walk->pte_hole(addr, next, walk);
67 67 if (err)
68 68 break;
69 69 continue;
70 70 }
71 71 if (walk->pud_entry)
72   - err = walk->pud_entry(pud, addr, next, private);
  72 + err = walk->pud_entry(pud, addr, next, walk);
73 73 if (!err && (walk->pmd_entry || walk->pte_entry))
74   - err = walk_pmd_range(pud, addr, next, walk, private);
  74 + err = walk_pmd_range(pud, addr, next, walk);
75 75 if (err)
76 76 break;
77 77 } while (pud++, addr = next, addr != end);
78 78  
... ... @@ -85,15 +85,15 @@
85 85 * @addr: starting address
86 86 * @end: ending address
87 87 * @walk: set of callbacks to invoke for each level of the tree
88   - * @private: private data passed to the callback function
89 88 *
90 89 * Recursively walk the page table for the memory area in a VMA,
91 90 * calling supplied callbacks. Callbacks are called in-order (first
92 91 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
93 92 * etc.). If lower-level callbacks are omitted, walking depth is reduced.
94 93 *
95   - * Each callback receives an entry pointer, the start and end of the
96   - * associated range, and a caller-supplied private data pointer.
  94 + * Each callback receives an entry pointer and the start and end of the
  95 + * associated range, and a copy of the original mm_walk for access to
  96 + * the ->private or ->mm fields.
97 97 *
98 98 * No locks are taken, but the bottom level iterator will map PTE
99 99 * directories from highmem if necessary.
... ... @@ -101,9 +101,8 @@
101 101 * If any callback returns a non-zero value, the walk is aborted and
102 102 * the return value is propagated back to the caller. Otherwise 0 is returned.
103 103 */
104   -int walk_page_range(const struct mm_struct *mm,
105   - unsigned long addr, unsigned long end,
106   - const struct mm_walk *walk, void *private)
  104 +int walk_page_range(unsigned long addr, unsigned long end,
  105 + struct mm_walk *walk)
107 106 {
108 107 pgd_t *pgd;
109 108 unsigned long next;
110 109  
111 110  
112 111  
... ... @@ -112,21 +111,24 @@
112 111 if (addr >= end)
113 112 return err;
114 113  
115   - pgd = pgd_offset(mm, addr);
  114 + if (!walk->mm)
  115 + return -EINVAL;
  116 +
  117 + pgd = pgd_offset(walk->mm, addr);
116 118 do {
117 119 next = pgd_addr_end(addr, end);
118 120 if (pgd_none_or_clear_bad(pgd)) {
119 121 if (walk->pte_hole)
120   - err = walk->pte_hole(addr, next, private);
  122 + err = walk->pte_hole(addr, next, walk);
121 123 if (err)
122 124 break;
123 125 continue;
124 126 }
125 127 if (walk->pgd_entry)
126   - err = walk->pgd_entry(pgd, addr, next, private);
  128 + err = walk->pgd_entry(pgd, addr, next, walk);
127 129 if (!err &&
128 130 (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
129   - err = walk_pud_range(pgd, addr, next, walk, private);
  131 + err = walk_pud_range(pgd, addr, next, walk);
130 132 if (err)
131 133 break;
132 134 } while (pgd++, addr = next, addr != end);