Blame view
mm/pagewalk.c
7.53 KB
e6473092b maps4: introduce ... |
1 2 3 |
#include <linux/mm.h> #include <linux/highmem.h> #include <linux/sched.h> |
d33b9f45b mm: hugetlb: fix ... |
4 |
#include <linux/hugetlb.h> |
e6473092b maps4: introduce ... |
5 6 |
static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
7 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
8 9 10 11 12 |
{ pte_t *pte; int err = 0; pte = pte_offset_map(pmd, addr); |
556637cda mm: fix possible ... |
13 |
for (;;) { |
2165009bd pagemap: pass mm ... |
14 |
err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
e6473092b maps4: introduce ... |
15 16 |
if (err) break; |
556637cda mm: fix possible ... |
17 18 19 20 21 |
addr += PAGE_SIZE; if (addr == end) break; pte++; } |
e6473092b maps4: introduce ... |
22 23 24 25 26 27 |
pte_unmap(pte); return err; } static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
28 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
29 30 31 32 33 34 35 |
{ pmd_t *pmd; unsigned long next; int err = 0; pmd = pmd_offset(pud, addr); do { |
033193275 pagewalk: only sp... |
36 |
again: |
e6473092b maps4: introduce ... |
37 |
next = pmd_addr_end(addr, end); |
48684a65b mm: pagewalk: fix... |
38 |
if (pmd_none(*pmd) || !walk->vma) { |
e6473092b maps4: introduce ... |
39 |
if (walk->pte_hole) |
2165009bd pagemap: pass mm ... |
40 |
err = walk->pte_hole(addr, next, walk); |
e6473092b maps4: introduce ... |
41 42 43 44 |
if (err) break; continue; } |
033193275 pagewalk: only sp... |
45 46 47 48 |
/* * This implies that each ->pmd_entry() handler * needs to know about pmd_trans_huge() pmds */ |
e6473092b maps4: introduce ... |
49 |
if (walk->pmd_entry) |
2165009bd pagemap: pass mm ... |
50 |
err = walk->pmd_entry(pmd, addr, next, walk); |
033193275 pagewalk: only sp... |
51 52 53 54 55 56 57 58 59 |
if (err) break; /* * Check this here so we only break down trans_huge * pages when we _need_ to */ if (!walk->pte_entry) continue; |
78ddc5347 thp: rename split... |
60 |
split_huge_pmd(walk->vma, pmd, addr); |
fafaa4264 pagewalk: improve... |
61 |
if (pmd_trans_unstable(pmd)) |
033193275 pagewalk: only sp... |
62 63 |
goto again; err = walk_pte_range(pmd, addr, next, walk); |
e6473092b maps4: introduce ... |
64 65 66 67 68 69 70 71 |
if (err) break; } while (pmd++, addr = next, addr != end); return err; } static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
72 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
73 74 75 76 77 78 79 80 81 82 |
{ pud_t *pud; unsigned long next; int err = 0; pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) { if (walk->pte_hole) |
2165009bd pagemap: pass mm ... |
83 |
err = walk->pte_hole(addr, next, walk); |
e6473092b maps4: introduce ... |
84 85 86 87 |
if (err) break; continue; } |
0b1fbfe50 mm/pagewalk: remo... |
88 |
if (walk->pmd_entry || walk->pte_entry) |
2165009bd pagemap: pass mm ... |
89 |
err = walk_pmd_range(pud, addr, next, walk); |
e6473092b maps4: introduce ... |
90 91 92 93 94 95 |
if (err) break; } while (pud++, addr = next, addr != end); return err; } |
fafaa4264 pagewalk: improve... |
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { pgd_t *pgd; unsigned long next; int err = 0; pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pmd_entry || walk->pte_entry) err = walk_pud_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); return err; } |
116354d17 pagemap: fix pfn ... |
121 122 123 124 125 126 127 |
#ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, unsigned long end) { unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); return boundary < end ? boundary : end; } |
fafaa4264 pagewalk: improve... |
128 |
static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
116354d17 pagemap: fix pfn ... |
129 130 |
struct mm_walk *walk) { |
fafaa4264 pagewalk: improve... |
131 |
struct vm_area_struct *vma = walk->vma; |
116354d17 pagemap: fix pfn ... |
132 133 134 135 136 137 138 139 140 141 142 143 |
struct hstate *h = hstate_vma(vma); unsigned long next; unsigned long hmask = huge_page_mask(h); pte_t *pte; int err = 0; do { next = hugetlb_entry_end(h, addr, end); pte = huge_pte_offset(walk->mm, addr & hmask); if (pte && walk->hugetlb_entry) err = walk->hugetlb_entry(pte, hmask, addr, next, walk); if (err) |
fafaa4264 pagewalk: improve... |
144 |
break; |
116354d17 pagemap: fix pfn ... |
145 |
} while (addr = next, addr != end); |
fafaa4264 pagewalk: improve... |
146 |
return err; |
116354d17 pagemap: fix pfn ... |
147 |
} |
6c6d52804 pagewalk: don't l... |
148 |
|
6c6d52804 pagewalk: don't l... |
149 |
#else /* CONFIG_HUGETLB_PAGE */ |
fafaa4264 pagewalk: improve... |
150 |
static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
6c6d52804 pagewalk: don't l... |
151 152 153 154 155 156 |
struct mm_walk *walk) { return 0; } #endif /* CONFIG_HUGETLB_PAGE */ |
fafaa4264 pagewalk: improve... |
157 158 159 160 161 |
/* * Decide whether we really walk over the current vma on [@start, @end) * or skip it via the returned value. Return 0 if we do walk over the * current vma, and return 1 if we skip the vma. Negative values means * error, where we abort the current walk. |
fafaa4264 pagewalk: improve... |
162 163 164 165 166 |
*/ static int walk_page_test(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; |
6c6d52804 pagewalk: don't l... |
167 |
|
fafaa4264 pagewalk: improve... |
168 169 170 171 |
if (walk->test_walk) return walk->test_walk(start, end, walk); /* |
48684a65b mm: pagewalk: fix... |
172 173 174 175 176 177 |
* vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP * range, so we don't walk over it as we do for normal vmas. However, * Some callers are interested in handling hole range and they don't * want to just ignore any single address range. Such users certainly * define their ->pte_hole() callbacks, so let's delegate them to handle * vma(VM_PFNMAP). |
fafaa4264 pagewalk: improve... |
178 |
*/ |
48684a65b mm: pagewalk: fix... |
179 180 181 182 183 184 |
if (vma->vm_flags & VM_PFNMAP) { int err = 1; if (walk->pte_hole) err = walk->pte_hole(start, end, walk); return err ? err : 1; } |
fafaa4264 pagewalk: improve... |
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
return 0; } static int __walk_page_range(unsigned long start, unsigned long end, struct mm_walk *walk) { int err = 0; struct vm_area_struct *vma = walk->vma; if (vma && is_vm_hugetlb_page(vma)) { if (walk->hugetlb_entry) err = walk_hugetlb_range(start, end, walk); } else err = walk_pgd_range(start, end, walk); return err; } |
116354d17 pagemap: fix pfn ... |
202 |
|
e6473092b maps4: introduce ... |
203 |
/** |
fafaa4264 pagewalk: improve... |
204 |
* walk_page_range - walk page table with caller specific callbacks |
e6473092b maps4: introduce ... |
205 |
* |
fafaa4264 pagewalk: improve... |
206 207 208 209 210 211 212 213 214 215 216 217 |
* Recursively walk the page table tree of the process represented by @walk->mm * within the virtual address range [@start, @end). During walking, we can do * some caller-specific works for each entry, by setting up pmd_entry(), * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these * callbacks, the associated entries/pages are just ignored. * The return values of these callbacks are commonly defined like below: * - 0 : succeeded to handle the current entry, and if you don't reach the * end address yet, continue to walk. * - >0 : succeeded to handle the current entry, and return to the caller * with caller specific value. * - <0 : failed to handle the current entry, and return to the caller * with error code. |
e6473092b maps4: introduce ... |
218 |
* |
fafaa4264 pagewalk: improve... |
219 220 221 222 |
* Before starting to walk page table, some callers want to check whether * they really want to walk over the current vma, typically by checking * its vm_flags. walk_page_test() and @walk->test_walk() are used for this * purpose. |
e6473092b maps4: introduce ... |
223 |
* |
fafaa4264 pagewalk: improve... |
224 225 226 |
* struct mm_walk keeps current values of some common data like vma and pmd, * which are useful for the access from callbacks. If you want to pass some * caller-specific data to callbacks, @walk->private should be helpful. |
c27fe4c89 pagewalk: add loc... |
227 |
* |
fafaa4264 pagewalk: improve... |
228 229 230 231 |
* Locking: * Callers of walk_page_range() and walk_page_vma() should hold * @walk->mm->mmap_sem, because these function traverse vma list and/or * access to vma's data. |
e6473092b maps4: introduce ... |
232 |
*/ |
fafaa4264 pagewalk: improve... |
233 |
int walk_page_range(unsigned long start, unsigned long end, |
2165009bd pagemap: pass mm ... |
234 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
235 |
{ |
e6473092b maps4: introduce ... |
236 |
int err = 0; |
fafaa4264 pagewalk: improve... |
237 238 |
unsigned long next; struct vm_area_struct *vma; |
e6473092b maps4: introduce ... |
239 |
|
fafaa4264 pagewalk: improve... |
240 241 |
if (start >= end) return -EINVAL; |
e6473092b maps4: introduce ... |
242 |
|
2165009bd pagemap: pass mm ... |
243 244 |
if (!walk->mm) return -EINVAL; |
96dad67ff mm: use VM_BUG_ON... |
245 |
VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); |
a9ff785e4 mm/pagewalk.c: wa... |
246 |
|
fafaa4264 pagewalk: improve... |
247 |
vma = find_vma(walk->mm, start); |
e6473092b maps4: introduce ... |
248 |
do { |
fafaa4264 pagewalk: improve... |
249 250 251 252 253 254 255 256 257 258 |
if (!vma) { /* after the last vma */ walk->vma = NULL; next = end; } else if (start < vma->vm_start) { /* outside vma */ walk->vma = NULL; next = min(end, vma->vm_start); } else { /* inside vma */ walk->vma = vma; next = min(end, vma->vm_end); vma = vma->vm_next; |
5f0af70a2 mm: remove call t... |
259 |
|
fafaa4264 pagewalk: improve... |
260 |
err = walk_page_test(start, next, walk); |
f68373953 mm/pagewalk.c: pr... |
261 262 263 264 265 266 267 |
if (err > 0) { /* * positive return values are purely for * controlling the pagewalk, so should never * be passed to the callers. */ err = 0; |
a9ff785e4 mm/pagewalk.c: wa... |
268 |
continue; |
f68373953 mm/pagewalk.c: pr... |
269 |
} |
fafaa4264 pagewalk: improve... |
270 |
if (err < 0) |
e6473092b maps4: introduce ... |
271 |
break; |
e6473092b maps4: introduce ... |
272 |
} |
fafaa4264 pagewalk: improve... |
273 274 |
if (walk->vma || walk->pte_hole) err = __walk_page_range(start, next, walk); |
e6473092b maps4: introduce ... |
275 276 |
if (err) break; |
fafaa4264 pagewalk: improve... |
277 |
} while (start = next, start < end); |
e6473092b maps4: introduce ... |
278 279 |
return err; } |
900fc5f19 pagewalk: add wal... |
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk) { int err; if (!walk->mm) return -EINVAL; VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); VM_BUG_ON(!vma); walk->vma = vma; err = walk_page_test(vma->vm_start, vma->vm_end, walk); if (err > 0) return 0; if (err < 0) return err; return __walk_page_range(vma->vm_start, vma->vm_end, walk); } |