Blame view
mm/pagewalk.c
9.18 KB
b24413180 License cleanup: ... |
1 |
// SPDX-License-Identifier: GPL-2.0 |
a520110e4 mm: split out a n... |
2 |
#include <linux/pagewalk.h> |
e6473092b maps4: introduce ... |
3 4 |
#include <linux/highmem.h> #include <linux/sched.h> |
d33b9f45b mm: hugetlb: fix ... |
5 |
#include <linux/hugetlb.h> |
e6473092b maps4: introduce ... |
6 7 |
static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
8 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
9 10 11 |
{ pte_t *pte; int err = 0; |
7b86ac337 pagewalk: separat... |
12 |
const struct mm_walk_ops *ops = walk->ops; |
e6473092b maps4: introduce ... |
13 14 |
pte = pte_offset_map(pmd, addr); |
556637cda mm: fix possible ... |
15 |
for (;;) { |
7b86ac337 pagewalk: separat... |
16 |
err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
e6473092b maps4: introduce ... |
17 18 |
if (err) break; |
556637cda mm: fix possible ... |
19 20 21 22 23 |
addr += PAGE_SIZE; if (addr == end) break; pte++; } |
e6473092b maps4: introduce ... |
24 25 26 27 28 29 |
pte_unmap(pte); return err; } static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
30 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
31 32 33 |
{ pmd_t *pmd; unsigned long next; |
7b86ac337 pagewalk: separat... |
34 |
const struct mm_walk_ops *ops = walk->ops; |
e6473092b maps4: introduce ... |
35 36 37 38 |
int err = 0; pmd = pmd_offset(pud, addr); do { |
033193275 pagewalk: only sp... |
39 |
again: |
e6473092b maps4: introduce ... |
40 |
next = pmd_addr_end(addr, end); |
48684a65b mm: pagewalk: fix... |
41 |
if (pmd_none(*pmd) || !walk->vma) { |
7b86ac337 pagewalk: separat... |
42 43 |
if (ops->pte_hole) err = ops->pte_hole(addr, next, walk); |
e6473092b maps4: introduce ... |
44 45 46 47 |
if (err) break; continue; } |
033193275 pagewalk: only sp... |
48 49 50 51 |
/* * This implies that each ->pmd_entry() handler * needs to know about pmd_trans_huge() pmds */ |
7b86ac337 pagewalk: separat... |
52 53 |
if (ops->pmd_entry) err = ops->pmd_entry(pmd, addr, next, walk); |
033193275 pagewalk: only sp... |
54 55 56 57 58 59 60 |
if (err) break; /* * Check this here so we only break down trans_huge * pages when we _need_ to */ |
7b86ac337 pagewalk: separat... |
61 |
if (!ops->pte_entry) |
033193275 pagewalk: only sp... |
62 |
continue; |
78ddc5347 thp: rename split... |
63 |
split_huge_pmd(walk->vma, pmd, addr); |
fafaa4264 pagewalk: improve... |
64 |
if (pmd_trans_unstable(pmd)) |
033193275 pagewalk: only sp... |
65 66 |
goto again; err = walk_pte_range(pmd, addr, next, walk); |
e6473092b maps4: introduce ... |
67 68 69 70 71 72 |
if (err) break; } while (pmd++, addr = next, addr != end); return err; } |
c2febafc6 mm: convert gener... |
73 |
static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
74 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
75 76 77 |
{ pud_t *pud; unsigned long next; |
7b86ac337 pagewalk: separat... |
78 |
const struct mm_walk_ops *ops = walk->ops; |
e6473092b maps4: introduce ... |
79 |
int err = 0; |
c2febafc6 mm: convert gener... |
80 |
pud = pud_offset(p4d, addr); |
e6473092b maps4: introduce ... |
81 |
do { |
a00cc7d9d mm, x86: add supp... |
82 |
again: |
e6473092b maps4: introduce ... |
83 |
next = pud_addr_end(addr, end); |
a00cc7d9d mm, x86: add supp... |
84 |
if (pud_none(*pud) || !walk->vma) { |
7b86ac337 pagewalk: separat... |
85 86 |
if (ops->pte_hole) err = ops->pte_hole(addr, next, walk); |
e6473092b maps4: introduce ... |
87 88 89 90 |
if (err) break; continue; } |
a00cc7d9d mm, x86: add supp... |
91 |
|
7b86ac337 pagewalk: separat... |
92 |
if (ops->pud_entry) { |
a00cc7d9d mm, x86: add supp... |
93 94 95 |
spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); if (ptl) { |
7b86ac337 pagewalk: separat... |
96 |
err = ops->pud_entry(pud, addr, next, walk); |
a00cc7d9d mm, x86: add supp... |
97 98 99 100 101 102 103 104 105 106 |
spin_unlock(ptl); if (err) break; continue; } } split_huge_pud(walk->vma, pud, addr); if (pud_none(*pud)) goto again; |
7b86ac337 pagewalk: separat... |
107 |
if (ops->pmd_entry || ops->pte_entry) |
2165009bd pagemap: pass mm ... |
108 |
err = walk_pmd_range(pud, addr, next, walk); |
e6473092b maps4: introduce ... |
109 110 111 112 113 114 |
if (err) break; } while (pud++, addr = next, addr != end); return err; } |
c2febafc6 mm: convert gener... |
115 116 117 118 119 |
static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, struct mm_walk *walk) { p4d_t *p4d; unsigned long next; |
7b86ac337 pagewalk: separat... |
120 |
const struct mm_walk_ops *ops = walk->ops; |
c2febafc6 mm: convert gener... |
121 122 123 124 125 126 |
int err = 0; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) { |
7b86ac337 pagewalk: separat... |
127 128 |
if (ops->pte_hole) err = ops->pte_hole(addr, next, walk); |
c2febafc6 mm: convert gener... |
129 130 131 132 |
if (err) break; continue; } |
7b86ac337 pagewalk: separat... |
133 |
if (ops->pmd_entry || ops->pte_entry) |
c2febafc6 mm: convert gener... |
134 135 136 137 138 139 140 |
err = walk_pud_range(p4d, addr, next, walk); if (err) break; } while (p4d++, addr = next, addr != end); return err; } |
fafaa4264 pagewalk: improve... |
141 142 143 144 145 |
static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { pgd_t *pgd; unsigned long next; |
7b86ac337 pagewalk: separat... |
146 |
const struct mm_walk_ops *ops = walk->ops; |
fafaa4264 pagewalk: improve... |
147 148 149 150 151 152 |
int err = 0; pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { |
7b86ac337 pagewalk: separat... |
153 154 |
if (ops->pte_hole) err = ops->pte_hole(addr, next, walk); |
fafaa4264 pagewalk: improve... |
155 156 157 158 |
if (err) break; continue; } |
7b86ac337 pagewalk: separat... |
159 |
if (ops->pmd_entry || ops->pte_entry) |
c2febafc6 mm: convert gener... |
160 |
err = walk_p4d_range(pgd, addr, next, walk); |
fafaa4264 pagewalk: improve... |
161 162 163 164 165 166 |
if (err) break; } while (pgd++, addr = next, addr != end); return err; } |
116354d17 pagemap: fix pfn ... |
167 168 169 170 171 172 173 |
#ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, unsigned long end) { unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); return boundary < end ? boundary : end; } |
fafaa4264 pagewalk: improve... |
174 |
static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
116354d17 pagemap: fix pfn ... |
175 176 |
struct mm_walk *walk) { |
fafaa4264 pagewalk: improve... |
177 |
struct vm_area_struct *vma = walk->vma; |
116354d17 pagemap: fix pfn ... |
178 179 180 |
struct hstate *h = hstate_vma(vma); unsigned long next; unsigned long hmask = huge_page_mask(h); |
7868a2087 mm/hugetlb: add s... |
181 |
unsigned long sz = huge_page_size(h); |
116354d17 pagemap: fix pfn ... |
182 |
pte_t *pte; |
7b86ac337 pagewalk: separat... |
183 |
const struct mm_walk_ops *ops = walk->ops; |
116354d17 pagemap: fix pfn ... |
184 185 186 187 |
int err = 0; do { next = hugetlb_entry_end(h, addr, end); |
7868a2087 mm/hugetlb: add s... |
188 |
pte = huge_pte_offset(walk->mm, addr & hmask, sz); |
373c4557d mm/pagewalk.c: re... |
189 190 |
if (pte) |
7b86ac337 pagewalk: separat... |
191 192 193 |
err = ops->hugetlb_entry(pte, hmask, addr, next, walk); else if (ops->pte_hole) err = ops->pte_hole(addr, next, walk); |
373c4557d mm/pagewalk.c: re... |
194 |
|
116354d17 pagemap: fix pfn ... |
195 |
if (err) |
fafaa4264 pagewalk: improve... |
196 |
break; |
116354d17 pagemap: fix pfn ... |
197 |
} while (addr = next, addr != end); |
fafaa4264 pagewalk: improve... |
198 |
return err; |
116354d17 pagemap: fix pfn ... |
199 |
} |
6c6d52804 pagewalk: don't l... |
200 |
|
6c6d52804 pagewalk: don't l... |
201 |
#else /* CONFIG_HUGETLB_PAGE */ |
fafaa4264 pagewalk: improve... |
202 |
static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
6c6d52804 pagewalk: don't l... |
203 204 205 206 207 208 |
struct mm_walk *walk) { return 0; } #endif /* CONFIG_HUGETLB_PAGE */ |
fafaa4264 pagewalk: improve... |
209 210 211 212 213 |
/* * Decide whether we really walk over the current vma on [@start, @end) * or skip it via the returned value. Return 0 if we do walk over the * current vma, and return 1 if we skip the vma. Negative values means * error, where we abort the current walk. |
fafaa4264 pagewalk: improve... |
214 215 216 217 218 |
*/ static int walk_page_test(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; |
7b86ac337 pagewalk: separat... |
219 |
const struct mm_walk_ops *ops = walk->ops; |
6c6d52804 pagewalk: don't l... |
220 |
|
7b86ac337 pagewalk: separat... |
221 222 |
if (ops->test_walk) return ops->test_walk(start, end, walk); |
fafaa4264 pagewalk: improve... |
223 224 |
/* |
48684a65b mm: pagewalk: fix... |
225 226 227 228 229 230 |
* vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP * range, so we don't walk over it as we do for normal vmas. However, * Some callers are interested in handling hole range and they don't * want to just ignore any single address range. Such users certainly * define their ->pte_hole() callbacks, so let's delegate them to handle * vma(VM_PFNMAP). |
fafaa4264 pagewalk: improve... |
231 |
*/ |
48684a65b mm: pagewalk: fix... |
232 233 |
if (vma->vm_flags & VM_PFNMAP) { int err = 1; |
7b86ac337 pagewalk: separat... |
234 235 |
if (ops->pte_hole) err = ops->pte_hole(start, end, walk); |
48684a65b mm: pagewalk: fix... |
236 237 |
return err ? err : 1; } |
fafaa4264 pagewalk: improve... |
238 239 240 241 242 243 244 245 246 247 |
return 0; } static int __walk_page_range(unsigned long start, unsigned long end, struct mm_walk *walk) { int err = 0; struct vm_area_struct *vma = walk->vma; if (vma && is_vm_hugetlb_page(vma)) { |
7b86ac337 pagewalk: separat... |
248 |
if (walk->ops->hugetlb_entry) |
fafaa4264 pagewalk: improve... |
249 250 251 252 253 254 |
err = walk_hugetlb_range(start, end, walk); } else err = walk_pgd_range(start, end, walk); return err; } |
116354d17 pagemap: fix pfn ... |
255 |
|
e6473092b maps4: introduce ... |
256 |
/** |
fafaa4264 pagewalk: improve... |
257 |
* walk_page_range - walk page table with caller specific callbacks |
7b86ac337 pagewalk: separat... |
258 259 260 261 262 |
* @mm: mm_struct representing the target process of page table walk * @start: start address of the virtual address range * @end: end address of the virtual address range * @ops: operation to call during the walk * @private: private data for callbacks' usage |
e6473092b maps4: introduce ... |
263 |
* |
7b86ac337 pagewalk: separat... |
264 |
* Recursively walk the page table tree of the process represented by @mm |
fafaa4264 pagewalk: improve... |
265 266 267 268 269 |
* within the virtual address range [@start, @end). During walking, we can do * some caller-specific works for each entry, by setting up pmd_entry(), * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these * callbacks, the associated entries/pages are just ignored. * The return values of these callbacks are commonly defined like below: |
a5d09bed7 mm: docs: add bla... |
270 |
* |
fafaa4264 pagewalk: improve... |
271 272 273 274 275 276 |
* - 0 : succeeded to handle the current entry, and if you don't reach the * end address yet, continue to walk. * - >0 : succeeded to handle the current entry, and return to the caller * with caller specific value. * - <0 : failed to handle the current entry, and return to the caller * with error code. |
e6473092b maps4: introduce ... |
277 |
* |
fafaa4264 pagewalk: improve... |
278 279 |
* Before starting to walk page table, some callers want to check whether * they really want to walk over the current vma, typically by checking |
7b86ac337 pagewalk: separat... |
280 |
* its vm_flags. walk_page_test() and @ops->test_walk() are used for this |
fafaa4264 pagewalk: improve... |
281 |
* purpose. |
e6473092b maps4: introduce ... |
282 |
* |
fafaa4264 pagewalk: improve... |
283 284 |
* struct mm_walk keeps current values of some common data like vma and pmd, * which are useful for the access from callbacks. If you want to pass some |
7b86ac337 pagewalk: separat... |
285 |
* caller-specific data to callbacks, @private should be helpful. |
c27fe4c89 pagewalk: add loc... |
286 |
* |
fafaa4264 pagewalk: improve... |
287 |
* Locking: |
7b86ac337 pagewalk: separat... |
288 289 |
* Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem, * because these function traverse vma list and/or access to vma's data. |
e6473092b maps4: introduce ... |
290 |
*/ |
7b86ac337 pagewalk: separat... |
291 292 293 |
int walk_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private) |
e6473092b maps4: introduce ... |
294 |
{ |
e6473092b maps4: introduce ... |
295 |
int err = 0; |
fafaa4264 pagewalk: improve... |
296 297 |
unsigned long next; struct vm_area_struct *vma; |
7b86ac337 pagewalk: separat... |
298 299 300 301 302 |
struct mm_walk walk = { .ops = ops, .mm = mm, .private = private, }; |
e6473092b maps4: introduce ... |
303 |
|
fafaa4264 pagewalk: improve... |
304 305 |
if (start >= end) return -EINVAL; |
e6473092b maps4: introduce ... |
306 |
|
7b86ac337 pagewalk: separat... |
307 |
if (!walk.mm) |
2165009bd pagemap: pass mm ... |
308 |
return -EINVAL; |
b4bc7817b pagewalk: use loc... |
309 |
lockdep_assert_held(&walk.mm->mmap_sem); |
a9ff785e4 mm/pagewalk.c: wa... |
310 |
|
7b86ac337 pagewalk: separat... |
311 |
vma = find_vma(walk.mm, start); |
e6473092b maps4: introduce ... |
312 |
do { |
fafaa4264 pagewalk: improve... |
313 |
if (!vma) { /* after the last vma */ |
7b86ac337 pagewalk: separat... |
314 |
walk.vma = NULL; |
fafaa4264 pagewalk: improve... |
315 316 |
next = end; } else if (start < vma->vm_start) { /* outside vma */ |
7b86ac337 pagewalk: separat... |
317 |
walk.vma = NULL; |
fafaa4264 pagewalk: improve... |
318 319 |
next = min(end, vma->vm_start); } else { /* inside vma */ |
7b86ac337 pagewalk: separat... |
320 |
walk.vma = vma; |
fafaa4264 pagewalk: improve... |
321 322 |
next = min(end, vma->vm_end); vma = vma->vm_next; |
5f0af70a2 mm: remove call t... |
323 |
|
7b86ac337 pagewalk: separat... |
324 |
err = walk_page_test(start, next, &walk); |
f68373953 mm/pagewalk.c: pr... |
325 326 327 328 329 330 331 |
if (err > 0) { /* * positive return values are purely for * controlling the pagewalk, so should never * be passed to the callers. */ err = 0; |
a9ff785e4 mm/pagewalk.c: wa... |
332 |
continue; |
f68373953 mm/pagewalk.c: pr... |
333 |
} |
fafaa4264 pagewalk: improve... |
334 |
if (err < 0) |
e6473092b maps4: introduce ... |
335 |
break; |
e6473092b maps4: introduce ... |
336 |
} |
7b86ac337 pagewalk: separat... |
337 338 |
if (walk.vma || walk.ops->pte_hole) err = __walk_page_range(start, next, &walk); |
e6473092b maps4: introduce ... |
339 340 |
if (err) break; |
fafaa4264 pagewalk: improve... |
341 |
} while (start = next, start < end); |
e6473092b maps4: introduce ... |
342 343 |
return err; } |
900fc5f19 pagewalk: add wal... |
344 |
|
7b86ac337 pagewalk: separat... |
345 346 |
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private) |
900fc5f19 pagewalk: add wal... |
347 |
{ |
7b86ac337 pagewalk: separat... |
348 349 350 351 352 353 |
struct mm_walk walk = { .ops = ops, .mm = vma->vm_mm, .vma = vma, .private = private, }; |
900fc5f19 pagewalk: add wal... |
354 |
int err; |
7b86ac337 pagewalk: separat... |
355 |
if (!walk.mm) |
900fc5f19 pagewalk: add wal... |
356 |
return -EINVAL; |
b4bc7817b pagewalk: use loc... |
357 |
lockdep_assert_held(&walk.mm->mmap_sem); |
7b86ac337 pagewalk: separat... |
358 359 |
err = walk_page_test(vma->vm_start, vma->vm_end, &walk); |
900fc5f19 pagewalk: add wal... |
360 361 362 363 |
if (err > 0) return 0; if (err < 0) return err; |
7b86ac337 pagewalk: separat... |
364 |
return __walk_page_range(vma->vm_start, vma->vm_end, &walk); |
900fc5f19 pagewalk: add wal... |
365 |
} |