Blame view
mm/pagewalk.c
8.66 KB
b24413180 License cleanup: ... |
1 |
// SPDX-License-Identifier: GPL-2.0 |
e6473092b maps4: introduce ... |
2 3 4 |
#include <linux/mm.h> #include <linux/highmem.h> #include <linux/sched.h> |
d33b9f45b mm: hugetlb: fix ... |
5 |
#include <linux/hugetlb.h> |
e6473092b maps4: introduce ... |
6 7 |
static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
8 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
9 10 11 12 13 |
{ pte_t *pte; int err = 0; pte = pte_offset_map(pmd, addr); |
556637cda mm: fix possible ... |
14 |
for (;;) { |
2165009bd pagemap: pass mm ... |
15 |
err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
e6473092b maps4: introduce ... |
16 17 |
if (err) break; |
556637cda mm: fix possible ... |
18 19 20 21 22 |
addr += PAGE_SIZE; if (addr == end) break; pte++; } |
e6473092b maps4: introduce ... |
23 24 25 26 27 28 |
pte_unmap(pte); return err; } static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
29 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
30 31 32 33 34 35 36 |
{ pmd_t *pmd; unsigned long next; int err = 0; pmd = pmd_offset(pud, addr); do { |
033193275 pagewalk: only sp... |
37 |
again: |
e6473092b maps4: introduce ... |
38 |
next = pmd_addr_end(addr, end); |
48684a65b mm: pagewalk: fix... |
39 |
if (pmd_none(*pmd) || !walk->vma) { |
e6473092b maps4: introduce ... |
40 |
if (walk->pte_hole) |
2165009bd pagemap: pass mm ... |
41 |
err = walk->pte_hole(addr, next, walk); |
e6473092b maps4: introduce ... |
42 43 44 45 |
if (err) break; continue; } |
033193275 pagewalk: only sp... |
46 47 48 49 |
/* * This implies that each ->pmd_entry() handler * needs to know about pmd_trans_huge() pmds */ |
e6473092b maps4: introduce ... |
50 |
if (walk->pmd_entry) |
2165009bd pagemap: pass mm ... |
51 |
err = walk->pmd_entry(pmd, addr, next, walk); |
033193275 pagewalk: only sp... |
52 53 54 55 56 57 58 59 60 |
if (err) break; /* * Check this here so we only break down trans_huge * pages when we _need_ to */ if (!walk->pte_entry) continue; |
78ddc5347 thp: rename split... |
61 |
split_huge_pmd(walk->vma, pmd, addr); |
fafaa4264 pagewalk: improve... |
62 |
if (pmd_trans_unstable(pmd)) |
033193275 pagewalk: only sp... |
63 64 |
goto again; err = walk_pte_range(pmd, addr, next, walk); |
e6473092b maps4: introduce ... |
65 66 67 68 69 70 |
if (err) break; } while (pmd++, addr = next, addr != end); return err; } |
c2febafc6 mm: convert gener... |
71 |
static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, |
2165009bd pagemap: pass mm ... |
72 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
73 74 75 76 |
{ pud_t *pud; unsigned long next; int err = 0; |
c2febafc6 mm: convert gener... |
77 |
pud = pud_offset(p4d, addr); |
e6473092b maps4: introduce ... |
78 |
do { |
a00cc7d9d mm, x86: add supp... |
79 |
again: |
e6473092b maps4: introduce ... |
80 |
next = pud_addr_end(addr, end); |
a00cc7d9d mm, x86: add supp... |
81 |
if (pud_none(*pud) || !walk->vma) { |
e6473092b maps4: introduce ... |
82 |
if (walk->pte_hole) |
2165009bd pagemap: pass mm ... |
83 |
err = walk->pte_hole(addr, next, walk); |
e6473092b maps4: introduce ... |
84 85 86 87 |
if (err) break; continue; } |
a00cc7d9d mm, x86: add supp... |
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
if (walk->pud_entry) { spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); if (ptl) { err = walk->pud_entry(pud, addr, next, walk); spin_unlock(ptl); if (err) break; continue; } } split_huge_pud(walk->vma, pud, addr); if (pud_none(*pud)) goto again; |
0b1fbfe50 mm/pagewalk: remo... |
104 |
if (walk->pmd_entry || walk->pte_entry) |
2165009bd pagemap: pass mm ... |
105 |
err = walk_pmd_range(pud, addr, next, walk); |
e6473092b maps4: introduce ... |
106 107 108 109 110 111 |
if (err) break; } while (pud++, addr = next, addr != end); return err; } |
c2febafc6 mm: convert gener... |
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, struct mm_walk *walk) { p4d_t *p4d; unsigned long next; int err = 0; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pmd_entry || walk->pte_entry) err = walk_pud_range(p4d, addr, next, walk); if (err) break; } while (p4d++, addr = next, addr != end); return err; } |
fafaa4264 pagewalk: improve... |
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { pgd_t *pgd; unsigned long next; int err = 0; pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pmd_entry || walk->pte_entry) |
c2febafc6 mm: convert gener... |
155 |
err = walk_p4d_range(pgd, addr, next, walk); |
fafaa4264 pagewalk: improve... |
156 157 158 159 160 161 |
if (err) break; } while (pgd++, addr = next, addr != end); return err; } |
116354d17 pagemap: fix pfn ... |
162 163 164 165 166 167 168 |
#ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, unsigned long end) { unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); return boundary < end ? boundary : end; } |
fafaa4264 pagewalk: improve... |
169 |
static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
116354d17 pagemap: fix pfn ... |
170 171 |
struct mm_walk *walk) { |
fafaa4264 pagewalk: improve... |
172 |
struct vm_area_struct *vma = walk->vma; |
116354d17 pagemap: fix pfn ... |
173 174 175 |
struct hstate *h = hstate_vma(vma); unsigned long next; unsigned long hmask = huge_page_mask(h); |
7868a2087 mm/hugetlb: add s... |
176 |
unsigned long sz = huge_page_size(h); |
116354d17 pagemap: fix pfn ... |
177 178 179 180 181 |
pte_t *pte; int err = 0; do { next = hugetlb_entry_end(h, addr, end); |
7868a2087 mm/hugetlb: add s... |
182 |
pte = huge_pte_offset(walk->mm, addr & hmask, sz); |
373c4557d mm/pagewalk.c: re... |
183 184 |
if (pte) |
116354d17 pagemap: fix pfn ... |
185 |
err = walk->hugetlb_entry(pte, hmask, addr, next, walk); |
373c4557d mm/pagewalk.c: re... |
186 187 |
else if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); |
116354d17 pagemap: fix pfn ... |
188 |
if (err) |
fafaa4264 pagewalk: improve... |
189 |
break; |
116354d17 pagemap: fix pfn ... |
190 |
} while (addr = next, addr != end); |
fafaa4264 pagewalk: improve... |
191 |
return err; |
116354d17 pagemap: fix pfn ... |
192 |
} |
6c6d52804 pagewalk: don't l... |
193 |
|
6c6d52804 pagewalk: don't l... |
194 |
#else /* CONFIG_HUGETLB_PAGE */ |
fafaa4264 pagewalk: improve... |
195 |
static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
6c6d52804 pagewalk: don't l... |
196 197 198 199 200 201 |
struct mm_walk *walk) { return 0; } #endif /* CONFIG_HUGETLB_PAGE */ |
fafaa4264 pagewalk: improve... |
202 203 204 205 206 |
/* * Decide whether we really walk over the current vma on [@start, @end) * or skip it via the returned value. Return 0 if we do walk over the * current vma, and return 1 if we skip the vma. Negative values means * error, where we abort the current walk. |
fafaa4264 pagewalk: improve... |
207 208 209 210 211 |
*/ static int walk_page_test(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; |
6c6d52804 pagewalk: don't l... |
212 |
|
fafaa4264 pagewalk: improve... |
213 214 215 216 |
if (walk->test_walk) return walk->test_walk(start, end, walk); /* |
48684a65b mm: pagewalk: fix... |
217 218 219 220 221 222 |
* vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP * range, so we don't walk over it as we do for normal vmas. However, * Some callers are interested in handling hole range and they don't * want to just ignore any single address range. Such users certainly * define their ->pte_hole() callbacks, so let's delegate them to handle * vma(VM_PFNMAP). |
fafaa4264 pagewalk: improve... |
223 |
*/ |
48684a65b mm: pagewalk: fix... |
224 225 226 227 228 229 |
if (vma->vm_flags & VM_PFNMAP) { int err = 1; if (walk->pte_hole) err = walk->pte_hole(start, end, walk); return err ? err : 1; } |
fafaa4264 pagewalk: improve... |
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
return 0; } static int __walk_page_range(unsigned long start, unsigned long end, struct mm_walk *walk) { int err = 0; struct vm_area_struct *vma = walk->vma; if (vma && is_vm_hugetlb_page(vma)) { if (walk->hugetlb_entry) err = walk_hugetlb_range(start, end, walk); } else err = walk_pgd_range(start, end, walk); return err; } |
116354d17 pagemap: fix pfn ... |
247 |
|
e6473092b maps4: introduce ... |
248 |
/** |
fafaa4264 pagewalk: improve... |
249 |
* walk_page_range - walk page table with caller specific callbacks |
e8b098fc5 mm: kernel-doc: a... |
250 251 252 |
* @start: start address of the virtual address range * @end: end address of the virtual address range * @walk: mm_walk structure defining the callbacks and the target address space |
e6473092b maps4: introduce ... |
253 |
* |
fafaa4264 pagewalk: improve... |
254 255 256 257 258 259 |
* Recursively walk the page table tree of the process represented by @walk->mm * within the virtual address range [@start, @end). During walking, we can do * some caller-specific works for each entry, by setting up pmd_entry(), * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these * callbacks, the associated entries/pages are just ignored. * The return values of these callbacks are commonly defined like below: |
a5d09bed7 mm: docs: add bla... |
260 |
* |
fafaa4264 pagewalk: improve... |
261 262 263 264 265 266 |
* - 0 : succeeded to handle the current entry, and if you don't reach the * end address yet, continue to walk. * - >0 : succeeded to handle the current entry, and return to the caller * with caller specific value. * - <0 : failed to handle the current entry, and return to the caller * with error code. |
e6473092b maps4: introduce ... |
267 |
* |
fafaa4264 pagewalk: improve... |
268 269 270 271 |
* Before starting to walk page table, some callers want to check whether * they really want to walk over the current vma, typically by checking * its vm_flags. walk_page_test() and @walk->test_walk() are used for this * purpose. |
e6473092b maps4: introduce ... |
272 |
* |
fafaa4264 pagewalk: improve... |
273 274 275 |
* struct mm_walk keeps current values of some common data like vma and pmd, * which are useful for the access from callbacks. If you want to pass some * caller-specific data to callbacks, @walk->private should be helpful. |
c27fe4c89 pagewalk: add loc... |
276 |
* |
fafaa4264 pagewalk: improve... |
277 278 279 280 |
* Locking: * Callers of walk_page_range() and walk_page_vma() should hold * @walk->mm->mmap_sem, because these function traverse vma list and/or * access to vma's data. |
e6473092b maps4: introduce ... |
281 |
*/ |
fafaa4264 pagewalk: improve... |
282 |
int walk_page_range(unsigned long start, unsigned long end, |
2165009bd pagemap: pass mm ... |
283 |
struct mm_walk *walk) |
e6473092b maps4: introduce ... |
284 |
{ |
e6473092b maps4: introduce ... |
285 |
int err = 0; |
fafaa4264 pagewalk: improve... |
286 287 |
unsigned long next; struct vm_area_struct *vma; |
e6473092b maps4: introduce ... |
288 |
|
fafaa4264 pagewalk: improve... |
289 290 |
if (start >= end) return -EINVAL; |
e6473092b maps4: introduce ... |
291 |
|
2165009bd pagemap: pass mm ... |
292 293 |
if (!walk->mm) return -EINVAL; |
96dad67ff mm: use VM_BUG_ON... |
294 |
VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); |
a9ff785e4 mm/pagewalk.c: wa... |
295 |
|
fafaa4264 pagewalk: improve... |
296 |
vma = find_vma(walk->mm, start); |
e6473092b maps4: introduce ... |
297 |
do { |
fafaa4264 pagewalk: improve... |
298 299 300 301 302 303 304 305 306 307 |
if (!vma) { /* after the last vma */ walk->vma = NULL; next = end; } else if (start < vma->vm_start) { /* outside vma */ walk->vma = NULL; next = min(end, vma->vm_start); } else { /* inside vma */ walk->vma = vma; next = min(end, vma->vm_end); vma = vma->vm_next; |
5f0af70a2 mm: remove call t... |
308 |
|
fafaa4264 pagewalk: improve... |
309 |
err = walk_page_test(start, next, walk); |
f68373953 mm/pagewalk.c: pr... |
310 311 312 313 314 315 316 |
if (err > 0) { /* * positive return values are purely for * controlling the pagewalk, so should never * be passed to the callers. */ err = 0; |
a9ff785e4 mm/pagewalk.c: wa... |
317 |
continue; |
f68373953 mm/pagewalk.c: pr... |
318 |
} |
fafaa4264 pagewalk: improve... |
319 |
if (err < 0) |
e6473092b maps4: introduce ... |
320 |
break; |
e6473092b maps4: introduce ... |
321 |
} |
fafaa4264 pagewalk: improve... |
322 323 |
if (walk->vma || walk->pte_hole) err = __walk_page_range(start, next, walk); |
e6473092b maps4: introduce ... |
324 325 |
if (err) break; |
fafaa4264 pagewalk: improve... |
326 |
} while (start = next, start < end); |
e6473092b maps4: introduce ... |
327 328 |
return err; } |
900fc5f19 pagewalk: add wal... |
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 |
int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk) { int err; if (!walk->mm) return -EINVAL; VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); VM_BUG_ON(!vma); walk->vma = vma; err = walk_page_test(vma->vm_start, vma->vm_end, walk); if (err > 0) return 0; if (err < 0) return err; return __walk_page_range(vma->vm_start, vma->vm_end, walk); } |