Blame view

mm/pagewalk.c 14.2 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
a520110e4   Christoph Hellwig   mm: split out a n...
2
  #include <linux/pagewalk.h>
e6473092b   Matt Mackall   maps4: introduce ...
3
4
  #include <linux/highmem.h>
  #include <linux/sched.h>
d33b9f45b   Naoya Horiguchi   mm: hugetlb: fix ...
5
  #include <linux/hugetlb.h>
e6473092b   Matt Mackall   maps4: introduce ...
6

b7a16c7ad   Steven Price   mm: pagewalk: add...
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
  /*
   * We want to know the real level where a entry is located ignoring any
   * folding of levels which may be happening. For example if p4d is folded then
   * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
   */
  static int real_depth(int depth)
  {
  	if (depth == 3 && PTRS_PER_PMD == 1)
  		depth = 2;
  	if (depth == 2 && PTRS_PER_PUD == 1)
  		depth = 1;
  	if (depth == 1 && PTRS_PER_P4D == 1)
  		depth = 0;
  	return depth;
  }
fbf56346b   Steven Price   mm: pagewalk: don...
22
23
  static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
  				unsigned long end, struct mm_walk *walk)
e6473092b   Matt Mackall   maps4: introduce ...
24
  {
7b86ac337   Christoph Hellwig   pagewalk: separat...
25
  	const struct mm_walk_ops *ops = walk->ops;
fbf56346b   Steven Price   mm: pagewalk: don...
26
  	int err = 0;
e6473092b   Matt Mackall   maps4: introduce ...
27

556637cda   Johannes Weiner   mm: fix possible ...
28
  	for (;;) {
7b86ac337   Christoph Hellwig   pagewalk: separat...
29
  		err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
e6473092b   Matt Mackall   maps4: introduce ...
30
31
  		if (err)
  		       break;
c02a98753   Steven Price   mm: pagewalk: fix...
32
  		if (addr >= end - PAGE_SIZE)
556637cda   Johannes Weiner   mm: fix possible ...
33
  			break;
c02a98753   Steven Price   mm: pagewalk: fix...
34
  		addr += PAGE_SIZE;
556637cda   Johannes Weiner   mm: fix possible ...
35
36
  		pte++;
  	}
fbf56346b   Steven Price   mm: pagewalk: don...
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  	return err;
  }
  
  static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  			  struct mm_walk *walk)
  {
  	pte_t *pte;
  	int err = 0;
  	spinlock_t *ptl;
  
  	if (walk->no_vma) {
  		pte = pte_offset_map(pmd, addr);
  		err = walk_pte_range_inner(pte, addr, end, walk);
  		pte_unmap(pte);
  	} else {
  		pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
  		err = walk_pte_range_inner(pte, addr, end, walk);
  		pte_unmap_unlock(pte, ptl);
  	}
e6473092b   Matt Mackall   maps4: introduce ...
56

e6473092b   Matt Mackall   maps4: introduce ...
57
58
59
60
  	return err;
  }
  
  static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
2165009bd   Dave Hansen   pagemap: pass mm ...
61
  			  struct mm_walk *walk)
e6473092b   Matt Mackall   maps4: introduce ...
62
63
64
  {
  	pmd_t *pmd;
  	unsigned long next;
7b86ac337   Christoph Hellwig   pagewalk: separat...
65
  	const struct mm_walk_ops *ops = walk->ops;
e6473092b   Matt Mackall   maps4: introduce ...
66
  	int err = 0;
b7a16c7ad   Steven Price   mm: pagewalk: add...
67
  	int depth = real_depth(3);
e6473092b   Matt Mackall   maps4: introduce ...
68
69
70
  
  	pmd = pmd_offset(pud, addr);
  	do {
033193275   Dave Hansen   pagewalk: only sp...
71
  again:
e6473092b   Matt Mackall   maps4: introduce ...
72
  		next = pmd_addr_end(addr, end);
488ae6a2b   Steven Price   mm: pagewalk: all...
73
  		if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
7b86ac337   Christoph Hellwig   pagewalk: separat...
74
  			if (ops->pte_hole)
b7a16c7ad   Steven Price   mm: pagewalk: add...
75
  				err = ops->pte_hole(addr, next, depth, walk);
e6473092b   Matt Mackall   maps4: introduce ...
76
77
78
79
  			if (err)
  				break;
  			continue;
  		}
3afc42363   Steven Price   mm: pagewalk: add...
80
81
  
  		walk->action = ACTION_SUBTREE;
033193275   Dave Hansen   pagewalk: only sp...
82
83
84
85
  		/*
  		 * This implies that each ->pmd_entry() handler
  		 * needs to know about pmd_trans_huge() pmds
  		 */
7b86ac337   Christoph Hellwig   pagewalk: separat...
86
87
  		if (ops->pmd_entry)
  			err = ops->pmd_entry(pmd, addr, next, walk);
033193275   Dave Hansen   pagewalk: only sp...
88
89
  		if (err)
  			break;
3afc42363   Steven Price   mm: pagewalk: add...
90
91
  		if (walk->action == ACTION_AGAIN)
  			goto again;
033193275   Dave Hansen   pagewalk: only sp...
92
93
94
95
  		/*
  		 * Check this here so we only break down trans_huge
  		 * pages when we _need_ to
  		 */
488ae6a2b   Steven Price   mm: pagewalk: all...
96
97
  		if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
  		    walk->action == ACTION_CONTINUE ||
3afc42363   Steven Price   mm: pagewalk: add...
98
  		    !(ops->pte_entry))
033193275   Dave Hansen   pagewalk: only sp...
99
  			continue;
488ae6a2b   Steven Price   mm: pagewalk: all...
100
101
102
103
104
  		if (walk->vma) {
  			split_huge_pmd(walk->vma, pmd, addr);
  			if (pmd_trans_unstable(pmd))
  				goto again;
  		}
3afc42363   Steven Price   mm: pagewalk: add...
105

033193275   Dave Hansen   pagewalk: only sp...
106
  		err = walk_pte_range(pmd, addr, next, walk);
e6473092b   Matt Mackall   maps4: introduce ...
107
108
109
110
111
112
  		if (err)
  			break;
  	} while (pmd++, addr = next, addr != end);
  
  	return err;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
113
  static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
2165009bd   Dave Hansen   pagemap: pass mm ...
114
  			  struct mm_walk *walk)
e6473092b   Matt Mackall   maps4: introduce ...
115
116
117
  {
  	pud_t *pud;
  	unsigned long next;
7b86ac337   Christoph Hellwig   pagewalk: separat...
118
  	const struct mm_walk_ops *ops = walk->ops;
e6473092b   Matt Mackall   maps4: introduce ...
119
  	int err = 0;
b7a16c7ad   Steven Price   mm: pagewalk: add...
120
  	int depth = real_depth(2);
e6473092b   Matt Mackall   maps4: introduce ...
121

c2febafc6   Kirill A. Shutemov   mm: convert gener...
122
  	pud = pud_offset(p4d, addr);
e6473092b   Matt Mackall   maps4: introduce ...
123
  	do {
a00cc7d9d   Matthew Wilcox   mm, x86: add supp...
124
   again:
e6473092b   Matt Mackall   maps4: introduce ...
125
  		next = pud_addr_end(addr, end);
488ae6a2b   Steven Price   mm: pagewalk: all...
126
  		if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
7b86ac337   Christoph Hellwig   pagewalk: separat...
127
  			if (ops->pte_hole)
b7a16c7ad   Steven Price   mm: pagewalk: add...
128
  				err = ops->pte_hole(addr, next, depth, walk);
e6473092b   Matt Mackall   maps4: introduce ...
129
130
131
132
  			if (err)
  				break;
  			continue;
  		}
a00cc7d9d   Matthew Wilcox   mm, x86: add supp...
133

3afc42363   Steven Price   mm: pagewalk: add...
134
  		walk->action = ACTION_SUBTREE;
a00cc7d9d   Matthew Wilcox   mm, x86: add supp...
135

3afc42363   Steven Price   mm: pagewalk: add...
136
137
138
139
140
141
142
  		if (ops->pud_entry)
  			err = ops->pud_entry(pud, addr, next, walk);
  		if (err)
  			break;
  
  		if (walk->action == ACTION_AGAIN)
  			goto again;
488ae6a2b   Steven Price   mm: pagewalk: all...
143
144
  		if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
  		    walk->action == ACTION_CONTINUE ||
3afc42363   Steven Price   mm: pagewalk: add...
145
146
  		    !(ops->pmd_entry || ops->pte_entry))
  			continue;
a00cc7d9d   Matthew Wilcox   mm, x86: add supp...
147

488ae6a2b   Steven Price   mm: pagewalk: all...
148
149
  		if (walk->vma)
  			split_huge_pud(walk->vma, pud, addr);
a00cc7d9d   Matthew Wilcox   mm, x86: add supp...
150
151
  		if (pud_none(*pud))
  			goto again;
3afc42363   Steven Price   mm: pagewalk: add...
152
  		err = walk_pmd_range(pud, addr, next, walk);
e6473092b   Matt Mackall   maps4: introduce ...
153
154
155
156
157
158
  		if (err)
  			break;
  	} while (pud++, addr = next, addr != end);
  
  	return err;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
159
160
161
162
163
  static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  			  struct mm_walk *walk)
  {
  	p4d_t *p4d;
  	unsigned long next;
7b86ac337   Christoph Hellwig   pagewalk: separat...
164
  	const struct mm_walk_ops *ops = walk->ops;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
165
  	int err = 0;
b7a16c7ad   Steven Price   mm: pagewalk: add...
166
  	int depth = real_depth(1);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
167
168
169
170
171
  
  	p4d = p4d_offset(pgd, addr);
  	do {
  		next = p4d_addr_end(addr, end);
  		if (p4d_none_or_clear_bad(p4d)) {
7b86ac337   Christoph Hellwig   pagewalk: separat...
172
  			if (ops->pte_hole)
b7a16c7ad   Steven Price   mm: pagewalk: add...
173
  				err = ops->pte_hole(addr, next, depth, walk);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
174
175
176
177
  			if (err)
  				break;
  			continue;
  		}
3afc42363   Steven Price   mm: pagewalk: add...
178
179
180
181
182
183
  		if (ops->p4d_entry) {
  			err = ops->p4d_entry(p4d, addr, next, walk);
  			if (err)
  				break;
  		}
  		if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
184
185
186
187
188
189
190
  			err = walk_pud_range(p4d, addr, next, walk);
  		if (err)
  			break;
  	} while (p4d++, addr = next, addr != end);
  
  	return err;
  }
fafaa4264   Naoya Horiguchi   pagewalk: improve...
191
192
193
194
195
  static int walk_pgd_range(unsigned long addr, unsigned long end,
  			  struct mm_walk *walk)
  {
  	pgd_t *pgd;
  	unsigned long next;
7b86ac337   Christoph Hellwig   pagewalk: separat...
196
  	const struct mm_walk_ops *ops = walk->ops;
fafaa4264   Naoya Horiguchi   pagewalk: improve...
197
  	int err = 0;
e47690d75   Steven Price   x86: mm: avoid al...
198
199
200
201
  	if (walk->pgd)
  		pgd = walk->pgd + pgd_index(addr);
  	else
  		pgd = pgd_offset(walk->mm, addr);
fafaa4264   Naoya Horiguchi   pagewalk: improve...
202
203
204
  	do {
  		next = pgd_addr_end(addr, end);
  		if (pgd_none_or_clear_bad(pgd)) {
7b86ac337   Christoph Hellwig   pagewalk: separat...
205
  			if (ops->pte_hole)
b7a16c7ad   Steven Price   mm: pagewalk: add...
206
  				err = ops->pte_hole(addr, next, 0, walk);
fafaa4264   Naoya Horiguchi   pagewalk: improve...
207
208
209
210
  			if (err)
  				break;
  			continue;
  		}
3afc42363   Steven Price   mm: pagewalk: add...
211
212
213
214
215
216
217
  		if (ops->pgd_entry) {
  			err = ops->pgd_entry(pgd, addr, next, walk);
  			if (err)
  				break;
  		}
  		if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
  		    ops->pte_entry)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
218
  			err = walk_p4d_range(pgd, addr, next, walk);
fafaa4264   Naoya Horiguchi   pagewalk: improve...
219
220
221
222
223
224
  		if (err)
  			break;
  	} while (pgd++, addr = next, addr != end);
  
  	return err;
  }
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
225
226
227
228
229
230
231
  #ifdef CONFIG_HUGETLB_PAGE
  static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
  				       unsigned long end)
  {
  	unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
  	return boundary < end ? boundary : end;
  }
fafaa4264   Naoya Horiguchi   pagewalk: improve...
232
  static int walk_hugetlb_range(unsigned long addr, unsigned long end,
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
233
234
  			      struct mm_walk *walk)
  {
fafaa4264   Naoya Horiguchi   pagewalk: improve...
235
  	struct vm_area_struct *vma = walk->vma;
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
236
237
238
  	struct hstate *h = hstate_vma(vma);
  	unsigned long next;
  	unsigned long hmask = huge_page_mask(h);
7868a2087   Punit Agrawal   mm/hugetlb: add s...
239
  	unsigned long sz = huge_page_size(h);
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
240
  	pte_t *pte;
7b86ac337   Christoph Hellwig   pagewalk: separat...
241
  	const struct mm_walk_ops *ops = walk->ops;
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
242
243
244
245
  	int err = 0;
  
  	do {
  		next = hugetlb_entry_end(h, addr, end);
7868a2087   Punit Agrawal   mm/hugetlb: add s...
246
  		pte = huge_pte_offset(walk->mm, addr & hmask, sz);
373c4557d   Jann Horn   mm/pagewalk.c: re...
247
248
  
  		if (pte)
7b86ac337   Christoph Hellwig   pagewalk: separat...
249
250
  			err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
  		else if (ops->pte_hole)
b7a16c7ad   Steven Price   mm: pagewalk: add...
251
  			err = ops->pte_hole(addr, next, -1, walk);
373c4557d   Jann Horn   mm/pagewalk.c: re...
252

116354d17   Naoya Horiguchi   pagemap: fix pfn ...
253
  		if (err)
fafaa4264   Naoya Horiguchi   pagewalk: improve...
254
  			break;
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
255
  	} while (addr = next, addr != end);
fafaa4264   Naoya Horiguchi   pagewalk: improve...
256
  	return err;
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
257
  }
6c6d52804   KOSAKI Motohiro   pagewalk: don't l...
258

6c6d52804   KOSAKI Motohiro   pagewalk: don't l...
259
  #else /* CONFIG_HUGETLB_PAGE */
fafaa4264   Naoya Horiguchi   pagewalk: improve...
260
  static int walk_hugetlb_range(unsigned long addr, unsigned long end,
6c6d52804   KOSAKI Motohiro   pagewalk: don't l...
261
262
263
264
265
266
  			      struct mm_walk *walk)
  {
  	return 0;
  }
  
  #endif /* CONFIG_HUGETLB_PAGE */
fafaa4264   Naoya Horiguchi   pagewalk: improve...
267
268
269
270
271
  /*
   * Decide whether we really walk over the current vma on [@start, @end)
   * or skip it via the returned value. Return 0 if we do walk over the
   * current vma, and return 1 if we skip the vma. Negative values means
   * error, where we abort the current walk.
fafaa4264   Naoya Horiguchi   pagewalk: improve...
272
273
274
275
276
   */
  static int walk_page_test(unsigned long start, unsigned long end,
  			struct mm_walk *walk)
  {
  	struct vm_area_struct *vma = walk->vma;
7b86ac337   Christoph Hellwig   pagewalk: separat...
277
  	const struct mm_walk_ops *ops = walk->ops;
6c6d52804   KOSAKI Motohiro   pagewalk: don't l...
278

7b86ac337   Christoph Hellwig   pagewalk: separat...
279
280
  	if (ops->test_walk)
  		return ops->test_walk(start, end, walk);
fafaa4264   Naoya Horiguchi   pagewalk: improve...
281
282
  
  	/*
48684a65b   Naoya Horiguchi   mm: pagewalk: fix...
283
284
285
286
287
288
  	 * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
  	 * range, so we don't walk over it as we do for normal vmas. However,
  	 * Some callers are interested in handling hole range and they don't
  	 * want to just ignore any single address range. Such users certainly
  	 * define their ->pte_hole() callbacks, so let's delegate them to handle
  	 * vma(VM_PFNMAP).
fafaa4264   Naoya Horiguchi   pagewalk: improve...
289
  	 */
48684a65b   Naoya Horiguchi   mm: pagewalk: fix...
290
291
  	if (vma->vm_flags & VM_PFNMAP) {
  		int err = 1;
7b86ac337   Christoph Hellwig   pagewalk: separat...
292
  		if (ops->pte_hole)
b7a16c7ad   Steven Price   mm: pagewalk: add...
293
  			err = ops->pte_hole(start, end, -1, walk);
48684a65b   Naoya Horiguchi   mm: pagewalk: fix...
294
295
  		return err ? err : 1;
  	}
fafaa4264   Naoya Horiguchi   pagewalk: improve...
296
297
298
299
300
301
302
303
  	return 0;
  }
  
  static int __walk_page_range(unsigned long start, unsigned long end,
  			struct mm_walk *walk)
  {
  	int err = 0;
  	struct vm_area_struct *vma = walk->vma;
ecaad8aca   Thomas Hellstrom   mm: Add a walk_pa...
304
305
306
307
308
309
310
  	const struct mm_walk_ops *ops = walk->ops;
  
  	if (vma && ops->pre_vma) {
  		err = ops->pre_vma(start, end, walk);
  		if (err)
  			return err;
  	}
fafaa4264   Naoya Horiguchi   pagewalk: improve...
311
312
  
  	if (vma && is_vm_hugetlb_page(vma)) {
ecaad8aca   Thomas Hellstrom   mm: Add a walk_pa...
313
  		if (ops->hugetlb_entry)
fafaa4264   Naoya Horiguchi   pagewalk: improve...
314
315
316
  			err = walk_hugetlb_range(start, end, walk);
  	} else
  		err = walk_pgd_range(start, end, walk);
ecaad8aca   Thomas Hellstrom   mm: Add a walk_pa...
317
318
  	if (vma && ops->post_vma)
  		ops->post_vma(walk);
fafaa4264   Naoya Horiguchi   pagewalk: improve...
319
320
  	return err;
  }
116354d17   Naoya Horiguchi   pagemap: fix pfn ...
321

e6473092b   Matt Mackall   maps4: introduce ...
322
  /**
fafaa4264   Naoya Horiguchi   pagewalk: improve...
323
   * walk_page_range - walk page table with caller specific callbacks
7b86ac337   Christoph Hellwig   pagewalk: separat...
324
325
326
327
328
   * @mm:		mm_struct representing the target process of page table walk
   * @start:	start address of the virtual address range
   * @end:	end address of the virtual address range
   * @ops:	operation to call during the walk
   * @private:	private data for callbacks' usage
e6473092b   Matt Mackall   maps4: introduce ...
329
   *
7b86ac337   Christoph Hellwig   pagewalk: separat...
330
   * Recursively walk the page table tree of the process represented by @mm
fafaa4264   Naoya Horiguchi   pagewalk: improve...
331
332
333
334
335
   * within the virtual address range [@start, @end). During walking, we can do
   * some caller-specific works for each entry, by setting up pmd_entry(),
   * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
   * callbacks, the associated entries/pages are just ignored.
   * The return values of these callbacks are commonly defined like below:
a5d09bed7   Mike Rapoport   mm: docs: add bla...
336
   *
fafaa4264   Naoya Horiguchi   pagewalk: improve...
337
338
339
340
341
342
   *  - 0  : succeeded to handle the current entry, and if you don't reach the
   *         end address yet, continue to walk.
   *  - >0 : succeeded to handle the current entry, and return to the caller
   *         with caller specific value.
   *  - <0 : failed to handle the current entry, and return to the caller
   *         with error code.
e6473092b   Matt Mackall   maps4: introduce ...
343
   *
fafaa4264   Naoya Horiguchi   pagewalk: improve...
344
345
   * Before starting to walk page table, some callers want to check whether
   * they really want to walk over the current vma, typically by checking
7b86ac337   Christoph Hellwig   pagewalk: separat...
346
   * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
fafaa4264   Naoya Horiguchi   pagewalk: improve...
347
   * purpose.
e6473092b   Matt Mackall   maps4: introduce ...
348
   *
ecaad8aca   Thomas Hellstrom   mm: Add a walk_pa...
349
350
351
352
353
   * If operations need to be staged before and committed after a vma is walked,
   * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
   * since it is intended to handle commit-type operations, can't return any
   * errors.
   *
fafaa4264   Naoya Horiguchi   pagewalk: improve...
354
355
   * struct mm_walk keeps current values of some common data like vma and pmd,
   * which are useful for the access from callbacks. If you want to pass some
7b86ac337   Christoph Hellwig   pagewalk: separat...
356
   * caller-specific data to callbacks, @private should be helpful.
c27fe4c89   KOSAKI Motohiro   pagewalk: add loc...
357
   *
fafaa4264   Naoya Horiguchi   pagewalk: improve...
358
   * Locking:
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
359
   *   Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock,
7b86ac337   Christoph Hellwig   pagewalk: separat...
360
   *   because these function traverse vma list and/or access to vma's data.
e6473092b   Matt Mackall   maps4: introduce ...
361
   */
7b86ac337   Christoph Hellwig   pagewalk: separat...
362
363
364
  int walk_page_range(struct mm_struct *mm, unsigned long start,
  		unsigned long end, const struct mm_walk_ops *ops,
  		void *private)
e6473092b   Matt Mackall   maps4: introduce ...
365
  {
e6473092b   Matt Mackall   maps4: introduce ...
366
  	int err = 0;
fafaa4264   Naoya Horiguchi   pagewalk: improve...
367
368
  	unsigned long next;
  	struct vm_area_struct *vma;
7b86ac337   Christoph Hellwig   pagewalk: separat...
369
370
371
372
373
  	struct mm_walk walk = {
  		.ops		= ops,
  		.mm		= mm,
  		.private	= private,
  	};
e6473092b   Matt Mackall   maps4: introduce ...
374

fafaa4264   Naoya Horiguchi   pagewalk: improve...
375
376
  	if (start >= end)
  		return -EINVAL;
e6473092b   Matt Mackall   maps4: introduce ...
377

7b86ac337   Christoph Hellwig   pagewalk: separat...
378
  	if (!walk.mm)
2165009bd   Dave Hansen   pagemap: pass mm ...
379
  		return -EINVAL;
42fc54140   Michel Lespinasse   mmap locking API:...
380
  	mmap_assert_locked(walk.mm);
a9ff785e4   Cliff Wickman   mm/pagewalk.c: wa...
381

7b86ac337   Christoph Hellwig   pagewalk: separat...
382
  	vma = find_vma(walk.mm, start);
e6473092b   Matt Mackall   maps4: introduce ...
383
  	do {
fafaa4264   Naoya Horiguchi   pagewalk: improve...
384
  		if (!vma) { /* after the last vma */
7b86ac337   Christoph Hellwig   pagewalk: separat...
385
  			walk.vma = NULL;
fafaa4264   Naoya Horiguchi   pagewalk: improve...
386
387
  			next = end;
  		} else if (start < vma->vm_start) { /* outside vma */
7b86ac337   Christoph Hellwig   pagewalk: separat...
388
  			walk.vma = NULL;
fafaa4264   Naoya Horiguchi   pagewalk: improve...
389
390
  			next = min(end, vma->vm_start);
  		} else { /* inside vma */
7b86ac337   Christoph Hellwig   pagewalk: separat...
391
  			walk.vma = vma;
fafaa4264   Naoya Horiguchi   pagewalk: improve...
392
393
  			next = min(end, vma->vm_end);
  			vma = vma->vm_next;
5f0af70a2   David Sterba   mm: remove call t...
394

7b86ac337   Christoph Hellwig   pagewalk: separat...
395
  			err = walk_page_test(start, next, &walk);
f68373953   Naoya Horiguchi   mm/pagewalk.c: pr...
396
397
398
399
400
401
402
  			if (err > 0) {
  				/*
  				 * positive return values are purely for
  				 * controlling the pagewalk, so should never
  				 * be passed to the callers.
  				 */
  				err = 0;
a9ff785e4   Cliff Wickman   mm/pagewalk.c: wa...
403
  				continue;
f68373953   Naoya Horiguchi   mm/pagewalk.c: pr...
404
  			}
fafaa4264   Naoya Horiguchi   pagewalk: improve...
405
  			if (err < 0)
e6473092b   Matt Mackall   maps4: introduce ...
406
  				break;
e6473092b   Matt Mackall   maps4: introduce ...
407
  		}
7b86ac337   Christoph Hellwig   pagewalk: separat...
408
409
  		if (walk.vma || walk.ops->pte_hole)
  			err = __walk_page_range(start, next, &walk);
e6473092b   Matt Mackall   maps4: introduce ...
410
411
  		if (err)
  			break;
fafaa4264   Naoya Horiguchi   pagewalk: improve...
412
  	} while (start = next, start < end);
e6473092b   Matt Mackall   maps4: introduce ...
413
414
  	return err;
  }
900fc5f19   Naoya Horiguchi   pagewalk: add wal...
415

fbf56346b   Steven Price   mm: pagewalk: don...
416
417
418
419
420
421
  /*
   * Similar to walk_page_range() but can walk any page tables even if they are
   * not backed by VMAs. Because 'unusual' entries may be walked this function
   * will also not lock the PTEs for the pte_entry() callback. This is useful for
   * walking the kernel pages tables or page tables for firmware.
   */
488ae6a2b   Steven Price   mm: pagewalk: all...
422
423
  int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
  			  unsigned long end, const struct mm_walk_ops *ops,
e47690d75   Steven Price   x86: mm: avoid al...
424
  			  pgd_t *pgd,
488ae6a2b   Steven Price   mm: pagewalk: all...
425
426
427
428
429
  			  void *private)
  {
  	struct mm_walk walk = {
  		.ops		= ops,
  		.mm		= mm,
e47690d75   Steven Price   x86: mm: avoid al...
430
  		.pgd		= pgd,
488ae6a2b   Steven Price   mm: pagewalk: all...
431
432
433
434
435
436
  		.private	= private,
  		.no_vma		= true
  	};
  
  	if (start >= end || !walk.mm)
  		return -EINVAL;
42fc54140   Michel Lespinasse   mmap locking API:...
437
  	mmap_assert_locked(walk.mm);
488ae6a2b   Steven Price   mm: pagewalk: all...
438
439
440
  
  	return __walk_page_range(start, end, &walk);
  }
7b86ac337   Christoph Hellwig   pagewalk: separat...
441
442
  int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
  		void *private)
900fc5f19   Naoya Horiguchi   pagewalk: add wal...
443
  {
7b86ac337   Christoph Hellwig   pagewalk: separat...
444
445
446
447
448
449
  	struct mm_walk walk = {
  		.ops		= ops,
  		.mm		= vma->vm_mm,
  		.vma		= vma,
  		.private	= private,
  	};
900fc5f19   Naoya Horiguchi   pagewalk: add wal...
450
  	int err;
7b86ac337   Christoph Hellwig   pagewalk: separat...
451
  	if (!walk.mm)
900fc5f19   Naoya Horiguchi   pagewalk: add wal...
452
  		return -EINVAL;
42fc54140   Michel Lespinasse   mmap locking API:...
453
  	mmap_assert_locked(walk.mm);
7b86ac337   Christoph Hellwig   pagewalk: separat...
454
455
  
  	err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
900fc5f19   Naoya Horiguchi   pagewalk: add wal...
456
457
458
459
  	if (err > 0)
  		return 0;
  	if (err < 0)
  		return err;
7b86ac337   Christoph Hellwig   pagewalk: separat...
460
  	return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
900fc5f19   Naoya Horiguchi   pagewalk: add wal...
461
  }
ecaad8aca   Thomas Hellstrom   mm: Add a walk_pa...
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
  
  /**
   * walk_page_mapping - walk all memory areas mapped into a struct address_space.
   * @mapping: Pointer to the struct address_space
   * @first_index: First page offset in the address_space
   * @nr: Number of incremental page offsets to cover
   * @ops:	operation to call during the walk
   * @private:	private data for callbacks' usage
   *
   * This function walks all memory areas mapped into a struct address_space.
   * The walk is limited to only the given page-size index range, but if
   * the index boundaries cross a huge page-table entry, that entry will be
   * included.
   *
   * Also see walk_page_range() for additional information.
   *
   * Locking:
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
479
   *   This function can't require that the struct mm_struct::mmap_lock is held,
ecaad8aca   Thomas Hellstrom   mm: Add a walk_pa...
480
481
482
   *   since @mapping may be mapped by multiple processes. Instead
   *   @mapping->i_mmap_rwsem must be held. This might have implications in the
   *   callbacks, and it's up tho the caller to ensure that the
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
483
   *   struct mm_struct::mmap_lock is not needed.
ecaad8aca   Thomas Hellstrom   mm: Add a walk_pa...
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
   *
   *   Also this means that a caller can't rely on the struct
   *   vm_area_struct::vm_flags to be constant across a call,
   *   except for immutable flags. Callers requiring this shouldn't use
   *   this function.
   *
   * Return: 0 on success, negative error code on failure, positive number on
   * caller defined premature termination.
   */
  int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
  		      pgoff_t nr, const struct mm_walk_ops *ops,
  		      void *private)
  {
  	struct mm_walk walk = {
  		.ops		= ops,
  		.private	= private,
  	};
  	struct vm_area_struct *vma;
  	pgoff_t vba, vea, cba, cea;
  	unsigned long start_addr, end_addr;
  	int err = 0;
  
  	lockdep_assert_held(&mapping->i_mmap_rwsem);
  	vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
  				  first_index + nr - 1) {
  		/* Clip to the vma */
  		vba = vma->vm_pgoff;
  		vea = vba + vma_pages(vma);
  		cba = first_index;
  		cba = max(cba, vba);
  		cea = first_index + nr;
  		cea = min(cea, vea);
  
  		start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
  		end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
  		if (start_addr >= end_addr)
  			continue;
  
  		walk.vma = vma;
  		walk.mm = vma->vm_mm;
  
  		err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
  		if (err > 0) {
  			err = 0;
  			break;
  		} else if (err < 0)
  			break;
  
  		err = __walk_page_range(start_addr, end_addr, &walk);
  		if (err)
  			break;
  	}
  
  	return err;
  }