Commit 1998cc048901109a29924380b8e91bc049b32951
Committed by
Linus Torvalds
1 parent
a394cb8ee6
Exists in
master
and in
20 other branches
mm: make madvise(MADV_WILLNEED) support swap file prefetch
Make madvise(MADV_WILLNEED) support swap file prefetch. If memory is swapout, this syscall can do swapin prefetch. It has no impact if the memory isn't swapout. [akpm@linux-foundation.org: fix CONFIG_SWAP=n build] [sasha.levin@oracle.com: fix BUG on madvise early failure] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 101 additions and 4 deletions Side-by-side Diff
mm/madvise.c
... | ... | @@ -16,6 +16,9 @@ |
16 | 16 | #include <linux/ksm.h> |
17 | 17 | #include <linux/fs.h> |
18 | 18 | #include <linux/file.h> |
19 | +#include <linux/blkdev.h> | |
20 | +#include <linux/swap.h> | |
21 | +#include <linux/swapops.h> | |
19 | 22 | |
20 | 23 | /* |
21 | 24 | * Any behaviour which results in changes to the vma->vm_flags needs to |
... | ... | @@ -131,6 +134,84 @@ |
131 | 134 | return error; |
132 | 135 | } |
133 | 136 | |
137 | +#ifdef CONFIG_SWAP | |
138 | +static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, | |
139 | + unsigned long end, struct mm_walk *walk) | |
140 | +{ | |
141 | + pte_t *orig_pte; | |
142 | + struct vm_area_struct *vma = walk->private; | |
143 | + unsigned long index; | |
144 | + | |
145 | + if (pmd_none_or_trans_huge_or_clear_bad(pmd)) | |
146 | + return 0; | |
147 | + | |
148 | + for (index = start; index != end; index += PAGE_SIZE) { | |
149 | + pte_t pte; | |
150 | + swp_entry_t entry; | |
151 | + struct page *page; | |
152 | + spinlock_t *ptl; | |
153 | + | |
154 | + orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); | |
155 | + pte = *(orig_pte + ((index - start) / PAGE_SIZE)); | |
156 | + pte_unmap_unlock(orig_pte, ptl); | |
157 | + | |
158 | + if (pte_present(pte) || pte_none(pte) || pte_file(pte)) | |
159 | + continue; | |
160 | + entry = pte_to_swp_entry(pte); | |
161 | + if (unlikely(non_swap_entry(entry))) | |
162 | + continue; | |
163 | + | |
164 | + page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, | |
165 | + vma, index); | |
166 | + if (page) | |
167 | + page_cache_release(page); | |
168 | + } | |
169 | + | |
170 | + return 0; | |
171 | +} | |
172 | + | |
173 | +static void force_swapin_readahead(struct vm_area_struct *vma, | |
174 | + unsigned long start, unsigned long end) | |
175 | +{ | |
176 | + struct mm_walk walk = { | |
177 | + .mm = vma->vm_mm, | |
178 | + .pmd_entry = swapin_walk_pmd_entry, | |
179 | + .private = vma, | |
180 | + }; | |
181 | + | |
182 | + walk_page_range(start, end, &walk); | |
183 | + | |
184 | + lru_add_drain(); /* Push any new pages onto the LRU now */ | |
185 | +} | |
186 | + | |
187 | +static void force_shm_swapin_readahead(struct vm_area_struct *vma, | |
188 | + unsigned long start, unsigned long end, | |
189 | + struct address_space *mapping) | |
190 | +{ | |
191 | + pgoff_t index; | |
192 | + struct page *page; | |
193 | + swp_entry_t swap; | |
194 | + | |
195 | + for (; start < end; start += PAGE_SIZE) { | |
196 | + index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | |
197 | + | |
198 | + page = find_get_page(mapping, index); | |
199 | + if (!radix_tree_exceptional_entry(page)) { | |
200 | + if (page) | |
201 | + page_cache_release(page); | |
202 | + continue; | |
203 | + } | |
204 | + swap = radix_to_swp_entry(page); | |
205 | + page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE, | |
206 | + NULL, 0); | |
207 | + if (page) | |
208 | + page_cache_release(page); | |
209 | + } | |
210 | + | |
211 | + lru_add_drain(); /* Push any new pages onto the LRU now */ | |
212 | +} | |
213 | +#endif /* CONFIG_SWAP */ | |
214 | + | |
134 | 215 | /* |
135 | 216 | * Schedule all required I/O operations. Do not wait for completion. |
136 | 217 | */ |
... | ... | @@ -140,6 +221,18 @@ |
140 | 221 | { |
141 | 222 | struct file *file = vma->vm_file; |
142 | 223 | |
224 | +#ifdef CONFIG_SWAP | |
225 | + if (!file || mapping_cap_swap_backed(file->f_mapping)) { | |
226 | + *prev = vma; | |
227 | + if (!file) | |
228 | + force_swapin_readahead(vma, start, end); | |
229 | + else | |
230 | + force_shm_swapin_readahead(vma, start, end, | |
231 | + file->f_mapping); | |
232 | + return 0; | |
233 | + } | |
234 | +#endif | |
235 | + | |
143 | 236 | if (!file) |
144 | 237 | return -EBADF; |
145 | 238 | |
... | ... | @@ -371,6 +464,7 @@ |
371 | 464 | int error = -EINVAL; |
372 | 465 | int write; |
373 | 466 | size_t len; |
467 | + struct blk_plug plug; | |
374 | 468 | |
375 | 469 | #ifdef CONFIG_MEMORY_FAILURE |
376 | 470 | if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) |
377 | 471 | |
378 | 472 | |
... | ... | @@ -410,18 +504,19 @@ |
410 | 504 | if (vma && start > vma->vm_start) |
411 | 505 | prev = vma; |
412 | 506 | |
507 | + blk_start_plug(&plug); | |
413 | 508 | for (;;) { |
414 | 509 | /* Still start < end. */ |
415 | 510 | error = -ENOMEM; |
416 | 511 | if (!vma) |
417 | - goto out; | |
512 | + goto out_plug; | |
418 | 513 | |
419 | 514 | /* Here start < (end|vma->vm_end). */ |
420 | 515 | if (start < vma->vm_start) { |
421 | 516 | unmapped_error = -ENOMEM; |
422 | 517 | start = vma->vm_start; |
423 | 518 | if (start >= end) |
424 | - goto out; | |
519 | + goto out_plug; | |
425 | 520 | } |
426 | 521 | |
427 | 522 | /* Here vma->vm_start <= start < (end|vma->vm_end) */ |
428 | 523 | |
429 | 524 | |
... | ... | @@ -432,18 +527,20 @@ |
432 | 527 | /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ |
433 | 528 | error = madvise_vma(vma, &prev, start, tmp, behavior); |
434 | 529 | if (error) |
435 | - goto out; | |
530 | + goto out_plug; | |
436 | 531 | start = tmp; |
437 | 532 | if (prev && start < prev->vm_end) |
438 | 533 | start = prev->vm_end; |
439 | 534 | error = unmapped_error; |
440 | 535 | if (start >= end) |
441 | - goto out; | |
536 | + goto out_plug; | |
442 | 537 | if (prev) |
443 | 538 | vma = prev->vm_next; |
444 | 539 | else /* madvise_remove dropped mmap_sem */ |
445 | 540 | vma = find_vma(current->mm, start); |
446 | 541 | } |
542 | +out_plug: | |
543 | + blk_finish_plug(&plug); | |
447 | 544 | out: |
448 | 545 | if (write) |
449 | 546 | up_write(¤t->mm->mmap_sem); |