Blame view
mm/filemap.c
98.3 KB
457c89965 treewide: Add SPD... |
1 |
// SPDX-License-Identifier: GPL-2.0-only |
1da177e4c Linux-2.6.12-rc2 |
2 3 4 5 6 7 8 9 10 11 12 |
/* * linux/mm/filemap.c * * Copyright (C) 1994-1999 Linus Torvalds */ /* * This file handles the generic file mmap semantics used by * most "normal" filesystems (but you don't /have/ to use this: * the NFS filesystem used to do this differently, for example) */ |
b95f1b31b mm: Map most file... |
13 |
#include <linux/export.h> |
1da177e4c Linux-2.6.12-rc2 |
14 |
#include <linux/compiler.h> |
f9fe48bec dax: support dirt... |
15 |
#include <linux/dax.h> |
1da177e4c Linux-2.6.12-rc2 |
16 |
#include <linux/fs.h> |
3f07c0144 sched/headers: Pr... |
17 |
#include <linux/sched/signal.h> |
c22ce143d [PATCH] x86: cach... |
18 |
#include <linux/uaccess.h> |
c59ede7b7 [PATCH] move capa... |
19 |
#include <linux/capability.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
#include <linux/kernel_stat.h> |
5a0e3ad6a include cleanup: ... |
21 |
#include <linux/gfp.h> |
1da177e4c Linux-2.6.12-rc2 |
22 23 24 25 26 27 |
#include <linux/mm.h> #include <linux/swap.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/file.h> #include <linux/uio.h> |
cfcbfb138 mm/filemap.c: ena... |
28 |
#include <linux/error-injection.h> |
1da177e4c Linux-2.6.12-rc2 |
29 30 |
#include <linux/hash.h> #include <linux/writeback.h> |
53253383f Include <linux/ba... |
31 |
#include <linux/backing-dev.h> |
1da177e4c Linux-2.6.12-rc2 |
32 33 34 |
#include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/security.h> |
44110fe38 [PATCH] cpuset me... |
35 |
#include <linux/cpuset.h> |
00501b531 mm: memcontrol: r... |
36 |
#include <linux/hugetlb.h> |
8a9f3ccd2 Memory controller... |
37 |
#include <linux/memcontrol.h> |
c515e1fd3 mm/fs: add hooks ... |
38 |
#include <linux/cleancache.h> |
c7df8ad29 mm, truncate: do ... |
39 |
#include <linux/shmem_fs.h> |
f1820361f mm: implement ->m... |
40 |
#include <linux/rmap.h> |
b1d29ba82 delayacct: track ... |
41 |
#include <linux/delayacct.h> |
eb414681d psi: pressure sta... |
42 |
#include <linux/psi.h> |
d0e6a5821 mm/filemap.c: inc... |
43 |
#include <linux/ramfs.h> |
b9306a796 mm: filemap: clea... |
44 |
#include <linux/page_idle.h> |
0f8053a50 [PATCH] mm: make ... |
45 |
#include "internal.h" |
fe0bfaaff mm: trace filemap... |
46 47 |
#define CREATE_TRACE_POINTS #include <trace/events/filemap.h> |
1da177e4c Linux-2.6.12-rc2 |
48 |
/* |
1da177e4c Linux-2.6.12-rc2 |
49 50 |
* FIXME: remove all knowledge of the buffer layer from the core VM */ |
148f948ba vfs: Introduce ne... |
51 |
#include <linux/buffer_head.h> /* for try_to_free_buffers */ |
1da177e4c Linux-2.6.12-rc2 |
52 |
|
1da177e4c Linux-2.6.12-rc2 |
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
#include <asm/mman.h> /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. * * Shared mappings now work. 15.8.1995 Bruno. * * finished 'unifying' the page and buffer cache and SMP-threaded the * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com> * * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de> */ /* * Lock ordering: * |
c8c06efa8 mm: convert i_mma... |
70 |
* ->i_mmap_rwsem (truncate_pagecache) |
1da177e4c Linux-2.6.12-rc2 |
71 |
* ->private_lock (__free_pte->__set_page_dirty_buffers) |
5d337b919 [PATCH] swap: swa... |
72 |
* ->swap_lock (exclusive_swap_page, others) |
b93b01631 page cache: use x... |
73 |
* ->i_pages lock |
1da177e4c Linux-2.6.12-rc2 |
74 |
* |
1b1dcc1b5 [PATCH] mutex sub... |
75 |
* ->i_mutex |
c8c06efa8 mm: convert i_mma... |
76 |
* ->i_mmap_rwsem (truncate->unmap_mapping_range) |
1da177e4c Linux-2.6.12-rc2 |
77 |
* |
c1e8d7c6a mmap locking API:... |
78 |
* ->mmap_lock |
c8c06efa8 mm: convert i_mma... |
79 |
* ->i_mmap_rwsem |
b8072f099 [PATCH] mm: updat... |
80 |
* ->page_table_lock or pte_lock (various, mainly in memory.c) |
b93b01631 page cache: use x... |
81 |
* ->i_pages lock (arch-dependent flush_dcache_mmap_lock) |
1da177e4c Linux-2.6.12-rc2 |
82 |
* |
c1e8d7c6a mmap locking API:... |
83 |
* ->mmap_lock |
1da177e4c Linux-2.6.12-rc2 |
84 85 |
* ->lock_page (access_process_vm) * |
ccad23656 kill generic_file... |
86 |
* ->i_mutex (generic_perform_write) |
c1e8d7c6a mmap locking API:... |
87 |
* ->mmap_lock (fault_in_pages_readable->do_page_fault) |
1da177e4c Linux-2.6.12-rc2 |
88 |
* |
f758eeabe writeback: split ... |
89 |
* bdi->wb.list_lock |
a66979aba fs: move i_wb_lis... |
90 |
* sb_lock (fs/fs-writeback.c) |
b93b01631 page cache: use x... |
91 |
* ->i_pages lock (__sync_single_inode) |
1da177e4c Linux-2.6.12-rc2 |
92 |
* |
c8c06efa8 mm: convert i_mma... |
93 |
* ->i_mmap_rwsem |
1da177e4c Linux-2.6.12-rc2 |
94 95 96 |
* ->anon_vma.lock (vma_adjust) * * ->anon_vma.lock |
b8072f099 [PATCH] mm: updat... |
97 |
* ->page_table_lock or pte_lock (anon_vma_prepare and various) |
1da177e4c Linux-2.6.12-rc2 |
98 |
* |
b8072f099 [PATCH] mm: updat... |
99 |
* ->page_table_lock or pte_lock |
5d337b919 [PATCH] swap: swa... |
100 |
* ->swap_lock (try_to_unmap_one) |
1da177e4c Linux-2.6.12-rc2 |
101 |
* ->private_lock (try_to_unmap_one) |
b93b01631 page cache: use x... |
102 |
* ->i_pages lock (try_to_unmap_one) |
f4b7e272b mm: remove zone_l... |
103 104 |
* ->pgdat->lru_lock (follow_page->mark_page_accessed) * ->pgdat->lru_lock (check_pte_range->isolate_lru_page) |
1da177e4c Linux-2.6.12-rc2 |
105 |
* ->private_lock (page_remove_rmap->set_page_dirty) |
b93b01631 page cache: use x... |
106 |
* ->i_pages lock (page_remove_rmap->set_page_dirty) |
f758eeabe writeback: split ... |
107 |
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty) |
250df6ed2 fs: protect inode... |
108 |
* ->inode->i_lock (page_remove_rmap->set_page_dirty) |
81f8c3a46 mm: memcontrol: g... |
109 |
* ->memcg->move_lock (page_remove_rmap->lock_page_memcg) |
f758eeabe writeback: split ... |
110 |
* bdi.wb->list_lock (zap_pte_range->set_page_dirty) |
250df6ed2 fs: protect inode... |
111 |
* ->inode->i_lock (zap_pte_range->set_page_dirty) |
1da177e4c Linux-2.6.12-rc2 |
112 113 |
* ->private_lock (zap_pte_range->__set_page_dirty_buffers) * |
c8c06efa8 mm: convert i_mma... |
114 |
* ->i_mmap_rwsem |
9a3c531df mm: update stale ... |
115 |
* ->tasklist_lock (memory_failure, collect_procs_ao) |
1da177e4c Linux-2.6.12-rc2 |
116 |
*/ |
5c024e6a4 page cache: Conve... |
117 |
static void page_cache_delete(struct address_space *mapping, |
91b0abe36 mm + fs: store sh... |
118 119 |
struct page *page, void *shadow) { |
5c024e6a4 page cache: Conve... |
120 121 |
XA_STATE(xas, &mapping->i_pages, page->index); unsigned int nr = 1; |
c70b647d3 mm/filemap.c: add... |
122 |
|
5c024e6a4 page cache: Conve... |
123 |
mapping_set_update(&xas, mapping); |
c70b647d3 mm/filemap.c: add... |
124 |
|
5c024e6a4 page cache: Conve... |
125 126 127 |
/* hugetlb pages are represented by a single entry in the xarray */ if (!PageHuge(page)) { xas_set_order(&xas, page->index, compound_order(page)); |
d8c6546b1 mm: introduce com... |
128 |
nr = compound_nr(page); |
5c024e6a4 page cache: Conve... |
129 |
} |
91b0abe36 mm + fs: store sh... |
130 |
|
83929372f filemap: prepare ... |
131 132 133 |
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(nr != 1 && shadow, page); |
449dd6984 mm: keep page cac... |
134 |
|
5c024e6a4 page cache: Conve... |
135 136 |
xas_store(&xas, shadow); xas_init_marks(&xas); |
d3798ae8c mm: filemap: don'... |
137 |
|
2300638b1 mm: move clearing... |
138 139 |
page->mapping = NULL; /* Leave page->index set: truncation lookup relies upon it */ |
d3798ae8c mm: filemap: don'... |
140 141 142 143 144 145 146 147 148 149 150 |
if (shadow) { mapping->nrexceptional += nr; /* * Make sure the nrexceptional update is committed before * the nrpages update so that final truncate racing * with reclaim does not see both counters 0 at the * same time and miss a shadow entry. */ smp_wmb(); } mapping->nrpages -= nr; |
91b0abe36 mm + fs: store sh... |
151 |
} |
5ecc4d852 mm: factor out ch... |
152 153 |
static void unaccount_page_cache_page(struct address_space *mapping, struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
154 |
{ |
5ecc4d852 mm: factor out ch... |
155 |
int nr; |
1da177e4c Linux-2.6.12-rc2 |
156 |
|
c515e1fd3 mm/fs: add hooks ... |
157 158 159 160 161 162 163 164 |
/* * if we're uptodate, flush out into the cleancache, otherwise * invalidate any existing cleancache entries. We can't leave * stale data around in the cleancache once our page is gone */ if (PageUptodate(page) && PageMappedToDisk(page)) cleancache_put_page(page); else |
3167760f8 mm: cleancache: s... |
165 |
cleancache_invalidate_page(mapping, page); |
c515e1fd3 mm/fs: add hooks ... |
166 |
|
83929372f filemap: prepare ... |
167 |
VM_BUG_ON_PAGE(PageTail(page), page); |
06b241f32 mm: __delete_from... |
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
VM_BUG_ON_PAGE(page_mapped(page), page); if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { int mapcount; pr_alert("BUG: Bad page cache in process %s pfn:%05lx ", current->comm, page_to_pfn(page)); dump_page(page, "still mapped when deleted"); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); mapcount = page_mapcount(page); if (mapping_exiting(mapping) && page_count(page) >= mapcount + 2) { /* * All vmas have already been torn down, so it's * a good bet that actually the page is unmapped, * and we'd prefer not to leak it: if we're wrong, * some other bad page check should catch it later. */ page_mapcount_reset(page); |
6d061f9f6 mm/page_ref: use ... |
189 |
page_ref_sub(page, mapcount); |
06b241f32 mm: __delete_from... |
190 191 |
} } |
4165b9b46 hugetlb: do not a... |
192 |
/* hugetlb pages do not participate in page cache accounting. */ |
5ecc4d852 mm: factor out ch... |
193 194 |
if (PageHuge(page)) return; |
09612fa65 mm: hugetlb: retu... |
195 |
|
6c357848b mm: replace hpage... |
196 |
nr = thp_nr_pages(page); |
5ecc4d852 mm: factor out ch... |
197 |
|
0d1c20722 mm: memcontrol: s... |
198 |
__mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); |
5ecc4d852 mm: factor out ch... |
199 |
if (PageSwapBacked(page)) { |
0d1c20722 mm: memcontrol: s... |
200 |
__mod_lruvec_page_state(page, NR_SHMEM, -nr); |
5ecc4d852 mm: factor out ch... |
201 202 |
if (PageTransHuge(page)) __dec_node_page_state(page, NR_SHMEM_THPS); |
99cb0dbd4 mm,thp: add read-... |
203 204 |
} else if (PageTransHuge(page)) { __dec_node_page_state(page, NR_FILE_THPS); |
09d91cda0 mm,thp: avoid wri... |
205 |
filemap_nr_thps_dec(mapping); |
800d8c63b shmem: add huge p... |
206 |
} |
5ecc4d852 mm: factor out ch... |
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
/* * At this point page must be either written or cleaned by * truncate. Dirty page here signals a bug and loss of * unwritten data. * * This fixes dirty accounting after removing the page entirely * but leaves PageDirty set: it has no effect for truncated * page and anyway will be cleared before returning page into * buddy allocator. */ if (WARN_ON_ONCE(PageDirty(page))) account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); } /* * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage |
b93b01631 page cache: use x... |
225 |
* is safe. The caller must hold the i_pages lock. |
5ecc4d852 mm: factor out ch... |
226 227 228 229 230 231 232 233 |
*/ void __delete_from_page_cache(struct page *page, void *shadow) { struct address_space *mapping = page->mapping; trace_mm_filemap_delete_from_page_cache(page); unaccount_page_cache_page(mapping, page); |
5c024e6a4 page cache: Conve... |
234 |
page_cache_delete(mapping, page, shadow); |
1da177e4c Linux-2.6.12-rc2 |
235 |
} |
59c66c5f8 mm: factor out pa... |
236 237 238 239 240 241 242 243 244 245 |
static void page_cache_free_page(struct address_space *mapping, struct page *page) { void (*freepage)(struct page *); freepage = mapping->a_ops->freepage; if (freepage) freepage(page); if (PageTransHuge(page) && !PageHuge(page)) { |
887b22c62 mm/filemap: fix p... |
246 |
page_ref_sub(page, thp_nr_pages(page)); |
59c66c5f8 mm: factor out pa... |
247 248 249 250 251 |
VM_BUG_ON_PAGE(page_count(page) <= 0, page); } else { put_page(page); } } |
702cfbf93 mm: goodbye remov... |
252 253 254 255 256 257 258 259 260 |
/** * delete_from_page_cache - delete page from page cache * @page: the page which the kernel is trying to remove from page cache * * This must be called only on pages that have been verified to be in the page * cache and locked. It will never put the page into the free list, the caller * has a reference on the page. */ void delete_from_page_cache(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
261 |
{ |
83929372f filemap: prepare ... |
262 |
struct address_space *mapping = page_mapping(page); |
c4843a759 memcg: add per cg... |
263 |
unsigned long flags; |
1da177e4c Linux-2.6.12-rc2 |
264 |
|
cd7619d6b [PATCH] Extermina... |
265 |
BUG_ON(!PageLocked(page)); |
b93b01631 page cache: use x... |
266 |
xa_lock_irqsave(&mapping->i_pages, flags); |
62cccb8c8 mm: simplify lock... |
267 |
__delete_from_page_cache(page, NULL); |
b93b01631 page cache: use x... |
268 |
xa_unlock_irqrestore(&mapping->i_pages, flags); |
6072d13c4 Call the filesyst... |
269 |
|
59c66c5f8 mm: factor out pa... |
270 |
page_cache_free_page(mapping, page); |
97cecb5a2 mm: introduce del... |
271 272 |
} EXPORT_SYMBOL(delete_from_page_cache); |
aa65c29ce mm: batch radix t... |
273 |
/* |
ef8e5717d page cache: Conve... |
274 |
* page_cache_delete_batch - delete several pages from page cache |
aa65c29ce mm: batch radix t... |
275 276 277 |
* @mapping: the mapping to which pages belong * @pvec: pagevec with pages to delete * |
b93b01631 page cache: use x... |
278 |
* The function walks over mapping->i_pages and removes pages passed in @pvec |
4101196b1 mm: page cache: s... |
279 280 |
* from the mapping. The function expects @pvec to be sorted by page index * and is optimised for it to be dense. |
b93b01631 page cache: use x... |
281 |
* It tolerates holes in @pvec (mapping entries at those indices are not |
aa65c29ce mm: batch radix t... |
282 |
* modified). The function expects only THP head pages to be present in the |
4101196b1 mm: page cache: s... |
283 |
* @pvec. |
aa65c29ce mm: batch radix t... |
284 |
* |
b93b01631 page cache: use x... |
285 |
* The function expects the i_pages lock to be held. |
aa65c29ce mm: batch radix t... |
286 |
*/ |
ef8e5717d page cache: Conve... |
287 |
static void page_cache_delete_batch(struct address_space *mapping, |
aa65c29ce mm: batch radix t... |
288 289 |
struct pagevec *pvec) { |
ef8e5717d page cache: Conve... |
290 |
XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); |
aa65c29ce mm: batch radix t... |
291 |
int total_pages = 0; |
4101196b1 mm: page cache: s... |
292 |
int i = 0; |
aa65c29ce mm: batch radix t... |
293 |
struct page *page; |
aa65c29ce mm: batch radix t... |
294 |
|
ef8e5717d page cache: Conve... |
295 296 |
mapping_set_update(&xas, mapping); xas_for_each(&xas, page, ULONG_MAX) { |
4101196b1 mm: page cache: s... |
297 |
if (i >= pagevec_count(pvec)) |
aa65c29ce mm: batch radix t... |
298 |
break; |
4101196b1 mm: page cache: s... |
299 300 |
/* A swap/dax/shadow entry got inserted? Skip it. */ |
3159f943a xarray: Replace e... |
301 |
if (xa_is_value(page)) |
aa65c29ce mm: batch radix t... |
302 |
continue; |
4101196b1 mm: page cache: s... |
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 |
/* * A page got inserted in our range? Skip it. We have our * pages locked so they are protected from being removed. * If we see a page whose index is higher than ours, it * means our page has been removed, which shouldn't be * possible because we're holding the PageLock. */ if (page != pvec->pages[i]) { VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, page); continue; } WARN_ON_ONCE(!PageLocked(page)); if (page->index == xas.xa_index) |
aa65c29ce mm: batch radix t... |
319 |
page->mapping = NULL; |
4101196b1 mm: page cache: s... |
320 321 322 323 324 325 326 327 |
/* Leave page->index set: truncation lookup relies on it */ /* * Move to the next page in the vector if this is a regular * page or the index is of the last sub-page of this compound * page. */ if (page->index + compound_nr(page) - 1 == xas.xa_index) |
aa65c29ce mm: batch radix t... |
328 |
i++; |
ef8e5717d page cache: Conve... |
329 |
xas_store(&xas, NULL); |
aa65c29ce mm: batch radix t... |
330 331 332 333 334 335 336 337 338 339 340 341 342 |
total_pages++; } mapping->nrpages -= total_pages; } void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec) { int i; unsigned long flags; if (!pagevec_count(pvec)) return; |
b93b01631 page cache: use x... |
343 |
xa_lock_irqsave(&mapping->i_pages, flags); |
aa65c29ce mm: batch radix t... |
344 345 346 347 348 |
for (i = 0; i < pagevec_count(pvec); i++) { trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); unaccount_page_cache_page(mapping, pvec->pages[i]); } |
ef8e5717d page cache: Conve... |
349 |
page_cache_delete_batch(mapping, pvec); |
b93b01631 page cache: use x... |
350 |
xa_unlock_irqrestore(&mapping->i_pages, flags); |
aa65c29ce mm: batch radix t... |
351 352 353 354 |
for (i = 0; i < pagevec_count(pvec); i++) page_cache_free_page(mapping, pvec->pages[i]); } |
d72d9e2a5 mm: export filema... |
355 |
int filemap_check_errors(struct address_space *mapping) |
865ffef37 fs: fix fsync() e... |
356 357 358 |
{ int ret = 0; /* Check for outstanding write errors */ |
7fcbbaf18 mm/filemap.c: avo... |
359 360 |
if (test_bit(AS_ENOSPC, &mapping->flags) && test_and_clear_bit(AS_ENOSPC, &mapping->flags)) |
865ffef37 fs: fix fsync() e... |
361 |
ret = -ENOSPC; |
7fcbbaf18 mm/filemap.c: avo... |
362 363 |
if (test_bit(AS_EIO, &mapping->flags) && test_and_clear_bit(AS_EIO, &mapping->flags)) |
865ffef37 fs: fix fsync() e... |
364 365 366 |
ret = -EIO; return ret; } |
d72d9e2a5 mm: export filema... |
367 |
EXPORT_SYMBOL(filemap_check_errors); |
865ffef37 fs: fix fsync() e... |
368 |
|
76341cabb jbd2: don't clear... |
369 370 371 372 373 374 375 376 377 |
static int filemap_check_and_keep_errors(struct address_space *mapping) { /* Check for outstanding write errors */ if (test_bit(AS_EIO, &mapping->flags)) return -EIO; if (test_bit(AS_ENOSPC, &mapping->flags)) return -ENOSPC; return 0; } |
1da177e4c Linux-2.6.12-rc2 |
378 |
/** |
485bb99b4 [PATCH] kernel-do... |
379 |
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range |
67be2dd1b [PATCH] DocBook: ... |
380 381 |
* @mapping: address space structure to write * @start: offset in bytes where the range starts |
469eb4d03 [PATCH] filemap_f... |
382 |
* @end: offset in bytes where the range ends (inclusive) |
67be2dd1b [PATCH] DocBook: ... |
383 |
* @sync_mode: enable synchronous operation |
1da177e4c Linux-2.6.12-rc2 |
384 |
* |
485bb99b4 [PATCH] kernel-do... |
385 386 387 |
* Start writeback against all of a mapping's dirty pages that lie * within the byte offsets <start, end> inclusive. * |
1da177e4c Linux-2.6.12-rc2 |
388 |
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as |
485bb99b4 [PATCH] kernel-do... |
389 |
* opposed to a regular memory cleansing writeback. The difference between |
1da177e4c Linux-2.6.12-rc2 |
390 391 |
* these two operations is that if a dirty page/buffer is encountered, it must * be waited upon, and not just skipped over. |
a862f68a8 docs/core-api/mm:... |
392 393 |
* * Return: %0 on success, negative error code otherwise. |
1da177e4c Linux-2.6.12-rc2 |
394 |
*/ |
ebcf28e1c [PATCH] fadvise()... |
395 396 |
int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode) |
1da177e4c Linux-2.6.12-rc2 |
397 398 399 400 |
{ int ret; struct writeback_control wbc = { .sync_mode = sync_mode, |
05fe478dd mm: write_cache_p... |
401 |
.nr_to_write = LONG_MAX, |
111ebb6e6 [PATCH] writeback... |
402 403 |
.range_start = start, .range_end = end, |
1da177e4c Linux-2.6.12-rc2 |
404 |
}; |
f56753ac2 bdi: replace BDI_... |
405 |
if (!mapping_can_writeback(mapping) || |
c3aab9a0b mm/filemap.c: don... |
406 |
!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
1da177e4c Linux-2.6.12-rc2 |
407 |
return 0; |
b16b1deb5 writeback: make w... |
408 |
wbc_attach_fdatawrite_inode(&wbc, mapping->host); |
1da177e4c Linux-2.6.12-rc2 |
409 |
ret = do_writepages(mapping, &wbc); |
b16b1deb5 writeback: make w... |
410 |
wbc_detach_inode(&wbc); |
1da177e4c Linux-2.6.12-rc2 |
411 412 413 414 415 416 |
return ret; } static inline int __filemap_fdatawrite(struct address_space *mapping, int sync_mode) { |
111ebb6e6 [PATCH] writeback... |
417 |
return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode); |
1da177e4c Linux-2.6.12-rc2 |
418 419 420 421 422 423 424 |
} int filemap_fdatawrite(struct address_space *mapping) { return __filemap_fdatawrite(mapping, WB_SYNC_ALL); } EXPORT_SYMBOL(filemap_fdatawrite); |
f4c0a0fdf vfs: export filem... |
425 |
int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, |
ebcf28e1c [PATCH] fadvise()... |
426 |
loff_t end) |
1da177e4c Linux-2.6.12-rc2 |
427 428 429 |
{ return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); } |
f4c0a0fdf vfs: export filem... |
430 |
EXPORT_SYMBOL(filemap_fdatawrite_range); |
1da177e4c Linux-2.6.12-rc2 |
431 |
|
485bb99b4 [PATCH] kernel-do... |
432 433 434 435 |
/** * filemap_flush - mostly a non-blocking flush * @mapping: target address_space * |
1da177e4c Linux-2.6.12-rc2 |
436 437 |
* This is a mostly non-blocking flush. Not suitable for data-integrity * purposes - I/O may not be started against all dirty pages. |
a862f68a8 docs/core-api/mm:... |
438 439 |
* * Return: %0 on success, negative error code otherwise. |
1da177e4c Linux-2.6.12-rc2 |
440 441 442 443 444 445 |
*/ int filemap_flush(struct address_space *mapping) { return __filemap_fdatawrite(mapping, WB_SYNC_NONE); } EXPORT_SYMBOL(filemap_flush); |
7fc9e4722 fs: Introduce fil... |
446 447 448 449 450 451 452 453 |
/** * filemap_range_has_page - check if a page exists in range. * @mapping: address space within which to check * @start_byte: offset in bytes where the range starts * @end_byte: offset in bytes where the range ends (inclusive) * * Find at least one page in the range supplied, usually used to check if * direct writing in this range will trigger a writeback. |
a862f68a8 docs/core-api/mm:... |
454 455 456 |
* * Return: %true if at least one page exists in the specified range, * %false otherwise. |
7fc9e4722 fs: Introduce fil... |
457 458 459 460 |
*/ bool filemap_range_has_page(struct address_space *mapping, loff_t start_byte, loff_t end_byte) { |
f7b680468 mm: use find_get_... |
461 |
struct page *page; |
8fa8e538e page cache: Conve... |
462 463 |
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT); pgoff_t max = end_byte >> PAGE_SHIFT; |
7fc9e4722 fs: Introduce fil... |
464 465 466 |
if (end_byte < start_byte) return false; |
8fa8e538e page cache: Conve... |
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 |
rcu_read_lock(); for (;;) { page = xas_find(&xas, max); if (xas_retry(&xas, page)) continue; /* Shadow entries don't count */ if (xa_is_value(page)) continue; /* * We don't need to try to pin this page; we're about to * release the RCU lock anyway. It is enough to know that * there was a page here recently. */ break; } rcu_read_unlock(); |
7fc9e4722 fs: Introduce fil... |
483 |
|
8fa8e538e page cache: Conve... |
484 |
return page != NULL; |
7fc9e4722 fs: Introduce fil... |
485 486 |
} EXPORT_SYMBOL(filemap_range_has_page); |
5e8fcc1a0 mm: don't TestCle... |
487 |
static void __filemap_fdatawait_range(struct address_space *mapping, |
aa750fd71 mm/filemap.c: mak... |
488 |
loff_t start_byte, loff_t end_byte) |
1da177e4c Linux-2.6.12-rc2 |
489 |
{ |
09cbfeaf1 mm, fs: get rid o... |
490 491 |
pgoff_t index = start_byte >> PAGE_SHIFT; pgoff_t end = end_byte >> PAGE_SHIFT; |
1da177e4c Linux-2.6.12-rc2 |
492 493 |
struct pagevec pvec; int nr_pages; |
1da177e4c Linux-2.6.12-rc2 |
494 |
|
94004ed72 kill wait_on_page... |
495 |
if (end_byte < start_byte) |
5e8fcc1a0 mm: don't TestCle... |
496 |
return; |
1da177e4c Linux-2.6.12-rc2 |
497 |
|
866798201 mm, pagevec: remo... |
498 |
pagevec_init(&pvec); |
312e9d2f7 mm: use pagevec_l... |
499 |
while (index <= end) { |
1da177e4c Linux-2.6.12-rc2 |
500 |
unsigned i; |
312e9d2f7 mm: use pagevec_l... |
501 |
nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, |
67fd707f4 mm: remove nr_pag... |
502 |
end, PAGECACHE_TAG_WRITEBACK); |
312e9d2f7 mm: use pagevec_l... |
503 504 |
if (!nr_pages) break; |
1da177e4c Linux-2.6.12-rc2 |
505 506 |
for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; |
1da177e4c Linux-2.6.12-rc2 |
507 |
wait_on_page_writeback(page); |
5e8fcc1a0 mm: don't TestCle... |
508 |
ClearPageError(page); |
1da177e4c Linux-2.6.12-rc2 |
509 510 511 512 |
} pagevec_release(&pvec); cond_resched(); } |
aa750fd71 mm/filemap.c: mak... |
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 |
} /** * filemap_fdatawait_range - wait for writeback to complete * @mapping: address space structure to wait for * @start_byte: offset in bytes where the range starts * @end_byte: offset in bytes where the range ends (inclusive) * * Walk the list of under-writeback pages of the given address space * in the given range and wait for all of them. Check error status of * the address space and return it. * * Since the error status of the address space is cleared by this function, * callers are responsible for checking the return value and handling and/or * reporting the error. |
a862f68a8 docs/core-api/mm:... |
528 529 |
* * Return: error status of the address space. |
aa750fd71 mm/filemap.c: mak... |
530 531 532 533 |
*/ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, loff_t end_byte) { |
5e8fcc1a0 mm: don't TestCle... |
534 535 |
__filemap_fdatawait_range(mapping, start_byte, end_byte); return filemap_check_errors(mapping); |
1da177e4c Linux-2.6.12-rc2 |
536 |
} |
d3bccb6f4 vfs: Introduce fi... |
537 538 539 |
EXPORT_SYMBOL(filemap_fdatawait_range); /** |
aa0bfcd93 mm: add filemap_f... |
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 |
* filemap_fdatawait_range_keep_errors - wait for writeback to complete * @mapping: address space structure to wait for * @start_byte: offset in bytes where the range starts * @end_byte: offset in bytes where the range ends (inclusive) * * Walk the list of under-writeback pages of the given address space in the * given range and wait for all of them. Unlike filemap_fdatawait_range(), * this function does not clear error status of the address space. * * Use this function if callers don't handle errors themselves. Expected * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2), * fsfreeze(8) */ int filemap_fdatawait_range_keep_errors(struct address_space *mapping, loff_t start_byte, loff_t end_byte) { __filemap_fdatawait_range(mapping, start_byte, end_byte); return filemap_check_and_keep_errors(mapping); } EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors); /** |
a823e4589 mm: add file_fdat... |
562 563 564 565 566 567 568 569 570 571 572 573 |
* file_fdatawait_range - wait for writeback to complete * @file: file pointing to address space structure to wait for * @start_byte: offset in bytes where the range starts * @end_byte: offset in bytes where the range ends (inclusive) * * Walk the list of under-writeback pages of the address space that file * refers to, in the given range and wait for all of them. Check error * status of the address space vs. the file->f_wb_err cursor and return it. * * Since the error status of the file is advanced by this function, * callers are responsible for checking the return value and handling and/or * reporting the error. |
a862f68a8 docs/core-api/mm:... |
574 575 |
* * Return: error status of the address space vs. the file->f_wb_err cursor. |
a823e4589 mm: add file_fdat... |
576 577 578 579 580 581 582 583 584 |
*/ int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte) { struct address_space *mapping = file->f_mapping; __filemap_fdatawait_range(mapping, start_byte, end_byte); return file_check_and_advance_wb_err(file); } EXPORT_SYMBOL(file_fdatawait_range); |
d3bccb6f4 vfs: Introduce fi... |
585 586 |
/** |
aa750fd71 mm/filemap.c: mak... |
587 588 589 590 591 592 593 594 595 596 |
* filemap_fdatawait_keep_errors - wait for writeback without clearing errors * @mapping: address space structure to wait for * * Walk the list of under-writeback pages of the given address space * and wait for all of them. Unlike filemap_fdatawait(), this function * does not clear error status of the address space. * * Use this function if callers don't handle errors themselves. Expected * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2), * fsfreeze(8) |
a862f68a8 docs/core-api/mm:... |
597 598 |
* * Return: error status of the address space. |
aa750fd71 mm/filemap.c: mak... |
599 |
*/ |
76341cabb jbd2: don't clear... |
600 |
int filemap_fdatawait_keep_errors(struct address_space *mapping) |
aa750fd71 mm/filemap.c: mak... |
601 |
{ |
ffb959bbd mm: remove optimi... |
602 |
__filemap_fdatawait_range(mapping, 0, LLONG_MAX); |
76341cabb jbd2: don't clear... |
603 |
return filemap_check_and_keep_errors(mapping); |
aa750fd71 mm/filemap.c: mak... |
604 |
} |
76341cabb jbd2: don't clear... |
605 |
EXPORT_SYMBOL(filemap_fdatawait_keep_errors); |
aa750fd71 mm/filemap.c: mak... |
606 |
|
875d91b11 mm/filemap.c: rew... |
607 |
/* Returns true if writeback might be needed or already in progress. */ |
9326c9b20 mm: consolidate d... |
608 |
static bool mapping_needs_writeback(struct address_space *mapping) |
1da177e4c Linux-2.6.12-rc2 |
609 |
{ |
875d91b11 mm/filemap.c: rew... |
610 611 612 613 |
if (dax_mapping(mapping)) return mapping->nrexceptional; return mapping->nrpages; |
1da177e4c Linux-2.6.12-rc2 |
614 |
} |
1da177e4c Linux-2.6.12-rc2 |
615 |
|
485bb99b4 [PATCH] kernel-do... |
616 617 618 619 620 621 |
/** * filemap_write_and_wait_range - write out & wait on a file range * @mapping: the address_space for the pages * @lstart: offset in bytes where the range starts * @lend: offset in bytes where the range ends (inclusive) * |
469eb4d03 [PATCH] filemap_f... |
622 623 |
* Write out and wait upon file offsets lstart->lend, inclusive. * |
0e056eb55 kernel-api.rst: f... |
624 |
* Note that @lend is inclusive (describes the last byte to be written) so |
469eb4d03 [PATCH] filemap_f... |
625 |
* that this function can be used to write to the very end-of-file (end = -1). |
a862f68a8 docs/core-api/mm:... |
626 627 |
* * Return: error status of the address space. |
469eb4d03 [PATCH] filemap_f... |
628 |
*/ |
1da177e4c Linux-2.6.12-rc2 |
629 630 631 |
int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend) { |
28fd12982 [PATCH] Fix and a... |
632 |
int err = 0; |
1da177e4c Linux-2.6.12-rc2 |
633 |
|
9326c9b20 mm: consolidate d... |
634 |
if (mapping_needs_writeback(mapping)) { |
28fd12982 [PATCH] Fix and a... |
635 636 |
err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); |
ddf8f376d mm/filemap.c: cle... |
637 638 639 640 641 642 |
/* * Even if the above returned error, the pages may be * written partially (e.g. -ENOSPC), so we wait for it. * But the -EIO is special case, it may indicate the worst * thing (e.g. bug) happened, so we avoid waiting for it. */ |
28fd12982 [PATCH] Fix and a... |
643 |
if (err != -EIO) { |
94004ed72 kill wait_on_page... |
644 645 |
int err2 = filemap_fdatawait_range(mapping, lstart, lend); |
28fd12982 [PATCH] Fix and a... |
646 647 |
if (!err) err = err2; |
cbeaf9510 mm: clear AS_EIO/... |
648 649 650 |
} else { /* Clear any previously stored errors */ filemap_check_errors(mapping); |
28fd12982 [PATCH] Fix and a... |
651 |
} |
865ffef37 fs: fix fsync() e... |
652 653 |
} else { err = filemap_check_errors(mapping); |
1da177e4c Linux-2.6.12-rc2 |
654 |
} |
28fd12982 [PATCH] Fix and a... |
655 |
return err; |
1da177e4c Linux-2.6.12-rc2 |
656 |
} |
f69955855 Export filemap_wr... |
657 |
EXPORT_SYMBOL(filemap_write_and_wait_range); |
1da177e4c Linux-2.6.12-rc2 |
658 |
|
5660e13d2 fs: new infrastru... |
659 660 |
void __filemap_set_wb_err(struct address_space *mapping, int err) { |
3acdfd280 errseq: rename __... |
661 |
errseq_t eseq = errseq_set(&mapping->wb_err, err); |
5660e13d2 fs: new infrastru... |
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 |
trace_filemap_set_wb_err(mapping, eseq); } EXPORT_SYMBOL(__filemap_set_wb_err); /** * file_check_and_advance_wb_err - report wb error (if any) that was previously * and advance wb_err to current one * @file: struct file on which the error is being reported * * When userland calls fsync (or something like nfsd does the equivalent), we * want to report any writeback errors that occurred since the last fsync (or * since the file was opened if there haven't been any). * * Grab the wb_err from the mapping. If it matches what we have in the file, * then just quickly return 0. The file is all caught up. * * If it doesn't match, then take the mapping value, set the "seen" flag in * it and try to swap it into place. If it works, or another task beat us * to it with the new value, then update the f_wb_err and return the error * portion. The error at this point must be reported via proper channels * (a'la fsync, or NFS COMMIT operation, etc.). * * While we handle mapping->wb_err with atomic operations, the f_wb_err * value is protected by the f_lock since we must ensure that it reflects * the latest value swapped in for this file descriptor. |
a862f68a8 docs/core-api/mm:... |
688 689 |
* * Return: %0 on success, negative error code otherwise. |
5660e13d2 fs: new infrastru... |
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 |
*/ int file_check_and_advance_wb_err(struct file *file) { int err = 0; errseq_t old = READ_ONCE(file->f_wb_err); struct address_space *mapping = file->f_mapping; /* Locklessly handle the common case where nothing has changed */ if (errseq_check(&mapping->wb_err, old)) { /* Something changed, must use slow path */ spin_lock(&file->f_lock); old = file->f_wb_err; err = errseq_check_and_advance(&mapping->wb_err, &file->f_wb_err); trace_file_check_and_advance_wb_err(file, old); spin_unlock(&file->f_lock); } |
f4e222c56 mm: have filemap_... |
707 708 709 710 711 712 713 714 |
/* * We're mostly using this function as a drop in replacement for * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect * that the legacy code would have had on these flags. */ clear_bit(AS_EIO, &mapping->flags); clear_bit(AS_ENOSPC, &mapping->flags); |
5660e13d2 fs: new infrastru... |
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 |
return err; } EXPORT_SYMBOL(file_check_and_advance_wb_err); /** * file_write_and_wait_range - write out & wait on a file range * @file: file pointing to address_space with pages * @lstart: offset in bytes where the range starts * @lend: offset in bytes where the range ends (inclusive) * * Write out and wait upon file offsets lstart->lend, inclusive. * * Note that @lend is inclusive (describes the last byte to be written) so * that this function can be used to write to the very end-of-file (end = -1). * * After writing out and waiting on the data, we check and advance the * f_wb_err cursor to the latest value, and return any errors detected there. |
a862f68a8 docs/core-api/mm:... |
732 733 |
* * Return: %0 on success, negative error code otherwise. |
5660e13d2 fs: new infrastru... |
734 735 736 737 738 |
*/ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend) { int err = 0, err2; struct address_space *mapping = file->f_mapping; |
9326c9b20 mm: consolidate d... |
739 |
if (mapping_needs_writeback(mapping)) { |
5660e13d2 fs: new infrastru... |
740 741 742 743 744 745 746 747 748 749 750 751 |
err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); /* See comment of filemap_write_and_wait() */ if (err != -EIO) __filemap_fdatawait_range(mapping, lstart, lend); } err2 = file_check_and_advance_wb_err(file); if (!err) err = err2; return err; } EXPORT_SYMBOL(file_write_and_wait_range); |
485bb99b4 [PATCH] kernel-do... |
752 |
/** |
ef6a3c631 mm: add replace_p... |
753 754 755 756 757 758 759 760 761 762 763 |
* replace_page_cache_page - replace a pagecache page with a new one * @old: page to be replaced * @new: page to replace with * @gfp_mask: allocation mode * * This function replaces a page in the pagecache with a new one. On * success it acquires the pagecache reference for the new page and * drops it for the old page. Both the old and new pages must be * locked. This function does not add the new page to the LRU, the * caller must do that. * |
74d609585 page cache: Add a... |
764 |
* The remove + add is atomic. This function cannot fail. |
a862f68a8 docs/core-api/mm:... |
765 766 |
* * Return: %0 |
ef6a3c631 mm: add replace_p... |
767 768 769 |
*/ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) { |
74d609585 page cache: Add a... |
770 771 772 773 774 |
struct address_space *mapping = old->mapping; void (*freepage)(struct page *) = mapping->a_ops->freepage; pgoff_t offset = old->index; XA_STATE(xas, &mapping->i_pages, offset); unsigned long flags; |
ef6a3c631 mm: add replace_p... |
775 |
|
309381fea mm: dump page whe... |
776 777 778 |
VM_BUG_ON_PAGE(!PageLocked(old), old); VM_BUG_ON_PAGE(!PageLocked(new), new); VM_BUG_ON_PAGE(new->mapping, new); |
ef6a3c631 mm: add replace_p... |
779 |
|
74d609585 page cache: Add a... |
780 781 782 |
get_page(new); new->mapping = mapping; new->index = offset; |
ef6a3c631 mm: add replace_p... |
783 |
|
0d1c20722 mm: memcontrol: s... |
784 |
mem_cgroup_migrate(old, new); |
74d609585 page cache: Add a... |
785 786 |
xas_lock_irqsave(&xas, flags); xas_store(&xas, new); |
4165b9b46 hugetlb: do not a... |
787 |
|
74d609585 page cache: Add a... |
788 789 790 |
old->mapping = NULL; /* hugetlb pages do not participate in page cache accounting. */ if (!PageHuge(old)) |
0d1c20722 mm: memcontrol: s... |
791 |
__dec_lruvec_page_state(old, NR_FILE_PAGES); |
74d609585 page cache: Add a... |
792 |
if (!PageHuge(new)) |
0d1c20722 mm: memcontrol: s... |
793 |
__inc_lruvec_page_state(new, NR_FILE_PAGES); |
74d609585 page cache: Add a... |
794 |
if (PageSwapBacked(old)) |
0d1c20722 mm: memcontrol: s... |
795 |
__dec_lruvec_page_state(old, NR_SHMEM); |
74d609585 page cache: Add a... |
796 |
if (PageSwapBacked(new)) |
0d1c20722 mm: memcontrol: s... |
797 |
__inc_lruvec_page_state(new, NR_SHMEM); |
74d609585 page cache: Add a... |
798 |
xas_unlock_irqrestore(&xas, flags); |
74d609585 page cache: Add a... |
799 800 801 |
if (freepage) freepage(old); put_page(old); |
ef6a3c631 mm: add replace_p... |
802 |
|
74d609585 page cache: Add a... |
803 |
return 0; |
ef6a3c631 mm: add replace_p... |
804 805 |
} EXPORT_SYMBOL_GPL(replace_page_cache_page); |
16c0cc0ce revert "mm/filema... |
806 |
noinline int __add_to_page_cache_locked(struct page *page, |
76cd61739 mm/error_inject: ... |
807 |
struct address_space *mapping, |
c4cf498dc Merge branch 'akp... |
808 |
pgoff_t offset, gfp_t gfp, |
76cd61739 mm/error_inject: ... |
809 |
void **shadowp) |
1da177e4c Linux-2.6.12-rc2 |
810 |
{ |
74d609585 page cache: Add a... |
811 |
XA_STATE(xas, &mapping->i_pages, offset); |
00501b531 mm: memcontrol: r... |
812 |
int huge = PageHuge(page); |
e286781d5 mm: speculative p... |
813 |
int error; |
309381fea mm: dump page whe... |
814 815 |
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page); |
74d609585 page cache: Add a... |
816 |
mapping_set_update(&xas, mapping); |
e286781d5 mm: speculative p... |
817 |
|
09cbfeaf1 mm, fs: get rid o... |
818 |
get_page(page); |
66a0c8ee3 mm: cleanup add_t... |
819 820 |
page->mapping = mapping; page->index = offset; |
3fea5a499 mm: memcontrol: c... |
821 |
if (!huge) { |
198b62f83 mm/filemap: fix s... |
822 |
error = mem_cgroup_charge(page, current->mm, gfp); |
3fea5a499 mm: memcontrol: c... |
823 824 825 |
if (error) goto error; } |
198b62f83 mm/filemap: fix s... |
826 |
gfp &= GFP_RECLAIM_MASK; |
74d609585 page cache: Add a... |
827 |
do { |
198b62f83 mm/filemap: fix s... |
828 829 830 831 832 833 |
unsigned int order = xa_get_order(xas.xa, xas.xa_index); void *entry, *old = NULL; if (order > thp_order(page)) xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), order, gfp); |
74d609585 page cache: Add a... |
834 |
xas_lock_irq(&xas); |
198b62f83 mm/filemap: fix s... |
835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 |
xas_for_each_conflict(&xas, entry) { old = entry; if (!xa_is_value(entry)) { xas_set_err(&xas, -EEXIST); goto unlock; } } if (old) { if (shadowp) *shadowp = old; /* entry may have been split before we acquired lock */ order = xa_get_order(xas.xa, xas.xa_index); if (order > thp_order(page)) { xas_split(&xas, old, order); xas_reset(&xas); } } |
74d609585 page cache: Add a... |
853 854 855 |
xas_store(&xas, page); if (xas_error(&xas)) goto unlock; |
198b62f83 mm/filemap: fix s... |
856 |
if (old) |
74d609585 page cache: Add a... |
857 |
mapping->nrexceptional--; |
74d609585 page cache: Add a... |
858 859 860 861 |
mapping->nrpages++; /* hugetlb pages do not participate in page cache accounting */ if (!huge) |
0d1c20722 mm: memcontrol: s... |
862 |
__inc_lruvec_page_state(page, NR_FILE_PAGES); |
74d609585 page cache: Add a... |
863 864 |
unlock: xas_unlock_irq(&xas); |
198b62f83 mm/filemap: fix s... |
865 |
} while (xas_nomem(&xas, gfp)); |
74d609585 page cache: Add a... |
866 |
|
3fea5a499 mm: memcontrol: c... |
867 868 |
if (xas_error(&xas)) { error = xas_error(&xas); |
74d609585 page cache: Add a... |
869 |
goto error; |
3fea5a499 mm: memcontrol: c... |
870 |
} |
4165b9b46 hugetlb: do not a... |
871 |
|
66a0c8ee3 mm: cleanup add_t... |
872 873 |
trace_mm_filemap_add_to_page_cache(page); return 0; |
74d609585 page cache: Add a... |
874 |
error: |
66a0c8ee3 mm: cleanup add_t... |
875 876 |
page->mapping = NULL; /* Leave page->index set: truncation relies upon it */ |
09cbfeaf1 mm, fs: get rid o... |
877 |
put_page(page); |
3fea5a499 mm: memcontrol: c... |
878 |
return error; |
1da177e4c Linux-2.6.12-rc2 |
879 |
} |
cfcbfb138 mm/filemap.c: ena... |
880 |
ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); |
a528910e1 mm: thrash detect... |
881 882 883 884 885 886 887 888 889 890 |
/** * add_to_page_cache_locked - add a locked page to the pagecache * @page: page to add * @mapping: the page's address_space * @offset: page index * @gfp_mask: page allocation mode * * This function is used to add a page to the pagecache. It must be locked. * This function does not add the page to the LRU. The caller must do that. |
a862f68a8 docs/core-api/mm:... |
891 892 |
* * Return: %0 on success, negative error code otherwise. |
a528910e1 mm: thrash detect... |
893 894 895 896 897 898 899 |
*/ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { return __add_to_page_cache_locked(page, mapping, offset, gfp_mask, NULL); } |
e286781d5 mm: speculative p... |
900 |
EXPORT_SYMBOL(add_to_page_cache_locked); |
1da177e4c Linux-2.6.12-rc2 |
901 902 |
int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
6daa0e286 [PATCH] gfp_t: mm... |
903 |
pgoff_t offset, gfp_t gfp_mask) |
1da177e4c Linux-2.6.12-rc2 |
904 |
{ |
a528910e1 mm: thrash detect... |
905 |
void *shadow = NULL; |
4f98a2fee vmscan: split LRU... |
906 |
int ret; |
48c935ad8 page-flags: defin... |
907 |
__SetPageLocked(page); |
a528910e1 mm: thrash detect... |
908 909 910 |
ret = __add_to_page_cache_locked(page, mapping, offset, gfp_mask, &shadow); if (unlikely(ret)) |
48c935ad8 page-flags: defin... |
911 |
__ClearPageLocked(page); |
a528910e1 mm: thrash detect... |
912 913 914 915 916 |
else { /* * The page might have been evicted from cache only * recently, in which case it should be activated like * any other repeatedly accessed page. |
f0281a00f mm: workingset: o... |
917 918 919 |
* The exception is pages getting rewritten; evicting other * data from the working set, only to cache data that will * get overwritten with something else, is a waste of memory. |
a528910e1 mm: thrash detect... |
920 |
*/ |
1899ad18c mm: workingset: t... |
921 922 923 |
WARN_ON_ONCE(PageActive(page)); if (!(gfp_mask & __GFP_WRITE) && shadow) workingset_refault(page, shadow); |
a528910e1 mm: thrash detect... |
924 925 |
lru_cache_add(page); } |
1da177e4c Linux-2.6.12-rc2 |
926 927 |
return ret; } |
18bc0bbd1 Staging: pohmelfs... |
928 |
EXPORT_SYMBOL_GPL(add_to_page_cache_lru); |
1da177e4c Linux-2.6.12-rc2 |
929 |
|
44110fe38 [PATCH] cpuset me... |
930 |
#ifdef CONFIG_NUMA |
2ae88149a [PATCH] mm: clean... |
931 |
struct page *__page_cache_alloc(gfp_t gfp) |
44110fe38 [PATCH] cpuset me... |
932 |
{ |
c0ff7453b cpuset,mm: fix no... |
933 934 |
int n; struct page *page; |
44110fe38 [PATCH] cpuset me... |
935 |
if (cpuset_do_page_mem_spread()) { |
cc9a6c877 cpuset: mm: reduc... |
936 937 |
unsigned int cpuset_mems_cookie; do { |
d26914d11 mm: optimize put_... |
938 |
cpuset_mems_cookie = read_mems_allowed_begin(); |
cc9a6c877 cpuset: mm: reduc... |
939 |
n = cpuset_mem_spread_node(); |
96db800f5 mm: rename alloc_... |
940 |
page = __alloc_pages_node(n, gfp, 0); |
d26914d11 mm: optimize put_... |
941 |
} while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); |
cc9a6c877 cpuset: mm: reduc... |
942 |
|
c0ff7453b cpuset,mm: fix no... |
943 |
return page; |
44110fe38 [PATCH] cpuset me... |
944 |
} |
2ae88149a [PATCH] mm: clean... |
945 |
return alloc_pages(gfp, 0); |
44110fe38 [PATCH] cpuset me... |
946 |
} |
2ae88149a [PATCH] mm: clean... |
947 |
EXPORT_SYMBOL(__page_cache_alloc); |
44110fe38 [PATCH] cpuset me... |
948 |
#endif |
1da177e4c Linux-2.6.12-rc2 |
949 950 951 952 953 954 955 956 957 958 |
/* * In order to wait for pages to become available there must be * waitqueues associated with pages. By using a hash table of * waitqueues where the bucket discipline is to maintain all * waiters on the same queue and wake all when any of the pages * become available, and for the woken contexts to check to be * sure the appropriate page became available, this saves space * at a cost of "thundering herd" phenomena during rare hash * collisions. */ |
629060270 mm: add PageWaite... |
959 960 961 962 963 |
#define PAGE_WAIT_TABLE_BITS 8 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; static wait_queue_head_t *page_waitqueue(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
964 |
{ |
629060270 mm: add PageWaite... |
965 |
return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; |
1da177e4c Linux-2.6.12-rc2 |
966 |
} |
1da177e4c Linux-2.6.12-rc2 |
967 |
|
629060270 mm: add PageWaite... |
968 |
void __init pagecache_init(void) |
1da177e4c Linux-2.6.12-rc2 |
969 |
{ |
629060270 mm: add PageWaite... |
970 |
int i; |
1da177e4c Linux-2.6.12-rc2 |
971 |
|
629060270 mm: add PageWaite... |
972 973 974 975 |
for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) init_waitqueue_head(&page_wait_table[i]); page_writeback_init(); |
1da177e4c Linux-2.6.12-rc2 |
976 |
} |
1da177e4c Linux-2.6.12-rc2 |
977 |
|
5ef64cc89 mm: allow a contr... |
978 979 |
/* * The page wait code treats the "wait->flags" somewhat unusually, because |
5868ec267 mm: fix wake_page... |
980 |
* we have multiple different kinds of waits, not just the usual "exclusive" |
5ef64cc89 mm: allow a contr... |
981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 |
* one. * * We have: * * (a) no special bits set: * * We're just waiting for the bit to be released, and when a waker * calls the wakeup function, we set WQ_FLAG_WOKEN and wake it up, * and remove it from the wait queue. * * Simple and straightforward. * * (b) WQ_FLAG_EXCLUSIVE: * * The waiter is waiting to get the lock, and only one waiter should * be woken up to avoid any thundering herd behavior. We'll set the * WQ_FLAG_WOKEN bit, wake it up, and remove it from the wait queue. * * This is the traditional exclusive wait. * |
5868ec267 mm: fix wake_page... |
1001 |
* (c) WQ_FLAG_EXCLUSIVE | WQ_FLAG_CUSTOM: |
5ef64cc89 mm: allow a contr... |
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 |
* * The waiter is waiting to get the bit, and additionally wants the * lock to be transferred to it for fair lock behavior. If the lock * cannot be taken, we stop walking the wait queue without waking * the waiter. * * This is the "fair lock handoff" case, and in addition to setting * WQ_FLAG_WOKEN, we set WQ_FLAG_DONE to let the waiter easily see * that it now has the lock. */ |
ac6424b98 sched/wait: Renam... |
1012 |
static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) |
f62e00cc3 mm: introduce wai... |
1013 |
{ |
5ef64cc89 mm: allow a contr... |
1014 |
unsigned int flags; |
629060270 mm: add PageWaite... |
1015 1016 1017 |
struct wait_page_key *key = arg; struct wait_page_queue *wait_page = container_of(wait, struct wait_page_queue, wait); |
cdc8fcb49 Merge tag 'for-5.... |
1018 |
if (!wake_page_match(wait_page, key)) |
629060270 mm: add PageWaite... |
1019 |
return 0; |
3510ca20e Minor page waitqu... |
1020 |
|
9a1ea439b mm: put_and_wait_... |
1021 |
/* |
5ef64cc89 mm: allow a contr... |
1022 1023 |
* If it's a lock handoff wait, we get the bit for it, and * stop walking (and do not wake it up) if we can't. |
9a1ea439b mm: put_and_wait_... |
1024 |
*/ |
5ef64cc89 mm: allow a contr... |
1025 1026 1027 |
flags = wait->flags; if (flags & WQ_FLAG_EXCLUSIVE) { if (test_bit(key->bit_nr, &key->page->flags)) |
2a9127fcf mm: rewrite wait_... |
1028 |
return -1; |
5ef64cc89 mm: allow a contr... |
1029 1030 1031 1032 1033 |
if (flags & WQ_FLAG_CUSTOM) { if (test_and_set_bit(key->bit_nr, &key->page->flags)) return -1; flags |= WQ_FLAG_DONE; } |
2a9127fcf mm: rewrite wait_... |
1034 |
} |
f62e00cc3 mm: introduce wai... |
1035 |
|
5ef64cc89 mm: allow a contr... |
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 |
/* * We are holding the wait-queue lock, but the waiter that * is waiting for this will be checking the flags without * any locking. * * So update the flags atomically, and wake up the waiter * afterwards to avoid any races. This store-release pairs * with the load-acquire in wait_on_page_bit_common(). */ smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN); |
2a9127fcf mm: rewrite wait_... |
1046 1047 1048 1049 1050 1051 |
wake_up_state(wait->private, mode); /* * Ok, we have successfully done what we're waiting for, * and we can unconditionally remove the wait entry. * |
5ef64cc89 mm: allow a contr... |
1052 1053 1054 |
* Note that this pairs with the "finish_wait()" in the * waiter, and has to be the absolute last thing we do. * After this list_del_init(&wait->entry) the wait entry |
2a9127fcf mm: rewrite wait_... |
1055 1056 |
* might be de-allocated and the process might even have * exited. |
2a9127fcf mm: rewrite wait_... |
1057 |
*/ |
c6fe44d96 list: add "list_d... |
1058 |
list_del_init_careful(&wait->entry); |
5ef64cc89 mm: allow a contr... |
1059 |
return (flags & WQ_FLAG_EXCLUSIVE) != 0; |
f62e00cc3 mm: introduce wai... |
1060 |
} |
74d81bfae mm: un-export wak... |
1061 |
static void wake_up_page_bit(struct page *page, int bit_nr) |
cbbce8220 SCHED: add some "... |
1062 |
{ |
629060270 mm: add PageWaite... |
1063 1064 1065 |
wait_queue_head_t *q = page_waitqueue(page); struct wait_page_key key; unsigned long flags; |
11a19c7b0 sched/wait: Intro... |
1066 |
wait_queue_entry_t bookmark; |
cbbce8220 SCHED: add some "... |
1067 |
|
629060270 mm: add PageWaite... |
1068 1069 1070 |
key.page = page; key.bit_nr = bit_nr; key.page_match = 0; |
11a19c7b0 sched/wait: Intro... |
1071 1072 1073 1074 |
bookmark.flags = 0; bookmark.private = NULL; bookmark.func = NULL; INIT_LIST_HEAD(&bookmark.entry); |
629060270 mm: add PageWaite... |
1075 |
spin_lock_irqsave(&q->lock, flags); |
11a19c7b0 sched/wait: Intro... |
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 |
__wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); while (bookmark.flags & WQ_FLAG_BOOKMARK) { /* * Take a breather from holding the lock, * allow pages that finish wake up asynchronously * to acquire the lock and remove themselves * from wait queue */ spin_unlock_irqrestore(&q->lock, flags); cpu_relax(); spin_lock_irqsave(&q->lock, flags); __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); } |
629060270 mm: add PageWaite... |
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 |
/* * It is possible for other pages to have collided on the waitqueue * hash, so in that case check for a page match. That prevents a long- * term waiter * * It is still possible to miss a case here, when we woke page waiters * and removed them from the waitqueue, but there are still other * page waiters. */ if (!waitqueue_active(q) || !key.page_match) { ClearPageWaiters(page); /* * It's possible to miss clearing Waiters here, when we woke * our page waiters, but the hashed waitqueue has waiters for * other pages on it. * * That's okay, it's a rare case. The next waker will clear it. */ } spin_unlock_irqrestore(&q->lock, flags); } |
74d81bfae mm: un-export wak... |
1111 1112 1113 1114 1115 1116 1117 |
static void wake_up_page(struct page *page, int bit) { if (!PageWaiters(page)) return; wake_up_page_bit(page, bit); } |
629060270 mm: add PageWaite... |
1118 |
|
9a1ea439b mm: put_and_wait_... |
1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 |
/* * A choice of three behaviors for wait_on_page_bit_common(): */ enum behavior { EXCLUSIVE, /* Hold ref to page and take the bit when woken, like * __lock_page() waiting on then setting PG_locked. */ SHARED, /* Hold ref to page and check the bit when woken, like * wait_on_page_writeback() waiting on PG_writeback. */ DROP, /* Drop ref to page before wait, no check when woken, * like put_and_wait_on_page_locked() on PG_locked. */ }; |
2a9127fcf mm: rewrite wait_... |
1133 |
/* |
5ef64cc89 mm: allow a contr... |
1134 1135 |
* Attempt to check (or get) the page bit, and mark us done * if successful. |
2a9127fcf mm: rewrite wait_... |
1136 1137 1138 1139 1140 1141 1142 1143 1144 |
*/ static inline bool trylock_page_bit_common(struct page *page, int bit_nr, struct wait_queue_entry *wait) { if (wait->flags & WQ_FLAG_EXCLUSIVE) { if (test_and_set_bit(bit_nr, &page->flags)) return false; } else if (test_bit(bit_nr, &page->flags)) return false; |
5ef64cc89 mm: allow a contr... |
1145 |
wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE; |
2a9127fcf mm: rewrite wait_... |
1146 1147 |
return true; } |
5ef64cc89 mm: allow a contr... |
1148 1149 |
/* How many times do we accept lock stealing from under a waiter? */ int sysctl_page_lock_unfairness = 5; |
dec0fd4a0 ANDROID: attribut... |
1150 |
static inline __sched int wait_on_page_bit_common(wait_queue_head_t *q, |
9a1ea439b mm: put_and_wait_... |
1151 |
struct page *page, int bit_nr, int state, enum behavior behavior) |
629060270 mm: add PageWaite... |
1152 |
{ |
5ef64cc89 mm: allow a contr... |
1153 |
int unfairness = sysctl_page_lock_unfairness; |
629060270 mm: add PageWaite... |
1154 |
struct wait_page_queue wait_page; |
ac6424b98 sched/wait: Renam... |
1155 |
wait_queue_entry_t *wait = &wait_page.wait; |
b1d29ba82 delayacct: track ... |
1156 |
bool thrashing = false; |
9a1ea439b mm: put_and_wait_... |
1157 |
bool delayacct = false; |
eb414681d psi: pressure sta... |
1158 |
unsigned long pflags; |
629060270 mm: add PageWaite... |
1159 |
|
eb414681d psi: pressure sta... |
1160 |
if (bit_nr == PG_locked && |
b1d29ba82 delayacct: track ... |
1161 |
!PageUptodate(page) && PageWorkingset(page)) { |
9a1ea439b mm: put_and_wait_... |
1162 |
if (!PageSwapBacked(page)) { |
eb414681d psi: pressure sta... |
1163 |
delayacct_thrashing_start(); |
9a1ea439b mm: put_and_wait_... |
1164 1165 |
delayacct = true; } |
eb414681d psi: pressure sta... |
1166 |
psi_memstall_enter(&pflags); |
b1d29ba82 delayacct: track ... |
1167 1168 |
thrashing = true; } |
629060270 mm: add PageWaite... |
1169 1170 1171 1172 |
init_wait(wait); wait->func = wake_page_function; wait_page.page = page; wait_page.bit_nr = bit_nr; |
5ef64cc89 mm: allow a contr... |
1173 1174 1175 1176 1177 1178 1179 |
repeat: wait->flags = 0; if (behavior == EXCLUSIVE) { wait->flags = WQ_FLAG_EXCLUSIVE; if (--unfairness < 0) wait->flags |= WQ_FLAG_CUSTOM; } |
2a9127fcf mm: rewrite wait_... |
1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 |
/* * Do one last check whether we can get the * page bit synchronously. * * Do the SetPageWaiters() marking before that * to let any waker we _just_ missed know they * need to wake us up (otherwise they'll never * even go to the slow case that looks at the * page queue), and add ourselves to the wait * queue if we need to sleep. * * This part needs to be done under the queue * lock to avoid races. */ spin_lock_irq(&q->lock); SetPageWaiters(page); if (!trylock_page_bit_common(page, bit_nr, wait)) __add_wait_queue_entry_tail(q, wait); spin_unlock_irq(&q->lock); |
629060270 mm: add PageWaite... |
1199 |
|
2a9127fcf mm: rewrite wait_... |
1200 1201 |
/* * From now on, all the logic will be based on |
5ef64cc89 mm: allow a contr... |
1202 1203 1204 |
* the WQ_FLAG_WOKEN and WQ_FLAG_DONE flag, to * see whether the page bit testing has already * been done by the wake function. |
2a9127fcf mm: rewrite wait_... |
1205 1206 1207 1208 1209 |
* * We can drop our reference to the page. */ if (behavior == DROP) put_page(page); |
629060270 mm: add PageWaite... |
1210 |
|
5ef64cc89 mm: allow a contr... |
1211 1212 1213 1214 1215 1216 |
/* * Note that until the "finish_wait()", or until * we see the WQ_FLAG_WOKEN flag, we need to * be very careful with the 'wait->flags', because * we may race with a waker that sets them. */ |
2a9127fcf mm: rewrite wait_... |
1217 |
for (;;) { |
5ef64cc89 mm: allow a contr... |
1218 |
unsigned int flags; |
629060270 mm: add PageWaite... |
1219 |
set_current_state(state); |
5ef64cc89 mm: allow a contr... |
1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 |
/* Loop until we've been woken or interrupted */ flags = smp_load_acquire(&wait->flags); if (!(flags & WQ_FLAG_WOKEN)) { if (signal_pending_state(state, current)) break; io_schedule(); continue; } /* If we were non-exclusive, we're done */ if (behavior != EXCLUSIVE) |
a8b169afb Avoid page waitqu... |
1232 |
break; |
9a1ea439b mm: put_and_wait_... |
1233 |
|
5ef64cc89 mm: allow a contr... |
1234 1235 |
/* If the waker got the lock for us, we're done */ if (flags & WQ_FLAG_DONE) |
9a1ea439b mm: put_and_wait_... |
1236 |
break; |
2a9127fcf mm: rewrite wait_... |
1237 |
|
5ef64cc89 mm: allow a contr... |
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 |
/* * Otherwise, if we're getting the lock, we need to * try to get it ourselves. * * And if that fails, we'll have to retry this all. */ if (unlikely(test_and_set_bit(bit_nr, &page->flags))) goto repeat; wait->flags |= WQ_FLAG_DONE; break; |
629060270 mm: add PageWaite... |
1249 |
} |
5ef64cc89 mm: allow a contr... |
1250 1251 1252 1253 1254 1255 |
/* * If a signal happened, this 'finish_wait()' may remove the last * waiter from the wait-queues, but the PageWaiters bit will remain * set. That's ok. The next wakeup will take care of it, and trying * to do it here would be difficult and prone to races. */ |
629060270 mm: add PageWaite... |
1256 |
finish_wait(q, wait); |
eb414681d psi: pressure sta... |
1257 |
if (thrashing) { |
9a1ea439b mm: put_and_wait_... |
1258 |
if (delayacct) |
eb414681d psi: pressure sta... |
1259 1260 1261 |
delayacct_thrashing_end(); psi_memstall_leave(&pflags); } |
b1d29ba82 delayacct: track ... |
1262 |
|
629060270 mm: add PageWaite... |
1263 |
/* |
5ef64cc89 mm: allow a contr... |
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 |
* NOTE! The wait->flags weren't stable until we've done the * 'finish_wait()', and we could have exited the loop above due * to a signal, and had a wakeup event happen after the signal * test but before the 'finish_wait()'. * * So only after the finish_wait() can we reliably determine * if we got woken up or not, so we can now figure out the final * return value based on that state without races. * * Also note that WQ_FLAG_WOKEN is sufficient for a non-exclusive * waiter, but an exclusive one requires WQ_FLAG_DONE. |
629060270 mm: add PageWaite... |
1275 |
*/ |
5ef64cc89 mm: allow a contr... |
1276 1277 |
if (behavior == EXCLUSIVE) return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR; |
629060270 mm: add PageWaite... |
1278 |
|
2a9127fcf mm: rewrite wait_... |
1279 |
return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; |
629060270 mm: add PageWaite... |
1280 |
} |
dec0fd4a0 ANDROID: attribut... |
1281 |
__sched void wait_on_page_bit(struct page *page, int bit_nr) |
629060270 mm: add PageWaite... |
1282 1283 |
{ wait_queue_head_t *q = page_waitqueue(page); |
9a1ea439b mm: put_and_wait_... |
1284 |
wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); |
629060270 mm: add PageWaite... |
1285 1286 |
} EXPORT_SYMBOL(wait_on_page_bit); |
dec0fd4a0 ANDROID: attribut... |
1287 |
__sched int wait_on_page_bit_killable(struct page *page, int bit_nr) |
629060270 mm: add PageWaite... |
1288 1289 |
{ wait_queue_head_t *q = page_waitqueue(page); |
9a1ea439b mm: put_and_wait_... |
1290 |
return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED); |
cbbce8220 SCHED: add some "... |
1291 |
} |
4343d0087 afs: Get rid of t... |
1292 |
EXPORT_SYMBOL(wait_on_page_bit_killable); |
cbbce8220 SCHED: add some "... |
1293 |
|
dd3e6d503 mm: add support f... |
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 |
static int __wait_on_page_locked_async(struct page *page, struct wait_page_queue *wait, bool set) { struct wait_queue_head *q = page_waitqueue(page); int ret = 0; wait->page = page; wait->bit_nr = PG_locked; spin_lock_irq(&q->lock); __add_wait_queue_entry_tail(q, &wait->wait); SetPageWaiters(page); if (set) ret = !trylock_page(page); else ret = PageLocked(page); /* * If we were succesful now, we know we're still on the * waitqueue as we're still under the lock. This means it's * safe to remove and return success, we know the callback * isn't going to trigger. */ if (!ret) __remove_wait_queue(q, &wait->wait); else ret = -EIOCBQUEUED; spin_unlock_irq(&q->lock); return ret; } |
1a0a7853b mm: support async... |
1323 1324 1325 1326 1327 1328 1329 |
static int wait_on_page_locked_async(struct page *page, struct wait_page_queue *wait) { if (!PageLocked(page)) return 0; return __wait_on_page_locked_async(compound_head(page), wait, false); } |
1da177e4c Linux-2.6.12-rc2 |
1330 |
/** |
9a1ea439b mm: put_and_wait_... |
1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 |
* put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked * @page: The page to wait for. * * The caller should hold a reference on @page. They expect the page to * become unlocked relatively soon, but do not wish to hold up migration * (for example) by holding the reference while waiting for the page to * come unlocked. After this function returns, the caller should not * dereference @page. */ void put_and_wait_on_page_locked(struct page *page) { wait_queue_head_t *q; page = compound_head(page); q = page_waitqueue(page); wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP); } /** |
385e1ca5f CacheFiles: Permi... |
1350 |
* add_page_wait_queue - Add an arbitrary waiter to a page's wait queue |
697f619fc filemap: fix kern... |
1351 1352 |
* @page: Page defining the wait queue of interest * @waiter: Waiter to add to the queue |
385e1ca5f CacheFiles: Permi... |
1353 1354 1355 |
* * Add an arbitrary @waiter to the wait queue for the nominated @page. */ |
ac6424b98 sched/wait: Renam... |
1356 |
void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) |
385e1ca5f CacheFiles: Permi... |
1357 1358 1359 1360 1361 |
{ wait_queue_head_t *q = page_waitqueue(page); unsigned long flags; spin_lock_irqsave(&q->lock, flags); |
9c3a815f4 page waitqueue: a... |
1362 |
__add_wait_queue_entry_tail(q, waiter); |
629060270 mm: add PageWaite... |
1363 |
SetPageWaiters(page); |
385e1ca5f CacheFiles: Permi... |
1364 1365 1366 |
spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL_GPL(add_page_wait_queue); |
b91e1302a mm: optimize Page... |
1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 |
#ifndef clear_bit_unlock_is_negative_byte /* * PG_waiters is the high bit in the same byte as PG_lock. * * On x86 (and on many other architectures), we can clear PG_lock and * test the sign bit at the same time. But if the architecture does * not support that special operation, we just do this all by hand * instead. * * The read of PG_waiters has to be after (or concurrently with) PG_locked |
ffceeb62f mm/filemap: fix a... |
1378 |
* being cleared, but a memory barrier should be unnecessary since it is |
b91e1302a mm: optimize Page... |
1379 1380 1381 1382 1383 1384 |
* in the same byte as PG_locked. */ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) { clear_bit_unlock(nr, mem); /* smp_mb__after_atomic(); */ |
98473f9f3 mm/filemap: fix p... |
1385 |
return test_bit(PG_waiters, mem); |
b91e1302a mm: optimize Page... |
1386 1387 1388 |
} #endif |
385e1ca5f CacheFiles: Permi... |
1389 |
/** |
485bb99b4 [PATCH] kernel-do... |
1390 |
* unlock_page - unlock a locked page |
1da177e4c Linux-2.6.12-rc2 |
1391 1392 |
* @page: the page * |
0e9aa6755 mm: fix some brok... |
1393 |
* Unlocks the page and wakes up sleepers in wait_on_page_locked(). |
1da177e4c Linux-2.6.12-rc2 |
1394 |
* Also wakes sleepers in wait_on_page_writeback() because the wakeup |
da3dae54e Documentation: Do... |
1395 |
* mechanism between PageLocked pages and PageWriteback pages is shared. |
1da177e4c Linux-2.6.12-rc2 |
1396 1397 |
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * |
b91e1302a mm: optimize Page... |
1398 1399 1400 1401 1402 |
* Note that this depends on PG_waiters being the sign bit in the byte * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to * clear the PG_locked bit and test PG_waiters at the same time fairly * portably (architectures that do LL/SC can test any bit, while x86 can * test the sign bit). |
1da177e4c Linux-2.6.12-rc2 |
1403 |
*/ |
920c7a5d0 mm: remove fastca... |
1404 |
void unlock_page(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
1405 |
{ |
b91e1302a mm: optimize Page... |
1406 |
BUILD_BUG_ON(PG_waiters != 7); |
48c935ad8 page-flags: defin... |
1407 |
page = compound_head(page); |
309381fea mm: dump page whe... |
1408 |
VM_BUG_ON_PAGE(!PageLocked(page), page); |
b91e1302a mm: optimize Page... |
1409 1410 |
if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) wake_up_page_bit(page, PG_locked); |
1da177e4c Linux-2.6.12-rc2 |
1411 1412 |
} EXPORT_SYMBOL(unlock_page); |
485bb99b4 [PATCH] kernel-do... |
1413 1414 1415 |
/** * end_page_writeback - end writeback against a page * @page: the page |
1da177e4c Linux-2.6.12-rc2 |
1416 1417 1418 |
*/ void end_page_writeback(struct page *page) { |
888cf2db4 mm: avoid unneces... |
1419 1420 1421 1422 1423 1424 1425 1426 1427 |
/* * TestClearPageReclaim could be used here but it is an atomic * operation and overkill in this particular case. Failing to * shuffle a page marked for immediate reclaim is too mild to * justify taking an atomic operation penalty at the end of * ever page writeback. */ if (PageReclaim(page)) { ClearPageReclaim(page); |
ac6aadb24 mm: rotate_reclai... |
1428 |
rotate_reclaimable_page(page); |
888cf2db4 mm: avoid unneces... |
1429 |
} |
ac6aadb24 mm: rotate_reclai... |
1430 |
|
073861ed7 mm: fix VM_BUG_ON... |
1431 1432 1433 1434 1435 1436 1437 |
/* * Writeback does not hold a page reference of its own, relying * on truncation to wait for the clearing of PG_writeback. * But here we must make sure that the page is not freed and * reused before the wake_up_page(). */ get_page(page); |
ac6aadb24 mm: rotate_reclai... |
1438 1439 |
if (!test_clear_page_writeback(page)) BUG(); |
4e857c58e arch: Mass conver... |
1440 |
smp_mb__after_atomic(); |
1da177e4c Linux-2.6.12-rc2 |
1441 |
wake_up_page(page, PG_writeback); |
073861ed7 mm: fix VM_BUG_ON... |
1442 |
put_page(page); |
1da177e4c Linux-2.6.12-rc2 |
1443 1444 |
} EXPORT_SYMBOL(end_page_writeback); |
57d998456 fs/mpage.c: facto... |
1445 1446 1447 1448 |
/* * After completing I/O on a page, call this routine to update the page * flags appropriately */ |
c11f0c0b5 block/mm: make bd... |
1449 |
void page_endio(struct page *page, bool is_write, int err) |
57d998456 fs/mpage.c: facto... |
1450 |
{ |
c11f0c0b5 block/mm: make bd... |
1451 |
if (!is_write) { |
57d998456 fs/mpage.c: facto... |
1452 1453 1454 1455 1456 1457 1458 |
if (!err) { SetPageUptodate(page); } else { ClearPageUptodate(page); SetPageError(page); } unlock_page(page); |
abf545484 mm/block: convert... |
1459 |
} else { |
57d998456 fs/mpage.c: facto... |
1460 |
if (err) { |
dd8416c47 mm: do not access... |
1461 |
struct address_space *mapping; |
57d998456 fs/mpage.c: facto... |
1462 |
SetPageError(page); |
dd8416c47 mm: do not access... |
1463 1464 1465 |
mapping = page_mapping(page); if (mapping) mapping_set_error(mapping, err); |
57d998456 fs/mpage.c: facto... |
1466 1467 1468 1469 1470 |
} end_page_writeback(page); } } EXPORT_SYMBOL_GPL(page_endio); |
485bb99b4 [PATCH] kernel-do... |
1471 1472 |
/** * __lock_page - get a lock on the page, assuming we need to sleep to get it |
870667553 mm: fix filemap.c... |
1473 |
* @__page: the page to lock |
1da177e4c Linux-2.6.12-rc2 |
1474 |
*/ |
dec0fd4a0 ANDROID: attribut... |
1475 |
__sched void __lock_page(struct page *__page) |
1da177e4c Linux-2.6.12-rc2 |
1476 |
{ |
629060270 mm: add PageWaite... |
1477 1478 |
struct page *page = compound_head(__page); wait_queue_head_t *q = page_waitqueue(page); |
9a1ea439b mm: put_and_wait_... |
1479 1480 |
wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); |
1da177e4c Linux-2.6.12-rc2 |
1481 1482 |
} EXPORT_SYMBOL(__lock_page); |
dec0fd4a0 ANDROID: attribut... |
1483 |
__sched int __lock_page_killable(struct page *__page) |
2687a3569 Add lock_page_kil... |
1484 |
{ |
629060270 mm: add PageWaite... |
1485 1486 |
struct page *page = compound_head(__page); wait_queue_head_t *q = page_waitqueue(page); |
9a1ea439b mm: put_and_wait_... |
1487 1488 |
return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, EXCLUSIVE); |
2687a3569 Add lock_page_kil... |
1489 |
} |
18bc0bbd1 Staging: pohmelfs... |
1490 |
EXPORT_SYMBOL_GPL(__lock_page_killable); |
2687a3569 Add lock_page_kil... |
1491 |
|
dec0fd4a0 ANDROID: attribut... |
1492 |
__sched int __lock_page_async(struct page *page, struct wait_page_queue *wait) |
dd3e6d503 mm: add support f... |
1493 1494 1495 |
{ return __wait_on_page_locked_async(page, wait, true); } |
9a95f3cf7 mm: describe mmap... |
1496 1497 |
/* * Return values: |
c1e8d7c6a mmap locking API:... |
1498 |
* 1 - page is locked; mmap_lock is still held. |
9a95f3cf7 mm: describe mmap... |
1499 |
* 0 - page is not locked. |
3e4e28c5a mmap locking API:... |
1500 |
* mmap_lock has been released (mmap_read_unlock(), unless flags had both |
9a95f3cf7 mm: describe mmap... |
1501 |
* FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in |
c1e8d7c6a mmap locking API:... |
1502 |
* which case mmap_lock is still held. |
9a95f3cf7 mm: describe mmap... |
1503 1504 |
* * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 |
c1e8d7c6a mmap locking API:... |
1505 |
* with the page locked and the mmap_lock unperturbed. |
9a95f3cf7 mm: describe mmap... |
1506 |
*/ |
dec0fd4a0 ANDROID: attribut... |
1507 |
__sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm, |
d065bd810 mm: retry page fa... |
1508 1509 |
unsigned int flags) { |
4064b9827 mm: allow VM_FAUL... |
1510 |
if (fault_flag_allow_retry_first(flags)) { |
37b23e052 x86,mm: make page... |
1511 |
/* |
c1e8d7c6a mmap locking API:... |
1512 |
* CAUTION! In this case, mmap_lock is not released |
37b23e052 x86,mm: make page... |
1513 1514 1515 1516 |
* even though return 0. */ if (flags & FAULT_FLAG_RETRY_NOWAIT) return 0; |
d8ed45c5d mmap locking API:... |
1517 |
mmap_read_unlock(mm); |
37b23e052 x86,mm: make page... |
1518 1519 1520 |
if (flags & FAULT_FLAG_KILLABLE) wait_on_page_locked_killable(page); else |
318b275fb mm: allow GUP to ... |
1521 |
wait_on_page_locked(page); |
d065bd810 mm: retry page fa... |
1522 |
return 0; |
37b23e052 x86,mm: make page... |
1523 1524 1525 1526 1527 1528 |
} else { if (flags & FAULT_FLAG_KILLABLE) { int ret; ret = __lock_page_killable(page); if (ret) { |
d8ed45c5d mmap locking API:... |
1529 |
mmap_read_unlock(mm); |
37b23e052 x86,mm: make page... |
1530 1531 1532 1533 1534 |
return 0; } } else __lock_page(page); return 1; |
d065bd810 mm: retry page fa... |
1535 1536 |
} } |
485bb99b4 [PATCH] kernel-do... |
1537 |
/** |
0d3f92966 page cache: Conve... |
1538 1539 1540 1541 |
* page_cache_next_miss() - Find the next gap in the page cache. * @mapping: Mapping. * @index: Index. * @max_scan: Maximum range to search. |
e7b563bb2 mm: filemap: move... |
1542 |
* |
0d3f92966 page cache: Conve... |
1543 1544 |
* Search the range [index, min(index + max_scan - 1, ULONG_MAX)] for the * gap with the lowest index. |
e7b563bb2 mm: filemap: move... |
1545 |
* |
0d3f92966 page cache: Conve... |
1546 1547 1548 1549 1550 |
* This function may be called under the rcu_read_lock. However, this will * not atomically search a snapshot of the cache at a single point in time. * For example, if a gap is created at index 5, then subsequently a gap is * created at index 10, page_cache_next_miss covering both indices may * return 10 if called under the rcu_read_lock. |
e7b563bb2 mm: filemap: move... |
1551 |
* |
0d3f92966 page cache: Conve... |
1552 1553 1554 |
* Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'return - index >= max_scan' will be true). * In the rare case of index wrap-around, 0 will be returned. |
e7b563bb2 mm: filemap: move... |
1555 |
*/ |
0d3f92966 page cache: Conve... |
1556 |
pgoff_t page_cache_next_miss(struct address_space *mapping, |
e7b563bb2 mm: filemap: move... |
1557 1558 |
pgoff_t index, unsigned long max_scan) { |
0d3f92966 page cache: Conve... |
1559 |
XA_STATE(xas, &mapping->i_pages, index); |
e7b563bb2 mm: filemap: move... |
1560 |
|
0d3f92966 page cache: Conve... |
1561 1562 1563 |
while (max_scan--) { void *entry = xas_next(&xas); if (!entry || xa_is_value(entry)) |
e7b563bb2 mm: filemap: move... |
1564 |
break; |
0d3f92966 page cache: Conve... |
1565 |
if (xas.xa_index == 0) |
e7b563bb2 mm: filemap: move... |
1566 1567 |
break; } |
0d3f92966 page cache: Conve... |
1568 |
return xas.xa_index; |
e7b563bb2 mm: filemap: move... |
1569 |
} |
0d3f92966 page cache: Conve... |
1570 |
EXPORT_SYMBOL(page_cache_next_miss); |
e7b563bb2 mm: filemap: move... |
1571 1572 |
/** |
2346a5605 mm/filemap.c: fix... |
1573 |
* page_cache_prev_miss() - Find the previous gap in the page cache. |
0d3f92966 page cache: Conve... |
1574 1575 1576 |
* @mapping: Mapping. * @index: Index. * @max_scan: Maximum range to search. |
e7b563bb2 mm: filemap: move... |
1577 |
* |
0d3f92966 page cache: Conve... |
1578 1579 |
* Search the range [max(index - max_scan + 1, 0), index] for the * gap with the highest index. |
e7b563bb2 mm: filemap: move... |
1580 |
* |
0d3f92966 page cache: Conve... |
1581 1582 1583 1584 1585 |
* This function may be called under the rcu_read_lock. However, this will * not atomically search a snapshot of the cache at a single point in time. * For example, if a gap is created at index 10, then subsequently a gap is * created at index 5, page_cache_prev_miss() covering both indices may * return 5 if called under the rcu_read_lock. |
e7b563bb2 mm: filemap: move... |
1586 |
* |
0d3f92966 page cache: Conve... |
1587 1588 1589 |
* Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'index - return >= max_scan' will be true). * In the rare case of wrap-around, ULONG_MAX will be returned. |
e7b563bb2 mm: filemap: move... |
1590 |
*/ |
0d3f92966 page cache: Conve... |
1591 |
pgoff_t page_cache_prev_miss(struct address_space *mapping, |
e7b563bb2 mm: filemap: move... |
1592 1593 |
pgoff_t index, unsigned long max_scan) { |
0d3f92966 page cache: Conve... |
1594 |
XA_STATE(xas, &mapping->i_pages, index); |
e7b563bb2 mm: filemap: move... |
1595 |
|
0d3f92966 page cache: Conve... |
1596 1597 1598 |
while (max_scan--) { void *entry = xas_prev(&xas); if (!entry || xa_is_value(entry)) |
e7b563bb2 mm: filemap: move... |
1599 |
break; |
0d3f92966 page cache: Conve... |
1600 |
if (xas.xa_index == ULONG_MAX) |
e7b563bb2 mm: filemap: move... |
1601 1602 |
break; } |
0d3f92966 page cache: Conve... |
1603 |
return xas.xa_index; |
e7b563bb2 mm: filemap: move... |
1604 |
} |
0d3f92966 page cache: Conve... |
1605 |
EXPORT_SYMBOL(page_cache_prev_miss); |
e7b563bb2 mm: filemap: move... |
1606 1607 |
/** |
0cd6144aa mm + fs: prepare ... |
1608 |
* find_get_entry - find and get a page cache entry |
485bb99b4 [PATCH] kernel-do... |
1609 |
* @mapping: the address_space to search |
a6de4b487 mm: convert find_... |
1610 |
* @index: The page cache index. |
0cd6144aa mm + fs: prepare ... |
1611 1612 |
* * Looks up the page cache slot at @mapping & @offset. If there is a |
a6de4b487 mm: convert find_... |
1613 |
* page cache page, the head page is returned with an increased refcount. |
485bb99b4 [PATCH] kernel-do... |
1614 |
* |
139b6a6fb mm: filemap: upda... |
1615 1616 |
* If the slot holds a shadow entry of a previously evicted page, or a * swap entry from shmem/tmpfs, it is returned. |
0cd6144aa mm + fs: prepare ... |
1617 |
* |
a6de4b487 mm: convert find_... |
1618 |
* Return: The head page or shadow entry, %NULL if nothing is found. |
1da177e4c Linux-2.6.12-rc2 |
1619 |
*/ |
a6de4b487 mm: convert find_... |
1620 |
struct page *find_get_entry(struct address_space *mapping, pgoff_t index) |
1da177e4c Linux-2.6.12-rc2 |
1621 |
{ |
a6de4b487 mm: convert find_... |
1622 |
XA_STATE(xas, &mapping->i_pages, index); |
4101196b1 mm: page cache: s... |
1623 |
struct page *page; |
1da177e4c Linux-2.6.12-rc2 |
1624 |
|
a60637c85 mm: lockless page... |
1625 1626 |
rcu_read_lock(); repeat: |
4c7472c0d page cache: Conve... |
1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 |
xas_reset(&xas); page = xas_load(&xas); if (xas_retry(&xas, page)) goto repeat; /* * A shadow entry of a recently evicted page, or a swap entry from * shmem/tmpfs. Return it without attempting to raise page count. */ if (!page || xa_is_value(page)) goto out; |
83929372f filemap: prepare ... |
1637 |
|
4101196b1 mm: page cache: s... |
1638 |
if (!page_cache_get_speculative(page)) |
4c7472c0d page cache: Conve... |
1639 |
goto repeat; |
83929372f filemap: prepare ... |
1640 |
|
4c7472c0d page cache: Conve... |
1641 |
/* |
4101196b1 mm: page cache: s... |
1642 |
* Has the page moved or been split? |
4c7472c0d page cache: Conve... |
1643 1644 1645 1646 |
* This is part of the lockless pagecache protocol. See * include/linux/pagemap.h for details. */ if (unlikely(page != xas_reload(&xas))) { |
4101196b1 mm: page cache: s... |
1647 |
put_page(page); |
4c7472c0d page cache: Conve... |
1648 |
goto repeat; |
a60637c85 mm: lockless page... |
1649 |
} |
27d20fddc radix-tree: fix R... |
1650 |
out: |
a60637c85 mm: lockless page... |
1651 |
rcu_read_unlock(); |
1da177e4c Linux-2.6.12-rc2 |
1652 1653 |
return page; } |
1da177e4c Linux-2.6.12-rc2 |
1654 |
|
485bb99b4 [PATCH] kernel-do... |
1655 |
/** |
63ec1973d mm/shmem: return ... |
1656 1657 1658 |
* find_lock_entry - Locate and lock a page cache entry. * @mapping: The address_space to search. * @index: The page cache index. |
0cd6144aa mm + fs: prepare ... |
1659 |
* |
63ec1973d mm/shmem: return ... |
1660 1661 |
* Looks up the page at @mapping & @index. If there is a page in the * cache, the head page is returned locked and with an increased refcount. |
0cd6144aa mm + fs: prepare ... |
1662 |
* |
139b6a6fb mm: filemap: upda... |
1663 1664 |
* If the slot holds a shadow entry of a previously evicted page, or a * swap entry from shmem/tmpfs, it is returned. |
0cd6144aa mm + fs: prepare ... |
1665 |
* |
63ec1973d mm/shmem: return ... |
1666 1667 |
* Context: May sleep. * Return: The head page or shadow entry, %NULL if nothing is found. |
0cd6144aa mm + fs: prepare ... |
1668 |
*/ |
63ec1973d mm/shmem: return ... |
1669 |
struct page *find_lock_entry(struct address_space *mapping, pgoff_t index) |
1da177e4c Linux-2.6.12-rc2 |
1670 1671 |
{ struct page *page; |
1da177e4c Linux-2.6.12-rc2 |
1672 |
repeat: |
63ec1973d mm/shmem: return ... |
1673 |
page = find_get_entry(mapping, index); |
4c7472c0d page cache: Conve... |
1674 |
if (page && !xa_is_value(page)) { |
a60637c85 mm: lockless page... |
1675 1676 |
lock_page(page); /* Has the page been truncated? */ |
63ec1973d mm/shmem: return ... |
1677 |
if (unlikely(page->mapping != mapping)) { |
a60637c85 mm: lockless page... |
1678 |
unlock_page(page); |
09cbfeaf1 mm, fs: get rid o... |
1679 |
put_page(page); |
a60637c85 mm: lockless page... |
1680 |
goto repeat; |
1da177e4c Linux-2.6.12-rc2 |
1681 |
} |
63ec1973d mm/shmem: return ... |
1682 |
VM_BUG_ON_PAGE(!thp_contains(page, index), page); |
1da177e4c Linux-2.6.12-rc2 |
1683 |
} |
1da177e4c Linux-2.6.12-rc2 |
1684 1685 |
return page; } |
0cd6144aa mm + fs: prepare ... |
1686 1687 |
/** |
2294b32e0 mm/filemap.c: rew... |
1688 1689 1690 1691 1692 |
* pagecache_get_page - Find and get a reference to a page. * @mapping: The address_space to search. * @index: The page index. * @fgp_flags: %FGP flags modify how the page is returned. * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified. |
1da177e4c Linux-2.6.12-rc2 |
1693 |
* |
2294b32e0 mm/filemap.c: rew... |
1694 |
* Looks up the page cache entry at @mapping & @index. |
0cd6144aa mm + fs: prepare ... |
1695 |
* |
2294b32e0 mm/filemap.c: rew... |
1696 |
* @fgp_flags can be zero or more of these flags: |
0e056eb55 kernel-api.rst: f... |
1697 |
* |
2294b32e0 mm/filemap.c: rew... |
1698 1699 |
* * %FGP_ACCESSED - The page will be marked accessed. * * %FGP_LOCK - The page is returned locked. |
a8cf7f272 mm: add find_lock... |
1700 1701 |
* * %FGP_HEAD - If the page is present and a THP, return the head page * rather than the exact page specified by the index. |
2294b32e0 mm/filemap.c: rew... |
1702 1703 1704 1705 1706 1707 |
* * %FGP_CREAT - If no page is present then a new page is allocated using * @gfp_mask and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the * page is already in cache. If the page was allocated, unlock it before * returning so the caller can do the same dance. |
605cad834 mm: filemap: add ... |
1708 1709 1710 |
* * %FGP_WRITE - The page will be written * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask * * %FGP_NOWAIT - Don't get blocked by page lock |
1da177e4c Linux-2.6.12-rc2 |
1711 |
* |
2294b32e0 mm/filemap.c: rew... |
1712 1713 |
* If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. |
1da177e4c Linux-2.6.12-rc2 |
1714 |
* |
2457aec63 mm: non-atomicall... |
1715 |
* If there is a page cache page, it is returned with an increased refcount. |
a862f68a8 docs/core-api/mm:... |
1716 |
* |
2294b32e0 mm/filemap.c: rew... |
1717 |
* Return: The found page or %NULL otherwise. |
1da177e4c Linux-2.6.12-rc2 |
1718 |
*/ |
2294b32e0 mm/filemap.c: rew... |
1719 1720 |
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp_mask) |
1da177e4c Linux-2.6.12-rc2 |
1721 |
{ |
eb2be1893 mm: buffered writ... |
1722 |
struct page *page; |
2457aec63 mm: non-atomicall... |
1723 |
|
1da177e4c Linux-2.6.12-rc2 |
1724 |
repeat: |
2294b32e0 mm/filemap.c: rew... |
1725 |
page = find_get_entry(mapping, index); |
3159f943a xarray: Replace e... |
1726 |
if (xa_is_value(page)) |
2457aec63 mm: non-atomicall... |
1727 1728 1729 1730 1731 1732 1733 |
page = NULL; if (!page) goto no_page; if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) { if (!trylock_page(page)) { |
09cbfeaf1 mm, fs: get rid o... |
1734 |
put_page(page); |
2457aec63 mm: non-atomicall... |
1735 1736 1737 1738 1739 1740 1741 |
return NULL; } } else { lock_page(page); } /* Has the page been truncated? */ |
a8cf7f272 mm: add find_lock... |
1742 |
if (unlikely(page->mapping != mapping)) { |
2457aec63 mm: non-atomicall... |
1743 |
unlock_page(page); |
09cbfeaf1 mm, fs: get rid o... |
1744 |
put_page(page); |
2457aec63 mm: non-atomicall... |
1745 1746 |
goto repeat; } |
a8cf7f272 mm: add find_lock... |
1747 |
VM_BUG_ON_PAGE(!thp_contains(page, index), page); |
2457aec63 mm: non-atomicall... |
1748 |
} |
c16eb000c mm/filemap.c: rem... |
1749 |
if (fgp_flags & FGP_ACCESSED) |
2457aec63 mm: non-atomicall... |
1750 |
mark_page_accessed(page); |
b9306a796 mm: filemap: clea... |
1751 1752 1753 1754 1755 |
else if (fgp_flags & FGP_WRITE) { /* Clear idle flag for buffer write */ if (page_is_idle(page)) clear_page_idle(page); } |
a8cf7f272 mm: add find_lock... |
1756 1757 |
if (!(fgp_flags & FGP_HEAD)) page = find_subpage(page, index); |
2457aec63 mm: non-atomicall... |
1758 1759 1760 1761 |
no_page: if (!page && (fgp_flags & FGP_CREAT)) { int err; |
f56753ac2 bdi: replace BDI_... |
1762 |
if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) |
45f87de57 mm: get rid of ra... |
1763 1764 1765 |
gfp_mask |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) gfp_mask &= ~__GFP_FS; |
2457aec63 mm: non-atomicall... |
1766 |
|
45f87de57 mm: get rid of ra... |
1767 |
page = __page_cache_alloc(gfp_mask); |
eb2be1893 mm: buffered writ... |
1768 1769 |
if (!page) return NULL; |
2457aec63 mm: non-atomicall... |
1770 |
|
a75d4c333 filemap: kill pag... |
1771 |
if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) |
2457aec63 mm: non-atomicall... |
1772 |
fgp_flags |= FGP_LOCK; |
eb39d618f mm: replace init_... |
1773 |
/* Init accessed so avoid atomic mark_page_accessed later */ |
2457aec63 mm: non-atomicall... |
1774 |
if (fgp_flags & FGP_ACCESSED) |
eb39d618f mm: replace init_... |
1775 |
__SetPageReferenced(page); |
2457aec63 mm: non-atomicall... |
1776 |
|
2294b32e0 mm/filemap.c: rew... |
1777 |
err = add_to_page_cache_lru(page, mapping, index, gfp_mask); |
eb2be1893 mm: buffered writ... |
1778 |
if (unlikely(err)) { |
09cbfeaf1 mm, fs: get rid o... |
1779 |
put_page(page); |
eb2be1893 mm: buffered writ... |
1780 1781 1782 |
page = NULL; if (err == -EEXIST) goto repeat; |
1da177e4c Linux-2.6.12-rc2 |
1783 |
} |
a75d4c333 filemap: kill pag... |
1784 1785 1786 1787 1788 1789 1790 |
/* * add_to_page_cache_lru locks the page, and for mmap we expect * an unlocked page. */ if (page && (fgp_flags & FGP_FOR_MMAP)) unlock_page(page); |
1da177e4c Linux-2.6.12-rc2 |
1791 |
} |
2457aec63 mm: non-atomicall... |
1792 |
|
1da177e4c Linux-2.6.12-rc2 |
1793 1794 |
return page; } |
2457aec63 mm: non-atomicall... |
1795 |
EXPORT_SYMBOL(pagecache_get_page); |
1da177e4c Linux-2.6.12-rc2 |
1796 1797 |
/** |
0cd6144aa mm + fs: prepare ... |
1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 |
* find_get_entries - gang pagecache lookup * @mapping: The address_space to search * @start: The starting page cache index * @nr_entries: The maximum number of entries * @entries: Where the resulting entries are placed * @indices: The cache indices corresponding to the entries in @entries * * find_get_entries() will search for and return a group of up to * @nr_entries entries in the mapping. The entries are placed at * @entries. find_get_entries() takes a reference against any actual * pages it returns. * * The search returns a group of mapping-contiguous page cache entries * with ascending indexes. There may be holes in the indices due to * not-present pages. * |
139b6a6fb mm: filemap: upda... |
1814 1815 |
* Any shadow entries of evicted pages, or swap entries from * shmem/tmpfs, are included in the returned array. |
0cd6144aa mm + fs: prepare ... |
1816 |
* |
71725ed10 mm: huge tmpfs: t... |
1817 1818 1819 1820 1821 |
* If it finds a Transparent Huge Page, head or tail, find_get_entries() * stops at that page: the caller is likely to have a better way to handle * the compound page as a whole, and then skip its extent, than repeatedly * calling find_get_entries() to return all its tails. * |
a862f68a8 docs/core-api/mm:... |
1822 |
* Return: the number of pages and shadow entries which were found. |
0cd6144aa mm + fs: prepare ... |
1823 1824 1825 1826 1827 |
*/ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices) { |
f280bf092 page cache: Conve... |
1828 1829 |
XA_STATE(xas, &mapping->i_pages, start); struct page *page; |
0cd6144aa mm + fs: prepare ... |
1830 |
unsigned int ret = 0; |
0cd6144aa mm + fs: prepare ... |
1831 1832 1833 1834 1835 |
if (!nr_entries) return 0; rcu_read_lock(); |
f280bf092 page cache: Conve... |
1836 |
xas_for_each(&xas, page, ULONG_MAX) { |
f280bf092 page cache: Conve... |
1837 |
if (xas_retry(&xas, page)) |
0cd6144aa mm + fs: prepare ... |
1838 |
continue; |
f280bf092 page cache: Conve... |
1839 1840 1841 1842 1843 1844 |
/* * A shadow entry of a recently evicted page, a swap * entry from shmem/tmpfs or a DAX entry. Return it * without attempting to raise page count. */ if (xa_is_value(page)) |
0cd6144aa mm + fs: prepare ... |
1845 |
goto export; |
83929372f filemap: prepare ... |
1846 |
|
4101196b1 mm: page cache: s... |
1847 |
if (!page_cache_get_speculative(page)) |
f280bf092 page cache: Conve... |
1848 |
goto retry; |
83929372f filemap: prepare ... |
1849 |
|
4101196b1 mm: page cache: s... |
1850 |
/* Has the page moved or been split? */ |
f280bf092 page cache: Conve... |
1851 1852 |
if (unlikely(page != xas_reload(&xas))) goto put_page; |
71725ed10 mm: huge tmpfs: t... |
1853 1854 1855 1856 1857 1858 1859 1860 |
/* * Terminate early on finding a THP, to allow the caller to * handle it all at once; but continue if this is hugetlbfs. */ if (PageTransHuge(page) && !PageHuge(page)) { page = find_subpage(page, xas.xa_index); nr_entries = ret + 1; } |
0cd6144aa mm + fs: prepare ... |
1861 |
export: |
f280bf092 page cache: Conve... |
1862 |
indices[ret] = xas.xa_index; |
0cd6144aa mm + fs: prepare ... |
1863 1864 1865 |
entries[ret] = page; if (++ret == nr_entries) break; |
f280bf092 page cache: Conve... |
1866 1867 |
continue; put_page: |
4101196b1 mm: page cache: s... |
1868 |
put_page(page); |
f280bf092 page cache: Conve... |
1869 1870 |
retry: xas_reset(&xas); |
0cd6144aa mm + fs: prepare ... |
1871 1872 1873 1874 1875 1876 |
} rcu_read_unlock(); return ret; } /** |
b947cee4b mm: implement fin... |
1877 |
* find_get_pages_range - gang pagecache lookup |
1da177e4c Linux-2.6.12-rc2 |
1878 1879 |
* @mapping: The address_space to search * @start: The starting page index |
b947cee4b mm: implement fin... |
1880 |
* @end: The final page index (inclusive) |
1da177e4c Linux-2.6.12-rc2 |
1881 1882 1883 |
* @nr_pages: The maximum number of pages * @pages: Where the resulting pages are placed * |
b947cee4b mm: implement fin... |
1884 1885 1886 1887 |
* find_get_pages_range() will search for and return a group of up to @nr_pages * pages in the mapping starting at index @start and up to index @end * (inclusive). The pages are placed at @pages. find_get_pages_range() takes * a reference against the returned pages. |
1da177e4c Linux-2.6.12-rc2 |
1888 1889 1890 |
* * The search returns a group of mapping-contiguous pages with ascending * indexes. There may be holes in the indices due to not-present pages. |
d72dc8a25 mm: make pagevec_... |
1891 |
* We also update @start to index the next page for the traversal. |
1da177e4c Linux-2.6.12-rc2 |
1892 |
* |
a862f68a8 docs/core-api/mm:... |
1893 1894 |
* Return: the number of pages which were found. If this number is * smaller than @nr_pages, the end of specified range has been |
b947cee4b mm: implement fin... |
1895 |
* reached. |
1da177e4c Linux-2.6.12-rc2 |
1896 |
*/ |
b947cee4b mm: implement fin... |
1897 1898 1899 |
unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages) |
1da177e4c Linux-2.6.12-rc2 |
1900 |
{ |
fd1b3cee2 page cache: Conve... |
1901 1902 |
XA_STATE(xas, &mapping->i_pages, *start); struct page *page; |
0fc9d1040 radix-tree: use i... |
1903 1904 1905 1906 |
unsigned ret = 0; if (unlikely(!nr_pages)) return 0; |
a60637c85 mm: lockless page... |
1907 1908 |
rcu_read_lock(); |
fd1b3cee2 page cache: Conve... |
1909 |
xas_for_each(&xas, page, end) { |
fd1b3cee2 page cache: Conve... |
1910 |
if (xas_retry(&xas, page)) |
a60637c85 mm: lockless page... |
1911 |
continue; |
fd1b3cee2 page cache: Conve... |
1912 1913 |
/* Skip over shadow, swap and DAX entries */ if (xa_is_value(page)) |
8079b1c85 mm: clarify the r... |
1914 |
continue; |
a60637c85 mm: lockless page... |
1915 |
|
4101196b1 mm: page cache: s... |
1916 |
if (!page_cache_get_speculative(page)) |
fd1b3cee2 page cache: Conve... |
1917 |
goto retry; |
83929372f filemap: prepare ... |
1918 |
|
4101196b1 mm: page cache: s... |
1919 |
/* Has the page moved or been split? */ |
fd1b3cee2 page cache: Conve... |
1920 1921 |
if (unlikely(page != xas_reload(&xas))) goto put_page; |
1da177e4c Linux-2.6.12-rc2 |
1922 |
|
4101196b1 mm: page cache: s... |
1923 |
pages[ret] = find_subpage(page, xas.xa_index); |
b947cee4b mm: implement fin... |
1924 |
if (++ret == nr_pages) { |
5d3ee42f8 mm/shmem: make fi... |
1925 |
*start = xas.xa_index + 1; |
b947cee4b mm: implement fin... |
1926 1927 |
goto out; } |
fd1b3cee2 page cache: Conve... |
1928 1929 |
continue; put_page: |
4101196b1 mm: page cache: s... |
1930 |
put_page(page); |
fd1b3cee2 page cache: Conve... |
1931 1932 |
retry: xas_reset(&xas); |
a60637c85 mm: lockless page... |
1933 |
} |
5b280c0cc mm: don't return ... |
1934 |
|
b947cee4b mm: implement fin... |
1935 1936 1937 |
/* * We come here when there is no page beyond @end. We take care to not * overflow the index @start as it confuses some of the callers. This |
fd1b3cee2 page cache: Conve... |
1938 |
* breaks the iteration when there is a page at index -1 but that is |
b947cee4b mm: implement fin... |
1939 1940 1941 1942 1943 1944 1945 |
* already broken anyway. */ if (end == (pgoff_t)-1) *start = (pgoff_t)-1; else *start = end + 1; out: |
a60637c85 mm: lockless page... |
1946 |
rcu_read_unlock(); |
d72dc8a25 mm: make pagevec_... |
1947 |
|
1da177e4c Linux-2.6.12-rc2 |
1948 1949 |
return ret; } |
ebf43500e [PATCH] Add find_... |
1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 |
/** * find_get_pages_contig - gang contiguous pagecache lookup * @mapping: The address_space to search * @index: The starting page index * @nr_pages: The maximum number of pages * @pages: Where the resulting pages are placed * * find_get_pages_contig() works exactly like find_get_pages(), except * that the returned number of pages are guaranteed to be contiguous. * |
a862f68a8 docs/core-api/mm:... |
1960 |
* Return: the number of pages which were found. |
ebf43500e [PATCH] Add find_... |
1961 1962 1963 1964 |
*/ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, unsigned int nr_pages, struct page **pages) { |
3ece58a27 page cache: Conve... |
1965 1966 |
XA_STATE(xas, &mapping->i_pages, index); struct page *page; |
0fc9d1040 radix-tree: use i... |
1967 1968 1969 1970 |
unsigned int ret = 0; if (unlikely(!nr_pages)) return 0; |
a60637c85 mm: lockless page... |
1971 1972 |
rcu_read_lock(); |
3ece58a27 page cache: Conve... |
1973 |
for (page = xas_load(&xas); page; page = xas_next(&xas)) { |
3ece58a27 page cache: Conve... |
1974 1975 1976 1977 1978 1979 1980 |
if (xas_retry(&xas, page)) continue; /* * If the entry has been swapped out, we can stop looking. * No current caller is looking for DAX entries. */ if (xa_is_value(page)) |
8079b1c85 mm: clarify the r... |
1981 |
break; |
ebf43500e [PATCH] Add find_... |
1982 |
|
4101196b1 mm: page cache: s... |
1983 |
if (!page_cache_get_speculative(page)) |
3ece58a27 page cache: Conve... |
1984 |
goto retry; |
83929372f filemap: prepare ... |
1985 |
|
4101196b1 mm: page cache: s... |
1986 |
/* Has the page moved or been split? */ |
3ece58a27 page cache: Conve... |
1987 1988 |
if (unlikely(page != xas_reload(&xas))) goto put_page; |
a60637c85 mm: lockless page... |
1989 |
|
4101196b1 mm: page cache: s... |
1990 |
pages[ret] = find_subpage(page, xas.xa_index); |
0fc9d1040 radix-tree: use i... |
1991 1992 |
if (++ret == nr_pages) break; |
3ece58a27 page cache: Conve... |
1993 1994 |
continue; put_page: |
4101196b1 mm: page cache: s... |
1995 |
put_page(page); |
3ece58a27 page cache: Conve... |
1996 1997 |
retry: xas_reset(&xas); |
ebf43500e [PATCH] Add find_... |
1998 |
} |
a60637c85 mm: lockless page... |
1999 2000 |
rcu_read_unlock(); return ret; |
ebf43500e [PATCH] Add find_... |
2001 |
} |
ef71c15c4 AFS: export a cou... |
2002 |
EXPORT_SYMBOL(find_get_pages_contig); |
ebf43500e [PATCH] Add find_... |
2003 |
|
485bb99b4 [PATCH] kernel-do... |
2004 |
/** |
72b045aec mm: implement fin... |
2005 |
* find_get_pages_range_tag - find and return pages in given range matching @tag |
485bb99b4 [PATCH] kernel-do... |
2006 2007 |
* @mapping: the address_space to search * @index: the starting page index |
72b045aec mm: implement fin... |
2008 |
* @end: The final page index (inclusive) |
485bb99b4 [PATCH] kernel-do... |
2009 2010 2011 2012 |
* @tag: the tag index * @nr_pages: the maximum number of pages * @pages: where the resulting pages are placed * |
1da177e4c Linux-2.6.12-rc2 |
2013 |
* Like find_get_pages, except we only return pages which are tagged with |
485bb99b4 [PATCH] kernel-do... |
2014 |
* @tag. We update @index to index the next page for the traversal. |
a862f68a8 docs/core-api/mm:... |
2015 2016 |
* * Return: the number of pages which were found. |
1da177e4c Linux-2.6.12-rc2 |
2017 |
*/ |
72b045aec mm: implement fin... |
2018 |
unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, |
a6906972f page cache; Conve... |
2019 |
pgoff_t end, xa_mark_t tag, unsigned int nr_pages, |
72b045aec mm: implement fin... |
2020 |
struct page **pages) |
1da177e4c Linux-2.6.12-rc2 |
2021 |
{ |
a6906972f page cache; Conve... |
2022 2023 |
XA_STATE(xas, &mapping->i_pages, *index); struct page *page; |
0fc9d1040 radix-tree: use i... |
2024 2025 2026 2027 |
unsigned ret = 0; if (unlikely(!nr_pages)) return 0; |
a60637c85 mm: lockless page... |
2028 2029 |
rcu_read_lock(); |
a6906972f page cache; Conve... |
2030 |
xas_for_each_marked(&xas, page, end, tag) { |
a6906972f page cache; Conve... |
2031 |
if (xas_retry(&xas, page)) |
a60637c85 mm: lockless page... |
2032 |
continue; |
a6906972f page cache; Conve... |
2033 2034 2035 2036 2037 2038 |
/* * Shadow entries should never be tagged, but this iteration * is lockless so there is a window for page reclaim to evict * a page we saw tagged. Skip over it. */ if (xa_is_value(page)) |
139b6a6fb mm: filemap: upda... |
2039 |
continue; |
a60637c85 mm: lockless page... |
2040 |
|
4101196b1 mm: page cache: s... |
2041 |
if (!page_cache_get_speculative(page)) |
a6906972f page cache; Conve... |
2042 |
goto retry; |
a60637c85 mm: lockless page... |
2043 |
|
4101196b1 mm: page cache: s... |
2044 |
/* Has the page moved or been split? */ |
a6906972f page cache; Conve... |
2045 2046 |
if (unlikely(page != xas_reload(&xas))) goto put_page; |
a60637c85 mm: lockless page... |
2047 |
|
4101196b1 mm: page cache: s... |
2048 |
pages[ret] = find_subpage(page, xas.xa_index); |
72b045aec mm: implement fin... |
2049 |
if (++ret == nr_pages) { |
5d3ee42f8 mm/shmem: make fi... |
2050 |
*index = xas.xa_index + 1; |
72b045aec mm: implement fin... |
2051 2052 |
goto out; } |
a6906972f page cache; Conve... |
2053 2054 |
continue; put_page: |
4101196b1 mm: page cache: s... |
2055 |
put_page(page); |
a6906972f page cache; Conve... |
2056 2057 |
retry: xas_reset(&xas); |
a60637c85 mm: lockless page... |
2058 |
} |
5b280c0cc mm: don't return ... |
2059 |
|
72b045aec mm: implement fin... |
2060 |
/* |
a6906972f page cache; Conve... |
2061 |
* We come here when we got to @end. We take care to not overflow the |
72b045aec mm: implement fin... |
2062 |
* index @index as it confuses some of the callers. This breaks the |
a6906972f page cache; Conve... |
2063 2064 |
* iteration when there is a page at index -1 but that is already * broken anyway. |
72b045aec mm: implement fin... |
2065 2066 2067 2068 2069 2070 |
*/ if (end == (pgoff_t)-1) *index = (pgoff_t)-1; else *index = end + 1; out: |
a60637c85 mm: lockless page... |
2071 |
rcu_read_unlock(); |
1da177e4c Linux-2.6.12-rc2 |
2072 |
|
1da177e4c Linux-2.6.12-rc2 |
2073 2074 |
return ret; } |
72b045aec mm: implement fin... |
2075 |
EXPORT_SYMBOL(find_get_pages_range_tag); |
1da177e4c Linux-2.6.12-rc2 |
2076 |
|
76d42bd96 [PATCH] readahead... |
2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 |
/* * CD/DVDs are error prone. When a medium error occurs, the driver may fail * a _large_ part of the i/o request. Imagine the worst scenario: * * ---R__________________________________________B__________ * ^ reading here ^ bad block(assume 4k) * * read(R) => miss => readahead(R...B) => media error => frustrating retries * => failing the whole request => read(R) => read(R+1) => * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) => * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) => * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ...... * * It is going insane. Fix it by quickly scaling down the readahead size. */ |
0f8e2db4e mm/filemap.c: rem... |
2092 |
static void shrink_readahead_size_eio(struct file_ra_state *ra) |
76d42bd96 [PATCH] readahead... |
2093 |
{ |
76d42bd96 [PATCH] readahead... |
2094 |
ra->ra_pages /= 4; |
76d42bd96 [PATCH] readahead... |
2095 |
} |
485bb99b4 [PATCH] kernel-do... |
2096 |
/** |
47c27bc46 fs: pass iocb to ... |
2097 2098 |
* generic_file_buffered_read - generic file read routine * @iocb: the iocb to read |
6e58e79db introduce copy_pa... |
2099 2100 |
* @iter: data destination * @written: already copied |
485bb99b4 [PATCH] kernel-do... |
2101 |
* |
1da177e4c Linux-2.6.12-rc2 |
2102 |
* This is a generic file read routine, and uses the |
485bb99b4 [PATCH] kernel-do... |
2103 |
* mapping->a_ops->readpage() function for the actual low-level stuff. |
1da177e4c Linux-2.6.12-rc2 |
2104 2105 2106 |
* * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. |
a862f68a8 docs/core-api/mm:... |
2107 2108 2109 2110 |
* * Return: * * total number of bytes copied, including those the were already @written * * negative error code if nothing was copied |
1da177e4c Linux-2.6.12-rc2 |
2111 |
*/ |
d85dc2e11 fs: export generi... |
2112 |
ssize_t generic_file_buffered_read(struct kiocb *iocb, |
6e58e79db introduce copy_pa... |
2113 |
struct iov_iter *iter, ssize_t written) |
1da177e4c Linux-2.6.12-rc2 |
2114 |
{ |
47c27bc46 fs: pass iocb to ... |
2115 |
struct file *filp = iocb->ki_filp; |
36e789144 kill do_generic_m... |
2116 |
struct address_space *mapping = filp->f_mapping; |
1da177e4c Linux-2.6.12-rc2 |
2117 |
struct inode *inode = mapping->host; |
36e789144 kill do_generic_m... |
2118 |
struct file_ra_state *ra = &filp->f_ra; |
47c27bc46 fs: pass iocb to ... |
2119 |
loff_t *ppos = &iocb->ki_pos; |
57f6b96c0 filemap: convert ... |
2120 2121 2122 2123 |
pgoff_t index; pgoff_t last_index; pgoff_t prev_index; unsigned long offset; /* offset into pagecache page */ |
ec0f16372 readahead: improv... |
2124 |
unsigned int prev_offset; |
6e58e79db introduce copy_pa... |
2125 |
int error = 0; |
1da177e4c Linux-2.6.12-rc2 |
2126 |
|
c2a9737f4 vfs,mm: fix a dea... |
2127 |
if (unlikely(*ppos >= inode->i_sb->s_maxbytes)) |
d05c5f7ba vfs,mm: fix retur... |
2128 |
return 0; |
c2a9737f4 vfs,mm: fix a dea... |
2129 |
iov_iter_truncate(iter, inode->i_sb->s_maxbytes); |
09cbfeaf1 mm, fs: get rid o... |
2130 2131 2132 2133 2134 |
index = *ppos >> PAGE_SHIFT; prev_index = ra->prev_pos >> PAGE_SHIFT; prev_offset = ra->prev_pos & (PAGE_SIZE-1); last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; offset = *ppos & ~PAGE_MASK; |
1da177e4c Linux-2.6.12-rc2 |
2135 |
|
13bd69142 mm: mark async io... |
2136 2137 2138 2139 2140 2141 2142 |
/* * If we've already successfully copied some data, then we * can no longer safely return -EIOCBQUEUED. Hence mark * an async read NOWAIT at that point. */ if (written && (iocb->ki_flags & IOCB_WAITQ)) iocb->ki_flags |= IOCB_NOWAIT; |
1da177e4c Linux-2.6.12-rc2 |
2143 2144 |
for (;;) { struct page *page; |
57f6b96c0 filemap: convert ... |
2145 |
pgoff_t end_index; |
a32ea1e1f Fix read/truncate... |
2146 |
loff_t isize; |
1da177e4c Linux-2.6.12-rc2 |
2147 |
unsigned long nr, ret; |
1da177e4c Linux-2.6.12-rc2 |
2148 |
cond_resched(); |
1da177e4c Linux-2.6.12-rc2 |
2149 |
find_page: |
5abf186a3 mm, fs: check for... |
2150 2151 2152 2153 |
if (fatal_signal_pending(current)) { error = -EINTR; goto out; } |
1da177e4c Linux-2.6.12-rc2 |
2154 |
page = find_get_page(mapping, index); |
3ea89ee86 readahead: conver... |
2155 |
if (!page) { |
cdc8fcb49 Merge tag 'for-5.... |
2156 |
if (iocb->ki_flags & IOCB_NOIO) |
3239d8348 fs: support IOCB_... |
2157 |
goto would_block; |
cf914a7d6 readahead: split ... |
2158 |
page_cache_sync_readahead(mapping, |
7ff81078d readahead: remove... |
2159 |
ra, filp, |
3ea89ee86 readahead: conver... |
2160 2161 2162 2163 2164 2165 |
index, last_index - index); page = find_get_page(mapping, index); if (unlikely(page == NULL)) goto no_cached_page; } if (PageReadahead(page)) { |
41da51bce fs: Add IOCB_NOIO... |
2166 2167 2168 2169 |
if (iocb->ki_flags & IOCB_NOIO) { put_page(page); goto out; } |
cf914a7d6 readahead: split ... |
2170 |
page_cache_async_readahead(mapping, |
7ff81078d readahead: remove... |
2171 |
ra, filp, page, |
3ea89ee86 readahead: conver... |
2172 |
index, last_index - index); |
1da177e4c Linux-2.6.12-rc2 |
2173 |
} |
8ab22b9ab vfs: pagecache us... |
2174 |
if (!PageUptodate(page)) { |
ebded0278 mm: filemap: avoi... |
2175 2176 2177 2178 2179 |
/* * See comment in do_read_cache_page on why * wait_on_page_locked is used to avoid unnecessarily * serialisations and why it's safe. */ |
1a0a7853b mm: support async... |
2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 |
if (iocb->ki_flags & IOCB_WAITQ) { if (written) { put_page(page); goto out; } error = wait_on_page_locked_async(page, iocb->ki_waitq); } else { if (iocb->ki_flags & IOCB_NOWAIT) { put_page(page); goto would_block; } error = wait_on_page_locked_killable(page); } |
c4b209a42 do_generic_file_r... |
2194 2195 |
if (unlikely(error)) goto readpage_error; |
ebded0278 mm: filemap: avoi... |
2196 2197 |
if (PageUptodate(page)) goto page_ok; |
09cbfeaf1 mm, fs: get rid o... |
2198 |
if (inode->i_blkbits == PAGE_SHIFT || |
8ab22b9ab vfs: pagecache us... |
2199 2200 |
!mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; |
6d6d36bc6 mm/filemap: don't... |
2201 |
/* pipes can't handle partially uptodate pages */ |
00e237074 iov_iter: Use acc... |
2202 |
if (unlikely(iov_iter_is_pipe(iter))) |
6d6d36bc6 mm/filemap: don't... |
2203 |
goto page_not_up_to_date; |
529ae9aaa mm: rename page t... |
2204 |
if (!trylock_page(page)) |
8ab22b9ab vfs: pagecache us... |
2205 |
goto page_not_up_to_date; |
8d056cb96 mm/vfs: revalidat... |
2206 2207 2208 |
/* Did it get truncated before we got the lock? */ if (!page->mapping) goto page_not_up_to_date_locked; |
8ab22b9ab vfs: pagecache us... |
2209 |
if (!mapping->a_ops->is_partially_uptodate(page, |
6e58e79db introduce copy_pa... |
2210 |
offset, iter->count)) |
8ab22b9ab vfs: pagecache us... |
2211 2212 2213 |
goto page_not_up_to_date_locked; unlock_page(page); } |
1da177e4c Linux-2.6.12-rc2 |
2214 |
page_ok: |
a32ea1e1f Fix read/truncate... |
2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 |
/* * i_size must be checked after we know the page is Uptodate. * * Checking i_size after the check allows us to calculate * the correct value for "nr", which means the zero-filled * part of the page is not copied back to userspace (unless * another truncate extends the file - this is desired though). */ isize = i_size_read(inode); |
09cbfeaf1 mm, fs: get rid o... |
2225 |
end_index = (isize - 1) >> PAGE_SHIFT; |
a32ea1e1f Fix read/truncate... |
2226 |
if (unlikely(!isize || index > end_index)) { |
09cbfeaf1 mm, fs: get rid o... |
2227 |
put_page(page); |
a32ea1e1f Fix read/truncate... |
2228 2229 2230 2231 |
goto out; } /* nr is the maximum number of bytes to copy from this page */ |
09cbfeaf1 mm, fs: get rid o... |
2232 |
nr = PAGE_SIZE; |
a32ea1e1f Fix read/truncate... |
2233 |
if (index == end_index) { |
09cbfeaf1 mm, fs: get rid o... |
2234 |
nr = ((isize - 1) & ~PAGE_MASK) + 1; |
a32ea1e1f Fix read/truncate... |
2235 |
if (nr <= offset) { |
09cbfeaf1 mm, fs: get rid o... |
2236 |
put_page(page); |
a32ea1e1f Fix read/truncate... |
2237 2238 2239 2240 |
goto out; } } nr = nr - offset; |
1da177e4c Linux-2.6.12-rc2 |
2241 2242 2243 2244 2245 2246 2247 2248 2249 |
/* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. */ if (mapping_writably_mapped(mapping)) flush_dcache_page(page); /* |
ec0f16372 readahead: improv... |
2250 2251 |
* When a sequential read accesses a page several times, * only mark it as accessed the first time. |
1da177e4c Linux-2.6.12-rc2 |
2252 |
*/ |
ec0f16372 readahead: improv... |
2253 |
if (prev_index != index || offset != prev_offset) |
1da177e4c Linux-2.6.12-rc2 |
2254 2255 2256 2257 2258 2259 |
mark_page_accessed(page); prev_index = index; /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... |
1da177e4c Linux-2.6.12-rc2 |
2260 |
*/ |
6e58e79db introduce copy_pa... |
2261 2262 |
ret = copy_page_to_iter(page, offset, nr, iter); |
1da177e4c Linux-2.6.12-rc2 |
2263 |
offset += ret; |
09cbfeaf1 mm, fs: get rid o... |
2264 2265 |
index += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; |
6ce745ed3 readahead: code c... |
2266 |
prev_offset = offset; |
1da177e4c Linux-2.6.12-rc2 |
2267 |
|
09cbfeaf1 mm, fs: get rid o... |
2268 |
put_page(page); |
6e58e79db introduce copy_pa... |
2269 2270 2271 2272 2273 2274 2275 2276 |
written += ret; if (!iov_iter_count(iter)) goto out; if (ret < nr) { error = -EFAULT; goto out; } continue; |
1da177e4c Linux-2.6.12-rc2 |
2277 2278 2279 |
page_not_up_to_date: /* Get exclusive access to the page ... */ |
0abed7c69 mm: never attempt... |
2280 2281 2282 2283 2284 |
if (iocb->ki_flags & IOCB_WAITQ) { if (written) { put_page(page); goto out; } |
1a0a7853b mm: support async... |
2285 |
error = lock_page_async(page, iocb->ki_waitq); |
0abed7c69 mm: never attempt... |
2286 |
} else { |
1a0a7853b mm: support async... |
2287 |
error = lock_page_killable(page); |
0abed7c69 mm: never attempt... |
2288 |
} |
854623235 do_generic_file_r... |
2289 2290 |
if (unlikely(error)) goto readpage_error; |
1da177e4c Linux-2.6.12-rc2 |
2291 |
|
8ab22b9ab vfs: pagecache us... |
2292 |
page_not_up_to_date_locked: |
da6052f7b [PATCH] update so... |
2293 |
/* Did it get truncated before we got the lock? */ |
1da177e4c Linux-2.6.12-rc2 |
2294 2295 |
if (!page->mapping) { unlock_page(page); |
09cbfeaf1 mm, fs: get rid o... |
2296 |
put_page(page); |
1da177e4c Linux-2.6.12-rc2 |
2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 |
continue; } /* Did somebody else fill it already? */ if (PageUptodate(page)) { unlock_page(page); goto page_ok; } readpage: |
cdc8fcb49 Merge tag 'for-5.... |
2307 |
if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) { |
41da51bce fs: Add IOCB_NOIO... |
2308 2309 2310 2311 |
unlock_page(page); put_page(page); goto would_block; } |
91803b499 do_generic_file_r... |
2312 2313 2314 2315 2316 2317 |
/* * A previous I/O error may have been due to temporary * failures, eg. multipath errors. * PG_error will be set again if readpage fails. */ ClearPageError(page); |
1da177e4c Linux-2.6.12-rc2 |
2318 2319 |
/* Start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); |
994fc28c7 [PATCH] add AOP_T... |
2320 2321 |
if (unlikely(error)) { if (error == AOP_TRUNCATED_PAGE) { |
09cbfeaf1 mm, fs: get rid o... |
2322 |
put_page(page); |
6e58e79db introduce copy_pa... |
2323 |
error = 0; |
994fc28c7 [PATCH] add AOP_T... |
2324 2325 |
goto find_page; } |
1da177e4c Linux-2.6.12-rc2 |
2326 |
goto readpage_error; |
994fc28c7 [PATCH] add AOP_T... |
2327 |
} |
1da177e4c Linux-2.6.12-rc2 |
2328 2329 |
if (!PageUptodate(page)) { |
0abed7c69 mm: never attempt... |
2330 2331 2332 2333 2334 |
if (iocb->ki_flags & IOCB_WAITQ) { if (written) { put_page(page); goto out; } |
c8d317aa1 io_uring: fix asy... |
2335 |
error = lock_page_async(page, iocb->ki_waitq); |
0abed7c69 mm: never attempt... |
2336 |
} else { |
c8d317aa1 io_uring: fix asy... |
2337 |
error = lock_page_killable(page); |
0abed7c69 mm: never attempt... |
2338 |
} |
c8d317aa1 io_uring: fix asy... |
2339 |
|
854623235 do_generic_file_r... |
2340 2341 |
if (unlikely(error)) goto readpage_error; |
1da177e4c Linux-2.6.12-rc2 |
2342 2343 2344 |
if (!PageUptodate(page)) { if (page->mapping == NULL) { /* |
2ecdc82ef kill unused inval... |
2345 |
* invalidate_mapping_pages got it |
1da177e4c Linux-2.6.12-rc2 |
2346 2347 |
*/ unlock_page(page); |
09cbfeaf1 mm, fs: get rid o... |
2348 |
put_page(page); |
1da177e4c Linux-2.6.12-rc2 |
2349 2350 2351 |
goto find_page; } unlock_page(page); |
0f8e2db4e mm/filemap.c: rem... |
2352 |
shrink_readahead_size_eio(ra); |
854623235 do_generic_file_r... |
2353 2354 |
error = -EIO; goto readpage_error; |
1da177e4c Linux-2.6.12-rc2 |
2355 2356 2357 |
} unlock_page(page); } |
1da177e4c Linux-2.6.12-rc2 |
2358 2359 2360 2361 |
goto page_ok; readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ |
09cbfeaf1 mm, fs: get rid o... |
2362 |
put_page(page); |
1da177e4c Linux-2.6.12-rc2 |
2363 2364 2365 2366 2367 2368 2369 |
goto out; no_cached_page: /* * Ok, it wasn't cached, so we need to create a new * page.. */ |
453f85d43 mm: remove __GFP_... |
2370 |
page = page_cache_alloc(mapping); |
eb2be1893 mm: buffered writ... |
2371 |
if (!page) { |
6e58e79db introduce copy_pa... |
2372 |
error = -ENOMEM; |
eb2be1893 mm: buffered writ... |
2373 |
goto out; |
1da177e4c Linux-2.6.12-rc2 |
2374 |
} |
6afdb859b mm: do not ignore... |
2375 |
error = add_to_page_cache_lru(page, mapping, index, |
c62d25556 mm, fs: introduce... |
2376 |
mapping_gfp_constraint(mapping, GFP_KERNEL)); |
1da177e4c Linux-2.6.12-rc2 |
2377 |
if (error) { |
09cbfeaf1 mm, fs: get rid o... |
2378 |
put_page(page); |
6e58e79db introduce copy_pa... |
2379 2380 |
if (error == -EEXIST) { error = 0; |
1da177e4c Linux-2.6.12-rc2 |
2381 |
goto find_page; |
6e58e79db introduce copy_pa... |
2382 |
} |
1da177e4c Linux-2.6.12-rc2 |
2383 2384 |
goto out; } |
1da177e4c Linux-2.6.12-rc2 |
2385 2386 |
goto readpage; } |
3239d8348 fs: support IOCB_... |
2387 2388 |
would_block: error = -EAGAIN; |
1da177e4c Linux-2.6.12-rc2 |
2389 |
out: |
7ff81078d readahead: remove... |
2390 |
ra->prev_pos = prev_index; |
09cbfeaf1 mm, fs: get rid o... |
2391 |
ra->prev_pos <<= PAGE_SHIFT; |
7ff81078d readahead: remove... |
2392 |
ra->prev_pos |= prev_offset; |
1da177e4c Linux-2.6.12-rc2 |
2393 |
|
09cbfeaf1 mm, fs: get rid o... |
2394 |
*ppos = ((loff_t)index << PAGE_SHIFT) + offset; |
0c6aa2639 mm: do_generic_fi... |
2395 |
file_accessed(filp); |
6e58e79db introduce copy_pa... |
2396 |
return written ? written : error; |
1da177e4c Linux-2.6.12-rc2 |
2397 |
} |
d85dc2e11 fs: export generi... |
2398 |
EXPORT_SYMBOL_GPL(generic_file_buffered_read); |
1da177e4c Linux-2.6.12-rc2 |
2399 |
|
485bb99b4 [PATCH] kernel-do... |
2400 |
/** |
6abd23227 bury generic_file... |
2401 |
* generic_file_read_iter - generic filesystem read routine |
485bb99b4 [PATCH] kernel-do... |
2402 |
* @iocb: kernel I/O control block |
6abd23227 bury generic_file... |
2403 |
* @iter: destination for the data read |
485bb99b4 [PATCH] kernel-do... |
2404 |
* |
6abd23227 bury generic_file... |
2405 |
* This is the "read_iter()" routine for all filesystems |
1da177e4c Linux-2.6.12-rc2 |
2406 |
* that can use the page cache directly. |
41da51bce fs: Add IOCB_NOIO... |
2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 |
* * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall * be returned when no data can be read without waiting for I/O requests * to complete; it doesn't prevent readahead. * * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O * requests shall be made for the read or for readahead. When no data * can be read, -EAGAIN shall be returned. When readahead would be * triggered, a partial, possibly empty read shall be returned. * |
a862f68a8 docs/core-api/mm:... |
2417 2418 |
* Return: * * number of bytes copied, even for partial reads |
41da51bce fs: Add IOCB_NOIO... |
2419 |
* * negative error code (or 0 if IOCB_NOIO) if nothing was read |
1da177e4c Linux-2.6.12-rc2 |
2420 2421 |
*/ ssize_t |
ed978a811 new helper: gener... |
2422 |
generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
1da177e4c Linux-2.6.12-rc2 |
2423 |
{ |
e7080a439 mm/filemap: gener... |
2424 |
size_t count = iov_iter_count(iter); |
47c27bc46 fs: pass iocb to ... |
2425 |
ssize_t retval = 0; |
e7080a439 mm/filemap: gener... |
2426 2427 2428 |
if (!count) goto out; /* skip atime */ |
1da177e4c Linux-2.6.12-rc2 |
2429 |
|
2ba48ce51 mirror O_APPEND a... |
2430 |
if (iocb->ki_flags & IOCB_DIRECT) { |
47c27bc46 fs: pass iocb to ... |
2431 |
struct file *file = iocb->ki_filp; |
ed978a811 new helper: gener... |
2432 2433 |
struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; |
543ade1fc [PATCH] Streamlin... |
2434 |
loff_t size; |
1da177e4c Linux-2.6.12-rc2 |
2435 |
|
1da177e4c Linux-2.6.12-rc2 |
2436 |
size = i_size_read(inode); |
6be96d3ad fs: return if dir... |
2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 |
if (iocb->ki_flags & IOCB_NOWAIT) { if (filemap_range_has_page(mapping, iocb->ki_pos, iocb->ki_pos + count - 1)) return -EAGAIN; } else { retval = filemap_write_and_wait_range(mapping, iocb->ki_pos, iocb->ki_pos + count - 1); if (retval < 0) goto out; } |
d8d3d94b8 pass iov_iter to ... |
2448 |
|
0d5b0cf24 fs: update atime ... |
2449 |
file_accessed(file); |
5ecda1371 generic_file_read... |
2450 |
retval = mapping->a_ops->direct_IO(iocb, iter); |
c3a690240 fix ITER_PIPE int... |
2451 |
if (retval >= 0) { |
c64fb5c74 filemap: remove p... |
2452 |
iocb->ki_pos += retval; |
5ecda1371 generic_file_read... |
2453 |
count -= retval; |
9fe55eea7 Fix race when che... |
2454 |
} |
5b47d59af fix braino in gen... |
2455 |
iov_iter_revert(iter, count - iov_iter_count(iter)); |
66f998f61 fs: allow short d... |
2456 |
|
9fe55eea7 Fix race when che... |
2457 2458 2459 2460 2461 2462 |
/* * Btrfs can have a short DIO read if we encounter * compressed extents, so if there was an error, or if * we've already read everything we wanted to, or if * there was a short read because we hit EOF, go ahead * and return. Otherwise fallthrough to buffered io for |
fbbbad4bc vfs,ext2: introdu... |
2463 2464 |
* the rest of the read. Buffered reads will not work for * DAX files, so don't bother trying. |
9fe55eea7 Fix race when che... |
2465 |
*/ |
5ecda1371 generic_file_read... |
2466 |
if (retval < 0 || !count || iocb->ki_pos >= size || |
0d5b0cf24 fs: update atime ... |
2467 |
IS_DAX(inode)) |
9fe55eea7 Fix race when che... |
2468 |
goto out; |
1da177e4c Linux-2.6.12-rc2 |
2469 |
} |
47c27bc46 fs: pass iocb to ... |
2470 |
retval = generic_file_buffered_read(iocb, iter, retval); |
1da177e4c Linux-2.6.12-rc2 |
2471 2472 2473 |
out: return retval; } |
ed978a811 new helper: gener... |
2474 |
EXPORT_SYMBOL(generic_file_read_iter); |
1da177e4c Linux-2.6.12-rc2 |
2475 |
|
1da177e4c Linux-2.6.12-rc2 |
2476 |
#ifdef CONFIG_MMU |
1da177e4c Linux-2.6.12-rc2 |
2477 |
#define MMAP_LOTSAMISS (100) |
6b4c9f446 filemap: drop the... |
2478 |
/* |
c1e8d7c6a mmap locking API:... |
2479 |
* lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock |
6b4c9f446 filemap: drop the... |
2480 2481 2482 2483 |
* @vmf - the vm_fault for this fault. * @page - the page to lock. * @fpin - the pointer to the file we may pin (or is already pinned). * |
c1e8d7c6a mmap locking API:... |
2484 |
* This works similar to lock_page_or_retry in that it can drop the mmap_lock. |
6b4c9f446 filemap: drop the... |
2485 |
* It differs in that it actually returns the page locked if it returns 1 and 0 |
c1e8d7c6a mmap locking API:... |
2486 |
* if it couldn't lock the page. If we did have to drop the mmap_lock then fpin |
6b4c9f446 filemap: drop the... |
2487 2488 2489 2490 2491 2492 2493 |
* will point to the pinned file and needs to be fput()'ed at a later point. */ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, struct file **fpin) { if (trylock_page(page)) return 1; |
8b0f9fa2e filemap: add a co... |
2494 2495 |
/* * NOTE! This will make us return with VM_FAULT_RETRY, but with |
c1e8d7c6a mmap locking API:... |
2496 |
* the mmap_lock still held. That's how FAULT_FLAG_RETRY_NOWAIT |
8b0f9fa2e filemap: add a co... |
2497 2498 |
* is supposed to work. We have way too many special cases.. */ |
6b4c9f446 filemap: drop the... |
2499 2500 2501 2502 2503 2504 2505 |
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) return 0; *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); if (vmf->flags & FAULT_FLAG_KILLABLE) { if (__lock_page_killable(page)) { /* |
c1e8d7c6a mmap locking API:... |
2506 |
* We didn't have the right flags to drop the mmap_lock, |
6b4c9f446 filemap: drop the... |
2507 2508 |
* but all fault_handlers only check for fatal signals * if we return VM_FAULT_RETRY, so we need to drop the |
c1e8d7c6a mmap locking API:... |
2509 |
* mmap_lock here and return 0 if we don't have a fpin. |
6b4c9f446 filemap: drop the... |
2510 2511 |
*/ if (*fpin == NULL) |
d8ed45c5d mmap locking API:... |
2512 |
mmap_read_unlock(vmf->vma->vm_mm); |
6b4c9f446 filemap: drop the... |
2513 2514 2515 2516 2517 2518 |
return 0; } } else __lock_page(page); return 1; } |
1da177e4c Linux-2.6.12-rc2 |
2519 |
|
ef00e08e2 readahead: clean ... |
2520 |
/* |
6b4c9f446 filemap: drop the... |
2521 2522 2523 2524 2525 |
* Synchronous readahead happens when we don't even find a page in the page * cache at all. We don't want to perform IO under the mmap sem, so if we have * to drop the mmap sem we return the file that was pinned in order for us to do * that. If we didn't pin a file then we return NULL. The file that is * returned needs to be fput()'ed when we're done with it. |
ef00e08e2 readahead: clean ... |
2526 |
*/ |
6b4c9f446 filemap: drop the... |
2527 |
static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) |
ef00e08e2 readahead: clean ... |
2528 |
{ |
2a1180f1b filemap: pass vm_... |
2529 2530 |
struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; |
ef00e08e2 readahead: clean ... |
2531 |
struct address_space *mapping = file->f_mapping; |
db660d462 mm/filemap: fold ... |
2532 |
DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff); |
6b4c9f446 filemap: drop the... |
2533 |
struct file *fpin = NULL; |
e630bfac7 mm/filemap.c: fix... |
2534 |
unsigned int mmap_miss; |
ef00e08e2 readahead: clean ... |
2535 2536 |
/* If we don't want any read-ahead, don't bother */ |
2a1180f1b filemap: pass vm_... |
2537 |
if (vmf->vma->vm_flags & VM_RAND_READ) |
6b4c9f446 filemap: drop the... |
2538 |
return fpin; |
275b12bf5 readahead: return... |
2539 |
if (!ra->ra_pages) |
6b4c9f446 filemap: drop the... |
2540 |
return fpin; |
ef00e08e2 readahead: clean ... |
2541 |
|
2a1180f1b filemap: pass vm_... |
2542 |
if (vmf->vma->vm_flags & VM_SEQ_READ) { |
6b4c9f446 filemap: drop the... |
2543 |
fpin = maybe_unlock_mmap_for_io(vmf, fpin); |
db660d462 mm/filemap: fold ... |
2544 |
page_cache_sync_ra(&ractl, ra, ra->ra_pages); |
6b4c9f446 filemap: drop the... |
2545 |
return fpin; |
ef00e08e2 readahead: clean ... |
2546 |
} |
207d04baa readahead: reduce... |
2547 |
/* Avoid banging the cache line if not needed */ |
e630bfac7 mm/filemap.c: fix... |
2548 2549 2550 |
mmap_miss = READ_ONCE(ra->mmap_miss); if (mmap_miss < MMAP_LOTSAMISS * 10) WRITE_ONCE(ra->mmap_miss, ++mmap_miss); |
ef00e08e2 readahead: clean ... |
2551 2552 2553 2554 2555 |
/* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ |
e630bfac7 mm/filemap.c: fix... |
2556 |
if (mmap_miss > MMAP_LOTSAMISS) |
6b4c9f446 filemap: drop the... |
2557 |
return fpin; |
ef00e08e2 readahead: clean ... |
2558 |
|
d30a11004 readahead: record... |
2559 2560 2561 |
/* * mmap read-around */ |
6b4c9f446 filemap: drop the... |
2562 |
fpin = maybe_unlock_mmap_for_io(vmf, fpin); |
db660d462 mm/filemap: fold ... |
2563 |
ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2); |
600e19afc mm: use only per-... |
2564 2565 |
ra->size = ra->ra_pages; ra->async_size = ra->ra_pages / 4; |
db660d462 mm/filemap: fold ... |
2566 2567 |
ractl._index = ra->start; do_page_cache_ra(&ractl, ra->size, ra->async_size); |
6b4c9f446 filemap: drop the... |
2568 |
return fpin; |
ef00e08e2 readahead: clean ... |
2569 2570 2571 2572 |
} /* * Asynchronous readahead happens when we find the page and PG_readahead, |
6b4c9f446 filemap: drop the... |
2573 |
* so we want to possibly extend the readahead further. We return the file that |
c1e8d7c6a mmap locking API:... |
2574 |
* was pinned if we have to drop the mmap_lock in order to do IO. |
ef00e08e2 readahead: clean ... |
2575 |
*/ |
6b4c9f446 filemap: drop the... |
2576 2577 |
static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct page *page) |
ef00e08e2 readahead: clean ... |
2578 |
{ |
2a1180f1b filemap: pass vm_... |
2579 2580 |
struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; |
ef00e08e2 readahead: clean ... |
2581 |
struct address_space *mapping = file->f_mapping; |
6b4c9f446 filemap: drop the... |
2582 |
struct file *fpin = NULL; |
e630bfac7 mm/filemap.c: fix... |
2583 |
unsigned int mmap_miss; |
2a1180f1b filemap: pass vm_... |
2584 |
pgoff_t offset = vmf->pgoff; |
ef00e08e2 readahead: clean ... |
2585 2586 |
/* If we don't want any read-ahead, don't bother */ |
5c72feee3 mm/filemap.c: don... |
2587 |
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) |
6b4c9f446 filemap: drop the... |
2588 |
return fpin; |
e630bfac7 mm/filemap.c: fix... |
2589 2590 2591 |
mmap_miss = READ_ONCE(ra->mmap_miss); if (mmap_miss) WRITE_ONCE(ra->mmap_miss, --mmap_miss); |
6b4c9f446 filemap: drop the... |
2592 2593 |
if (PageReadahead(page)) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); |
2fad6f5de readahead: enforc... |
2594 2595 |
page_cache_async_readahead(mapping, ra, file, page, offset, ra->ra_pages); |
6b4c9f446 filemap: drop the... |
2596 2597 |
} return fpin; |
ef00e08e2 readahead: clean ... |
2598 |
} |
485bb99b4 [PATCH] kernel-do... |
2599 |
/** |
54cb8821d mm: merge populat... |
2600 |
* filemap_fault - read in file data for page fault handling |
d0217ac04 mm: fault feedbac... |
2601 |
* @vmf: struct vm_fault containing details of the fault |
485bb99b4 [PATCH] kernel-do... |
2602 |
* |
54cb8821d mm: merge populat... |
2603 |
* filemap_fault() is invoked via the vma operations vector for a |
1da177e4c Linux-2.6.12-rc2 |
2604 2605 2606 2607 2608 |
* mapped memory region to read in file data during a page fault. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. |
9a95f3cf7 mm: describe mmap... |
2609 |
* |
c1e8d7c6a mmap locking API:... |
2610 |
* vma->vm_mm->mmap_lock must be held on entry. |
9a95f3cf7 mm: describe mmap... |
2611 |
* |
c1e8d7c6a mmap locking API:... |
2612 |
* If our return value has VM_FAULT_RETRY set, it's because the mmap_lock |
a49858338 mm/filemap.c: cor... |
2613 |
* may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). |
9a95f3cf7 mm: describe mmap... |
2614 |
* |
c1e8d7c6a mmap locking API:... |
2615 |
* If our return value does not have VM_FAULT_RETRY set, the mmap_lock |
9a95f3cf7 mm: describe mmap... |
2616 2617 2618 |
* has not been released. * * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. |
a862f68a8 docs/core-api/mm:... |
2619 2620 |
* * Return: bitwise-OR of %VM_FAULT_ codes. |
1da177e4c Linux-2.6.12-rc2 |
2621 |
*/ |
2bcd6454b mm: use new retur... |
2622 |
vm_fault_t filemap_fault(struct vm_fault *vmf) |
1da177e4c Linux-2.6.12-rc2 |
2623 2624 |
{ int error; |
11bac8000 mm, fs: reduce fa... |
2625 |
struct file *file = vmf->vma->vm_file; |
6b4c9f446 filemap: drop the... |
2626 |
struct file *fpin = NULL; |
1da177e4c Linux-2.6.12-rc2 |
2627 2628 2629 |
struct address_space *mapping = file->f_mapping; struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; |
ef00e08e2 readahead: clean ... |
2630 |
pgoff_t offset = vmf->pgoff; |
9ab2594fe mm: tighten up th... |
2631 |
pgoff_t max_off; |
1da177e4c Linux-2.6.12-rc2 |
2632 |
struct page *page; |
2bcd6454b mm: use new retur... |
2633 |
vm_fault_t ret = 0; |
1da177e4c Linux-2.6.12-rc2 |
2634 |
|
9ab2594fe mm: tighten up th... |
2635 2636 |
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) |
5307cc1aa Remove broken ptr... |
2637 |
return VM_FAULT_SIGBUS; |
1da177e4c Linux-2.6.12-rc2 |
2638 |
|
1da177e4c Linux-2.6.12-rc2 |
2639 |
/* |
494264208 mm: memcg: handle... |
2640 |
* Do we have something in the page cache already? |
1da177e4c Linux-2.6.12-rc2 |
2641 |
*/ |
ef00e08e2 readahead: clean ... |
2642 |
page = find_get_page(mapping, offset); |
45cac65b0 readahead: fault ... |
2643 |
if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { |
1da177e4c Linux-2.6.12-rc2 |
2644 |
/* |
ef00e08e2 readahead: clean ... |
2645 2646 |
* We found the page, so try async readahead before * waiting for the lock. |
1da177e4c Linux-2.6.12-rc2 |
2647 |
*/ |
6b4c9f446 filemap: drop the... |
2648 |
fpin = do_async_mmap_readahead(vmf, page); |
45cac65b0 readahead: fault ... |
2649 |
} else if (!page) { |
ef00e08e2 readahead: clean ... |
2650 |
/* No page in the page cache at all */ |
ef00e08e2 readahead: clean ... |
2651 |
count_vm_event(PGMAJFAULT); |
2262185c5 mm: per-cgroup me... |
2652 |
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); |
ef00e08e2 readahead: clean ... |
2653 |
ret = VM_FAULT_MAJOR; |
6b4c9f446 filemap: drop the... |
2654 |
fpin = do_sync_mmap_readahead(vmf); |
ef00e08e2 readahead: clean ... |
2655 |
retry_find: |
a75d4c333 filemap: kill pag... |
2656 2657 2658 |
page = pagecache_get_page(mapping, offset, FGP_CREAT|FGP_FOR_MMAP, vmf->gfp_mask); |
6b4c9f446 filemap: drop the... |
2659 2660 2661 |
if (!page) { if (fpin) goto out_retry; |
e520e932d mm/filemap.c: use... |
2662 |
return VM_FAULT_OOM; |
6b4c9f446 filemap: drop the... |
2663 |
} |
1da177e4c Linux-2.6.12-rc2 |
2664 |
} |
6b4c9f446 filemap: drop the... |
2665 2666 |
if (!lock_page_maybe_drop_mmap(vmf, page, &fpin)) goto out_retry; |
b522c94da mm: filemap_fault... |
2667 2668 |
/* Did it get truncated? */ |
585e5a7ba filemap: check co... |
2669 |
if (unlikely(compound_head(page)->mapping != mapping)) { |
b522c94da mm: filemap_fault... |
2670 2671 2672 2673 |
unlock_page(page); put_page(page); goto retry_find; } |
520e5ba41 filemap: update o... |
2674 |
VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); |
b522c94da mm: filemap_fault... |
2675 |
|
1da177e4c Linux-2.6.12-rc2 |
2676 |
/* |
d00806b18 mm: fix fault vs ... |
2677 2678 |
* We have a locked page in the page cache, now we need to check * that it's up-to-date. If not, it is going to be due to an error. |
1da177e4c Linux-2.6.12-rc2 |
2679 |
*/ |
d00806b18 mm: fix fault vs ... |
2680 |
if (unlikely(!PageUptodate(page))) |
1da177e4c Linux-2.6.12-rc2 |
2681 |
goto page_not_uptodate; |
ef00e08e2 readahead: clean ... |
2682 |
/* |
c1e8d7c6a mmap locking API:... |
2683 |
* We've made it this far and we had to drop our mmap_lock, now is the |
6b4c9f446 filemap: drop the... |
2684 2685 2686 2687 2688 2689 2690 2691 2692 |
* time to return to the upper layer and have it re-find the vma and * redo the fault. */ if (fpin) { unlock_page(page); goto out_retry; } /* |
ef00e08e2 readahead: clean ... |
2693 2694 2695 |
* Found the page and have a reference on it. * We must recheck i_size under page lock. */ |
9ab2594fe mm: tighten up th... |
2696 2697 |
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) { |
d00806b18 mm: fix fault vs ... |
2698 |
unlock_page(page); |
09cbfeaf1 mm, fs: get rid o... |
2699 |
put_page(page); |
5307cc1aa Remove broken ptr... |
2700 |
return VM_FAULT_SIGBUS; |
d00806b18 mm: fix fault vs ... |
2701 |
} |
d0217ac04 mm: fault feedbac... |
2702 |
vmf->page = page; |
83c54070e mm: fault feedbac... |
2703 |
return ret | VM_FAULT_LOCKED; |
1da177e4c Linux-2.6.12-rc2 |
2704 |
|
1da177e4c Linux-2.6.12-rc2 |
2705 |
page_not_uptodate: |
1da177e4c Linux-2.6.12-rc2 |
2706 2707 2708 2709 2710 2711 |
/* * Umm, take care of errors if the page isn't up-to-date. * Try to re-read it _once_. We do this synchronously, * because there really aren't any performance issues here * and we need to check for errors. */ |
1da177e4c Linux-2.6.12-rc2 |
2712 |
ClearPageError(page); |
6b4c9f446 filemap: drop the... |
2713 |
fpin = maybe_unlock_mmap_for_io(vmf, fpin); |
994fc28c7 [PATCH] add AOP_T... |
2714 |
error = mapping->a_ops->readpage(file, page); |
3ef0f720e mm: fix infinite ... |
2715 2716 2717 2718 2719 |
if (!error) { wait_on_page_locked(page); if (!PageUptodate(page)) error = -EIO; } |
6b4c9f446 filemap: drop the... |
2720 2721 |
if (fpin) goto out_retry; |
09cbfeaf1 mm, fs: get rid o... |
2722 |
put_page(page); |
d00806b18 mm: fix fault vs ... |
2723 2724 |
if (!error || error == AOP_TRUNCATED_PAGE) |
994fc28c7 [PATCH] add AOP_T... |
2725 |
goto retry_find; |
1da177e4c Linux-2.6.12-rc2 |
2726 |
|
0f8e2db4e mm/filemap.c: rem... |
2727 |
shrink_readahead_size_eio(ra); |
d0217ac04 mm: fault feedbac... |
2728 |
return VM_FAULT_SIGBUS; |
6b4c9f446 filemap: drop the... |
2729 2730 2731 |
out_retry: /* |
c1e8d7c6a mmap locking API:... |
2732 |
* We dropped the mmap_lock, we need to return to the fault handler to |
6b4c9f446 filemap: drop the... |
2733 2734 2735 2736 2737 2738 2739 2740 |
* re-find the vma and come back and find our hopefully still populated * page. */ if (page) put_page(page); if (fpin) fput(fpin); return ret | VM_FAULT_RETRY; |
54cb8821d mm: merge populat... |
2741 2742 |
} EXPORT_SYMBOL(filemap_fault); |
82b0f8c39 mm: join struct f... |
2743 |
void filemap_map_pages(struct vm_fault *vmf, |
bae473a42 mm: introduce fau... |
2744 |
pgoff_t start_pgoff, pgoff_t end_pgoff) |
f1820361f mm: implement ->m... |
2745 |
{ |
82b0f8c39 mm: join struct f... |
2746 |
struct file *file = vmf->vma->vm_file; |
f1820361f mm: implement ->m... |
2747 |
struct address_space *mapping = file->f_mapping; |
bae473a42 mm: introduce fau... |
2748 |
pgoff_t last_pgoff = start_pgoff; |
9ab2594fe mm: tighten up th... |
2749 |
unsigned long max_idx; |
070e807c6 page cache: Conve... |
2750 |
XA_STATE(xas, &mapping->i_pages, start_pgoff); |
27a83a609 mm/filemap: fix f... |
2751 |
struct page *head, *page; |
e630bfac7 mm/filemap.c: fix... |
2752 |
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); |
f1820361f mm: implement ->m... |
2753 2754 |
rcu_read_lock(); |
27a83a609 mm/filemap: fix f... |
2755 2756 |
xas_for_each(&xas, head, end_pgoff) { if (xas_retry(&xas, head)) |
070e807c6 page cache: Conve... |
2757 |
continue; |
27a83a609 mm/filemap: fix f... |
2758 |
if (xa_is_value(head)) |
2cf938aae mm: use radix_tre... |
2759 |
goto next; |
f1820361f mm: implement ->m... |
2760 |
|
e0975b2aa mm, fault_around:... |
2761 2762 2763 2764 |
/* * Check for a locked page first, as a speculative * reference may adversely influence page migration. */ |
27a83a609 mm/filemap: fix f... |
2765 |
if (PageLocked(head)) |
e0975b2aa mm, fault_around:... |
2766 |
goto next; |
27a83a609 mm/filemap: fix f... |
2767 |
if (!page_cache_get_speculative(head)) |
070e807c6 page cache: Conve... |
2768 |
goto next; |
f1820361f mm: implement ->m... |
2769 |
|
4101196b1 mm: page cache: s... |
2770 |
/* Has the page moved or been split? */ |
27a83a609 mm/filemap: fix f... |
2771 |
if (unlikely(head != xas_reload(&xas))) |
070e807c6 page cache: Conve... |
2772 |
goto skip; |
27a83a609 mm/filemap: fix f... |
2773 |
page = find_subpage(head, xas.xa_index); |
f1820361f mm: implement ->m... |
2774 |
|
27a83a609 mm/filemap: fix f... |
2775 |
if (!PageUptodate(head) || |
f1820361f mm: implement ->m... |
2776 2777 2778 |
PageReadahead(page) || PageHWPoison(page)) goto skip; |
27a83a609 mm/filemap: fix f... |
2779 |
if (!trylock_page(head)) |
f1820361f mm: implement ->m... |
2780 |
goto skip; |
27a83a609 mm/filemap: fix f... |
2781 |
if (head->mapping != mapping || !PageUptodate(head)) |
f1820361f mm: implement ->m... |
2782 |
goto unlock; |
9ab2594fe mm: tighten up th... |
2783 |
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); |
27a83a609 mm/filemap: fix f... |
2784 |
if (xas.xa_index >= max_idx) |
f1820361f mm: implement ->m... |
2785 |
goto unlock; |
e630bfac7 mm/filemap.c: fix... |
2786 2787 |
if (mmap_miss > 0) mmap_miss--; |
7267ec008 mm: postpone page... |
2788 |
|
070e807c6 page cache: Conve... |
2789 |
vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; |
82b0f8c39 mm: join struct f... |
2790 |
if (vmf->pte) |
070e807c6 page cache: Conve... |
2791 2792 |
vmf->pte += xas.xa_index - last_pgoff; last_pgoff = xas.xa_index; |
9d82c6943 mm: memcontrol: c... |
2793 |
if (alloc_set_pte(vmf, page)) |
7267ec008 mm: postpone page... |
2794 |
goto unlock; |
27a83a609 mm/filemap: fix f... |
2795 |
unlock_page(head); |
f1820361f mm: implement ->m... |
2796 2797 |
goto next; unlock: |
27a83a609 mm/filemap: fix f... |
2798 |
unlock_page(head); |
f1820361f mm: implement ->m... |
2799 |
skip: |
27a83a609 mm/filemap: fix f... |
2800 |
put_page(head); |
f1820361f mm: implement ->m... |
2801 |
next: |
7267ec008 mm: postpone page... |
2802 |
/* Huge page is mapped? No need to proceed. */ |
82b0f8c39 mm: join struct f... |
2803 |
if (pmd_trans_huge(*vmf->pmd)) |
7267ec008 mm: postpone page... |
2804 |
break; |
f1820361f mm: implement ->m... |
2805 2806 |
} rcu_read_unlock(); |
e630bfac7 mm/filemap.c: fix... |
2807 |
WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); |
f1820361f mm: implement ->m... |
2808 2809 |
} EXPORT_SYMBOL(filemap_map_pages); |
2bcd6454b mm: use new retur... |
2810 |
vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) |
4fcf1c620 mm: Make default ... |
2811 2812 |
{ struct page *page = vmf->page; |
11bac8000 mm, fs: reduce fa... |
2813 |
struct inode *inode = file_inode(vmf->vma->vm_file); |
2bcd6454b mm: use new retur... |
2814 |
vm_fault_t ret = VM_FAULT_LOCKED; |
4fcf1c620 mm: Make default ... |
2815 |
|
14da92001 fs: Protect write... |
2816 |
sb_start_pagefault(inode->i_sb); |
11bac8000 mm, fs: reduce fa... |
2817 |
file_update_time(vmf->vma->vm_file); |
4fcf1c620 mm: Make default ... |
2818 2819 2820 2821 2822 2823 |
lock_page(page); if (page->mapping != inode->i_mapping) { unlock_page(page); ret = VM_FAULT_NOPAGE; goto out; } |
14da92001 fs: Protect write... |
2824 2825 2826 2827 2828 2829 |
/* * We mark the page dirty already here so that when freeze is in * progress, we are guaranteed that writeback during freezing will * see the dirty page and writeprotect it again. */ set_page_dirty(page); |
1d1d1a767 mm: only enforce ... |
2830 |
wait_for_stable_page(page); |
4fcf1c620 mm: Make default ... |
2831 |
out: |
14da92001 fs: Protect write... |
2832 |
sb_end_pagefault(inode->i_sb); |
4fcf1c620 mm: Make default ... |
2833 2834 |
return ret; } |
4fcf1c620 mm: Make default ... |
2835 |
|
f0f37e2f7 const: mark struc... |
2836 |
const struct vm_operations_struct generic_file_vm_ops = { |
54cb8821d mm: merge populat... |
2837 |
.fault = filemap_fault, |
f1820361f mm: implement ->m... |
2838 |
.map_pages = filemap_map_pages, |
4fcf1c620 mm: Make default ... |
2839 |
.page_mkwrite = filemap_page_mkwrite, |
1da177e4c Linux-2.6.12-rc2 |
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 |
}; /* This is used for a general mmap of a disk file */ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) { struct address_space *mapping = file->f_mapping; if (!mapping->a_ops->readpage) return -ENOEXEC; file_accessed(file); vma->vm_ops = &generic_file_vm_ops; return 0; } |
1da177e4c Linux-2.6.12-rc2 |
2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 |
/* * This is for filesystems which do not implement ->writepage. */ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma) { if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) return -EINVAL; return generic_file_mmap(file, vma); } #else |
4b96a37d1 mm: convert to us... |
2865 |
vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) |
453972283 mm/filemap.c: pro... |
2866 |
{ |
4b96a37d1 mm: convert to us... |
2867 |
return VM_FAULT_SIGBUS; |
453972283 mm/filemap.c: pro... |
2868 |
} |
1da177e4c Linux-2.6.12-rc2 |
2869 2870 2871 2872 2873 2874 2875 2876 2877 |
int generic_file_mmap(struct file * file, struct vm_area_struct * vma) { return -ENOSYS; } int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma) { return -ENOSYS; } #endif /* CONFIG_MMU */ |
453972283 mm/filemap.c: pro... |
2878 |
EXPORT_SYMBOL(filemap_page_mkwrite); |
1da177e4c Linux-2.6.12-rc2 |
2879 2880 |
EXPORT_SYMBOL(generic_file_mmap); EXPORT_SYMBOL(generic_file_readonly_mmap); |
67f9fd91f mm: remove read_c... |
2881 2882 2883 2884 2885 |
static struct page *wait_on_page_read(struct page *page) { if (!IS_ERR(page)) { wait_on_page_locked(page); if (!PageUptodate(page)) { |
09cbfeaf1 mm, fs: get rid o... |
2886 |
put_page(page); |
67f9fd91f mm: remove read_c... |
2887 2888 2889 2890 2891 |
page = ERR_PTR(-EIO); } } return page; } |
32b635298 mm: filemap: remo... |
2892 |
static struct page *do_read_cache_page(struct address_space *mapping, |
57f6b96c0 filemap: convert ... |
2893 |
pgoff_t index, |
5e5358e7c mm: cleanup descr... |
2894 |
int (*filler)(void *, struct page *), |
0531b2aac mm: add new 'read... |
2895 2896 |
void *data, gfp_t gfp) |
1da177e4c Linux-2.6.12-rc2 |
2897 |
{ |
eb2be1893 mm: buffered writ... |
2898 |
struct page *page; |
1da177e4c Linux-2.6.12-rc2 |
2899 2900 2901 2902 |
int err; repeat: page = find_get_page(mapping, index); if (!page) { |
453f85d43 mm: remove __GFP_... |
2903 |
page = __page_cache_alloc(gfp); |
eb2be1893 mm: buffered writ... |
2904 2905 |
if (!page) return ERR_PTR(-ENOMEM); |
e6f67b8c0 vfs: __read_cache... |
2906 |
err = add_to_page_cache_lru(page, mapping, index, gfp); |
eb2be1893 mm: buffered writ... |
2907 |
if (unlikely(err)) { |
09cbfeaf1 mm, fs: get rid o... |
2908 |
put_page(page); |
eb2be1893 mm: buffered writ... |
2909 2910 |
if (err == -EEXIST) goto repeat; |
22ecdb4f8 page cache: Remov... |
2911 |
/* Presumably ENOMEM for xarray node */ |
1da177e4c Linux-2.6.12-rc2 |
2912 2913 |
return ERR_PTR(err); } |
32b635298 mm: filemap: remo... |
2914 2915 |
filler: |
6c45b4541 mm/filemap: don't... |
2916 2917 2918 2919 |
if (filler) err = filler(data, page); else err = mapping->a_ops->readpage(data, page); |
1da177e4c Linux-2.6.12-rc2 |
2920 |
if (err < 0) { |
09cbfeaf1 mm, fs: get rid o... |
2921 |
put_page(page); |
32b635298 mm: filemap: remo... |
2922 |
return ERR_PTR(err); |
1da177e4c Linux-2.6.12-rc2 |
2923 |
} |
1da177e4c Linux-2.6.12-rc2 |
2924 |
|
32b635298 mm: filemap: remo... |
2925 2926 2927 2928 2929 |
page = wait_on_page_read(page); if (IS_ERR(page)) return page; goto out; } |
1da177e4c Linux-2.6.12-rc2 |
2930 2931 |
if (PageUptodate(page)) goto out; |
ebded0278 mm: filemap: avoi... |
2932 |
/* |
0e9aa6755 mm: fix some brok... |
2933 |
* Page is not up to date and may be locked due to one of the following |
ebded0278 mm: filemap: avoi... |
2934 2935 2936 2937 2938 2939 2940 2941 |
* case a: Page is being filled and the page lock is held * case b: Read/write error clearing the page uptodate status * case c: Truncation in progress (page locked) * case d: Reclaim in progress * * Case a, the page will be up to date when the page is unlocked. * There is no need to serialise on the page lock here as the page * is pinned so the lock gives no additional protection. Even if the |
ce89fddfe mm/filemap.c: del... |
2942 |
* page is truncated, the data is still valid if PageUptodate as |
ebded0278 mm: filemap: avoi... |
2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 |
* it's a race vs truncate race. * Case b, the page will not be up to date * Case c, the page may be truncated but in itself, the data may still * be valid after IO completes as it's a read vs truncate race. The * operation must restart if the page is not uptodate on unlock but * otherwise serialising on page lock to stabilise the mapping gives * no additional guarantees to the caller as the page lock is * released before return. * Case d, similar to truncation. If reclaim holds the page lock, it * will be a race with remove_mapping that determines if the mapping * is valid on unlock but otherwise the data is valid and there is * no need to serialise with page lock. * * As the page lock gives no additional guarantee, we optimistically * wait on the page to be unlocked and check if it's up to date and * use the page if it is. Otherwise, the page lock is required to * distinguish between the different cases. The motivation is that we * avoid spurious serialisations and wakeups when multiple processes * wait on the same page for IO to complete. */ wait_on_page_locked(page); if (PageUptodate(page)) goto out; /* Distinguish between all the cases under the safety of the lock */ |
1da177e4c Linux-2.6.12-rc2 |
2968 |
lock_page(page); |
ebded0278 mm: filemap: avoi... |
2969 2970 |
/* Case c or d, restart the operation */ |
1da177e4c Linux-2.6.12-rc2 |
2971 2972 |
if (!page->mapping) { unlock_page(page); |
09cbfeaf1 mm, fs: get rid o... |
2973 |
put_page(page); |
32b635298 mm: filemap: remo... |
2974 |
goto repeat; |
1da177e4c Linux-2.6.12-rc2 |
2975 |
} |
ebded0278 mm: filemap: avoi... |
2976 2977 |
/* Someone else locked and filled the page in a very small window */ |
1da177e4c Linux-2.6.12-rc2 |
2978 2979 2980 2981 |
if (PageUptodate(page)) { unlock_page(page); goto out; } |
faffdfa04 mm/filemap.c: cle... |
2982 2983 2984 2985 2986 2987 2988 2989 |
/* * A previous I/O error may have been due to temporary * failures. * Clear page error before actual read, PG_error will be * set again if read page fails. */ ClearPageError(page); |
32b635298 mm: filemap: remo... |
2990 |
goto filler; |
c855ff371 Fix a bad error c... |
2991 |
out: |
6fe6900e1 mm: make read_cac... |
2992 2993 2994 |
mark_page_accessed(page); return page; } |
0531b2aac mm: add new 'read... |
2995 2996 |
/** |
67f9fd91f mm: remove read_c... |
2997 |
* read_cache_page - read into page cache, fill it if needed |
0531b2aac mm: add new 'read... |
2998 2999 3000 |
* @mapping: the page's address_space * @index: the page index * @filler: function to perform the read |
5e5358e7c mm: cleanup descr... |
3001 |
* @data: first arg to filler(data, page) function, often left as NULL |
0531b2aac mm: add new 'read... |
3002 |
* |
0531b2aac mm: add new 'read... |
3003 |
* Read into the page cache. If a page already exists, and PageUptodate() is |
67f9fd91f mm: remove read_c... |
3004 |
* not set, try to fill the page and wait for it to become unlocked. |
0531b2aac mm: add new 'read... |
3005 3006 |
* * If the page does not get brought uptodate, return -EIO. |
a862f68a8 docs/core-api/mm:... |
3007 3008 |
* * Return: up to date page on success, ERR_PTR() on failure. |
0531b2aac mm: add new 'read... |
3009 |
*/ |
67f9fd91f mm: remove read_c... |
3010 |
struct page *read_cache_page(struct address_space *mapping, |
0531b2aac mm: add new 'read... |
3011 |
pgoff_t index, |
5e5358e7c mm: cleanup descr... |
3012 |
int (*filler)(void *, struct page *), |
0531b2aac mm: add new 'read... |
3013 3014 |
void *data) { |
d322a8e5e mm/filemap.c: fix... |
3015 3016 |
return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); |
0531b2aac mm: add new 'read... |
3017 |
} |
67f9fd91f mm: remove read_c... |
3018 |
EXPORT_SYMBOL(read_cache_page); |
0531b2aac mm: add new 'read... |
3019 3020 3021 3022 3023 3024 3025 3026 |
/** * read_cache_page_gfp - read into page cache, using specified page allocation flags. * @mapping: the page's address_space * @index: the page index * @gfp: the page allocator flags to use if allocating * * This is the same as "read_mapping_page(mapping, index, NULL)", but with |
e6f67b8c0 vfs: __read_cache... |
3027 |
* any new page allocations done using the specified allocation flags. |
0531b2aac mm: add new 'read... |
3028 3029 |
* * If the page does not get brought uptodate, return -EIO. |
a862f68a8 docs/core-api/mm:... |
3030 3031 |
* * Return: up to date page on success, ERR_PTR() on failure. |
0531b2aac mm: add new 'read... |
3032 3033 3034 3035 3036 |
*/ struct page *read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp) { |
6c45b4541 mm/filemap: don't... |
3037 |
return do_read_cache_page(mapping, index, NULL, NULL, gfp); |
0531b2aac mm: add new 'read... |
3038 3039 |
} EXPORT_SYMBOL(read_cache_page_gfp); |
afddba49d fs: introduce wri... |
3040 3041 3042 3043 3044 |
int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { const struct address_space_operations *aops = mapping->a_ops; |
4e02ed4b4 fs: remove prepar... |
3045 |
return aops->write_begin(file, mapping, pos, len, flags, |
afddba49d fs: introduce wri... |
3046 |
pagep, fsdata); |
afddba49d fs: introduce wri... |
3047 3048 3049 3050 3051 3052 3053 3054 |
} EXPORT_SYMBOL(pagecache_write_begin); int pagecache_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { const struct address_space_operations *aops = mapping->a_ops; |
afddba49d fs: introduce wri... |
3055 |
|
4e02ed4b4 fs: remove prepar... |
3056 |
return aops->write_end(file, mapping, pos, len, copied, page, fsdata); |
afddba49d fs: introduce wri... |
3057 3058 |
} EXPORT_SYMBOL(pagecache_write_end); |
a92853b67 fs/direct-io.c: k... |
3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 |
/* * Warn about a page cache invalidation failure during a direct I/O write. */ void dio_warn_stale_pagecache(struct file *filp) { static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); char pathname[128]; struct inode *inode = file_inode(filp); char *path; errseq_set(&inode->i_mapping->wb_err, -EIO); if (__ratelimit(&_rs)) { path = file_path(filp, pathname, sizeof(pathname)); if (IS_ERR(path)) path = "(unknown)"; pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O! "); pr_crit("File: %s PID: %d Comm: %.20s ", path, current->pid, current->comm); } } |
1da177e4c Linux-2.6.12-rc2 |
3081 |
ssize_t |
1af5bb491 filemap: remove t... |
3082 |
generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) |
1da177e4c Linux-2.6.12-rc2 |
3083 3084 3085 3086 |
{ struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; |
1af5bb491 filemap: remove t... |
3087 |
loff_t pos = iocb->ki_pos; |
1da177e4c Linux-2.6.12-rc2 |
3088 |
ssize_t written; |
a969e903a kill generic_file... |
3089 3090 |
size_t write_len; pgoff_t end; |
1da177e4c Linux-2.6.12-rc2 |
3091 |
|
0c949334a iov_iter_truncate() |
3092 |
write_len = iov_iter_count(from); |
09cbfeaf1 mm, fs: get rid o... |
3093 |
end = (pos + write_len - 1) >> PAGE_SHIFT; |
a969e903a kill generic_file... |
3094 |
|
6be96d3ad fs: return if dir... |
3095 3096 3097 |
if (iocb->ki_flags & IOCB_NOWAIT) { /* If there are pages to writeback, return */ if (filemap_range_has_page(inode->i_mapping, pos, |
35f12f0f5 mm/filemap: pass ... |
3098 |
pos + write_len - 1)) |
6be96d3ad fs: return if dir... |
3099 3100 3101 3102 3103 3104 3105 |
return -EAGAIN; } else { written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); if (written) goto out; } |
a969e903a kill generic_file... |
3106 3107 3108 3109 3110 |
/* * After a write we want buffered reads to be sure to go to disk to get * the new data. We invalidate clean cached page from the region we're * about to write. We do this *before* the write so that we can return |
6ccfa806a VFS: fix dio writ... |
3111 |
* without clobbering -EIOCBQUEUED from ->direct_IO(). |
a969e903a kill generic_file... |
3112 |
*/ |
55635ba76 fs: fix data inva... |
3113 |
written = invalidate_inode_pages2_range(mapping, |
09cbfeaf1 mm, fs: get rid o... |
3114 |
pos >> PAGE_SHIFT, end); |
55635ba76 fs: fix data inva... |
3115 3116 3117 3118 3119 3120 3121 3122 |
/* * If a page can not be invalidated, return 0 to fall back * to buffered write. */ if (written) { if (written == -EBUSY) return 0; goto out; |
a969e903a kill generic_file... |
3123 |
} |
639a93a52 generic_file_dire... |
3124 |
written = mapping->a_ops->direct_IO(iocb, from); |
a969e903a kill generic_file... |
3125 3126 3127 3128 3129 3130 3131 3132 |
/* * Finally, try again to invalidate clean pages which might have been * cached by non-direct readahead, or faulted in by get_user_pages() * if the source of the write was an mmap'ed region of the file * we're writing. Either one is a pretty crazy thing to do, * so we don't support it 100%. If this invalidation * fails, tough, the write still worked... |
332391a99 fs: Fix page cach... |
3133 3134 3135 3136 |
* * Most of the time we do not need this since dio_complete() will do * the invalidation for us. However there are some file systems that * do not end up with dio_complete() being called, so let's not break |
80c1fe902 mm/filemap.c: rem... |
3137 3138 |
* them by removing it completely. * |
9266a1403 mm/filemap.c: war... |
3139 3140 |
* Noticeable example is a blkdev_direct_IO(). * |
80c1fe902 mm/filemap.c: rem... |
3141 |
* Skip invalidation for async writes or if mapping has no pages. |
a969e903a kill generic_file... |
3142 |
*/ |
9266a1403 mm/filemap.c: war... |
3143 3144 3145 |
if (written > 0 && mapping->nrpages && invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end)) dio_warn_stale_pagecache(file); |
a969e903a kill generic_file... |
3146 |
|
1da177e4c Linux-2.6.12-rc2 |
3147 |
if (written > 0) { |
0116651c8 mm: remove tempor... |
3148 |
pos += written; |
639a93a52 generic_file_dire... |
3149 |
write_len -= written; |
0116651c8 mm: remove tempor... |
3150 3151 |
if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { i_size_write(inode, pos); |
1da177e4c Linux-2.6.12-rc2 |
3152 3153 |
mark_inode_dirty(inode); } |
5cb6c6c7e generic_file_dire... |
3154 |
iocb->ki_pos = pos; |
1da177e4c Linux-2.6.12-rc2 |
3155 |
} |
639a93a52 generic_file_dire... |
3156 |
iov_iter_revert(from, write_len - iov_iter_count(from)); |
a969e903a kill generic_file... |
3157 |
out: |
1da177e4c Linux-2.6.12-rc2 |
3158 3159 3160 |
return written; } EXPORT_SYMBOL(generic_file_direct_write); |
eb2be1893 mm: buffered writ... |
3161 3162 3163 3164 |
/* * Find or create a page at the given pagecache position. Return the locked * page. This function is specifically for buffered writes. */ |
54566b2c1 fs: symlink write... |
3165 3166 |
struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags) |
eb2be1893 mm: buffered writ... |
3167 |
{ |
eb2be1893 mm: buffered writ... |
3168 |
struct page *page; |
bbddabe2e mm: filemap: only... |
3169 |
int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; |
0faa70cb0 mm: filemap: pass... |
3170 |
|
54566b2c1 fs: symlink write... |
3171 |
if (flags & AOP_FLAG_NOFS) |
2457aec63 mm: non-atomicall... |
3172 3173 3174 |
fgp_flags |= FGP_NOFS; page = pagecache_get_page(mapping, index, fgp_flags, |
45f87de57 mm: get rid of ra... |
3175 |
mapping_gfp_mask(mapping)); |
c585a2678 mm: remove likely... |
3176 |
if (page) |
2457aec63 mm: non-atomicall... |
3177 |
wait_for_stable_page(page); |
eb2be1893 mm: buffered writ... |
3178 |
|
eb2be1893 mm: buffered writ... |
3179 3180 |
return page; } |
54566b2c1 fs: symlink write... |
3181 |
EXPORT_SYMBOL(grab_cache_page_write_begin); |
eb2be1893 mm: buffered writ... |
3182 |
|
3b93f911d export generic_pe... |
3183 |
ssize_t generic_perform_write(struct file *file, |
afddba49d fs: introduce wri... |
3184 3185 3186 3187 3188 3189 |
struct iov_iter *i, loff_t pos) { struct address_space *mapping = file->f_mapping; const struct address_space_operations *a_ops = mapping->a_ops; long status = 0; ssize_t written = 0; |
674b892ed mm: restore KERNE... |
3190 |
unsigned int flags = 0; |
afddba49d fs: introduce wri... |
3191 3192 |
do { struct page *page; |
afddba49d fs: introduce wri... |
3193 3194 3195 3196 |
unsigned long offset; /* Offset into pagecache page */ unsigned long bytes; /* Bytes to write to page */ size_t copied; /* Bytes copied from user */ void *fsdata; |
09cbfeaf1 mm, fs: get rid o... |
3197 3198 |
offset = (pos & (PAGE_SIZE - 1)); bytes = min_t(unsigned long, PAGE_SIZE - offset, |
afddba49d fs: introduce wri... |
3199 3200 3201 |
iov_iter_count(i)); again: |
00a3d660c Revert "fs: do no... |
3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 |
/* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * Not only is this an optimisation, but it is also required * to check that the address is actually valid, when atomic * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; break; } |
296291cdd mm: make sendfile... |
3216 3217 3218 3219 |
if (fatal_signal_pending(current)) { status = -EINTR; break; } |
674b892ed mm: restore KERNE... |
3220 |
status = a_ops->write_begin(file, mapping, pos, bytes, flags, |
afddba49d fs: introduce wri... |
3221 |
&page, &fsdata); |
2457aec63 mm: non-atomicall... |
3222 |
if (unlikely(status < 0)) |
afddba49d fs: introduce wri... |
3223 |
break; |
931e80e4b mm: flush dcache ... |
3224 3225 |
if (mapping_writably_mapped(mapping)) flush_dcache_page(page); |
00a3d660c Revert "fs: do no... |
3226 |
|
afddba49d fs: introduce wri... |
3227 |
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); |
afddba49d fs: introduce wri... |
3228 3229 3230 3231 3232 3233 3234 3235 3236 |
flush_dcache_page(page); status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); if (unlikely(status < 0)) break; copied = status; cond_resched(); |
124d3b704 fix writev regres... |
3237 |
iov_iter_advance(i, copied); |
afddba49d fs: introduce wri... |
3238 3239 3240 3241 3242 3243 3244 3245 3246 |
if (unlikely(copied == 0)) { /* * If we were unable to copy any data at all, we must * fall back to a single segment length write. * * If we didn't fallback here, we could livelock * because not all segments in the iov can be copied at * once without a pagefault. */ |
09cbfeaf1 mm, fs: get rid o... |
3247 |
bytes = min_t(unsigned long, PAGE_SIZE - offset, |
afddba49d fs: introduce wri... |
3248 3249 3250 |
iov_iter_single_seg_count(i)); goto again; } |
afddba49d fs: introduce wri... |
3251 3252 3253 3254 |
pos += copied; written += copied; balance_dirty_pages_ratelimited(mapping); |
afddba49d fs: introduce wri... |
3255 3256 3257 3258 |
} while (iov_iter_count(i)); return written ? written : status; } |
3b93f911d export generic_pe... |
3259 |
EXPORT_SYMBOL(generic_perform_write); |
1da177e4c Linux-2.6.12-rc2 |
3260 |
|
e4dd9de3c vfs: Export __gen... |
3261 |
/** |
8174202b3 write_iter varian... |
3262 |
* __generic_file_write_iter - write data to a file |
e4dd9de3c vfs: Export __gen... |
3263 |
* @iocb: IO state structure (file, offset, etc.) |
8174202b3 write_iter varian... |
3264 |
* @from: iov_iter with data to write |
e4dd9de3c vfs: Export __gen... |
3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 |
* * This function does all the work needed for actually writing data to a * file. It does all basic checks, removes SUID from the file, updates * modification times and calls proper subroutines depending on whether we * do direct IO or a standard buffered write. * * It expects i_mutex to be grabbed unless we work on a block device or similar * object which does not need locking at all. * * This function does *not* take care of syncing data in case of O_SYNC write. * A caller has to handle it. This is mainly due to the fact that we want to * avoid syncing under i_mutex. |
a862f68a8 docs/core-api/mm:... |
3277 3278 3279 3280 |
* * Return: * * number of bytes written, even for truncated writes * * negative error code if no data has been written at all |
e4dd9de3c vfs: Export __gen... |
3281 |
*/ |
8174202b3 write_iter varian... |
3282 |
ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
1da177e4c Linux-2.6.12-rc2 |
3283 3284 |
{ struct file *file = iocb->ki_filp; |
fb5527e68 [PATCH] direct-io... |
3285 |
struct address_space * mapping = file->f_mapping; |
1da177e4c Linux-2.6.12-rc2 |
3286 |
struct inode *inode = mapping->host; |
3b93f911d export generic_pe... |
3287 |
ssize_t written = 0; |
1da177e4c Linux-2.6.12-rc2 |
3288 |
ssize_t err; |
3b93f911d export generic_pe... |
3289 |
ssize_t status; |
1da177e4c Linux-2.6.12-rc2 |
3290 |
|
1da177e4c Linux-2.6.12-rc2 |
3291 |
/* We can write back this queue in page reclaim */ |
de1414a65 fs: export inode_... |
3292 |
current->backing_dev_info = inode_to_bdi(inode); |
5fa8e0a1c fs: Rename file_r... |
3293 |
err = file_remove_privs(file); |
1da177e4c Linux-2.6.12-rc2 |
3294 3295 |
if (err) goto out; |
c3b2da314 fs: introduce ino... |
3296 3297 3298 |
err = file_update_time(file); if (err) goto out; |
1da177e4c Linux-2.6.12-rc2 |
3299 |
|
2ba48ce51 mirror O_APPEND a... |
3300 |
if (iocb->ki_flags & IOCB_DIRECT) { |
0b8def9d6 __generic_file_wr... |
3301 |
loff_t pos, endbyte; |
fb5527e68 [PATCH] direct-io... |
3302 |
|
1af5bb491 filemap: remove t... |
3303 |
written = generic_file_direct_write(iocb, from); |
1da177e4c Linux-2.6.12-rc2 |
3304 |
/* |
fbbbad4bc vfs,ext2: introdu... |
3305 3306 3307 3308 3309 |
* If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to * holes, for example. For DAX files, a buffered write will * not succeed (even if it did, DAX does not handle dirty * page-cache pages correctly). |
1da177e4c Linux-2.6.12-rc2 |
3310 |
*/ |
0b8def9d6 __generic_file_wr... |
3311 |
if (written < 0 || !iov_iter_count(from) || IS_DAX(inode)) |
fbbbad4bc vfs,ext2: introdu... |
3312 |
goto out; |
0b8def9d6 __generic_file_wr... |
3313 |
status = generic_perform_write(file, from, pos = iocb->ki_pos); |
fb5527e68 [PATCH] direct-io... |
3314 |
/* |
3b93f911d export generic_pe... |
3315 |
* If generic_perform_write() returned a synchronous error |
fb5527e68 [PATCH] direct-io... |
3316 3317 3318 3319 3320 |
* then we want to return the number of bytes which were * direct-written, or the error code if that was zero. Note * that this differs from normal direct-io semantics, which * will return -EFOO even if some bytes were written. */ |
60bb45297 __generic_file_wr... |
3321 |
if (unlikely(status < 0)) { |
3b93f911d export generic_pe... |
3322 |
err = status; |
fb5527e68 [PATCH] direct-io... |
3323 3324 |
goto out; } |
fb5527e68 [PATCH] direct-io... |
3325 3326 3327 3328 3329 |
/* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */ |
3b93f911d export generic_pe... |
3330 |
endbyte = pos + status - 1; |
0b8def9d6 __generic_file_wr... |
3331 |
err = filemap_write_and_wait_range(mapping, pos, endbyte); |
fb5527e68 [PATCH] direct-io... |
3332 |
if (err == 0) { |
0b8def9d6 __generic_file_wr... |
3333 |
iocb->ki_pos = endbyte + 1; |
3b93f911d export generic_pe... |
3334 |
written += status; |
fb5527e68 [PATCH] direct-io... |
3335 |
invalidate_mapping_pages(mapping, |
09cbfeaf1 mm, fs: get rid o... |
3336 3337 |
pos >> PAGE_SHIFT, endbyte >> PAGE_SHIFT); |
fb5527e68 [PATCH] direct-io... |
3338 3339 3340 3341 3342 3343 3344 |
} else { /* * We don't know how much we wrote, so just return * the number of bytes which were direct-written */ } } else { |
0b8def9d6 __generic_file_wr... |
3345 3346 3347 |
written = generic_perform_write(file, from, iocb->ki_pos); if (likely(written > 0)) iocb->ki_pos += written; |
fb5527e68 [PATCH] direct-io... |
3348 |
} |
1da177e4c Linux-2.6.12-rc2 |
3349 3350 3351 3352 |
out: current->backing_dev_info = NULL; return written ? written : err; } |
8174202b3 write_iter varian... |
3353 |
EXPORT_SYMBOL(__generic_file_write_iter); |
e4dd9de3c vfs: Export __gen... |
3354 |
|
e4dd9de3c vfs: Export __gen... |
3355 |
/** |
8174202b3 write_iter varian... |
3356 |
* generic_file_write_iter - write data to a file |
e4dd9de3c vfs: Export __gen... |
3357 |
* @iocb: IO state structure |
8174202b3 write_iter varian... |
3358 |
* @from: iov_iter with data to write |
e4dd9de3c vfs: Export __gen... |
3359 |
* |
8174202b3 write_iter varian... |
3360 |
* This is a wrapper around __generic_file_write_iter() to be used by most |
e4dd9de3c vfs: Export __gen... |
3361 3362 |
* filesystems. It takes care of syncing the file in case of O_SYNC file * and acquires i_mutex as needed. |
a862f68a8 docs/core-api/mm:... |
3363 3364 3365 3366 |
* Return: * * negative error code if no data has been written at all of * vfs_fsync_range() failed for a synchronous write * * number of bytes written, even for truncated writes |
e4dd9de3c vfs: Export __gen... |
3367 |
*/ |
8174202b3 write_iter varian... |
3368 |
ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
1da177e4c Linux-2.6.12-rc2 |
3369 3370 |
{ struct file *file = iocb->ki_filp; |
148f948ba vfs: Introduce ne... |
3371 |
struct inode *inode = file->f_mapping->host; |
1da177e4c Linux-2.6.12-rc2 |
3372 |
ssize_t ret; |
1da177e4c Linux-2.6.12-rc2 |
3373 |
|
5955102c9 wrappers for ->i_... |
3374 |
inode_lock(inode); |
3309dd04c switch generic_wr... |
3375 3376 |
ret = generic_write_checks(iocb, from); if (ret > 0) |
5f380c7fa lift generic_writ... |
3377 |
ret = __generic_file_write_iter(iocb, from); |
5955102c9 wrappers for ->i_... |
3378 |
inode_unlock(inode); |
1da177e4c Linux-2.6.12-rc2 |
3379 |
|
e25922176 fs: simplify the ... |
3380 3381 |
if (ret > 0) ret = generic_write_sync(iocb, ret); |
1da177e4c Linux-2.6.12-rc2 |
3382 3383 |
return ret; } |
8174202b3 write_iter varian... |
3384 |
EXPORT_SYMBOL(generic_file_write_iter); |
1da177e4c Linux-2.6.12-rc2 |
3385 |
|
cf9a2ae8d [PATCH] BLOCK: Mo... |
3386 3387 3388 3389 3390 3391 3392 |
/** * try_to_release_page() - release old fs-specific metadata on a page * * @page: the page which the kernel is trying to free * @gfp_mask: memory allocation flags (and I/O mode) * * The address_space is to try to release any data against the page |
a862f68a8 docs/core-api/mm:... |
3393 |
* (presumably at page->private). |
cf9a2ae8d [PATCH] BLOCK: Mo... |
3394 |
* |
266cf658e FS-Cache: Recruit... |
3395 3396 3397 |
* This may also be called if PG_fscache is set on a page, indicating that the * page is known to the local caching routines. * |
cf9a2ae8d [PATCH] BLOCK: Mo... |
3398 |
* The @gfp_mask argument specifies whether I/O may be performed to release |
71baba4b9 mm, page_alloc: r... |
3399 |
* this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). |
cf9a2ae8d [PATCH] BLOCK: Mo... |
3400 |
* |
a862f68a8 docs/core-api/mm:... |
3401 |
* Return: %1 if the release was successful, otherwise return zero. |
cf9a2ae8d [PATCH] BLOCK: Mo... |
3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 |
*/ int try_to_release_page(struct page *page, gfp_t gfp_mask) { struct address_space * const mapping = page->mapping; BUG_ON(!PageLocked(page)); if (PageWriteback(page)) return 0; if (mapping && mapping->a_ops->releasepage) return mapping->a_ops->releasepage(page, gfp_mask); return try_to_free_buffers(page); } EXPORT_SYMBOL(try_to_release_page); |