Blame view
fs/dax.c
49.8 KB
d475c6346 dax,ext2: replace... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
/* * fs/dax.c - Direct Access filesystem code * Copyright (c) 2013-2014 Intel Corporation * Author: Matthew Wilcox <matthew.r.wilcox@intel.com> * Author: Ross Zwisler <ross.zwisler@linux.intel.com> * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ #include <linux/atomic.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> |
d77e92e27 dax: update PMD f... |
20 |
#include <linux/dax.h> |
d475c6346 dax,ext2: replace... |
21 22 |
#include <linux/fs.h> #include <linux/genhd.h> |
f7ca90b16 dax,ext2: replace... |
23 24 25 |
#include <linux/highmem.h> #include <linux/memcontrol.h> #include <linux/mm.h> |
d475c6346 dax,ext2: replace... |
26 |
#include <linux/mutex.h> |
9973c98ec dax: add support ... |
27 |
#include <linux/pagevec.h> |
289c6aeda dax,ext2: replace... |
28 |
#include <linux/sched.h> |
f361bf4a6 sched/headers: Pr... |
29 |
#include <linux/sched/signal.h> |
d475c6346 dax,ext2: replace... |
30 |
#include <linux/uio.h> |
f7ca90b16 dax,ext2: replace... |
31 |
#include <linux/vmstat.h> |
34c0fd540 mm, dax, pmem: in... |
32 |
#include <linux/pfn_t.h> |
0e749e542 dax: increase gra... |
33 |
#include <linux/sizes.h> |
4b4bb46d0 dax: clear dirty ... |
34 |
#include <linux/mmu_notifier.h> |
a254e5681 dax: provide an i... |
35 36 |
#include <linux/iomap.h> #include "internal.h" |
d475c6346 dax,ext2: replace... |
37 |
|
282a8e039 dax: add tracepoi... |
38 39 |
#define CREATE_TRACE_POINTS #include <trace/events/fs_dax.h> |
ac401cc78 dax: New fault lo... |
40 41 42 |
/* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) |
917f34526 dax: use PG_PMD_C... |
43 44 |
/* The 'colour' (ie low bits) within a PMD of a page offset. */ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) |
977fbdcd5 mm: add unmap_map... |
45 |
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT) |
917f34526 dax: use PG_PMD_C... |
46 |
|
ce95ab0fa dax: make 'wait_t... |
47 |
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES]; |
ac401cc78 dax: New fault lo... |
48 49 50 51 52 53 54 55 56 57 |
static int __init init_dax_wait_table(void) { int i; for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++) init_waitqueue_head(wait_table + i); return 0; } fs_initcall(init_dax_wait_table); |
527b19d08 dax: move all DAX... |
58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
/* * We use lowest available bit in exceptional entry for locking, one bit for * the entry size (PMD) and two more to tell us if the entry is a zero page or * an empty entry that is just used for locking. In total four special bits. * * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem * block allocation. */ #define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) #define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) #define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) |
3fe0791c2 dax: store pfns i... |
72 |
static unsigned long dax_radix_pfn(void *entry) |
527b19d08 dax: move all DAX... |
73 74 75 |
{ return (unsigned long)entry >> RADIX_DAX_SHIFT; } |
3fe0791c2 dax: store pfns i... |
76 |
static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) |
527b19d08 dax: move all DAX... |
77 78 |
{ return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | |
3fe0791c2 dax: store pfns i... |
79 |
(pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); |
527b19d08 dax: move all DAX... |
80 81 82 83 84 85 86 87 |
} static unsigned int dax_radix_order(void *entry) { if ((unsigned long)entry & RADIX_DAX_PMD) return PMD_SHIFT - PAGE_SHIFT; return 0; } |
642261ac9 dax: add struct i... |
88 |
static int dax_is_pmd_entry(void *entry) |
d1a5f2b4d block: use DAX fo... |
89 |
{ |
642261ac9 dax: add struct i... |
90 |
return (unsigned long)entry & RADIX_DAX_PMD; |
d1a5f2b4d block: use DAX fo... |
91 |
} |
642261ac9 dax: add struct i... |
92 |
static int dax_is_pte_entry(void *entry) |
d475c6346 dax,ext2: replace... |
93 |
{ |
642261ac9 dax: add struct i... |
94 |
return !((unsigned long)entry & RADIX_DAX_PMD); |
d475c6346 dax,ext2: replace... |
95 |
} |
642261ac9 dax: add struct i... |
96 |
static int dax_is_zero_entry(void *entry) |
d475c6346 dax,ext2: replace... |
97 |
{ |
91d25ba8a dax: use common 4... |
98 |
return (unsigned long)entry & RADIX_DAX_ZERO_PAGE; |
d475c6346 dax,ext2: replace... |
99 |
} |
642261ac9 dax: add struct i... |
100 |
static int dax_is_empty_entry(void *entry) |
b2e0d1625 dax: fix lifetime... |
101 |
{ |
642261ac9 dax: add struct i... |
102 |
return (unsigned long)entry & RADIX_DAX_EMPTY; |
b2e0d1625 dax: fix lifetime... |
103 |
} |
f7ca90b16 dax,ext2: replace... |
104 |
/* |
ac401cc78 dax: New fault lo... |
105 106 107 108 |
* DAX radix tree locking */ struct exceptional_entry_key { struct address_space *mapping; |
63e95b5c4 dax: coordinate l... |
109 |
pgoff_t entry_start; |
ac401cc78 dax: New fault lo... |
110 111 112 |
}; struct wait_exceptional_entry_queue { |
ac6424b98 sched/wait: Renam... |
113 |
wait_queue_entry_t wait; |
ac401cc78 dax: New fault lo... |
114 115 |
struct exceptional_entry_key key; }; |
63e95b5c4 dax: coordinate l... |
116 117 118 119 120 121 122 123 124 125 |
static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping, pgoff_t index, void *entry, struct exceptional_entry_key *key) { unsigned long hash; /* * If 'entry' is a PMD, align the 'index' that we use for the wait * queue to the start of that PMD. This ensures that all offsets in * the range covered by the PMD map to the same bit lock. */ |
642261ac9 dax: add struct i... |
126 |
if (dax_is_pmd_entry(entry)) |
917f34526 dax: use PG_PMD_C... |
127 |
index &= ~PG_PMD_COLOUR; |
63e95b5c4 dax: coordinate l... |
128 129 130 131 132 133 134 |
key->mapping = mapping; key->entry_start = index; hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS); return wait_table + hash; } |
ac6424b98 sched/wait: Renam... |
135 |
static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode, |
ac401cc78 dax: New fault lo... |
136 137 138 139 140 141 142 |
int sync, void *keyp) { struct exceptional_entry_key *key = keyp; struct wait_exceptional_entry_queue *ewait = container_of(wait, struct wait_exceptional_entry_queue, wait); if (key->mapping != ewait->key.mapping || |
63e95b5c4 dax: coordinate l... |
143 |
key->entry_start != ewait->key.entry_start) |
ac401cc78 dax: New fault lo... |
144 145 146 147 148 |
return 0; return autoremove_wake_function(wait, mode, sync, NULL); } /* |
b93b01631 page cache: use x... |
149 150 151 |
* @entry may no longer be the entry at the index in the mapping. * The important information it's conveying is whether the entry at * this index used to be a PMD entry. |
e30331ff0 dax: relocate som... |
152 |
*/ |
d01ad197a dax: remove DAX c... |
153 |
static void dax_wake_mapping_entry_waiter(struct address_space *mapping, |
e30331ff0 dax: relocate som... |
154 155 156 157 158 159 160 161 162 |
pgoff_t index, void *entry, bool wake_all) { struct exceptional_entry_key key; wait_queue_head_t *wq; wq = dax_entry_waitqueue(mapping, index, entry, &key); /* * Checking for locked entry and prepare_to_wait_exclusive() happens |
b93b01631 page cache: use x... |
163 |
* under the i_pages lock, ditto for entry handling in our callers. |
e30331ff0 dax: relocate som... |
164 165 166 167 168 169 170 171 |
* So at this point all tasks that could have seen our entry locked * must be in the waitqueue and the following check will see them. */ if (waitqueue_active(wq)) __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } /* |
b93b01631 page cache: use x... |
172 173 |
* Check whether the given slot is locked. Must be called with the i_pages * lock held. |
ac401cc78 dax: New fault lo... |
174 175 176 177 |
*/ static inline int slot_locked(struct address_space *mapping, void **slot) { unsigned long entry = (unsigned long) |
b93b01631 page cache: use x... |
178 |
radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); |
ac401cc78 dax: New fault lo... |
179 180 181 182 |
return entry & RADIX_DAX_ENTRY_LOCK; } /* |
b93b01631 page cache: use x... |
183 |
* Mark the given slot as locked. Must be called with the i_pages lock held. |
ac401cc78 dax: New fault lo... |
184 185 186 187 |
*/ static inline void *lock_slot(struct address_space *mapping, void **slot) { unsigned long entry = (unsigned long) |
b93b01631 page cache: use x... |
188 |
radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); |
ac401cc78 dax: New fault lo... |
189 190 |
entry |= RADIX_DAX_ENTRY_LOCK; |
b93b01631 page cache: use x... |
191 |
radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); |
ac401cc78 dax: New fault lo... |
192 193 194 195 |
return (void *)entry; } /* |
b93b01631 page cache: use x... |
196 |
* Mark the given slot as unlocked. Must be called with the i_pages lock held. |
ac401cc78 dax: New fault lo... |
197 198 199 200 |
*/ static inline void *unlock_slot(struct address_space *mapping, void **slot) { unsigned long entry = (unsigned long) |
b93b01631 page cache: use x... |
201 |
radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); |
ac401cc78 dax: New fault lo... |
202 203 |
entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; |
b93b01631 page cache: use x... |
204 |
radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); |
ac401cc78 dax: New fault lo... |
205 206 |
return (void *)entry; } |
e7a121e34 dax: Avoid losing... |
207 208 |
static void put_unlocked_mapping_entry(struct address_space *mapping, pgoff_t index, void *entry); |
ac401cc78 dax: New fault lo... |
209 210 211 212 213 214 215 |
/* * Lookup entry in radix tree, wait for it to become unlocked if it is * exceptional entry and return it. The caller must call * put_unlocked_mapping_entry() when he decided not to lock the entry or * put_locked_mapping_entry() when he locked the entry and now wants to * unlock it. * |
b93b01631 page cache: use x... |
216 |
* Must be called with the i_pages lock held. |
ac401cc78 dax: New fault lo... |
217 |
*/ |
c555772c2 dax: Don't access... |
218 219 |
static void *get_unlocked_mapping_entry(struct address_space *mapping, pgoff_t index, void ***slotp) |
ac401cc78 dax: New fault lo... |
220 |
{ |
e3ad61c64 dax: consistent v... |
221 |
void *entry, **slot; |
ac401cc78 dax: New fault lo... |
222 |
struct wait_exceptional_entry_queue ewait; |
63e95b5c4 dax: coordinate l... |
223 |
wait_queue_head_t *wq; |
ac401cc78 dax: New fault lo... |
224 225 226 |
init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; |
ac401cc78 dax: New fault lo... |
227 228 |
for (;;) { |
b93b01631 page cache: use x... |
229 |
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, |
ac401cc78 dax: New fault lo... |
230 |
&slot); |
91d25ba8a dax: use common 4... |
231 232 |
if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || |
ac401cc78 dax: New fault lo... |
233 234 235 |
!slot_locked(mapping, slot)) { if (slotp) *slotp = slot; |
e3ad61c64 dax: consistent v... |
236 |
return entry; |
ac401cc78 dax: New fault lo... |
237 |
} |
63e95b5c4 dax: coordinate l... |
238 239 |
wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); |
ac401cc78 dax: New fault lo... |
240 241 |
prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); |
b93b01631 page cache: use x... |
242 |
xa_unlock_irq(&mapping->i_pages); |
c555772c2 dax: Don't access... |
243 |
schedule(); |
ac401cc78 dax: New fault lo... |
244 |
finish_wait(wq, &ewait.wait); |
b93b01631 page cache: use x... |
245 |
xa_lock_irq(&mapping->i_pages); |
ac401cc78 dax: New fault lo... |
246 247 |
} } |
c555772c2 dax: Don't access... |
248 249 250 251 252 253 254 |
/* * The only thing keeping the address space around is the i_pages lock * (it's cycled in clear_inode() after removing the entries from i_pages) * After we call xas_unlock_irq(), we cannot touch xas->xa. */ static void wait_entry_unlocked(struct address_space *mapping, pgoff_t index, void ***slotp, void *entry) |
c2a7d2a11 filesystem-dax: I... |
255 |
{ |
c555772c2 dax: Don't access... |
256 257 258 259 260 261 262 |
struct wait_exceptional_entry_queue ewait; wait_queue_head_t *wq; init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); |
9621ea6b9 dax: Use non-excl... |
263 264 265 266 267 268 269 |
/* * Unlike get_unlocked_entry() there is no guarantee that this * path ever successfully retrieves an unlocked entry before an * inode dies. Perform a non-exclusive wait in case this path * never successfully performs its own wake up. */ prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); |
c555772c2 dax: Don't access... |
270 |
xa_unlock_irq(&mapping->i_pages); |
c2a7d2a11 filesystem-dax: I... |
271 |
schedule(); |
c555772c2 dax: Don't access... |
272 |
finish_wait(wq, &ewait.wait); |
c2a7d2a11 filesystem-dax: I... |
273 274 275 |
} static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) |
b1aa812b2 mm: move handling... |
276 277 |
{ void *entry, **slot; |
b93b01631 page cache: use x... |
278 279 |
xa_lock_irq(&mapping->i_pages); entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); |
b1aa812b2 mm: move handling... |
280 281 |
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || !slot_locked(mapping, slot))) { |
b93b01631 page cache: use x... |
282 |
xa_unlock_irq(&mapping->i_pages); |
b1aa812b2 mm: move handling... |
283 284 285 |
return; } unlock_slot(mapping, slot); |
b93b01631 page cache: use x... |
286 |
xa_unlock_irq(&mapping->i_pages); |
b1aa812b2 mm: move handling... |
287 288 |
dax_wake_mapping_entry_waiter(mapping, index, entry, false); } |
422476c46 dax: move put_(un... |
289 |
static void put_locked_mapping_entry(struct address_space *mapping, |
91d25ba8a dax: use common 4... |
290 |
pgoff_t index) |
422476c46 dax: move put_(un... |
291 |
{ |
c2a7d2a11 filesystem-dax: I... |
292 |
unlock_mapping_entry(mapping, index); |
422476c46 dax: move put_(un... |
293 294 295 296 297 298 299 300 301 |
} /* * Called when we are done with radix tree entry we looked up via * get_unlocked_mapping_entry() and which we didn't lock in the end. */ static void put_unlocked_mapping_entry(struct address_space *mapping, pgoff_t index, void *entry) { |
91d25ba8a dax: use common 4... |
302 |
if (!entry) |
422476c46 dax: move put_(un... |
303 304 305 306 307 |
return; /* We have to wake up next waiter for the radix tree entry lock */ dax_wake_mapping_entry_waiter(mapping, index, entry, false); } |
d2c997c0f fs, dax: use page... |
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 |
static unsigned long dax_entry_size(void *entry) { if (dax_is_zero_entry(entry)) return 0; else if (dax_is_empty_entry(entry)) return 0; else if (dax_is_pmd_entry(entry)) return PMD_SIZE; else return PAGE_SIZE; } static unsigned long dax_radix_end_pfn(void *entry) { return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; } /* * Iterate through all mapped pfns represented by an entry, i.e. skip * 'empty' and 'zero' entries. */ #define for_each_mapped_pfn(entry, pfn) \ for (pfn = dax_radix_pfn(entry); \ pfn < dax_radix_end_pfn(entry); pfn++) |
73449daf8 filesystem-dax: S... |
332 333 334 335 336 337 338 |
/* * TODO: for reflink+dax we need a way to associate a single page with * multiple address_space instances at different linear_page_index() * offsets. */ static void dax_associate_entry(void *entry, struct address_space *mapping, struct vm_area_struct *vma, unsigned long address) |
d2c997c0f fs, dax: use page... |
339 |
{ |
73449daf8 filesystem-dax: S... |
340 341 |
unsigned long size = dax_entry_size(entry), pfn, index; int i = 0; |
d2c997c0f fs, dax: use page... |
342 343 344 |
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; |
73449daf8 filesystem-dax: S... |
345 |
index = linear_page_index(vma, address & ~(size - 1)); |
d2c997c0f fs, dax: use page... |
346 347 348 349 350 |
for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(page->mapping); page->mapping = mapping; |
73449daf8 filesystem-dax: S... |
351 |
page->index = index + i++; |
d2c997c0f fs, dax: use page... |
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 |
} } static void dax_disassociate_entry(void *entry, struct address_space *mapping, bool trunc) { unsigned long pfn; if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(trunc && page_ref_count(page) > 1); WARN_ON_ONCE(page->mapping && page->mapping != mapping); page->mapping = NULL; |
73449daf8 filesystem-dax: S... |
369 |
page->index = 0; |
d2c997c0f fs, dax: use page... |
370 371 |
} } |
5fac7408d mm, fs, dax: hand... |
372 373 374 375 376 377 378 379 380 381 382 383 |
static struct page *dax_busy_page(void *entry) { unsigned long pfn; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); if (page_ref_count(page) > 1) return page; } return NULL; } |
c2a7d2a11 filesystem-dax: I... |
384 385 386 387 388 389 390 391 392 393 394 |
bool dax_lock_mapping_entry(struct page *page) { pgoff_t index; struct inode *inode; bool did_lock = false; void *entry = NULL, **slot; struct address_space *mapping; rcu_read_lock(); for (;;) { mapping = READ_ONCE(page->mapping); |
384f18115 dax: Check page->... |
395 |
if (!mapping || !dax_mapping(mapping)) |
c2a7d2a11 filesystem-dax: I... |
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 |
break; /* * In the device-dax case there's no need to lock, a * struct dev_pagemap pin is sufficient to keep the * inode alive, and we assume we have dev_pagemap pin * otherwise we would not have a valid pfn_to_page() * translation. */ inode = mapping->host; if (S_ISCHR(inode->i_mode)) { did_lock = true; break; } xa_lock_irq(&mapping->i_pages); if (mapping != page->mapping) { xa_unlock_irq(&mapping->i_pages); continue; } index = page->index; |
c555772c2 dax: Don't access... |
417 418 |
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); |
c2a7d2a11 filesystem-dax: I... |
419 420 421 |
if (!entry) { xa_unlock_irq(&mapping->i_pages); break; |
c555772c2 dax: Don't access... |
422 423 424 425 |
} else if (slot_locked(mapping, slot)) { rcu_read_unlock(); wait_entry_unlocked(mapping, index, &slot, entry); rcu_read_lock(); |
c2a7d2a11 filesystem-dax: I... |
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 |
continue; } lock_slot(mapping, slot); did_lock = true; xa_unlock_irq(&mapping->i_pages); break; } rcu_read_unlock(); return did_lock; } void dax_unlock_mapping_entry(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; if (S_ISCHR(inode->i_mode)) return; unlock_mapping_entry(mapping, page->index); } |
ac401cc78 dax: New fault lo... |
448 |
/* |
91d25ba8a dax: use common 4... |
449 450 451 452 |
* Find radix tree entry at given index. If it points to an exceptional entry, * return it with the radix tree entry locked. If the radix tree doesn't * contain given index, create an empty exceptional entry for the index and * return with it locked. |
ac401cc78 dax: New fault lo... |
453 |
* |
642261ac9 dax: add struct i... |
454 455 |
* When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will * either return that locked entry or will return an error. This error will |
91d25ba8a dax: use common 4... |
456 457 |
* happen if there are any 4k entries within the 2MiB range that we are * requesting. |
642261ac9 dax: add struct i... |
458 459 460 461 462 463 464 465 466 467 468 469 |
* * We always favor 4k entries over 2MiB entries. There isn't a flow where we * evict 4k entries in order to 'upgrade' them to a 2MiB entry. A 2MiB * insertion will fail if it finds any 4k entries already in the tree, and a * 4k insertion will cause an existing 2MiB entry to be unmapped and * downgraded to 4k entries. This happens for both 2MiB huge zero pages as * well as 2MiB empty entries. * * The exception to this downgrade path is for 2MiB DAX PMD entries that have * real storage backing them. We will leave these real 2MiB DAX entries in * the tree, and PTE writes will simply dirty the entire 2MiB DAX entry. * |
ac401cc78 dax: New fault lo... |
470 471 472 473 |
* Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For * persistent memory the benefit is doubtful. We can add that later if we can * show it helps. */ |
642261ac9 dax: add struct i... |
474 475 |
static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, unsigned long size_flag) |
ac401cc78 dax: New fault lo... |
476 |
{ |
642261ac9 dax: add struct i... |
477 |
bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */ |
e3ad61c64 dax: consistent v... |
478 |
void *entry, **slot; |
ac401cc78 dax: New fault lo... |
479 480 |
restart: |
b93b01631 page cache: use x... |
481 |
xa_lock_irq(&mapping->i_pages); |
e3ad61c64 dax: consistent v... |
482 |
entry = get_unlocked_mapping_entry(mapping, index, &slot); |
642261ac9 dax: add struct i... |
483 |
|
91d25ba8a dax: use common 4... |
484 485 486 487 |
if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { entry = ERR_PTR(-EIO); goto out_unlock; } |
642261ac9 dax: add struct i... |
488 489 |
if (entry) { if (size_flag & RADIX_DAX_PMD) { |
91d25ba8a dax: use common 4... |
490 |
if (dax_is_pte_entry(entry)) { |
642261ac9 dax: add struct i... |
491 492 493 494 495 496 |
put_unlocked_mapping_entry(mapping, index, entry); entry = ERR_PTR(-EEXIST); goto out_unlock; } } else { /* trying to grab a PTE entry */ |
91d25ba8a dax: use common 4... |
497 |
if (dax_is_pmd_entry(entry) && |
642261ac9 dax: add struct i... |
498 499 500 501 502 503 |
(dax_is_zero_entry(entry) || dax_is_empty_entry(entry))) { pmd_downgrade = true; } } } |
ac401cc78 dax: New fault lo... |
504 |
/* No entry for given index? Make sure radix tree is big enough. */ |
642261ac9 dax: add struct i... |
505 |
if (!entry || pmd_downgrade) { |
ac401cc78 dax: New fault lo... |
506 |
int err; |
642261ac9 dax: add struct i... |
507 508 509 |
if (pmd_downgrade) { /* * Make sure 'entry' remains valid while we drop |
b93b01631 page cache: use x... |
510 |
* the i_pages lock. |
642261ac9 dax: add struct i... |
511 512 513 |
*/ entry = lock_slot(mapping, slot); } |
b93b01631 page cache: use x... |
514 |
xa_unlock_irq(&mapping->i_pages); |
642261ac9 dax: add struct i... |
515 516 517 518 519 520 |
/* * Besides huge zero pages the only other thing that gets * downgraded are empty entries which don't need to be * unmapped. */ if (pmd_downgrade && dax_is_zero_entry(entry)) |
977fbdcd5 mm: add unmap_map... |
521 522 |
unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, PG_PMD_NR, false); |
642261ac9 dax: add struct i... |
523 |
|
ac401cc78 dax: New fault lo... |
524 525 |
err = radix_tree_preload( mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM); |
0cb80b484 dax: Fix sleep in... |
526 527 |
if (err) { if (pmd_downgrade) |
91d25ba8a dax: use common 4... |
528 |
put_locked_mapping_entry(mapping, index); |
ac401cc78 dax: New fault lo... |
529 |
return ERR_PTR(err); |
0cb80b484 dax: Fix sleep in... |
530 |
} |
b93b01631 page cache: use x... |
531 |
xa_lock_irq(&mapping->i_pages); |
642261ac9 dax: add struct i... |
532 |
|
e11f8b7b6 dax: fix radix tr... |
533 534 |
if (!entry) { /* |
b93b01631 page cache: use x... |
535 |
* We needed to drop the i_pages lock while calling |
e11f8b7b6 dax: fix radix tr... |
536 537 538 539 |
* radix_tree_preload() and we didn't have an entry to * lock. See if another thread inserted an entry at * our index during this time. */ |
b93b01631 page cache: use x... |
540 |
entry = __radix_tree_lookup(&mapping->i_pages, index, |
e11f8b7b6 dax: fix radix tr... |
541 542 543 |
NULL, &slot); if (entry) { radix_tree_preload_end(); |
b93b01631 page cache: use x... |
544 |
xa_unlock_irq(&mapping->i_pages); |
e11f8b7b6 dax: fix radix tr... |
545 546 547 |
goto restart; } } |
642261ac9 dax: add struct i... |
548 |
if (pmd_downgrade) { |
d2c997c0f fs, dax: use page... |
549 |
dax_disassociate_entry(entry, mapping, false); |
b93b01631 page cache: use x... |
550 |
radix_tree_delete(&mapping->i_pages, index); |
642261ac9 dax: add struct i... |
551 552 553 554 555 556 |
mapping->nrexceptional--; dax_wake_mapping_entry_waiter(mapping, index, entry, true); } entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); |
b93b01631 page cache: use x... |
557 |
err = __radix_tree_insert(&mapping->i_pages, index, |
642261ac9 dax: add struct i... |
558 |
dax_radix_order(entry), entry); |
ac401cc78 dax: New fault lo... |
559 560 |
radix_tree_preload_end(); if (err) { |
b93b01631 page cache: use x... |
561 |
xa_unlock_irq(&mapping->i_pages); |
642261ac9 dax: add struct i... |
562 |
/* |
e11f8b7b6 dax: fix radix tr... |
563 564 565 566 567 568 |
* Our insertion of a DAX entry failed, most likely * because we were inserting a PMD entry and it * collided with a PTE sized entry at a different * index in the PMD range. We haven't inserted * anything into the radix tree and have no waiters to * wake. |
642261ac9 dax: add struct i... |
569 |
*/ |
ac401cc78 dax: New fault lo... |
570 571 572 573 |
return ERR_PTR(err); } /* Good, we have inserted empty locked entry into the tree. */ mapping->nrexceptional++; |
b93b01631 page cache: use x... |
574 |
xa_unlock_irq(&mapping->i_pages); |
e3ad61c64 dax: consistent v... |
575 |
return entry; |
ac401cc78 dax: New fault lo... |
576 |
} |
e3ad61c64 dax: consistent v... |
577 |
entry = lock_slot(mapping, slot); |
642261ac9 dax: add struct i... |
578 |
out_unlock: |
b93b01631 page cache: use x... |
579 |
xa_unlock_irq(&mapping->i_pages); |
e3ad61c64 dax: consistent v... |
580 |
return entry; |
ac401cc78 dax: New fault lo... |
581 |
} |
5fac7408d mm, fs, dax: hand... |
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 |
/** * dax_layout_busy_page - find first pinned page in @mapping * @mapping: address space to scan for a page with ref count > 1 * * DAX requires ZONE_DEVICE mapped pages. These pages are never * 'onlined' to the page allocator so they are considered idle when * page->count == 1. A filesystem uses this interface to determine if * any page in the mapping is busy, i.e. for DMA, or other * get_user_pages() usages. * * It is expected that the filesystem is holding locks to block the * establishment of new mappings in this address_space. I.e. it expects * to be able to run unmap_mapping_range() and subsequently not race * mapping_mapped() becoming true. */ struct page *dax_layout_busy_page(struct address_space *mapping) { pgoff_t indices[PAGEVEC_SIZE]; struct page *page = NULL; struct pagevec pvec; pgoff_t index, end; unsigned i; /* * In the 'limited' case get_user_pages() for dax is disabled. */ if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return NULL; if (!dax_mapping(mapping) || !mapping_mapped(mapping)) return NULL; pagevec_init(&pvec); index = 0; end = -1; /* * If we race get_user_pages_fast() here either we'll see the * elevated page count in the pagevec_lookup and wait, or * get_user_pages_fast() will see that the page it took a reference * against is no longer mapped in the page tables and bail to the * get_user_pages() slow path. The slow path is protected by * pte_lock() and pmd_lock(). New references are not taken without * holding those locks, and unmap_mapping_range() will not zero the * pte or pmd without holding the respective lock, so we are * guaranteed to either see new references or prevent new * references from being established. */ unmap_mapping_range(mapping, 0, 0, 1); while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { |
d7782145e filesystem-dax: F... |
635 |
pgoff_t nr_pages = 1; |
5fac7408d mm, fs, dax: hand... |
636 637 638 639 640 641 642 |
for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; index = indices[i]; if (index >= end) break; |
cdbf8897c dax: dax_layout_b... |
643 644 |
if (WARN_ON_ONCE( !radix_tree_exceptional_entry(pvec_ent))) |
5fac7408d mm, fs, dax: hand... |
645 646 647 648 |
continue; xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); |
d7782145e filesystem-dax: F... |
649 |
if (entry) { |
5fac7408d mm, fs, dax: hand... |
650 |
page = dax_busy_page(entry); |
d7782145e filesystem-dax: F... |
651 652 653 654 655 656 657 |
/* * Account for multi-order entries at * the end of the pagevec. */ if (i + 1 >= pagevec_count(&pvec)) nr_pages = 1UL << dax_radix_order(entry); } |
5fac7408d mm, fs, dax: hand... |
658 659 660 661 662 |
put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) break; } |
cdbf8897c dax: dax_layout_b... |
663 664 665 666 667 668 669 |
/* * We don't expect normal struct page entries to exist in our * tree, but we keep these pagevec calls so that this code is * consistent with the common pattern for handling pagevecs * throughout the kernel. */ |
5fac7408d mm, fs, dax: hand... |
670 671 |
pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); |
d7782145e filesystem-dax: F... |
672 |
index += nr_pages; |
5fac7408d mm, fs, dax: hand... |
673 674 675 676 677 678 679 |
if (page) break; } return page; } EXPORT_SYMBOL_GPL(dax_layout_busy_page); |
c6dcf52c2 mm: Invalidate DA... |
680 681 682 683 684 |
static int __dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index, bool trunc) { int ret = 0; void *entry; |
b93b01631 page cache: use x... |
685 |
struct radix_tree_root *pages = &mapping->i_pages; |
c6dcf52c2 mm: Invalidate DA... |
686 |
|
b93b01631 page cache: use x... |
687 |
xa_lock_irq(pages); |
c6dcf52c2 mm: Invalidate DA... |
688 |
entry = get_unlocked_mapping_entry(mapping, index, NULL); |
91d25ba8a dax: use common 4... |
689 |
if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) |
c6dcf52c2 mm: Invalidate DA... |
690 691 |
goto out; if (!trunc && |
b93b01631 page cache: use x... |
692 693 |
(radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))) |
c6dcf52c2 mm: Invalidate DA... |
694 |
goto out; |
d2c997c0f fs, dax: use page... |
695 |
dax_disassociate_entry(entry, mapping, trunc); |
b93b01631 page cache: use x... |
696 |
radix_tree_delete(pages, index); |
c6dcf52c2 mm: Invalidate DA... |
697 698 699 700 |
mapping->nrexceptional--; ret = 1; out: put_unlocked_mapping_entry(mapping, index, entry); |
b93b01631 page cache: use x... |
701 |
xa_unlock_irq(pages); |
c6dcf52c2 mm: Invalidate DA... |
702 703 |
return ret; } |
ac401cc78 dax: New fault lo... |
704 705 706 707 708 709 |
/* * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree * entry to get unlocked before deleting it. */ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { |
c6dcf52c2 mm: Invalidate DA... |
710 |
int ret = __dax_invalidate_mapping_entry(mapping, index, true); |
ac401cc78 dax: New fault lo... |
711 |
|
ac401cc78 dax: New fault lo... |
712 713 714 715 716 717 718 |
/* * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the * radix tree (usually fs-private i_mmap_sem for writing). Since the * caller has seen exceptional entry for this index, we better find it * at that index as well... */ |
c6dcf52c2 mm: Invalidate DA... |
719 720 721 722 723 |
WARN_ON_ONCE(!ret); return ret; } /* |
c6dcf52c2 mm: Invalidate DA... |
724 725 726 727 728 729 |
* Invalidate exceptional DAX entry if it is clean. */ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index) { return __dax_invalidate_mapping_entry(mapping, index, false); |
ac401cc78 dax: New fault lo... |
730 |
} |
cccbce671 filesystem-dax: c... |
731 732 733 |
static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, size_t size, struct page *to, unsigned long vaddr) |
f7ca90b16 dax,ext2: replace... |
734 |
{ |
cccbce671 filesystem-dax: c... |
735 736 |
void *vto, *kaddr; pgoff_t pgoff; |
cccbce671 filesystem-dax: c... |
737 738 739 740 741 742 743 744 |
long rc; int id; rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); if (rc) return rc; id = dax_read_lock(); |
86ed913b0 filesystem-dax: D... |
745 |
rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL); |
cccbce671 filesystem-dax: c... |
746 747 748 749 |
if (rc < 0) { dax_read_unlock(id); return rc; } |
f7ca90b16 dax,ext2: replace... |
750 |
vto = kmap_atomic(to); |
cccbce671 filesystem-dax: c... |
751 |
copy_user_page(vto, (void __force *)kaddr, vaddr, to); |
f7ca90b16 dax,ext2: replace... |
752 |
kunmap_atomic(vto); |
cccbce671 filesystem-dax: c... |
753 |
dax_read_unlock(id); |
f7ca90b16 dax,ext2: replace... |
754 755 |
return 0; } |
642261ac9 dax: add struct i... |
756 757 758 759 760 761 762 |
/* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to * PTEs. If we happen to be trying to insert a PTE and there is a PMD * already in the tree, we will skip the insertion and just dirty the PMD as * appropriate. */ |
ac401cc78 dax: New fault lo... |
763 764 |
static void *dax_insert_mapping_entry(struct address_space *mapping, struct vm_fault *vmf, |
3fe0791c2 dax: store pfns i... |
765 |
void *entry, pfn_t pfn_t, |
f5b7b7487 dax: Allow tuning... |
766 |
unsigned long flags, bool dirty) |
9973c98ec dax: add support ... |
767 |
{ |
b93b01631 page cache: use x... |
768 |
struct radix_tree_root *pages = &mapping->i_pages; |
3fe0791c2 dax: store pfns i... |
769 |
unsigned long pfn = pfn_t_to_pfn(pfn_t); |
ac401cc78 dax: New fault lo... |
770 |
pgoff_t index = vmf->pgoff; |
3fe0791c2 dax: store pfns i... |
771 |
void *new_entry; |
9973c98ec dax: add support ... |
772 |
|
f5b7b7487 dax: Allow tuning... |
773 |
if (dirty) |
d2b2a28e6 dax: dirty inode ... |
774 |
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
9973c98ec dax: add support ... |
775 |
|
91d25ba8a dax: use common 4... |
776 777 778 |
if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) |
977fbdcd5 mm: add unmap_map... |
779 780 |
unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, PG_PMD_NR, false); |
91d25ba8a dax: use common 4... |
781 |
else /* pte entry */ |
977fbdcd5 mm: add unmap_map... |
782 |
unmap_mapping_pages(mapping, vmf->pgoff, 1, false); |
9973c98ec dax: add support ... |
783 |
} |
b93b01631 page cache: use x... |
784 |
xa_lock_irq(pages); |
3fe0791c2 dax: store pfns i... |
785 |
new_entry = dax_radix_locked_entry(pfn, flags); |
d2c997c0f fs, dax: use page... |
786 787 |
if (dax_entry_size(entry) != dax_entry_size(new_entry)) { dax_disassociate_entry(entry, mapping, false); |
73449daf8 filesystem-dax: S... |
788 |
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); |
d2c997c0f fs, dax: use page... |
789 |
} |
642261ac9 dax: add struct i... |
790 |
|
91d25ba8a dax: use common 4... |
791 |
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { |
642261ac9 dax: add struct i... |
792 793 794 795 796 797 798 799 |
/* * Only swap our new entry into the radix tree if the current * entry is a zero page or an empty entry. If a normal PTE or * PMD entry is already in the tree, we leave it alone. This * means that if we are trying to insert a PTE and the * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ |
f7942430e lib: radix-tree: ... |
800 |
struct radix_tree_node *node; |
ac401cc78 dax: New fault lo... |
801 802 |
void **slot; void *ret; |
9973c98ec dax: add support ... |
803 |
|
b93b01631 page cache: use x... |
804 |
ret = __radix_tree_lookup(pages, index, &node, &slot); |
ac401cc78 dax: New fault lo... |
805 |
WARN_ON_ONCE(ret != entry); |
b93b01631 page cache: use x... |
806 |
__radix_tree_replace(pages, node, slot, |
c7df8ad29 mm, truncate: do ... |
807 |
new_entry, NULL); |
91d25ba8a dax: use common 4... |
808 |
entry = new_entry; |
9973c98ec dax: add support ... |
809 |
} |
91d25ba8a dax: use common 4... |
810 |
|
f5b7b7487 dax: Allow tuning... |
811 |
if (dirty) |
b93b01631 page cache: use x... |
812 |
radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY); |
91d25ba8a dax: use common 4... |
813 |
|
b93b01631 page cache: use x... |
814 |
xa_unlock_irq(pages); |
91d25ba8a dax: use common 4... |
815 |
return entry; |
9973c98ec dax: add support ... |
816 |
} |
4b4bb46d0 dax: clear dirty ... |
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 |
static inline unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma) { unsigned long address; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); return address; } /* Walk all mappings of a given index of a file and writeprotect them */ static void dax_mapping_entry_mkclean(struct address_space *mapping, pgoff_t index, unsigned long pfn) { struct vm_area_struct *vma; |
f729c8c9b dax: wrprotect pm... |
832 833 |
pte_t pte, *ptep = NULL; pmd_t *pmdp = NULL; |
4b4bb46d0 dax: clear dirty ... |
834 |
spinlock_t *ptl; |
4b4bb46d0 dax: clear dirty ... |
835 836 837 |
i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { |
a4d1a8852 dax: update to ne... |
838 |
unsigned long address, start, end; |
4b4bb46d0 dax: clear dirty ... |
839 840 841 842 843 844 845 |
cond_resched(); if (!(vma->vm_flags & VM_SHARED)) continue; address = pgoff_address(index, vma); |
a4d1a8852 dax: update to ne... |
846 847 848 849 850 851 852 |
/* * Note because we provide start/end to follow_pte_pmd it will * call mmu_notifier_invalidate_range_start() on our behalf * before taking any lock. */ if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) |
4b4bb46d0 dax: clear dirty ... |
853 |
continue; |
4b4bb46d0 dax: clear dirty ... |
854 |
|
0f10851ea mm/mmu_notifier: ... |
855 856 857 858 859 |
/* * No need to call mmu_notifier_invalidate_range() as we are * downgrading page table protection not changing it to point * to a new page. * |
ad56b738c docs/vm: rename d... |
860 |
* See Documentation/vm/mmu_notifier.rst |
0f10851ea mm/mmu_notifier: ... |
861 |
*/ |
f729c8c9b dax: wrprotect pm... |
862 863 864 865 866 867 |
if (pmdp) { #ifdef CONFIG_FS_DAX_PMD pmd_t pmd; if (pfn != pmd_pfn(*pmdp)) goto unlock_pmd; |
f6f373216 Revert "mm: repla... |
868 |
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) |
f729c8c9b dax: wrprotect pm... |
869 870 871 872 873 874 875 |
goto unlock_pmd; flush_cache_page(vma, address, pfn); pmd = pmdp_huge_clear_flush(vma, address, pmdp); pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); |
f729c8c9b dax: wrprotect pm... |
876 |
unlock_pmd: |
f729c8c9b dax: wrprotect pm... |
877 |
#endif |
ee190ca65 fs/dax.c: release... |
878 |
spin_unlock(ptl); |
f729c8c9b dax: wrprotect pm... |
879 880 881 882 883 884 885 886 887 888 889 |
} else { if (pfn != pte_pfn(*ptep)) goto unlock_pte; if (!pte_dirty(*ptep) && !pte_write(*ptep)) goto unlock_pte; flush_cache_page(vma, address, pfn); pte = ptep_clear_flush(vma, address, ptep); pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(vma->vm_mm, address, ptep, pte); |
f729c8c9b dax: wrprotect pm... |
890 891 892 |
unlock_pte: pte_unmap_unlock(ptep, ptl); } |
4b4bb46d0 dax: clear dirty ... |
893 |
|
a4d1a8852 dax: update to ne... |
894 |
mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); |
4b4bb46d0 dax: clear dirty ... |
895 896 897 |
} i_mmap_unlock_read(mapping); } |
3fe0791c2 dax: store pfns i... |
898 899 |
static int dax_writeback_one(struct dax_device *dax_dev, struct address_space *mapping, pgoff_t index, void *entry) |
9973c98ec dax: add support ... |
900 |
{ |
b93b01631 page cache: use x... |
901 |
struct radix_tree_root *pages = &mapping->i_pages; |
3fe0791c2 dax: store pfns i... |
902 903 904 |
void *entry2, **slot; unsigned long pfn; long ret = 0; |
cccbce671 filesystem-dax: c... |
905 |
size_t size; |
9973c98ec dax: add support ... |
906 |
|
9973c98ec dax: add support ... |
907 |
/* |
a6abc2c0e dax: make cache f... |
908 909 |
* A page got tagged dirty in DAX mapping? Something is seriously * wrong. |
9973c98ec dax: add support ... |
910 |
*/ |
a6abc2c0e dax: make cache f... |
911 912 |
if (WARN_ON(!radix_tree_exceptional_entry(entry))) return -EIO; |
9973c98ec dax: add support ... |
913 |
|
b93b01631 page cache: use x... |
914 |
xa_lock_irq(pages); |
a6abc2c0e dax: make cache f... |
915 916 |
entry2 = get_unlocked_mapping_entry(mapping, index, &slot); /* Entry got punched out / reallocated? */ |
91d25ba8a dax: use common 4... |
917 |
if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) |
a6abc2c0e dax: make cache f... |
918 919 920 |
goto put_unlocked; /* * Entry got reallocated elsewhere? No need to writeback. We have to |
3fe0791c2 dax: store pfns i... |
921 |
* compare pfns as we must not bail out due to difference in lockbit |
a6abc2c0e dax: make cache f... |
922 923 |
* or entry type. */ |
3fe0791c2 dax: store pfns i... |
924 |
if (dax_radix_pfn(entry2) != dax_radix_pfn(entry)) |
a6abc2c0e dax: make cache f... |
925 |
goto put_unlocked; |
642261ac9 dax: add struct i... |
926 927 |
if (WARN_ON_ONCE(dax_is_empty_entry(entry) || dax_is_zero_entry(entry))) { |
9973c98ec dax: add support ... |
928 |
ret = -EIO; |
a6abc2c0e dax: make cache f... |
929 |
goto put_unlocked; |
9973c98ec dax: add support ... |
930 |
} |
a6abc2c0e dax: make cache f... |
931 |
/* Another fsync thread may have already written back this entry */ |
b93b01631 page cache: use x... |
932 |
if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)) |
a6abc2c0e dax: make cache f... |
933 934 935 936 937 938 939 |
goto put_unlocked; /* Lock the entry to serialize with page faults */ entry = lock_slot(mapping, slot); /* * We can clear the tag now but we have to be careful so that concurrent * dax_writeback_one() calls for the same index cannot finish before we * actually flush the caches. This is achieved as the calls will look |
b93b01631 page cache: use x... |
940 941 |
* at the entry only under the i_pages lock and once they do that * they will see the entry locked and wait for it to unlock. |
a6abc2c0e dax: make cache f... |
942 |
*/ |
b93b01631 page cache: use x... |
943 944 |
radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE); xa_unlock_irq(pages); |
a6abc2c0e dax: make cache f... |
945 |
|
642261ac9 dax: add struct i... |
946 947 948 |
/* * Even if dax_writeback_mapping_range() was given a wbc->range_start * in the middle of a PMD, the 'index' we are given will be aligned to |
3fe0791c2 dax: store pfns i... |
949 950 951 |
* the start index of the PMD, as will the pfn we pull from 'entry'. * This allows us to flush for PMD_SIZE and not have to worry about * partial PMD writebacks. |
642261ac9 dax: add struct i... |
952 |
*/ |
3fe0791c2 dax: store pfns i... |
953 |
pfn = dax_radix_pfn(entry); |
cccbce671 filesystem-dax: c... |
954 |
size = PAGE_SIZE << dax_radix_order(entry); |
3fe0791c2 dax: store pfns i... |
955 956 |
dax_mapping_entry_mkclean(mapping, index, pfn); dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size); |
4b4bb46d0 dax: clear dirty ... |
957 958 959 960 961 962 |
/* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as * the pfn mappings are writeprotected and fault waits for mapping * entry lock. */ |
b93b01631 page cache: use x... |
963 964 965 |
xa_lock_irq(pages); radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY); xa_unlock_irq(pages); |
f9bc3a075 dax: add tracepoi... |
966 |
trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); |
91d25ba8a dax: use common 4... |
967 |
put_locked_mapping_entry(mapping, index); |
9973c98ec dax: add support ... |
968 |
return ret; |
a6abc2c0e dax: make cache f... |
969 970 |
put_unlocked: put_unlocked_mapping_entry(mapping, index, entry2); |
b93b01631 page cache: use x... |
971 |
xa_unlock_irq(pages); |
9973c98ec dax: add support ... |
972 973 974 975 976 977 978 979 |
return ret; } /* * Flush the mapping to the persistent domain within the byte range of [start, * end]. This is required by data integrity operations to ensure file data is * on persistent storage prior to completion of the operation. */ |
7f6d5b529 dax: move writeba... |
980 981 |
int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc) |
9973c98ec dax: add support ... |
982 983 |
{ struct inode *inode = mapping->host; |
642261ac9 dax: add struct i... |
984 |
pgoff_t start_index, end_index; |
9973c98ec dax: add support ... |
985 |
pgoff_t indices[PAGEVEC_SIZE]; |
cccbce671 filesystem-dax: c... |
986 |
struct dax_device *dax_dev; |
9973c98ec dax: add support ... |
987 988 989 |
struct pagevec pvec; bool done = false; int i, ret = 0; |
9973c98ec dax: add support ... |
990 991 992 |
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) return -EIO; |
7f6d5b529 dax: move writeba... |
993 994 |
if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; |
cccbce671 filesystem-dax: c... |
995 996 997 |
dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); if (!dax_dev) return -EIO; |
09cbfeaf1 mm, fs: get rid o... |
998 999 |
start_index = wbc->range_start >> PAGE_SHIFT; end_index = wbc->range_end >> PAGE_SHIFT; |
9973c98ec dax: add support ... |
1000 |
|
d14a3f48a dax: add tracepoi... |
1001 |
trace_dax_writeback_range(inode, start_index, end_index); |
9973c98ec dax: add support ... |
1002 |
tag_pages_for_writeback(mapping, start_index, end_index); |
866798201 mm, pagevec: remo... |
1003 |
pagevec_init(&pvec); |
9973c98ec dax: add support ... |
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 |
while (!done) { pvec.nr = find_get_entries_tag(mapping, start_index, PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE, pvec.pages, indices); if (pvec.nr == 0) break; for (i = 0; i < pvec.nr; i++) { if (indices[i] > end_index) { done = true; break; } |
3fe0791c2 dax: store pfns i... |
1017 1018 |
ret = dax_writeback_one(dax_dev, mapping, indices[i], pvec.pages[i]); |
819ec6b91 dax: set errors i... |
1019 1020 |
if (ret < 0) { mapping_set_error(mapping, ret); |
d14a3f48a dax: add tracepoi... |
1021 |
goto out; |
819ec6b91 dax: set errors i... |
1022 |
} |
9973c98ec dax: add support ... |
1023 |
} |
1eb643d02 fs/dax.c: fix ine... |
1024 |
start_index = indices[pvec.nr - 1] + 1; |
9973c98ec dax: add support ... |
1025 |
} |
d14a3f48a dax: add tracepoi... |
1026 |
out: |
cccbce671 filesystem-dax: c... |
1027 |
put_dax(dax_dev); |
d14a3f48a dax: add tracepoi... |
1028 1029 |
trace_dax_writeback_range_done(inode, start_index, end_index); return (ret < 0 ? ret : 0); |
9973c98ec dax: add support ... |
1030 1031 |
} EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); |
31a6f1a6e dax: Simplify arg... |
1032 |
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) |
f7ca90b16 dax,ext2: replace... |
1033 |
{ |
a3841f94c Merge tag 'libnvd... |
1034 |
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; |
31a6f1a6e dax: Simplify arg... |
1035 |
} |
5e161e406 dax: Factor out g... |
1036 1037 |
static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, pfn_t *pfnp) |
f7ca90b16 dax,ext2: replace... |
1038 |
{ |
31a6f1a6e dax: Simplify arg... |
1039 |
const sector_t sector = dax_iomap_sector(iomap, pos); |
cccbce671 filesystem-dax: c... |
1040 1041 |
pgoff_t pgoff; int id, rc; |
5e161e406 dax: Factor out g... |
1042 |
long length; |
f7ca90b16 dax,ext2: replace... |
1043 |
|
5e161e406 dax: Factor out g... |
1044 |
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff); |
cccbce671 filesystem-dax: c... |
1045 1046 |
if (rc) return rc; |
cccbce671 filesystem-dax: c... |
1047 |
id = dax_read_lock(); |
5e161e406 dax: Factor out g... |
1048 |
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), |
86ed913b0 filesystem-dax: D... |
1049 |
NULL, pfnp); |
5e161e406 dax: Factor out g... |
1050 1051 1052 |
if (length < 0) { rc = length; goto out; |
cccbce671 filesystem-dax: c... |
1053 |
} |
5e161e406 dax: Factor out g... |
1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 |
rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) goto out; /* For larger pages we need devmap */ if (length > 1 && !pfn_t_devmap(*pfnp)) goto out; rc = 0; out: |
cccbce671 filesystem-dax: c... |
1064 |
dax_read_unlock(id); |
5e161e406 dax: Factor out g... |
1065 |
return rc; |
0e3b210ce dax: use pfn_mkwr... |
1066 |
} |
0e3b210ce dax: use pfn_mkwr... |
1067 |
|
e30331ff0 dax: relocate som... |
1068 |
/* |
91d25ba8a dax: use common 4... |
1069 1070 1071 1072 1073 |
* The user has performed a load from a hole in the file. Allocating a new * page in the file would cause excessive storage usage for workloads with * sparse files. Instead we insert a read-only mapping of the 4k zero page. * If this page is ever written to we will re-fault and change the mapping to * point to real DAX storage instead. |
e30331ff0 dax: relocate som... |
1074 |
*/ |
ab77dab46 fs/dax.c: use new... |
1075 |
static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry, |
e30331ff0 dax: relocate som... |
1076 1077 1078 |
struct vm_fault *vmf) { struct inode *inode = mapping->host; |
91d25ba8a dax: use common 4... |
1079 |
unsigned long vaddr = vmf->address; |
b90ca5cc3 filesystem-dax: F... |
1080 1081 |
pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); vm_fault_t ret; |
e30331ff0 dax: relocate som... |
1082 |
|
cc4a90ac8 dax: dax_insert_m... |
1083 1084 |
dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); |
ab77dab46 fs/dax.c: use new... |
1085 |
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); |
e30331ff0 dax: relocate som... |
1086 1087 1088 |
trace_dax_load_hole(inode, vmf, ret); return ret; } |
4b0228fa1 dax: for truncate... |
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 |
static bool dax_range_is_aligned(struct block_device *bdev, unsigned int offset, unsigned int length) { unsigned short sector_size = bdev_logical_block_size(bdev); if (!IS_ALIGNED(offset, sector_size)) return false; if (!IS_ALIGNED(length, sector_size)) return false; return true; } |
cccbce671 filesystem-dax: c... |
1101 1102 1103 |
int __dax_zero_page_range(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, unsigned int offset, unsigned int size) |
679c8bd3b dax: export a low... |
1104 |
{ |
cccbce671 filesystem-dax: c... |
1105 1106 |
if (dax_range_is_aligned(bdev, offset, size)) { sector_t start_sector = sector + (offset >> 9); |
4b0228fa1 dax: for truncate... |
1107 1108 |
return blkdev_issue_zeroout(bdev, start_sector, |
53ef7d0e2 Merge tag 'libnvd... |
1109 |
size >> 9, GFP_NOFS, 0); |
4b0228fa1 dax: for truncate... |
1110 |
} else { |
cccbce671 filesystem-dax: c... |
1111 1112 1113 |
pgoff_t pgoff; long rc, id; void *kaddr; |
cccbce671 filesystem-dax: c... |
1114 |
|
e84b83b9e filesystem-dax: f... |
1115 |
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff); |
cccbce671 filesystem-dax: c... |
1116 1117 1118 1119 |
if (rc) return rc; id = dax_read_lock(); |
86ed913b0 filesystem-dax: D... |
1120 |
rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); |
cccbce671 filesystem-dax: c... |
1121 1122 1123 1124 |
if (rc < 0) { dax_read_unlock(id); return rc; } |
81f558701 x86, dax: replace... |
1125 |
memset(kaddr + offset, 0, size); |
c3ca015fa dax: remove the p... |
1126 |
dax_flush(dax_dev, kaddr + offset, size); |
cccbce671 filesystem-dax: c... |
1127 |
dax_read_unlock(id); |
4b0228fa1 dax: for truncate... |
1128 |
} |
679c8bd3b dax: export a low... |
1129 1130 1131 |
return 0; } EXPORT_SYMBOL_GPL(__dax_zero_page_range); |
a254e5681 dax: provide an i... |
1132 |
static loff_t |
11c59c92f dax: correct dax ... |
1133 |
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, |
a254e5681 dax: provide an i... |
1134 1135 |
struct iomap *iomap) { |
cccbce671 filesystem-dax: c... |
1136 1137 |
struct block_device *bdev = iomap->bdev; struct dax_device *dax_dev = iomap->dax_dev; |
a254e5681 dax: provide an i... |
1138 1139 1140 |
struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; |
a77d47864 dax: Report bytes... |
1141 |
size_t xfer; |
cccbce671 filesystem-dax: c... |
1142 |
int id; |
a254e5681 dax: provide an i... |
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 |
if (iov_iter_rw(iter) == READ) { end = min(end, i_size_read(inode)); if (pos >= end) return 0; if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) return iov_iter_zero(min(length, end - pos), iter); } if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; |
e3fce68cd dax: Avoid page i... |
1155 1156 1157 1158 1159 |
/* * Write can allocate block for an area which has a hole page mapped * into page tables. We have to tear down these mappings so that data * written by write(2) is visible in mmap. */ |
cd656375f mm: fix data corr... |
1160 |
if (iomap->flags & IOMAP_F_NEW) { |
e3fce68cd dax: Avoid page i... |
1161 1162 1163 1164 |
invalidate_inode_pages2_range(inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); } |
cccbce671 filesystem-dax: c... |
1165 |
id = dax_read_lock(); |
a254e5681 dax: provide an i... |
1166 1167 |
while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); |
cccbce671 filesystem-dax: c... |
1168 1169 |
const size_t size = ALIGN(length + offset, PAGE_SIZE); const sector_t sector = dax_iomap_sector(iomap, pos); |
a254e5681 dax: provide an i... |
1170 |
ssize_t map_len; |
cccbce671 filesystem-dax: c... |
1171 1172 |
pgoff_t pgoff; void *kaddr; |
a254e5681 dax: provide an i... |
1173 |
|
d1908f525 fs: break out of ... |
1174 1175 1176 1177 |
if (fatal_signal_pending(current)) { ret = -EINTR; break; } |
cccbce671 filesystem-dax: c... |
1178 1179 1180 1181 1182 |
ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); if (ret) break; map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), |
86ed913b0 filesystem-dax: D... |
1183 |
&kaddr, NULL); |
a254e5681 dax: provide an i... |
1184 1185 1186 1187 |
if (map_len < 0) { ret = map_len; break; } |
cccbce671 filesystem-dax: c... |
1188 1189 |
map_len = PFN_PHYS(map_len); kaddr += offset; |
a254e5681 dax: provide an i... |
1190 1191 1192 |
map_len -= offset; if (map_len > end - pos) map_len = end - pos; |
a2e050f5a dax: explain how ... |
1193 1194 1195 1196 1197 |
/* * The userspace address for the memory copy has already been * validated via access_ok() in either vfs_read() or * vfs_write(), depending on which operation we are doing. */ |
a254e5681 dax: provide an i... |
1198 |
if (iov_iter_rw(iter) == WRITE) |
a77d47864 dax: Report bytes... |
1199 |
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, |
fec53774f filesystem-dax: c... |
1200 |
map_len, iter); |
a254e5681 dax: provide an i... |
1201 |
else |
a77d47864 dax: Report bytes... |
1202 |
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, |
b3a9a0c36 dax: Introduce a ... |
1203 |
map_len, iter); |
a254e5681 dax: provide an i... |
1204 |
|
a77d47864 dax: Report bytes... |
1205 1206 1207 1208 1209 1210 1211 1212 |
pos += xfer; length -= xfer; done += xfer; if (xfer == 0) ret = -EFAULT; if (xfer < map_len) break; |
a254e5681 dax: provide an i... |
1213 |
} |
cccbce671 filesystem-dax: c... |
1214 |
dax_read_unlock(id); |
a254e5681 dax: provide an i... |
1215 1216 1217 1218 1219 |
return done ? done : ret; } /** |
11c59c92f dax: correct dax ... |
1220 |
* dax_iomap_rw - Perform I/O to a DAX file |
a254e5681 dax: provide an i... |
1221 1222 1223 1224 1225 1226 1227 1228 1229 |
* @iocb: The control block for this I/O * @iter: The addresses to do I/O from or to * @ops: iomap ops passed from the file system * * This function performs read and write operations to directly mapped * persistent memory. The callers needs to take care of read/write exclusion * and evicting any page cache pages in the region under I/O. */ ssize_t |
11c59c92f dax: correct dax ... |
1230 |
dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, |
8ff6daa17 iomap: constify s... |
1231 |
const struct iomap_ops *ops) |
a254e5681 dax: provide an i... |
1232 1233 1234 1235 1236 |
{ struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos, ret = 0, done = 0; unsigned flags = 0; |
168316db3 dax: assert that ... |
1237 1238 |
if (iov_iter_rw(iter) == WRITE) { lockdep_assert_held_exclusive(&inode->i_rwsem); |
a254e5681 dax: provide an i... |
1239 |
flags |= IOMAP_WRITE; |
168316db3 dax: assert that ... |
1240 1241 1242 |
} else { lockdep_assert_held(&inode->i_rwsem); } |
a254e5681 dax: provide an i... |
1243 |
|
a254e5681 dax: provide an i... |
1244 1245 |
while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, |
11c59c92f dax: correct dax ... |
1246 |
iter, dax_iomap_actor); |
a254e5681 dax: provide an i... |
1247 1248 1249 1250 1251 1252 1253 1254 1255 |
if (ret <= 0) break; pos += ret; done += ret; } iocb->ki_pos += done; return done ? done : ret; } |
11c59c92f dax: correct dax ... |
1256 |
EXPORT_SYMBOL_GPL(dax_iomap_rw); |
a7d73fe6c dax: provide an i... |
1257 |
|
ab77dab46 fs/dax.c: use new... |
1258 |
static vm_fault_t dax_fault_return(int error) |
9f141d6ef dax: Call ->iomap... |
1259 1260 1261 1262 1263 1264 1265 |
{ if (error == 0) return VM_FAULT_NOPAGE; if (error == -ENOMEM) return VM_FAULT_OOM; return VM_FAULT_SIGBUS; } |
aaa422c4c fs, dax: unify IO... |
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 |
/* * MAP_SYNC on a dax mapping guarantees dirty metadata is * flushed on write-faults (non-cow), but not read-faults. */ static bool dax_fault_is_synchronous(unsigned long flags, struct vm_area_struct *vma, struct iomap *iomap) { return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) && (iomap->flags & IOMAP_F_DIRTY); } |
ab77dab46 fs/dax.c: use new... |
1276 |
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, |
c0b246259 dax: pass detaile... |
1277 |
int *iomap_errp, const struct iomap_ops *ops) |
a7d73fe6c dax: provide an i... |
1278 |
{ |
a0987ad5c dax: Create local... |
1279 1280 |
struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; |
a7d73fe6c dax: provide an i... |
1281 |
struct inode *inode = mapping->host; |
1a29d85eb mm: use vmf->addr... |
1282 |
unsigned long vaddr = vmf->address; |
a7d73fe6c dax: provide an i... |
1283 |
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; |
a7d73fe6c dax: provide an i... |
1284 |
struct iomap iomap = { 0 }; |
9484ab1bf dax: Introduce IO... |
1285 |
unsigned flags = IOMAP_FAULT; |
a7d73fe6c dax: provide an i... |
1286 |
int error, major = 0; |
d2c43ef13 dax: Create local... |
1287 |
bool write = vmf->flags & FAULT_FLAG_WRITE; |
caa51d26f dax, iomap: Add s... |
1288 |
bool sync; |
ab77dab46 fs/dax.c: use new... |
1289 |
vm_fault_t ret = 0; |
a7d73fe6c dax: provide an i... |
1290 |
void *entry; |
1b5a1cb21 dax: Inline dax_i... |
1291 |
pfn_t pfn; |
a7d73fe6c dax: provide an i... |
1292 |
|
ab77dab46 fs/dax.c: use new... |
1293 |
trace_dax_pte_fault(inode, vmf, ret); |
a7d73fe6c dax: provide an i... |
1294 1295 1296 1297 1298 |
/* * Check whether offset isn't beyond end of file now. Caller is supposed * to hold locks serializing us with truncate / punch hole so this is * a reliable test. */ |
a9c42b33e dax: add tracepoi... |
1299 |
if (pos >= i_size_read(inode)) { |
ab77dab46 fs/dax.c: use new... |
1300 |
ret = VM_FAULT_SIGBUS; |
a9c42b33e dax: add tracepoi... |
1301 1302 |
goto out; } |
a7d73fe6c dax: provide an i... |
1303 |
|
d2c43ef13 dax: Create local... |
1304 |
if (write && !vmf->cow_page) |
a7d73fe6c dax: provide an i... |
1305 |
flags |= IOMAP_WRITE; |
13e451fdc dax: fix data cor... |
1306 1307 |
entry = grab_mapping_entry(mapping, vmf->pgoff, 0); if (IS_ERR(entry)) { |
ab77dab46 fs/dax.c: use new... |
1308 |
ret = dax_fault_return(PTR_ERR(entry)); |
13e451fdc dax: fix data cor... |
1309 1310 |
goto out; } |
a7d73fe6c dax: provide an i... |
1311 |
/* |
e2093926a dax: fix race bet... |
1312 1313 1314 1315 1316 1317 |
* It is possible, particularly with mixed reads & writes to private * mappings, that we have raced with a PMD fault that overlaps with * the PTE we need to set up. If so just return and the fault will be * retried. */ if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { |
ab77dab46 fs/dax.c: use new... |
1318 |
ret = VM_FAULT_NOPAGE; |
e2093926a dax: fix race bet... |
1319 1320 1321 1322 |
goto unlock_entry; } /* |
a7d73fe6c dax: provide an i... |
1323 1324 1325 1326 1327 |
* Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means * that we never have to deal with more than a single extent here. */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); |
c0b246259 dax: pass detaile... |
1328 1329 |
if (iomap_errp) *iomap_errp = error; |
a9c42b33e dax: add tracepoi... |
1330 |
if (error) { |
ab77dab46 fs/dax.c: use new... |
1331 |
ret = dax_fault_return(error); |
13e451fdc dax: fix data cor... |
1332 |
goto unlock_entry; |
a9c42b33e dax: add tracepoi... |
1333 |
} |
a7d73fe6c dax: provide an i... |
1334 |
if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { |
13e451fdc dax: fix data cor... |
1335 1336 |
error = -EIO; /* fs corruption? */ goto error_finish_iomap; |
a7d73fe6c dax: provide an i... |
1337 |
} |
a7d73fe6c dax: provide an i... |
1338 |
if (vmf->cow_page) { |
31a6f1a6e dax: Simplify arg... |
1339 |
sector_t sector = dax_iomap_sector(&iomap, pos); |
a7d73fe6c dax: provide an i... |
1340 1341 1342 1343 1344 1345 |
switch (iomap.type) { case IOMAP_HOLE: case IOMAP_UNWRITTEN: clear_user_highpage(vmf->cow_page, vaddr); break; case IOMAP_MAPPED: |
cccbce671 filesystem-dax: c... |
1346 1347 |
error = copy_user_dax(iomap.bdev, iomap.dax_dev, sector, PAGE_SIZE, vmf->cow_page, vaddr); |
a7d73fe6c dax: provide an i... |
1348 1349 1350 1351 1352 1353 1354 1355 |
break; default: WARN_ON_ONCE(1); error = -EIO; break; } if (error) |
13e451fdc dax: fix data cor... |
1356 |
goto error_finish_iomap; |
b1aa812b2 mm: move handling... |
1357 1358 |
__SetPageUptodate(vmf->cow_page); |
ab77dab46 fs/dax.c: use new... |
1359 1360 1361 |
ret = finish_fault(vmf); if (!ret) ret = VM_FAULT_DONE_COW; |
13e451fdc dax: fix data cor... |
1362 |
goto finish_iomap; |
a7d73fe6c dax: provide an i... |
1363 |
} |
aaa422c4c fs, dax: unify IO... |
1364 |
sync = dax_fault_is_synchronous(flags, vma, &iomap); |
caa51d26f dax, iomap: Add s... |
1365 |
|
a7d73fe6c dax: provide an i... |
1366 1367 1368 1369 |
switch (iomap.type) { case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); |
a0987ad5c dax: Create local... |
1370 |
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); |
a7d73fe6c dax: provide an i... |
1371 1372 |
major = VM_FAULT_MAJOR; } |
1b5a1cb21 dax: Inline dax_i... |
1373 1374 1375 |
error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); if (error < 0) goto error_finish_iomap; |
3fe0791c2 dax: store pfns i... |
1376 |
entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, |
caa51d26f dax, iomap: Add s... |
1377 |
0, write && !sync); |
1b5a1cb21 dax: Inline dax_i... |
1378 |
|
caa51d26f dax, iomap: Add s... |
1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 |
/* * If we are doing synchronous page fault and inode needs fsync, * we can insert PTE into page tables only after that happens. * Skip insertion for now and return the pfn so that caller can * insert it after fsync is done. */ if (sync) { if (WARN_ON_ONCE(!pfnp)) { error = -EIO; goto error_finish_iomap; } *pfnp = pfn; |
ab77dab46 fs/dax.c: use new... |
1391 |
ret = VM_FAULT_NEEDDSYNC | major; |
caa51d26f dax, iomap: Add s... |
1392 1393 |
goto finish_iomap; } |
1b5a1cb21 dax: Inline dax_i... |
1394 1395 |
trace_dax_insert_mapping(inode, vmf, entry); if (write) |
ab77dab46 fs/dax.c: use new... |
1396 |
ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn); |
1b5a1cb21 dax: Inline dax_i... |
1397 |
else |
ab77dab46 fs/dax.c: use new... |
1398 |
ret = vmf_insert_mixed(vma, vaddr, pfn); |
1b5a1cb21 dax: Inline dax_i... |
1399 |
|
ab77dab46 fs/dax.c: use new... |
1400 |
goto finish_iomap; |
a7d73fe6c dax: provide an i... |
1401 1402 |
case IOMAP_UNWRITTEN: case IOMAP_HOLE: |
d2c43ef13 dax: Create local... |
1403 |
if (!write) { |
ab77dab46 fs/dax.c: use new... |
1404 |
ret = dax_load_hole(mapping, entry, vmf); |
13e451fdc dax: fix data cor... |
1405 |
goto finish_iomap; |
1550290b0 dax: dax_iomap_fa... |
1406 |
} |
a7d73fe6c dax: provide an i... |
1407 1408 1409 1410 1411 1412 |
/*FALLTHRU*/ default: WARN_ON_ONCE(1); error = -EIO; break; } |
13e451fdc dax: fix data cor... |
1413 |
error_finish_iomap: |
ab77dab46 fs/dax.c: use new... |
1414 |
ret = dax_fault_return(error); |
9f141d6ef dax: Call ->iomap... |
1415 1416 1417 |
finish_iomap: if (ops->iomap_end) { int copied = PAGE_SIZE; |
ab77dab46 fs/dax.c: use new... |
1418 |
if (ret & VM_FAULT_ERROR) |
9f141d6ef dax: Call ->iomap... |
1419 1420 1421 1422 1423 1424 1425 1426 |
copied = 0; /* * The fault is done by now and there's no way back (other * thread may be already happily using PTE we have installed). * Just ignore error from ->iomap_end since we cannot do much * with it. */ ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); |
1550290b0 dax: dax_iomap_fa... |
1427 |
} |
13e451fdc dax: fix data cor... |
1428 |
unlock_entry: |
91d25ba8a dax: use common 4... |
1429 |
put_locked_mapping_entry(mapping, vmf->pgoff); |
13e451fdc dax: fix data cor... |
1430 |
out: |
ab77dab46 fs/dax.c: use new... |
1431 1432 |
trace_dax_pte_fault_done(inode, vmf, ret); return ret | major; |
a7d73fe6c dax: provide an i... |
1433 |
} |
642261ac9 dax: add struct i... |
1434 1435 |
#ifdef CONFIG_FS_DAX_PMD |
ab77dab46 fs/dax.c: use new... |
1436 |
static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, |
91d25ba8a dax: use common 4... |
1437 |
void *entry) |
642261ac9 dax: add struct i... |
1438 |
{ |
f42003917 mm, dax: change p... |
1439 1440 |
struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; |
653b2ea33 dax: add tracepoi... |
1441 |
struct inode *inode = mapping->host; |
642261ac9 dax: add struct i... |
1442 |
struct page *zero_page; |
653b2ea33 dax: add tracepoi... |
1443 |
void *ret = NULL; |
642261ac9 dax: add struct i... |
1444 1445 |
spinlock_t *ptl; pmd_t pmd_entry; |
3fe0791c2 dax: store pfns i... |
1446 |
pfn_t pfn; |
642261ac9 dax: add struct i... |
1447 |
|
f42003917 mm, dax: change p... |
1448 |
zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); |
642261ac9 dax: add struct i... |
1449 1450 |
if (unlikely(!zero_page)) |
653b2ea33 dax: add tracepoi... |
1451 |
goto fallback; |
642261ac9 dax: add struct i... |
1452 |
|
3fe0791c2 dax: store pfns i... |
1453 1454 |
pfn = page_to_pfn_t(zero_page); ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, |
f5b7b7487 dax: Allow tuning... |
1455 |
RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); |
642261ac9 dax: add struct i... |
1456 |
|
f42003917 mm, dax: change p... |
1457 1458 |
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { |
642261ac9 dax: add struct i... |
1459 |
spin_unlock(ptl); |
653b2ea33 dax: add tracepoi... |
1460 |
goto fallback; |
642261ac9 dax: add struct i... |
1461 |
} |
f42003917 mm, dax: change p... |
1462 |
pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); |
642261ac9 dax: add struct i... |
1463 |
pmd_entry = pmd_mkhuge(pmd_entry); |
f42003917 mm, dax: change p... |
1464 |
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); |
642261ac9 dax: add struct i... |
1465 |
spin_unlock(ptl); |
f42003917 mm, dax: change p... |
1466 |
trace_dax_pmd_load_hole(inode, vmf, zero_page, ret); |
642261ac9 dax: add struct i... |
1467 |
return VM_FAULT_NOPAGE; |
653b2ea33 dax: add tracepoi... |
1468 1469 |
fallback: |
f42003917 mm, dax: change p... |
1470 |
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret); |
653b2ea33 dax: add tracepoi... |
1471 |
return VM_FAULT_FALLBACK; |
642261ac9 dax: add struct i... |
1472 |
} |
ab77dab46 fs/dax.c: use new... |
1473 |
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
a2d581675 mm,fs,dax: change... |
1474 |
const struct iomap_ops *ops) |
642261ac9 dax: add struct i... |
1475 |
{ |
f42003917 mm, dax: change p... |
1476 |
struct vm_area_struct *vma = vmf->vma; |
642261ac9 dax: add struct i... |
1477 |
struct address_space *mapping = vma->vm_file->f_mapping; |
d8a849e1b mm, dax: make pmd... |
1478 1479 |
unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; |
caa51d26f dax, iomap: Add s... |
1480 |
bool sync; |
9484ab1bf dax: Introduce IO... |
1481 |
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; |
642261ac9 dax: add struct i... |
1482 |
struct inode *inode = mapping->host; |
ab77dab46 fs/dax.c: use new... |
1483 |
vm_fault_t result = VM_FAULT_FALLBACK; |
642261ac9 dax: add struct i... |
1484 1485 |
struct iomap iomap = { 0 }; pgoff_t max_pgoff, pgoff; |
642261ac9 dax: add struct i... |
1486 1487 1488 |
void *entry; loff_t pos; int error; |
302a5e312 dax: Inline dax_p... |
1489 |
pfn_t pfn; |
642261ac9 dax: add struct i... |
1490 |
|
282a8e039 dax: add tracepoi... |
1491 1492 1493 1494 1495 1496 |
/* * Check whether offset isn't beyond end of file now. Caller is * supposed to hold locks serializing us with truncate / punch hole so * this is a reliable test. */ pgoff = linear_page_index(vma, pmd_addr); |
957ac8c42 dax: fix PMD faul... |
1497 |
max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
282a8e039 dax: add tracepoi... |
1498 |
|
f42003917 mm, dax: change p... |
1499 |
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); |
282a8e039 dax: add tracepoi... |
1500 |
|
fffa281b4 dax: fix deadlock... |
1501 1502 1503 1504 1505 1506 1507 1508 1509 |
/* * Make sure that the faulting address's PMD offset (color) matches * the PMD offset from the start of the file. This is necessary so * that a PMD range in the page table overlaps exactly with a PMD * range in the radix tree. */ if ((vmf->pgoff & PG_PMD_COLOUR) != ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) goto fallback; |
642261ac9 dax: add struct i... |
1510 1511 1512 1513 1514 1515 1516 1517 1518 |
/* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; /* If the PMD would extend outside the VMA */ if (pmd_addr < vma->vm_start) goto fallback; if ((pmd_addr + PMD_SIZE) > vma->vm_end) goto fallback; |
957ac8c42 dax: fix PMD faul... |
1519 |
if (pgoff >= max_pgoff) { |
282a8e039 dax: add tracepoi... |
1520 1521 1522 |
result = VM_FAULT_SIGBUS; goto out; } |
642261ac9 dax: add struct i... |
1523 1524 |
/* If the PMD would extend beyond the file size */ |
957ac8c42 dax: fix PMD faul... |
1525 |
if ((pgoff | PG_PMD_COLOUR) >= max_pgoff) |
642261ac9 dax: add struct i... |
1526 1527 1528 |
goto fallback; /* |
91d25ba8a dax: use common 4... |
1529 1530 1531 1532 |
* grab_mapping_entry() will make sure we get a 2MiB empty entry, a * 2MiB zero page entry or a DAX PMD. If it can't (because a 4k page * is already in the tree, for instance), it will return -EEXIST and * we just fall back to 4k entries. |
876f29460 dax: fix PMD data... |
1533 1534 1535 1536 1537 1538 |
*/ entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); if (IS_ERR(entry)) goto fallback; /* |
e2093926a dax: fix race bet... |
1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 |
* It is possible, particularly with mixed reads & writes to private * mappings, that we have raced with a PTE fault that overlaps with * the PMD we need to set up. If so just return and the fault will be * retried. */ if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && !pmd_devmap(*vmf->pmd)) { result = 0; goto unlock_entry; } /* |
642261ac9 dax: add struct i... |
1551 1552 1553 1554 1555 1556 1557 |
* Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way * to look up our filesystem block. */ pos = (loff_t)pgoff << PAGE_SHIFT; error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); if (error) |
876f29460 dax: fix PMD data... |
1558 |
goto unlock_entry; |
9f141d6ef dax: Call ->iomap... |
1559 |
|
642261ac9 dax: add struct i... |
1560 1561 |
if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; |
aaa422c4c fs, dax: unify IO... |
1562 |
sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap); |
caa51d26f dax, iomap: Add s... |
1563 |
|
642261ac9 dax: add struct i... |
1564 1565 |
switch (iomap.type) { case IOMAP_MAPPED: |
302a5e312 dax: Inline dax_p... |
1566 1567 1568 |
error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); if (error < 0) goto finish_iomap; |
3fe0791c2 dax: store pfns i... |
1569 |
entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, |
caa51d26f dax, iomap: Add s... |
1570 |
RADIX_DAX_PMD, write && !sync); |
302a5e312 dax: Inline dax_p... |
1571 |
|
caa51d26f dax, iomap: Add s... |
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 |
/* * If we are doing synchronous page fault and inode needs fsync, * we can insert PMD into page tables only after that happens. * Skip insertion for now and return the pfn so that caller can * insert it after fsync is done. */ if (sync) { if (WARN_ON_ONCE(!pfnp)) goto finish_iomap; *pfnp = pfn; result = VM_FAULT_NEEDDSYNC; goto finish_iomap; } |
302a5e312 dax: Inline dax_p... |
1585 1586 1587 |
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, write); |
642261ac9 dax: add struct i... |
1588 1589 1590 1591 |
break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) |
876f29460 dax: fix PMD data... |
1592 |
break; |
91d25ba8a dax: use common 4... |
1593 |
result = dax_pmd_load_hole(vmf, &iomap, entry); |
642261ac9 dax: add struct i... |
1594 1595 1596 1597 1598 1599 1600 1601 |
break; default: WARN_ON_ONCE(1); break; } finish_iomap: if (ops->iomap_end) { |
9f141d6ef dax: Call ->iomap... |
1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 |
int copied = PMD_SIZE; if (result == VM_FAULT_FALLBACK) copied = 0; /* * The fault is done by now and there's no way back (other * thread may be already happily using PMD we have installed). * Just ignore error from ->iomap_end since we cannot do much * with it. */ ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, &iomap); |
642261ac9 dax: add struct i... |
1614 |
} |
876f29460 dax: fix PMD data... |
1615 |
unlock_entry: |
91d25ba8a dax: use common 4... |
1616 |
put_locked_mapping_entry(mapping, pgoff); |
642261ac9 dax: add struct i... |
1617 1618 |
fallback: if (result == VM_FAULT_FALLBACK) { |
d8a849e1b mm, dax: make pmd... |
1619 |
split_huge_pmd(vma, vmf->pmd, vmf->address); |
642261ac9 dax: add struct i... |
1620 1621 |
count_vm_event(THP_FAULT_FALLBACK); } |
282a8e039 dax: add tracepoi... |
1622 |
out: |
f42003917 mm, dax: change p... |
1623 |
trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result); |
642261ac9 dax: add struct i... |
1624 1625 |
return result; } |
a2d581675 mm,fs,dax: change... |
1626 |
#else |
ab77dab46 fs/dax.c: use new... |
1627 |
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
01cddfe99 mm,fs,dax: mark d... |
1628 |
const struct iomap_ops *ops) |
a2d581675 mm,fs,dax: change... |
1629 1630 1631 |
{ return VM_FAULT_FALLBACK; } |
642261ac9 dax: add struct i... |
1632 |
#endif /* CONFIG_FS_DAX_PMD */ |
a2d581675 mm,fs,dax: change... |
1633 1634 1635 1636 |
/** * dax_iomap_fault - handle a page fault on a DAX file * @vmf: The description of the fault |
cec04e8c8 dax: Fix comment ... |
1637 |
* @pe_size: Size of the page to fault in |
9a0dd4225 dax: Allow dax_io... |
1638 |
* @pfnp: PFN to insert for synchronous faults if fsync is required |
c0b246259 dax: pass detaile... |
1639 |
* @iomap_errp: Storage for detailed error code in case of error |
cec04e8c8 dax: Fix comment ... |
1640 |
* @ops: Iomap ops passed from the file system |
a2d581675 mm,fs,dax: change... |
1641 1642 1643 1644 1645 1646 |
* * When a page fault occurs, filesystems may call this helper in * their fault handler for DAX files. dax_iomap_fault() assumes the caller * has done all the necessary locking for page fault to proceed * successfully. */ |
ab77dab46 fs/dax.c: use new... |
1647 |
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, |
c0b246259 dax: pass detaile... |
1648 |
pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) |
a2d581675 mm,fs,dax: change... |
1649 |
{ |
c791ace1e mm: replace FAULT... |
1650 1651 |
switch (pe_size) { case PE_SIZE_PTE: |
c0b246259 dax: pass detaile... |
1652 |
return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); |
c791ace1e mm: replace FAULT... |
1653 |
case PE_SIZE_PMD: |
9a0dd4225 dax: Allow dax_io... |
1654 |
return dax_iomap_pmd_fault(vmf, pfnp, ops); |
a2d581675 mm,fs,dax: change... |
1655 1656 1657 1658 1659 |
default: return VM_FAULT_FALLBACK; } } EXPORT_SYMBOL_GPL(dax_iomap_fault); |
71eab6dfd dax: Implement da... |
1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 |
/** * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables * @vmf: The description of the fault * @pe_size: Size of entry to be inserted * @pfn: PFN to insert * * This function inserts writeable PTE or PMD entry into page tables for mmaped * DAX file. It takes care of marking corresponding radix tree entry as dirty * as well. */ |
ab77dab46 fs/dax.c: use new... |
1671 |
static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, |
71eab6dfd dax: Implement da... |
1672 1673 1674 1675 1676 1677 |
enum page_entry_size pe_size, pfn_t pfn) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; void *entry, **slot; pgoff_t index = vmf->pgoff; |
ab77dab46 fs/dax.c: use new... |
1678 |
vm_fault_t ret; |
71eab6dfd dax: Implement da... |
1679 |
|
b93b01631 page cache: use x... |
1680 |
xa_lock_irq(&mapping->i_pages); |
71eab6dfd dax: Implement da... |
1681 1682 1683 1684 1685 1686 |
entry = get_unlocked_mapping_entry(mapping, index, &slot); /* Did we race with someone splitting entry or so? */ if (!entry || (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { put_unlocked_mapping_entry(mapping, index, entry); |
b93b01631 page cache: use x... |
1687 |
xa_unlock_irq(&mapping->i_pages); |
71eab6dfd dax: Implement da... |
1688 1689 1690 1691 |
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } |
b93b01631 page cache: use x... |
1692 |
radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY); |
71eab6dfd dax: Implement da... |
1693 |
entry = lock_slot(mapping, slot); |
b93b01631 page cache: use x... |
1694 |
xa_unlock_irq(&mapping->i_pages); |
71eab6dfd dax: Implement da... |
1695 1696 |
switch (pe_size) { case PE_SIZE_PTE: |
ab77dab46 fs/dax.c: use new... |
1697 |
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); |
71eab6dfd dax: Implement da... |
1698 1699 1700 |
break; #ifdef CONFIG_FS_DAX_PMD case PE_SIZE_PMD: |
ab77dab46 fs/dax.c: use new... |
1701 |
ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, |
71eab6dfd dax: Implement da... |
1702 1703 1704 1705 |
pfn, true); break; #endif default: |
ab77dab46 fs/dax.c: use new... |
1706 |
ret = VM_FAULT_FALLBACK; |
71eab6dfd dax: Implement da... |
1707 1708 |
} put_locked_mapping_entry(mapping, index); |
ab77dab46 fs/dax.c: use new... |
1709 1710 |
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); return ret; |
71eab6dfd dax: Implement da... |
1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 |
} /** * dax_finish_sync_fault - finish synchronous page fault * @vmf: The description of the fault * @pe_size: Size of entry to be inserted * @pfn: PFN to insert * * This function ensures that the file range touched by the page fault is * stored persistently on the media and handles inserting of appropriate page * table entry. */ |
ab77dab46 fs/dax.c: use new... |
1723 1724 |
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn) |
71eab6dfd dax: Implement da... |
1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 |
{ int err; loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; size_t len = 0; if (pe_size == PE_SIZE_PTE) len = PAGE_SIZE; else if (pe_size == PE_SIZE_PMD) len = PMD_SIZE; else WARN_ON_ONCE(1); err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1); if (err) return VM_FAULT_SIGBUS; return dax_insert_pfn_mkwrite(vmf, pe_size, pfn); } EXPORT_SYMBOL_GPL(dax_finish_sync_fault); |