Blame view
fs/dax.c
45.9 KB
2025cf9e1 treewide: Replace... |
1 |
// SPDX-License-Identifier: GPL-2.0-only |
d475c6346 dax,ext2: replace... |
2 3 4 5 6 |
/* * fs/dax.c - Direct Access filesystem code * Copyright (c) 2013-2014 Intel Corporation * Author: Matthew Wilcox <matthew.r.wilcox@intel.com> * Author: Ross Zwisler <ross.zwisler@linux.intel.com> |
d475c6346 dax,ext2: replace... |
7 8 9 10 11 |
*/ #include <linux/atomic.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> |
d77e92e27 dax: update PMD f... |
12 |
#include <linux/dax.h> |
d475c6346 dax,ext2: replace... |
13 14 |
#include <linux/fs.h> #include <linux/genhd.h> |
f7ca90b16 dax,ext2: replace... |
15 16 17 |
#include <linux/highmem.h> #include <linux/memcontrol.h> #include <linux/mm.h> |
d475c6346 dax,ext2: replace... |
18 |
#include <linux/mutex.h> |
9973c98ec dax: add support ... |
19 |
#include <linux/pagevec.h> |
289c6aeda dax,ext2: replace... |
20 |
#include <linux/sched.h> |
f361bf4a6 sched/headers: Pr... |
21 |
#include <linux/sched/signal.h> |
d475c6346 dax,ext2: replace... |
22 |
#include <linux/uio.h> |
f7ca90b16 dax,ext2: replace... |
23 |
#include <linux/vmstat.h> |
34c0fd540 mm, dax, pmem: in... |
24 |
#include <linux/pfn_t.h> |
0e749e542 dax: increase gra... |
25 |
#include <linux/sizes.h> |
4b4bb46d0 dax: clear dirty ... |
26 |
#include <linux/mmu_notifier.h> |
a254e5681 dax: provide an i... |
27 |
#include <linux/iomap.h> |
11cf9d863 fs/dax: Deposit p... |
28 |
#include <asm/pgalloc.h> |
d475c6346 dax,ext2: replace... |
29 |
|
282a8e039 dax: add tracepoi... |
30 31 |
#define CREATE_TRACE_POINTS #include <trace/events/fs_dax.h> |
cfc93c6c6 dax: Convert dax_... |
32 33 34 35 36 37 38 39 40 41 |
static inline unsigned int pe_order(enum page_entry_size pe_size) { if (pe_size == PE_SIZE_PTE) return PAGE_SHIFT - PAGE_SHIFT; if (pe_size == PE_SIZE_PMD) return PMD_SHIFT - PAGE_SHIFT; if (pe_size == PE_SIZE_PUD) return PUD_SHIFT - PAGE_SHIFT; return ~0; } |
ac401cc78 dax: New fault lo... |
42 43 44 |
/* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) |
917f34526 dax: use PG_PMD_C... |
45 46 |
/* The 'colour' (ie low bits) within a PMD of a page offset. */ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) |
977fbdcd5 mm: add unmap_map... |
47 |
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT) |
917f34526 dax: use PG_PMD_C... |
48 |
|
cfc93c6c6 dax: Convert dax_... |
49 50 |
/* The order of a PMD entry */ #define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT) |
ce95ab0fa dax: make 'wait_t... |
51 |
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES]; |
ac401cc78 dax: New fault lo... |
52 53 54 55 56 57 58 59 60 61 |
static int __init init_dax_wait_table(void) { int i; for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++) init_waitqueue_head(wait_table + i); return 0; } fs_initcall(init_dax_wait_table); |
527b19d08 dax: move all DAX... |
62 |
/* |
3159f943a xarray: Replace e... |
63 64 65 66 |
* DAX pagecache entries use XArray value entries so they can't be mistaken * for pages. We use one bit for locking, one bit for the entry size (PMD) * and two more to tell us if the entry is a zero page or an empty entry that * is just used for locking. In total four special bits. |
527b19d08 dax: move all DAX... |
67 68 69 70 71 |
* * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem * block allocation. */ |
3159f943a xarray: Replace e... |
72 73 74 75 76 |
#define DAX_SHIFT (4) #define DAX_LOCKED (1UL << 0) #define DAX_PMD (1UL << 1) #define DAX_ZERO_PAGE (1UL << 2) #define DAX_EMPTY (1UL << 3) |
527b19d08 dax: move all DAX... |
77 |
|
a77d19f46 dax: Rename some ... |
78 |
static unsigned long dax_to_pfn(void *entry) |
527b19d08 dax: move all DAX... |
79 |
{ |
3159f943a xarray: Replace e... |
80 |
return xa_to_value(entry) >> DAX_SHIFT; |
527b19d08 dax: move all DAX... |
81 |
} |
9f32d2213 dax: Convert dax_... |
82 83 84 85 |
static void *dax_make_entry(pfn_t pfn, unsigned long flags) { return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); } |
cfc93c6c6 dax: Convert dax_... |
86 87 88 89 |
static bool dax_is_locked(void *entry) { return xa_to_value(entry) & DAX_LOCKED; } |
a77d19f46 dax: Rename some ... |
90 |
static unsigned int dax_entry_order(void *entry) |
527b19d08 dax: move all DAX... |
91 |
{ |
3159f943a xarray: Replace e... |
92 |
if (xa_to_value(entry) & DAX_PMD) |
cfc93c6c6 dax: Convert dax_... |
93 |
return PMD_ORDER; |
527b19d08 dax: move all DAX... |
94 95 |
return 0; } |
fda490d39 dax: Fix dax_unlo... |
96 |
static unsigned long dax_is_pmd_entry(void *entry) |
d1a5f2b4d block: use DAX fo... |
97 |
{ |
3159f943a xarray: Replace e... |
98 |
return xa_to_value(entry) & DAX_PMD; |
d1a5f2b4d block: use DAX fo... |
99 |
} |
fda490d39 dax: Fix dax_unlo... |
100 |
static bool dax_is_pte_entry(void *entry) |
d475c6346 dax,ext2: replace... |
101 |
{ |
3159f943a xarray: Replace e... |
102 |
return !(xa_to_value(entry) & DAX_PMD); |
d475c6346 dax,ext2: replace... |
103 |
} |
642261ac9 dax: add struct i... |
104 |
static int dax_is_zero_entry(void *entry) |
d475c6346 dax,ext2: replace... |
105 |
{ |
3159f943a xarray: Replace e... |
106 |
return xa_to_value(entry) & DAX_ZERO_PAGE; |
d475c6346 dax,ext2: replace... |
107 |
} |
642261ac9 dax: add struct i... |
108 |
static int dax_is_empty_entry(void *entry) |
b2e0d1625 dax: fix lifetime... |
109 |
{ |
3159f943a xarray: Replace e... |
110 |
return xa_to_value(entry) & DAX_EMPTY; |
b2e0d1625 dax: fix lifetime... |
111 |
} |
f7ca90b16 dax,ext2: replace... |
112 |
/* |
23c84eb78 dax: Fix missed w... |
113 114 115 116 117 118 119 120 121 |
* true if the entry that was found is of a smaller order than the entry * we were looking for */ static bool dax_is_conflict(void *entry) { return entry == XA_RETRY_ENTRY; } /* |
a77d19f46 dax: Rename some ... |
122 |
* DAX page cache entry locking |
ac401cc78 dax: New fault lo... |
123 124 |
*/ struct exceptional_entry_key { |
ec4907ff6 dax: Hash on XArr... |
125 |
struct xarray *xa; |
63e95b5c4 dax: coordinate l... |
126 |
pgoff_t entry_start; |
ac401cc78 dax: New fault lo... |
127 128 129 |
}; struct wait_exceptional_entry_queue { |
ac6424b98 sched/wait: Renam... |
130 |
wait_queue_entry_t wait; |
ac401cc78 dax: New fault lo... |
131 132 |
struct exceptional_entry_key key; }; |
b15cd8006 dax: Convert page... |
133 134 |
static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas, void *entry, struct exceptional_entry_key *key) |
63e95b5c4 dax: coordinate l... |
135 136 |
{ unsigned long hash; |
b15cd8006 dax: Convert page... |
137 |
unsigned long index = xas->xa_index; |
63e95b5c4 dax: coordinate l... |
138 139 140 141 142 143 |
/* * If 'entry' is a PMD, align the 'index' that we use for the wait * queue to the start of that PMD. This ensures that all offsets in * the range covered by the PMD map to the same bit lock. */ |
642261ac9 dax: add struct i... |
144 |
if (dax_is_pmd_entry(entry)) |
917f34526 dax: use PG_PMD_C... |
145 |
index &= ~PG_PMD_COLOUR; |
b15cd8006 dax: Convert page... |
146 |
key->xa = xas->xa; |
63e95b5c4 dax: coordinate l... |
147 |
key->entry_start = index; |
b15cd8006 dax: Convert page... |
148 |
hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS); |
63e95b5c4 dax: coordinate l... |
149 150 |
return wait_table + hash; } |
ec4907ff6 dax: Hash on XArr... |
151 152 |
static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode, int sync, void *keyp) |
ac401cc78 dax: New fault lo... |
153 154 155 156 |
{ struct exceptional_entry_key *key = keyp; struct wait_exceptional_entry_queue *ewait = container_of(wait, struct wait_exceptional_entry_queue, wait); |
ec4907ff6 dax: Hash on XArr... |
157 |
if (key->xa != ewait->key.xa || |
63e95b5c4 dax: coordinate l... |
158 |
key->entry_start != ewait->key.entry_start) |
ac401cc78 dax: New fault lo... |
159 160 161 162 163 |
return 0; return autoremove_wake_function(wait, mode, sync, NULL); } /* |
b93b01631 page cache: use x... |
164 165 166 |
* @entry may no longer be the entry at the index in the mapping. * The important information it's conveying is whether the entry at * this index used to be a PMD entry. |
e30331ff0 dax: relocate som... |
167 |
*/ |
b15cd8006 dax: Convert page... |
168 |
static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) |
e30331ff0 dax: relocate som... |
169 170 171 |
{ struct exceptional_entry_key key; wait_queue_head_t *wq; |
b15cd8006 dax: Convert page... |
172 |
wq = dax_entry_waitqueue(xas, entry, &key); |
e30331ff0 dax: relocate som... |
173 174 175 |
/* * Checking for locked entry and prepare_to_wait_exclusive() happens |
b93b01631 page cache: use x... |
176 |
* under the i_pages lock, ditto for entry handling in our callers. |
e30331ff0 dax: relocate som... |
177 178 179 180 181 182 |
* So at this point all tasks that could have seen our entry locked * must be in the waitqueue and the following check will see them. */ if (waitqueue_active(wq)) __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } |
cfc93c6c6 dax: Convert dax_... |
183 184 185 186 |
/* * Look up entry in page cache, wait for it to become unlocked if it * is a DAX entry and return it. The caller must subsequently call * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry() |
23c84eb78 dax: Fix missed w... |
187 188 189 |
* if it did. The entry returned may have a larger order than @order. * If @order is larger than the order of the entry found in i_pages, this * function returns a dax_is_conflict entry. |
cfc93c6c6 dax: Convert dax_... |
190 191 192 |
* * Must be called with the i_pages lock held. */ |
23c84eb78 dax: Fix missed w... |
193 |
static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) |
cfc93c6c6 dax: Convert dax_... |
194 195 196 197 198 199 200 201 202 |
{ void *entry; struct wait_exceptional_entry_queue ewait; wait_queue_head_t *wq; init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; for (;;) { |
0e40de033 dax: Fix huge pag... |
203 |
entry = xas_find_conflict(xas); |
6370740e5 fs/dax: Fix pmd v... |
204 205 |
if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) return entry; |
23c84eb78 dax: Fix missed w... |
206 207 |
if (dax_entry_order(entry) < order) return XA_RETRY_ENTRY; |
6370740e5 fs/dax: Fix pmd v... |
208 |
if (!dax_is_locked(entry)) |
cfc93c6c6 dax: Convert dax_... |
209 |
return entry; |
b15cd8006 dax: Convert page... |
210 |
wq = dax_entry_waitqueue(xas, entry, &ewait.key); |
cfc93c6c6 dax: Convert dax_... |
211 212 213 214 215 216 217 218 219 |
prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); xas_unlock_irq(xas); xas_reset(xas); schedule(); finish_wait(wq, &ewait.wait); xas_lock_irq(xas); } } |
55e56f06e dax: Don't access... |
220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
/* * The only thing keeping the address space around is the i_pages lock * (it's cycled in clear_inode() after removing the entries from i_pages) * After we call xas_unlock_irq(), we cannot touch xas->xa. */ static void wait_entry_unlocked(struct xa_state *xas, void *entry) { struct wait_exceptional_entry_queue ewait; wait_queue_head_t *wq; init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; wq = dax_entry_waitqueue(xas, entry, &ewait.key); |
d8a706414 dax: Use non-excl... |
234 235 236 237 238 239 240 |
/* * Unlike get_unlocked_entry() there is no guarantee that this * path ever successfully retrieves an unlocked entry before an * inode dies. Perform a non-exclusive wait in case this path * never successfully performs its own wake up. */ prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); |
55e56f06e dax: Don't access... |
241 242 243 |
xas_unlock_irq(xas); schedule(); finish_wait(wq, &ewait.wait); |
55e56f06e dax: Don't access... |
244 |
} |
cfc93c6c6 dax: Convert dax_... |
245 246 247 |
static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ |
61c30c98e dax: Fix missed w... |
248 |
if (entry && !dax_is_conflict(entry)) |
cfc93c6c6 dax: Convert dax_... |
249 250 251 252 253 254 255 256 257 258 259 |
dax_wake_entry(xas, entry, false); } /* * We used the xa_state to get the entry, but then we locked the entry and * dropped the xa_lock, so we know the xa_state is stale and must be reset * before use. */ static void dax_unlock_entry(struct xa_state *xas, void *entry) { void *old; |
7ae2ea7dc dax: Make sure th... |
260 |
BUG_ON(dax_is_locked(entry)); |
cfc93c6c6 dax: Convert dax_... |
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
xas_reset(xas); xas_lock_irq(xas); old = xas_store(xas, entry); xas_unlock_irq(xas); BUG_ON(!dax_is_locked(old)); dax_wake_entry(xas, entry, false); } /* * Return: The entry stored at this location before it was locked. */ static void *dax_lock_entry(struct xa_state *xas, void *entry) { unsigned long v = xa_to_value(entry); return xas_store(xas, xa_mk_value(v | DAX_LOCKED)); } |
d2c997c0f fs, dax: use page... |
277 278 279 280 281 282 283 284 285 286 287 |
static unsigned long dax_entry_size(void *entry) { if (dax_is_zero_entry(entry)) return 0; else if (dax_is_empty_entry(entry)) return 0; else if (dax_is_pmd_entry(entry)) return PMD_SIZE; else return PAGE_SIZE; } |
a77d19f46 dax: Rename some ... |
288 |
static unsigned long dax_end_pfn(void *entry) |
d2c997c0f fs, dax: use page... |
289 |
{ |
a77d19f46 dax: Rename some ... |
290 |
return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; |
d2c997c0f fs, dax: use page... |
291 292 293 294 295 296 297 |
} /* * Iterate through all mapped pfns represented by an entry, i.e. skip * 'empty' and 'zero' entries. */ #define for_each_mapped_pfn(entry, pfn) \ |
a77d19f46 dax: Rename some ... |
298 299 |
for (pfn = dax_to_pfn(entry); \ pfn < dax_end_pfn(entry); pfn++) |
d2c997c0f fs, dax: use page... |
300 |
|
73449daf8 filesystem-dax: S... |
301 302 303 304 305 306 307 |
/* * TODO: for reflink+dax we need a way to associate a single page with * multiple address_space instances at different linear_page_index() * offsets. */ static void dax_associate_entry(void *entry, struct address_space *mapping, struct vm_area_struct *vma, unsigned long address) |
d2c997c0f fs, dax: use page... |
308 |
{ |
73449daf8 filesystem-dax: S... |
309 310 |
unsigned long size = dax_entry_size(entry), pfn, index; int i = 0; |
d2c997c0f fs, dax: use page... |
311 312 313 |
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; |
73449daf8 filesystem-dax: S... |
314 |
index = linear_page_index(vma, address & ~(size - 1)); |
d2c997c0f fs, dax: use page... |
315 316 317 318 319 |
for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(page->mapping); page->mapping = mapping; |
73449daf8 filesystem-dax: S... |
320 |
page->index = index + i++; |
d2c997c0f fs, dax: use page... |
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 |
} } static void dax_disassociate_entry(void *entry, struct address_space *mapping, bool trunc) { unsigned long pfn; if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(trunc && page_ref_count(page) > 1); WARN_ON_ONCE(page->mapping && page->mapping != mapping); page->mapping = NULL; |
73449daf8 filesystem-dax: S... |
338 |
page->index = 0; |
d2c997c0f fs, dax: use page... |
339 340 |
} } |
5fac7408d mm, fs, dax: hand... |
341 342 343 344 345 346 347 348 349 350 351 352 |
static struct page *dax_busy_page(void *entry) { unsigned long pfn; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); if (page_ref_count(page) > 1) return page; } return NULL; } |
c5bbd4515 dax: Reinstate RC... |
353 354 355 356 357 |
/* * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page * @page: The page whose entry we want to lock * * Context: Process context. |
27359fd6e dax: Fix unlock m... |
358 359 |
* Return: A cookie to pass to dax_unlock_page() or 0 if the entry could * not be locked. |
c5bbd4515 dax: Reinstate RC... |
360 |
*/ |
27359fd6e dax: Fix unlock m... |
361 |
dax_entry_t dax_lock_page(struct page *page) |
c2a7d2a11 filesystem-dax: I... |
362 |
{ |
9f32d2213 dax: Convert dax_... |
363 364 |
XA_STATE(xas, NULL, 0); void *entry; |
c2a7d2a11 filesystem-dax: I... |
365 |
|
c5bbd4515 dax: Reinstate RC... |
366 367 |
/* Ensure page->mapping isn't freed while we look at it */ rcu_read_lock(); |
c2a7d2a11 filesystem-dax: I... |
368 |
for (;;) { |
9f32d2213 dax: Convert dax_... |
369 |
struct address_space *mapping = READ_ONCE(page->mapping); |
c2a7d2a11 filesystem-dax: I... |
370 |
|
27359fd6e dax: Fix unlock m... |
371 |
entry = NULL; |
c93db7bb6 dax: Check page->... |
372 |
if (!mapping || !dax_mapping(mapping)) |
c5bbd4515 dax: Reinstate RC... |
373 |
break; |
c2a7d2a11 filesystem-dax: I... |
374 375 376 377 378 379 380 381 |
/* * In the device-dax case there's no need to lock, a * struct dev_pagemap pin is sufficient to keep the * inode alive, and we assume we have dev_pagemap pin * otherwise we would not have a valid pfn_to_page() * translation. */ |
27359fd6e dax: Fix unlock m... |
382 |
entry = (void *)~0UL; |
9f32d2213 dax: Convert dax_... |
383 |
if (S_ISCHR(mapping->host->i_mode)) |
c5bbd4515 dax: Reinstate RC... |
384 |
break; |
c2a7d2a11 filesystem-dax: I... |
385 |
|
9f32d2213 dax: Convert dax_... |
386 387 |
xas.xa = &mapping->i_pages; xas_lock_irq(&xas); |
c2a7d2a11 filesystem-dax: I... |
388 |
if (mapping != page->mapping) { |
9f32d2213 dax: Convert dax_... |
389 |
xas_unlock_irq(&xas); |
c2a7d2a11 filesystem-dax: I... |
390 391 |
continue; } |
9f32d2213 dax: Convert dax_... |
392 393 394 |
xas_set(&xas, page->index); entry = xas_load(&xas); if (dax_is_locked(entry)) { |
c5bbd4515 dax: Reinstate RC... |
395 |
rcu_read_unlock(); |
55e56f06e dax: Don't access... |
396 |
wait_entry_unlocked(&xas, entry); |
c5bbd4515 dax: Reinstate RC... |
397 |
rcu_read_lock(); |
6d7cd8c13 dax: Remove optim... |
398 |
continue; |
c2a7d2a11 filesystem-dax: I... |
399 |
} |
9f32d2213 dax: Convert dax_... |
400 401 |
dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); |
c5bbd4515 dax: Reinstate RC... |
402 |
break; |
c2a7d2a11 filesystem-dax: I... |
403 |
} |
c5bbd4515 dax: Reinstate RC... |
404 |
rcu_read_unlock(); |
27359fd6e dax: Fix unlock m... |
405 |
return (dax_entry_t)entry; |
c2a7d2a11 filesystem-dax: I... |
406 |
} |
27359fd6e dax: Fix unlock m... |
407 |
void dax_unlock_page(struct page *page, dax_entry_t cookie) |
c2a7d2a11 filesystem-dax: I... |
408 409 |
{ struct address_space *mapping = page->mapping; |
9f32d2213 dax: Convert dax_... |
410 |
XA_STATE(xas, &mapping->i_pages, page->index); |
c2a7d2a11 filesystem-dax: I... |
411 |
|
9f32d2213 dax: Convert dax_... |
412 |
if (S_ISCHR(mapping->host->i_mode)) |
c2a7d2a11 filesystem-dax: I... |
413 |
return; |
27359fd6e dax: Fix unlock m... |
414 |
dax_unlock_entry(&xas, (void *)cookie); |
c2a7d2a11 filesystem-dax: I... |
415 |
} |
ac401cc78 dax: New fault lo... |
416 |
/* |
a77d19f46 dax: Rename some ... |
417 418 419 |
* Find page cache entry at given index. If it is a DAX entry, return it * with the entry locked. If the page cache doesn't contain an entry at * that index, add a locked empty entry. |
ac401cc78 dax: New fault lo... |
420 |
* |
3159f943a xarray: Replace e... |
421 |
* When requesting an entry with size DAX_PMD, grab_mapping_entry() will |
b15cd8006 dax: Convert page... |
422 423 424 |
* either return that locked entry or will return VM_FAULT_FALLBACK. * This will happen if there are any PTE entries within the PMD range * that we are requesting. |
642261ac9 dax: add struct i... |
425 |
* |
b15cd8006 dax: Convert page... |
426 427 428 429 430 431 |
* We always favor PTE entries over PMD entries. There isn't a flow where we * evict PTE entries in order to 'upgrade' them to a PMD entry. A PMD * insertion will fail if it finds any PTE entries already in the tree, and a * PTE insertion will cause an existing PMD entry to be unmapped and * downgraded to PTE entries. This happens for both PMD zero pages as * well as PMD empty entries. |
642261ac9 dax: add struct i... |
432 |
* |
b15cd8006 dax: Convert page... |
433 434 435 |
* The exception to this downgrade path is for PMD entries that have * real storage backing them. We will leave these real PMD entries in * the tree, and PTE writes will simply dirty the entire PMD entry. |
642261ac9 dax: add struct i... |
436 |
* |
ac401cc78 dax: New fault lo... |
437 438 439 |
* Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For * persistent memory the benefit is doubtful. We can add that later if we can * show it helps. |
b15cd8006 dax: Convert page... |
440 441 442 443 |
* * On error, this function does not return an ERR_PTR. Instead it returns * a VM_FAULT code, encoded as an xarray internal entry. The ERR_PTR values * overlap with xarray value entries. |
ac401cc78 dax: New fault lo... |
444 |
*/ |
b15cd8006 dax: Convert page... |
445 |
static void *grab_mapping_entry(struct xa_state *xas, |
23c84eb78 dax: Fix missed w... |
446 |
struct address_space *mapping, unsigned int order) |
ac401cc78 dax: New fault lo... |
447 |
{ |
b15cd8006 dax: Convert page... |
448 449 450 |
unsigned long index = xas->xa_index; bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */ void *entry; |
642261ac9 dax: add struct i... |
451 |
|
b15cd8006 dax: Convert page... |
452 453 |
retry: xas_lock_irq(xas); |
23c84eb78 dax: Fix missed w... |
454 |
entry = get_unlocked_entry(xas, order); |
91d25ba8a dax: use common 4... |
455 |
|
642261ac9 dax: add struct i... |
456 |
if (entry) { |
23c84eb78 dax: Fix missed w... |
457 458 |
if (dax_is_conflict(entry)) goto fallback; |
0e40de033 dax: Fix huge pag... |
459 |
if (!xa_is_value(entry)) { |
b15cd8006 dax: Convert page... |
460 461 462 |
xas_set_err(xas, EIO); goto out_unlock; } |
23c84eb78 dax: Fix missed w... |
463 |
if (order == 0) { |
91d25ba8a dax: use common 4... |
464 |
if (dax_is_pmd_entry(entry) && |
642261ac9 dax: add struct i... |
465 466 467 468 469 470 |
(dax_is_zero_entry(entry) || dax_is_empty_entry(entry))) { pmd_downgrade = true; } } } |
b15cd8006 dax: Convert page... |
471 472 473 474 475 476 |
if (pmd_downgrade) { /* * Make sure 'entry' remains valid while we drop * the i_pages lock. */ dax_lock_entry(xas, entry); |
642261ac9 dax: add struct i... |
477 |
|
642261ac9 dax: add struct i... |
478 479 480 481 482 |
/* * Besides huge zero pages the only other thing that gets * downgraded are empty entries which don't need to be * unmapped. */ |
b15cd8006 dax: Convert page... |
483 484 485 486 487 488 489 |
if (dax_is_zero_entry(entry)) { xas_unlock_irq(xas); unmap_mapping_pages(mapping, xas->xa_index & ~PG_PMD_COLOUR, PG_PMD_NR, false); xas_reset(xas); xas_lock_irq(xas); |
e11f8b7b6 dax: fix radix tr... |
490 |
} |
b15cd8006 dax: Convert page... |
491 492 493 494 495 496 497 |
dax_disassociate_entry(entry, mapping, false); xas_store(xas, NULL); /* undo the PMD join */ dax_wake_entry(xas, entry, true); mapping->nrexceptional--; entry = NULL; xas_set(xas, index); } |
642261ac9 dax: add struct i... |
498 |
|
b15cd8006 dax: Convert page... |
499 500 501 |
if (entry) { dax_lock_entry(xas, entry); } else { |
23c84eb78 dax: Fix missed w... |
502 503 504 505 506 |
unsigned long flags = DAX_EMPTY; if (order > 0) flags |= DAX_PMD; entry = dax_make_entry(pfn_to_pfn_t(0), flags); |
b15cd8006 dax: Convert page... |
507 508 509 |
dax_lock_entry(xas, entry); if (xas_error(xas)) goto out_unlock; |
ac401cc78 dax: New fault lo... |
510 |
mapping->nrexceptional++; |
ac401cc78 dax: New fault lo... |
511 |
} |
b15cd8006 dax: Convert page... |
512 513 514 515 516 517 518 519 520 |
out_unlock: xas_unlock_irq(xas); if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM)) goto retry; if (xas->xa_node == XA_ERROR(-ENOMEM)) return xa_mk_internal(VM_FAULT_OOM); if (xas_error(xas)) return xa_mk_internal(VM_FAULT_SIGBUS); |
e3ad61c64 dax: consistent v... |
521 |
return entry; |
b15cd8006 dax: Convert page... |
522 523 524 |
fallback: xas_unlock_irq(xas); return xa_mk_internal(VM_FAULT_FALLBACK); |
ac401cc78 dax: New fault lo... |
525 |
} |
5fac7408d mm, fs, dax: hand... |
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 |
/** * dax_layout_busy_page - find first pinned page in @mapping * @mapping: address space to scan for a page with ref count > 1 * * DAX requires ZONE_DEVICE mapped pages. These pages are never * 'onlined' to the page allocator so they are considered idle when * page->count == 1. A filesystem uses this interface to determine if * any page in the mapping is busy, i.e. for DMA, or other * get_user_pages() usages. * * It is expected that the filesystem is holding locks to block the * establishment of new mappings in this address_space. I.e. it expects * to be able to run unmap_mapping_range() and subsequently not race * mapping_mapped() becoming true. */ struct page *dax_layout_busy_page(struct address_space *mapping) { |
084a89900 dax: Convert dax_... |
543 544 545 |
XA_STATE(xas, &mapping->i_pages, 0); void *entry; unsigned int scanned = 0; |
5fac7408d mm, fs, dax: hand... |
546 |
struct page *page = NULL; |
5fac7408d mm, fs, dax: hand... |
547 548 549 550 551 552 553 554 555 |
/* * In the 'limited' case get_user_pages() for dax is disabled. */ if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return NULL; if (!dax_mapping(mapping) || !mapping_mapped(mapping)) return NULL; |
5fac7408d mm, fs, dax: hand... |
556 557 |
/* * If we race get_user_pages_fast() here either we'll see the |
084a89900 dax: Convert dax_... |
558 |
* elevated page count in the iteration and wait, or |
5fac7408d mm, fs, dax: hand... |
559 560 561 562 563 564 565 566 567 |
* get_user_pages_fast() will see that the page it took a reference * against is no longer mapped in the page tables and bail to the * get_user_pages() slow path. The slow path is protected by * pte_lock() and pmd_lock(). New references are not taken without * holding those locks, and unmap_mapping_range() will not zero the * pte or pmd without holding the respective lock, so we are * guaranteed to either see new references or prevent new * references from being established. */ |
d75996dd0 dax: dax_layout_b... |
568 |
unmap_mapping_range(mapping, 0, 0, 0); |
5fac7408d mm, fs, dax: hand... |
569 |
|
084a89900 dax: Convert dax_... |
570 571 572 573 574 |
xas_lock_irq(&xas); xas_for_each(&xas, entry, ULONG_MAX) { if (WARN_ON_ONCE(!xa_is_value(entry))) continue; if (unlikely(dax_is_locked(entry))) |
23c84eb78 dax: Fix missed w... |
575 |
entry = get_unlocked_entry(&xas, 0); |
084a89900 dax: Convert dax_... |
576 577 578 |
if (entry) page = dax_busy_page(entry); put_unlocked_entry(&xas, entry); |
5fac7408d mm, fs, dax: hand... |
579 580 |
if (page) break; |
084a89900 dax: Convert dax_... |
581 582 583 584 585 586 587 |
if (++scanned % XA_CHECK_SCHED) continue; xas_pause(&xas); xas_unlock_irq(&xas); cond_resched(); xas_lock_irq(&xas); |
5fac7408d mm, fs, dax: hand... |
588 |
} |
084a89900 dax: Convert dax_... |
589 |
xas_unlock_irq(&xas); |
5fac7408d mm, fs, dax: hand... |
590 591 592 |
return page; } EXPORT_SYMBOL_GPL(dax_layout_busy_page); |
a77d19f46 dax: Rename some ... |
593 |
static int __dax_invalidate_entry(struct address_space *mapping, |
c6dcf52c2 mm: Invalidate DA... |
594 595 |
pgoff_t index, bool trunc) { |
07f2d89cc dax: Convert __da... |
596 |
XA_STATE(xas, &mapping->i_pages, index); |
c6dcf52c2 mm: Invalidate DA... |
597 598 |
int ret = 0; void *entry; |
c6dcf52c2 mm: Invalidate DA... |
599 |
|
07f2d89cc dax: Convert __da... |
600 |
xas_lock_irq(&xas); |
23c84eb78 dax: Fix missed w... |
601 |
entry = get_unlocked_entry(&xas, 0); |
3159f943a xarray: Replace e... |
602 |
if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) |
c6dcf52c2 mm: Invalidate DA... |
603 604 |
goto out; if (!trunc && |
07f2d89cc dax: Convert __da... |
605 606 |
(xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) || xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE))) |
c6dcf52c2 mm: Invalidate DA... |
607 |
goto out; |
d2c997c0f fs, dax: use page... |
608 |
dax_disassociate_entry(entry, mapping, trunc); |
07f2d89cc dax: Convert __da... |
609 |
xas_store(&xas, NULL); |
c6dcf52c2 mm: Invalidate DA... |
610 611 612 |
mapping->nrexceptional--; ret = 1; out: |
07f2d89cc dax: Convert __da... |
613 614 |
put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); |
c6dcf52c2 mm: Invalidate DA... |
615 616 |
return ret; } |
07f2d89cc dax: Convert __da... |
617 |
|
ac401cc78 dax: New fault lo... |
618 |
/* |
3159f943a xarray: Replace e... |
619 620 |
* Delete DAX entry at @index from @mapping. Wait for it * to be unlocked before deleting it. |
ac401cc78 dax: New fault lo... |
621 622 623 |
*/ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { |
a77d19f46 dax: Rename some ... |
624 |
int ret = __dax_invalidate_entry(mapping, index, true); |
ac401cc78 dax: New fault lo... |
625 |
|
ac401cc78 dax: New fault lo... |
626 627 628 |
/* * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the |
a77d19f46 dax: Rename some ... |
629 |
* page cache (usually fs-private i_mmap_sem for writing). Since the |
3159f943a xarray: Replace e... |
630 |
* caller has seen a DAX entry for this index, we better find it |
ac401cc78 dax: New fault lo... |
631 632 |
* at that index as well... */ |
c6dcf52c2 mm: Invalidate DA... |
633 634 635 636 637 |
WARN_ON_ONCE(!ret); return ret; } /* |
3159f943a xarray: Replace e... |
638 |
* Invalidate DAX entry if it is clean. |
c6dcf52c2 mm: Invalidate DA... |
639 640 641 642 |
*/ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index) { |
a77d19f46 dax: Rename some ... |
643 |
return __dax_invalidate_entry(mapping, index, false); |
ac401cc78 dax: New fault lo... |
644 |
} |
cccbce671 filesystem-dax: c... |
645 646 647 |
static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, size_t size, struct page *to, unsigned long vaddr) |
f7ca90b16 dax,ext2: replace... |
648 |
{ |
cccbce671 filesystem-dax: c... |
649 650 |
void *vto, *kaddr; pgoff_t pgoff; |
cccbce671 filesystem-dax: c... |
651 652 653 654 655 656 657 658 |
long rc; int id; rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); if (rc) return rc; id = dax_read_lock(); |
86ed913b0 filesystem-dax: D... |
659 |
rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL); |
cccbce671 filesystem-dax: c... |
660 661 662 663 |
if (rc < 0) { dax_read_unlock(id); return rc; } |
f7ca90b16 dax,ext2: replace... |
664 |
vto = kmap_atomic(to); |
cccbce671 filesystem-dax: c... |
665 |
copy_user_page(vto, (void __force *)kaddr, vaddr, to); |
f7ca90b16 dax,ext2: replace... |
666 |
kunmap_atomic(vto); |
cccbce671 filesystem-dax: c... |
667 |
dax_read_unlock(id); |
f7ca90b16 dax,ext2: replace... |
668 669 |
return 0; } |
642261ac9 dax: add struct i... |
670 671 672 673 674 675 676 |
/* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to * PTEs. If we happen to be trying to insert a PTE and there is a PMD * already in the tree, we will skip the insertion and just dirty the PMD as * appropriate. */ |
b15cd8006 dax: Convert page... |
677 678 679 |
static void *dax_insert_entry(struct xa_state *xas, struct address_space *mapping, struct vm_fault *vmf, void *entry, pfn_t pfn, unsigned long flags, bool dirty) |
9973c98ec dax: add support ... |
680 |
{ |
b15cd8006 dax: Convert page... |
681 |
void *new_entry = dax_make_entry(pfn, flags); |
9973c98ec dax: add support ... |
682 |
|
f5b7b7487 dax: Allow tuning... |
683 |
if (dirty) |
d2b2a28e6 dax: dirty inode ... |
684 |
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
9973c98ec dax: add support ... |
685 |
|
3159f943a xarray: Replace e... |
686 |
if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { |
b15cd8006 dax: Convert page... |
687 |
unsigned long index = xas->xa_index; |
91d25ba8a dax: use common 4... |
688 689 |
/* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) |
977fbdcd5 mm: add unmap_map... |
690 |
unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, |
b15cd8006 dax: Convert page... |
691 |
PG_PMD_NR, false); |
91d25ba8a dax: use common 4... |
692 |
else /* pte entry */ |
b15cd8006 dax: Convert page... |
693 |
unmap_mapping_pages(mapping, index, 1, false); |
9973c98ec dax: add support ... |
694 |
} |
b15cd8006 dax: Convert page... |
695 696 |
xas_reset(xas); xas_lock_irq(xas); |
1571c029a dax: Fix xarray e... |
697 698 |
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { void *old; |
d2c997c0f fs, dax: use page... |
699 |
dax_disassociate_entry(entry, mapping, false); |
73449daf8 filesystem-dax: S... |
700 |
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); |
642261ac9 dax: add struct i... |
701 |
/* |
a77d19f46 dax: Rename some ... |
702 |
* Only swap our new entry into the page cache if the current |
642261ac9 dax: add struct i... |
703 |
* entry is a zero page or an empty entry. If a normal PTE or |
a77d19f46 dax: Rename some ... |
704 |
* PMD entry is already in the cache, we leave it alone. This |
642261ac9 dax: add struct i... |
705 706 707 708 |
* means that if we are trying to insert a PTE and the * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ |
1571c029a dax: Fix xarray e... |
709 |
old = dax_lock_entry(xas, new_entry); |
b15cd8006 dax: Convert page... |
710 711 |
WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | DAX_LOCKED)); |
91d25ba8a dax: use common 4... |
712 |
entry = new_entry; |
b15cd8006 dax: Convert page... |
713 714 |
} else { xas_load(xas); /* Walk the xa_state */ |
9973c98ec dax: add support ... |
715 |
} |
91d25ba8a dax: use common 4... |
716 |
|
f5b7b7487 dax: Allow tuning... |
717 |
if (dirty) |
b15cd8006 dax: Convert page... |
718 |
xas_set_mark(xas, PAGECACHE_TAG_DIRTY); |
91d25ba8a dax: use common 4... |
719 |
|
b15cd8006 dax: Convert page... |
720 |
xas_unlock_irq(xas); |
91d25ba8a dax: use common 4... |
721 |
return entry; |
9973c98ec dax: add support ... |
722 |
} |
a77d19f46 dax: Rename some ... |
723 724 |
static inline unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma) |
4b4bb46d0 dax: clear dirty ... |
725 726 727 728 729 730 731 732 733 |
{ unsigned long address; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); return address; } /* Walk all mappings of a given index of a file and writeprotect them */ |
a77d19f46 dax: Rename some ... |
734 735 |
static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, unsigned long pfn) |
4b4bb46d0 dax: clear dirty ... |
736 737 |
{ struct vm_area_struct *vma; |
f729c8c9b dax: wrprotect pm... |
738 739 |
pte_t pte, *ptep = NULL; pmd_t *pmdp = NULL; |
4b4bb46d0 dax: clear dirty ... |
740 |
spinlock_t *ptl; |
4b4bb46d0 dax: clear dirty ... |
741 742 743 |
i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { |
ac46d4f3c mm/mmu_notifier: ... |
744 745 |
struct mmu_notifier_range range; unsigned long address; |
4b4bb46d0 dax: clear dirty ... |
746 747 748 749 750 751 752 |
cond_resched(); if (!(vma->vm_flags & VM_SHARED)) continue; address = pgoff_address(index, vma); |
a4d1a8852 dax: update to ne... |
753 754 |
/* |
0cefc36b3 fs/dax: NIT fix c... |
755 |
* Note because we provide range to follow_pte_pmd it will |
a4d1a8852 dax: update to ne... |
756 757 758 |
* call mmu_notifier_invalidate_range_start() on our behalf * before taking any lock. */ |
ac46d4f3c mm/mmu_notifier: ... |
759 760 |
if (follow_pte_pmd(vma->vm_mm, address, &range, &ptep, &pmdp, &ptl)) |
4b4bb46d0 dax: clear dirty ... |
761 |
continue; |
4b4bb46d0 dax: clear dirty ... |
762 |
|
0f10851ea mm/mmu_notifier: ... |
763 764 765 766 767 |
/* * No need to call mmu_notifier_invalidate_range() as we are * downgrading page table protection not changing it to point * to a new page. * |
ad56b738c docs/vm: rename d... |
768 |
* See Documentation/vm/mmu_notifier.rst |
0f10851ea mm/mmu_notifier: ... |
769 |
*/ |
f729c8c9b dax: wrprotect pm... |
770 771 772 773 774 775 |
if (pmdp) { #ifdef CONFIG_FS_DAX_PMD pmd_t pmd; if (pfn != pmd_pfn(*pmdp)) goto unlock_pmd; |
f6f373216 Revert "mm: repla... |
776 |
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) |
f729c8c9b dax: wrprotect pm... |
777 778 779 |
goto unlock_pmd; flush_cache_page(vma, address, pfn); |
024eee0e8 mm: page_mkclean ... |
780 |
pmd = pmdp_invalidate(vma, address, pmdp); |
f729c8c9b dax: wrprotect pm... |
781 782 783 |
pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); |
f729c8c9b dax: wrprotect pm... |
784 |
unlock_pmd: |
f729c8c9b dax: wrprotect pm... |
785 |
#endif |
ee190ca65 fs/dax.c: release... |
786 |
spin_unlock(ptl); |
f729c8c9b dax: wrprotect pm... |
787 788 789 790 791 792 793 794 795 796 797 |
} else { if (pfn != pte_pfn(*ptep)) goto unlock_pte; if (!pte_dirty(*ptep) && !pte_write(*ptep)) goto unlock_pte; flush_cache_page(vma, address, pfn); pte = ptep_clear_flush(vma, address, ptep); pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(vma->vm_mm, address, ptep, pte); |
f729c8c9b dax: wrprotect pm... |
798 799 800 |
unlock_pte: pte_unmap_unlock(ptep, ptl); } |
4b4bb46d0 dax: clear dirty ... |
801 |
|
ac46d4f3c mm/mmu_notifier: ... |
802 |
mmu_notifier_invalidate_range_end(&range); |
4b4bb46d0 dax: clear dirty ... |
803 804 805 |
} i_mmap_unlock_read(mapping); } |
9fc747f68 dax: Convert dax ... |
806 807 |
static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, struct address_space *mapping, void *entry) |
9973c98ec dax: add support ... |
808 |
{ |
e4b3448bc dax: Flush partia... |
809 |
unsigned long pfn, index, count; |
3fe0791c2 dax: store pfns i... |
810 |
long ret = 0; |
9973c98ec dax: add support ... |
811 |
|
9973c98ec dax: add support ... |
812 |
/* |
a6abc2c0e dax: make cache f... |
813 814 |
* A page got tagged dirty in DAX mapping? Something is seriously * wrong. |
9973c98ec dax: add support ... |
815 |
*/ |
3159f943a xarray: Replace e... |
816 |
if (WARN_ON(!xa_is_value(entry))) |
a6abc2c0e dax: make cache f... |
817 |
return -EIO; |
9973c98ec dax: add support ... |
818 |
|
9fc747f68 dax: Convert dax ... |
819 820 |
if (unlikely(dax_is_locked(entry))) { void *old_entry = entry; |
23c84eb78 dax: Fix missed w... |
821 |
entry = get_unlocked_entry(xas, 0); |
9fc747f68 dax: Convert dax ... |
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 |
/* Entry got punched out / reallocated? */ if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) goto put_unlocked; /* * Entry got reallocated elsewhere? No need to writeback. * We have to compare pfns as we must not bail out due to * difference in lockbit or entry type. */ if (dax_to_pfn(old_entry) != dax_to_pfn(entry)) goto put_unlocked; if (WARN_ON_ONCE(dax_is_empty_entry(entry) || dax_is_zero_entry(entry))) { ret = -EIO; goto put_unlocked; } /* Another fsync thread may have already done this entry */ if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE)) goto put_unlocked; |
9973c98ec dax: add support ... |
842 |
} |
a6abc2c0e dax: make cache f... |
843 |
/* Lock the entry to serialize with page faults */ |
9fc747f68 dax: Convert dax ... |
844 |
dax_lock_entry(xas, entry); |
a6abc2c0e dax: make cache f... |
845 846 847 848 |
/* * We can clear the tag now but we have to be careful so that concurrent * dax_writeback_one() calls for the same index cannot finish before we * actually flush the caches. This is achieved as the calls will look |
b93b01631 page cache: use x... |
849 850 |
* at the entry only under the i_pages lock and once they do that * they will see the entry locked and wait for it to unlock. |
a6abc2c0e dax: make cache f... |
851 |
*/ |
9fc747f68 dax: Convert dax ... |
852 853 |
xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irq(xas); |
a6abc2c0e dax: make cache f... |
854 |
|
642261ac9 dax: add struct i... |
855 |
/* |
e4b3448bc dax: Flush partia... |
856 857 858 |
* If dax_writeback_mapping_range() was given a wbc->range_start * in the middle of a PMD, the 'index' we use needs to be * aligned to the start of the PMD. |
3fe0791c2 dax: store pfns i... |
859 860 |
* This allows us to flush for PMD_SIZE and not have to worry about * partial PMD writebacks. |
642261ac9 dax: add struct i... |
861 |
*/ |
a77d19f46 dax: Rename some ... |
862 |
pfn = dax_to_pfn(entry); |
e4b3448bc dax: Flush partia... |
863 864 |
count = 1UL << dax_entry_order(entry); index = xas->xa_index & ~(count - 1); |
cccbce671 filesystem-dax: c... |
865 |
|
e4b3448bc dax: Flush partia... |
866 867 |
dax_entry_mkclean(mapping, index, pfn); dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE); |
4b4bb46d0 dax: clear dirty ... |
868 869 870 871 872 873 |
/* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as * the pfn mappings are writeprotected and fault waits for mapping * entry lock. */ |
9fc747f68 dax: Convert dax ... |
874 875 876 877 878 |
xas_reset(xas); xas_lock_irq(xas); xas_store(xas, entry); xas_clear_mark(xas, PAGECACHE_TAG_DIRTY); dax_wake_entry(xas, entry, false); |
e4b3448bc dax: Flush partia... |
879 |
trace_dax_writeback_one(mapping->host, index, count); |
9973c98ec dax: add support ... |
880 |
return ret; |
a6abc2c0e dax: make cache f... |
881 |
put_unlocked: |
9fc747f68 dax: Convert dax ... |
882 |
put_unlocked_entry(xas, entry); |
9973c98ec dax: add support ... |
883 884 885 886 887 888 889 890 |
return ret; } /* * Flush the mapping to the persistent domain within the byte range of [start, * end]. This is required by data integrity operations to ensure file data is * on persistent storage prior to completion of the operation. */ |
7f6d5b529 dax: move writeba... |
891 892 |
int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc) |
9973c98ec dax: add support ... |
893 |
{ |
9fc747f68 dax: Convert dax ... |
894 |
XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT); |
9973c98ec dax: add support ... |
895 |
struct inode *inode = mapping->host; |
9fc747f68 dax: Convert dax ... |
896 |
pgoff_t end_index = wbc->range_end >> PAGE_SHIFT; |
cccbce671 filesystem-dax: c... |
897 |
struct dax_device *dax_dev; |
9fc747f68 dax: Convert dax ... |
898 899 900 |
void *entry; int ret = 0; unsigned int scanned = 0; |
9973c98ec dax: add support ... |
901 902 903 |
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) return -EIO; |
7f6d5b529 dax: move writeba... |
904 905 |
if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; |
cccbce671 filesystem-dax: c... |
906 907 908 |
dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); if (!dax_dev) return -EIO; |
9fc747f68 dax: Convert dax ... |
909 |
trace_dax_writeback_range(inode, xas.xa_index, end_index); |
9973c98ec dax: add support ... |
910 |
|
9fc747f68 dax: Convert dax ... |
911 |
tag_pages_for_writeback(mapping, xas.xa_index, end_index); |
9973c98ec dax: add support ... |
912 |
|
9fc747f68 dax: Convert dax ... |
913 914 915 916 917 |
xas_lock_irq(&xas); xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) { ret = dax_writeback_one(&xas, dax_dev, mapping, entry); if (ret < 0) { mapping_set_error(mapping, ret); |
9973c98ec dax: add support ... |
918 |
break; |
9973c98ec dax: add support ... |
919 |
} |
9fc747f68 dax: Convert dax ... |
920 921 922 923 924 925 926 |
if (++scanned % XA_CHECK_SCHED) continue; xas_pause(&xas); xas_unlock_irq(&xas); cond_resched(); xas_lock_irq(&xas); |
9973c98ec dax: add support ... |
927 |
} |
9fc747f68 dax: Convert dax ... |
928 |
xas_unlock_irq(&xas); |
cccbce671 filesystem-dax: c... |
929 |
put_dax(dax_dev); |
9fc747f68 dax: Convert dax ... |
930 931 |
trace_dax_writeback_range_done(inode, xas.xa_index, end_index); return ret; |
9973c98ec dax: add support ... |
932 933 |
} EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); |
31a6f1a6e dax: Simplify arg... |
934 |
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) |
f7ca90b16 dax,ext2: replace... |
935 |
{ |
a3841f94c Merge tag 'libnvd... |
936 |
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; |
31a6f1a6e dax: Simplify arg... |
937 |
} |
5e161e406 dax: Factor out g... |
938 939 |
static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, pfn_t *pfnp) |
f7ca90b16 dax,ext2: replace... |
940 |
{ |
31a6f1a6e dax: Simplify arg... |
941 |
const sector_t sector = dax_iomap_sector(iomap, pos); |
cccbce671 filesystem-dax: c... |
942 943 |
pgoff_t pgoff; int id, rc; |
5e161e406 dax: Factor out g... |
944 |
long length; |
f7ca90b16 dax,ext2: replace... |
945 |
|
5e161e406 dax: Factor out g... |
946 |
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff); |
cccbce671 filesystem-dax: c... |
947 948 |
if (rc) return rc; |
cccbce671 filesystem-dax: c... |
949 |
id = dax_read_lock(); |
5e161e406 dax: Factor out g... |
950 |
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), |
86ed913b0 filesystem-dax: D... |
951 |
NULL, pfnp); |
5e161e406 dax: Factor out g... |
952 953 954 |
if (length < 0) { rc = length; goto out; |
cccbce671 filesystem-dax: c... |
955 |
} |
5e161e406 dax: Factor out g... |
956 957 958 959 960 961 962 963 964 965 |
rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) goto out; /* For larger pages we need devmap */ if (length > 1 && !pfn_t_devmap(*pfnp)) goto out; rc = 0; out: |
cccbce671 filesystem-dax: c... |
966 |
dax_read_unlock(id); |
5e161e406 dax: Factor out g... |
967 |
return rc; |
0e3b210ce dax: use pfn_mkwr... |
968 |
} |
0e3b210ce dax: use pfn_mkwr... |
969 |
|
e30331ff0 dax: relocate som... |
970 |
/* |
91d25ba8a dax: use common 4... |
971 972 973 974 975 |
* The user has performed a load from a hole in the file. Allocating a new * page in the file would cause excessive storage usage for workloads with * sparse files. Instead we insert a read-only mapping of the 4k zero page. * If this page is ever written to we will re-fault and change the mapping to * point to real DAX storage instead. |
e30331ff0 dax: relocate som... |
976 |
*/ |
b15cd8006 dax: Convert page... |
977 978 979 |
static vm_fault_t dax_load_hole(struct xa_state *xas, struct address_space *mapping, void **entry, struct vm_fault *vmf) |
e30331ff0 dax: relocate som... |
980 981 |
{ struct inode *inode = mapping->host; |
91d25ba8a dax: use common 4... |
982 |
unsigned long vaddr = vmf->address; |
b90ca5cc3 filesystem-dax: F... |
983 984 |
pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); vm_fault_t ret; |
e30331ff0 dax: relocate som... |
985 |
|
b15cd8006 dax: Convert page... |
986 |
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, |
3159f943a xarray: Replace e... |
987 |
DAX_ZERO_PAGE, false); |
ab77dab46 fs/dax.c: use new... |
988 |
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); |
e30331ff0 dax: relocate som... |
989 990 991 |
trace_dax_load_hole(inode, vmf, ret); return ret; } |
4b0228fa1 dax: for truncate... |
992 993 994 995 996 997 998 999 1000 1001 1002 1003 |
static bool dax_range_is_aligned(struct block_device *bdev, unsigned int offset, unsigned int length) { unsigned short sector_size = bdev_logical_block_size(bdev); if (!IS_ALIGNED(offset, sector_size)) return false; if (!IS_ALIGNED(length, sector_size)) return false; return true; } |
cccbce671 filesystem-dax: c... |
1004 1005 1006 |
int __dax_zero_page_range(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, unsigned int offset, unsigned int size) |
679c8bd3b dax: export a low... |
1007 |
{ |
cccbce671 filesystem-dax: c... |
1008 1009 |
if (dax_range_is_aligned(bdev, offset, size)) { sector_t start_sector = sector + (offset >> 9); |
4b0228fa1 dax: for truncate... |
1010 1011 |
return blkdev_issue_zeroout(bdev, start_sector, |
53ef7d0e2 Merge tag 'libnvd... |
1012 |
size >> 9, GFP_NOFS, 0); |
4b0228fa1 dax: for truncate... |
1013 |
} else { |
cccbce671 filesystem-dax: c... |
1014 1015 1016 |
pgoff_t pgoff; long rc, id; void *kaddr; |
cccbce671 filesystem-dax: c... |
1017 |
|
e84b83b9e filesystem-dax: f... |
1018 |
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff); |
cccbce671 filesystem-dax: c... |
1019 1020 1021 1022 |
if (rc) return rc; id = dax_read_lock(); |
86ed913b0 filesystem-dax: D... |
1023 |
rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); |
cccbce671 filesystem-dax: c... |
1024 1025 1026 1027 |
if (rc < 0) { dax_read_unlock(id); return rc; } |
81f558701 x86, dax: replace... |
1028 |
memset(kaddr + offset, 0, size); |
c3ca015fa dax: remove the p... |
1029 |
dax_flush(dax_dev, kaddr + offset, size); |
cccbce671 filesystem-dax: c... |
1030 |
dax_read_unlock(id); |
4b0228fa1 dax: for truncate... |
1031 |
} |
679c8bd3b dax: export a low... |
1032 1033 1034 |
return 0; } EXPORT_SYMBOL_GPL(__dax_zero_page_range); |
a254e5681 dax: provide an i... |
1035 |
static loff_t |
11c59c92f dax: correct dax ... |
1036 |
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, |
a254e5681 dax: provide an i... |
1037 1038 |
struct iomap *iomap) { |
cccbce671 filesystem-dax: c... |
1039 1040 |
struct block_device *bdev = iomap->bdev; struct dax_device *dax_dev = iomap->dax_dev; |
a254e5681 dax: provide an i... |
1041 1042 1043 |
struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; |
a77d47864 dax: Report bytes... |
1044 |
size_t xfer; |
cccbce671 filesystem-dax: c... |
1045 |
int id; |
a254e5681 dax: provide an i... |
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 |
if (iov_iter_rw(iter) == READ) { end = min(end, i_size_read(inode)); if (pos >= end) return 0; if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) return iov_iter_zero(min(length, end - pos), iter); } if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; |
e3fce68cd dax: Avoid page i... |
1058 1059 1060 1061 1062 |
/* * Write can allocate block for an area which has a hole page mapped * into page tables. We have to tear down these mappings so that data * written by write(2) is visible in mmap. */ |
cd656375f mm: fix data corr... |
1063 |
if (iomap->flags & IOMAP_F_NEW) { |
e3fce68cd dax: Avoid page i... |
1064 1065 1066 1067 |
invalidate_inode_pages2_range(inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); } |
cccbce671 filesystem-dax: c... |
1068 |
id = dax_read_lock(); |
a254e5681 dax: provide an i... |
1069 1070 |
while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); |
cccbce671 filesystem-dax: c... |
1071 1072 |
const size_t size = ALIGN(length + offset, PAGE_SIZE); const sector_t sector = dax_iomap_sector(iomap, pos); |
a254e5681 dax: provide an i... |
1073 |
ssize_t map_len; |
cccbce671 filesystem-dax: c... |
1074 1075 |
pgoff_t pgoff; void *kaddr; |
a254e5681 dax: provide an i... |
1076 |
|
d1908f525 fs: break out of ... |
1077 1078 1079 1080 |
if (fatal_signal_pending(current)) { ret = -EINTR; break; } |
cccbce671 filesystem-dax: c... |
1081 1082 1083 1084 1085 |
ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); if (ret) break; map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), |
86ed913b0 filesystem-dax: D... |
1086 |
&kaddr, NULL); |
a254e5681 dax: provide an i... |
1087 1088 1089 1090 |
if (map_len < 0) { ret = map_len; break; } |
cccbce671 filesystem-dax: c... |
1091 1092 |
map_len = PFN_PHYS(map_len); kaddr += offset; |
a254e5681 dax: provide an i... |
1093 1094 1095 |
map_len -= offset; if (map_len > end - pos) map_len = end - pos; |
a2e050f5a dax: explain how ... |
1096 1097 1098 1099 1100 |
/* * The userspace address for the memory copy has already been * validated via access_ok() in either vfs_read() or * vfs_write(), depending on which operation we are doing. */ |
a254e5681 dax: provide an i... |
1101 |
if (iov_iter_rw(iter) == WRITE) |
a77d47864 dax: Report bytes... |
1102 |
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, |
fec53774f filesystem-dax: c... |
1103 |
map_len, iter); |
a254e5681 dax: provide an i... |
1104 |
else |
a77d47864 dax: Report bytes... |
1105 |
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, |
b3a9a0c36 dax: Introduce a ... |
1106 |
map_len, iter); |
a254e5681 dax: provide an i... |
1107 |
|
a77d47864 dax: Report bytes... |
1108 1109 1110 1111 1112 1113 1114 1115 |
pos += xfer; length -= xfer; done += xfer; if (xfer == 0) ret = -EFAULT; if (xfer < map_len) break; |
a254e5681 dax: provide an i... |
1116 |
} |
cccbce671 filesystem-dax: c... |
1117 |
dax_read_unlock(id); |
a254e5681 dax: provide an i... |
1118 1119 1120 1121 1122 |
return done ? done : ret; } /** |
11c59c92f dax: correct dax ... |
1123 |
* dax_iomap_rw - Perform I/O to a DAX file |
a254e5681 dax: provide an i... |
1124 1125 1126 1127 1128 1129 1130 1131 1132 |
* @iocb: The control block for this I/O * @iter: The addresses to do I/O from or to * @ops: iomap ops passed from the file system * * This function performs read and write operations to directly mapped * persistent memory. The callers needs to take care of read/write exclusion * and evicting any page cache pages in the region under I/O. */ ssize_t |
11c59c92f dax: correct dax ... |
1133 |
dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, |
8ff6daa17 iomap: constify s... |
1134 |
const struct iomap_ops *ops) |
a254e5681 dax: provide an i... |
1135 1136 1137 1138 1139 |
{ struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos, ret = 0, done = 0; unsigned flags = 0; |
168316db3 dax: assert that ... |
1140 |
if (iov_iter_rw(iter) == WRITE) { |
9ffbe8ac0 locking/lockdep: ... |
1141 |
lockdep_assert_held_write(&inode->i_rwsem); |
a254e5681 dax: provide an i... |
1142 |
flags |= IOMAP_WRITE; |
168316db3 dax: assert that ... |
1143 1144 1145 |
} else { lockdep_assert_held(&inode->i_rwsem); } |
a254e5681 dax: provide an i... |
1146 |
|
bb232821f dax: pass NOWAIT ... |
1147 1148 |
if (iocb->ki_flags & IOCB_NOWAIT) flags |= IOMAP_NOWAIT; |
a254e5681 dax: provide an i... |
1149 1150 |
while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, |
11c59c92f dax: correct dax ... |
1151 |
iter, dax_iomap_actor); |
a254e5681 dax: provide an i... |
1152 1153 1154 1155 1156 1157 1158 1159 1160 |
if (ret <= 0) break; pos += ret; done += ret; } iocb->ki_pos += done; return done ? done : ret; } |
11c59c92f dax: correct dax ... |
1161 |
EXPORT_SYMBOL_GPL(dax_iomap_rw); |
a7d73fe6c dax: provide an i... |
1162 |
|
ab77dab46 fs/dax.c: use new... |
1163 |
static vm_fault_t dax_fault_return(int error) |
9f141d6ef dax: Call ->iomap... |
1164 1165 1166 |
{ if (error == 0) return VM_FAULT_NOPAGE; |
c9aed74e6 fs/dax: Convert t... |
1167 |
return vmf_error(error); |
9f141d6ef dax: Call ->iomap... |
1168 |
} |
aaa422c4c fs, dax: unify IO... |
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 |
/* * MAP_SYNC on a dax mapping guarantees dirty metadata is * flushed on write-faults (non-cow), but not read-faults. */ static bool dax_fault_is_synchronous(unsigned long flags, struct vm_area_struct *vma, struct iomap *iomap) { return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) && (iomap->flags & IOMAP_F_DIRTY); } |
ab77dab46 fs/dax.c: use new... |
1179 |
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, |
c0b246259 dax: pass detaile... |
1180 |
int *iomap_errp, const struct iomap_ops *ops) |
a7d73fe6c dax: provide an i... |
1181 |
{ |
a0987ad5c dax: Create local... |
1182 1183 |
struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; |
b15cd8006 dax: Convert page... |
1184 |
XA_STATE(xas, &mapping->i_pages, vmf->pgoff); |
a7d73fe6c dax: provide an i... |
1185 |
struct inode *inode = mapping->host; |
1a29d85eb mm: use vmf->addr... |
1186 |
unsigned long vaddr = vmf->address; |
a7d73fe6c dax: provide an i... |
1187 |
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; |
a7d73fe6c dax: provide an i... |
1188 |
struct iomap iomap = { 0 }; |
9484ab1bf dax: Introduce IO... |
1189 |
unsigned flags = IOMAP_FAULT; |
a7d73fe6c dax: provide an i... |
1190 |
int error, major = 0; |
d2c43ef13 dax: Create local... |
1191 |
bool write = vmf->flags & FAULT_FLAG_WRITE; |
caa51d26f dax, iomap: Add s... |
1192 |
bool sync; |
ab77dab46 fs/dax.c: use new... |
1193 |
vm_fault_t ret = 0; |
a7d73fe6c dax: provide an i... |
1194 |
void *entry; |
1b5a1cb21 dax: Inline dax_i... |
1195 |
pfn_t pfn; |
a7d73fe6c dax: provide an i... |
1196 |
|
ab77dab46 fs/dax.c: use new... |
1197 |
trace_dax_pte_fault(inode, vmf, ret); |
a7d73fe6c dax: provide an i... |
1198 1199 1200 1201 1202 |
/* * Check whether offset isn't beyond end of file now. Caller is supposed * to hold locks serializing us with truncate / punch hole so this is * a reliable test. */ |
a9c42b33e dax: add tracepoi... |
1203 |
if (pos >= i_size_read(inode)) { |
ab77dab46 fs/dax.c: use new... |
1204 |
ret = VM_FAULT_SIGBUS; |
a9c42b33e dax: add tracepoi... |
1205 1206 |
goto out; } |
a7d73fe6c dax: provide an i... |
1207 |
|
d2c43ef13 dax: Create local... |
1208 |
if (write && !vmf->cow_page) |
a7d73fe6c dax: provide an i... |
1209 |
flags |= IOMAP_WRITE; |
b15cd8006 dax: Convert page... |
1210 1211 1212 |
entry = grab_mapping_entry(&xas, mapping, 0); if (xa_is_internal(entry)) { ret = xa_to_internal(entry); |
13e451fdc dax: fix data cor... |
1213 1214 |
goto out; } |
a7d73fe6c dax: provide an i... |
1215 |
/* |
e2093926a dax: fix race bet... |
1216 1217 1218 1219 1220 1221 |
* It is possible, particularly with mixed reads & writes to private * mappings, that we have raced with a PMD fault that overlaps with * the PTE we need to set up. If so just return and the fault will be * retried. */ if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { |
ab77dab46 fs/dax.c: use new... |
1222 |
ret = VM_FAULT_NOPAGE; |
e2093926a dax: fix race bet... |
1223 1224 1225 1226 |
goto unlock_entry; } /* |
a7d73fe6c dax: provide an i... |
1227 1228 1229 1230 1231 |
* Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means * that we never have to deal with more than a single extent here. */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); |
c0b246259 dax: pass detaile... |
1232 1233 |
if (iomap_errp) *iomap_errp = error; |
a9c42b33e dax: add tracepoi... |
1234 |
if (error) { |
ab77dab46 fs/dax.c: use new... |
1235 |
ret = dax_fault_return(error); |
13e451fdc dax: fix data cor... |
1236 |
goto unlock_entry; |
a9c42b33e dax: add tracepoi... |
1237 |
} |
a7d73fe6c dax: provide an i... |
1238 |
if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { |
13e451fdc dax: fix data cor... |
1239 1240 |
error = -EIO; /* fs corruption? */ goto error_finish_iomap; |
a7d73fe6c dax: provide an i... |
1241 |
} |
a7d73fe6c dax: provide an i... |
1242 |
if (vmf->cow_page) { |
31a6f1a6e dax: Simplify arg... |
1243 |
sector_t sector = dax_iomap_sector(&iomap, pos); |
a7d73fe6c dax: provide an i... |
1244 1245 1246 1247 1248 1249 |
switch (iomap.type) { case IOMAP_HOLE: case IOMAP_UNWRITTEN: clear_user_highpage(vmf->cow_page, vaddr); break; case IOMAP_MAPPED: |
cccbce671 filesystem-dax: c... |
1250 1251 |
error = copy_user_dax(iomap.bdev, iomap.dax_dev, sector, PAGE_SIZE, vmf->cow_page, vaddr); |
a7d73fe6c dax: provide an i... |
1252 1253 1254 1255 1256 1257 1258 1259 |
break; default: WARN_ON_ONCE(1); error = -EIO; break; } if (error) |
13e451fdc dax: fix data cor... |
1260 |
goto error_finish_iomap; |
b1aa812b2 mm: move handling... |
1261 1262 |
__SetPageUptodate(vmf->cow_page); |
ab77dab46 fs/dax.c: use new... |
1263 1264 1265 |
ret = finish_fault(vmf); if (!ret) ret = VM_FAULT_DONE_COW; |
13e451fdc dax: fix data cor... |
1266 |
goto finish_iomap; |
a7d73fe6c dax: provide an i... |
1267 |
} |
aaa422c4c fs, dax: unify IO... |
1268 |
sync = dax_fault_is_synchronous(flags, vma, &iomap); |
caa51d26f dax, iomap: Add s... |
1269 |
|
a7d73fe6c dax: provide an i... |
1270 1271 1272 1273 |
switch (iomap.type) { case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); |
a0987ad5c dax: Create local... |
1274 |
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); |
a7d73fe6c dax: provide an i... |
1275 1276 |
major = VM_FAULT_MAJOR; } |
1b5a1cb21 dax: Inline dax_i... |
1277 1278 1279 |
error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); if (error < 0) goto error_finish_iomap; |
b15cd8006 dax: Convert page... |
1280 |
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, |
caa51d26f dax, iomap: Add s... |
1281 |
0, write && !sync); |
1b5a1cb21 dax: Inline dax_i... |
1282 |
|
caa51d26f dax, iomap: Add s... |
1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 |
/* * If we are doing synchronous page fault and inode needs fsync, * we can insert PTE into page tables only after that happens. * Skip insertion for now and return the pfn so that caller can * insert it after fsync is done. */ if (sync) { if (WARN_ON_ONCE(!pfnp)) { error = -EIO; goto error_finish_iomap; } *pfnp = pfn; |
ab77dab46 fs/dax.c: use new... |
1295 |
ret = VM_FAULT_NEEDDSYNC | major; |
caa51d26f dax, iomap: Add s... |
1296 1297 |
goto finish_iomap; } |
1b5a1cb21 dax: Inline dax_i... |
1298 1299 |
trace_dax_insert_mapping(inode, vmf, entry); if (write) |
ab77dab46 fs/dax.c: use new... |
1300 |
ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn); |
1b5a1cb21 dax: Inline dax_i... |
1301 |
else |
ab77dab46 fs/dax.c: use new... |
1302 |
ret = vmf_insert_mixed(vma, vaddr, pfn); |
1b5a1cb21 dax: Inline dax_i... |
1303 |
|
ab77dab46 fs/dax.c: use new... |
1304 |
goto finish_iomap; |
a7d73fe6c dax: provide an i... |
1305 1306 |
case IOMAP_UNWRITTEN: case IOMAP_HOLE: |
d2c43ef13 dax: Create local... |
1307 |
if (!write) { |
b15cd8006 dax: Convert page... |
1308 |
ret = dax_load_hole(&xas, mapping, &entry, vmf); |
13e451fdc dax: fix data cor... |
1309 |
goto finish_iomap; |
1550290b0 dax: dax_iomap_fa... |
1310 |
} |
a7d73fe6c dax: provide an i... |
1311 1312 1313 1314 1315 1316 |
/*FALLTHRU*/ default: WARN_ON_ONCE(1); error = -EIO; break; } |
13e451fdc dax: fix data cor... |
1317 |
error_finish_iomap: |
ab77dab46 fs/dax.c: use new... |
1318 |
ret = dax_fault_return(error); |
9f141d6ef dax: Call ->iomap... |
1319 1320 1321 |
finish_iomap: if (ops->iomap_end) { int copied = PAGE_SIZE; |
ab77dab46 fs/dax.c: use new... |
1322 |
if (ret & VM_FAULT_ERROR) |
9f141d6ef dax: Call ->iomap... |
1323 1324 1325 1326 1327 1328 1329 1330 |
copied = 0; /* * The fault is done by now and there's no way back (other * thread may be already happily using PTE we have installed). * Just ignore error from ->iomap_end since we cannot do much * with it. */ ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); |
1550290b0 dax: dax_iomap_fa... |
1331 |
} |
13e451fdc dax: fix data cor... |
1332 |
unlock_entry: |
b15cd8006 dax: Convert page... |
1333 |
dax_unlock_entry(&xas, entry); |
13e451fdc dax: fix data cor... |
1334 |
out: |
ab77dab46 fs/dax.c: use new... |
1335 1336 |
trace_dax_pte_fault_done(inode, vmf, ret); return ret | major; |
a7d73fe6c dax: provide an i... |
1337 |
} |
642261ac9 dax: add struct i... |
1338 1339 |
#ifdef CONFIG_FS_DAX_PMD |
b15cd8006 dax: Convert page... |
1340 1341 |
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, struct iomap *iomap, void **entry) |
642261ac9 dax: add struct i... |
1342 |
{ |
f42003917 mm, dax: change p... |
1343 1344 |
struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; |
11cf9d863 fs/dax: Deposit p... |
1345 |
struct vm_area_struct *vma = vmf->vma; |
653b2ea33 dax: add tracepoi... |
1346 |
struct inode *inode = mapping->host; |
11cf9d863 fs/dax: Deposit p... |
1347 |
pgtable_t pgtable = NULL; |
642261ac9 dax: add struct i... |
1348 1349 1350 |
struct page *zero_page; spinlock_t *ptl; pmd_t pmd_entry; |
3fe0791c2 dax: store pfns i... |
1351 |
pfn_t pfn; |
642261ac9 dax: add struct i... |
1352 |
|
f42003917 mm, dax: change p... |
1353 |
zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); |
642261ac9 dax: add struct i... |
1354 1355 |
if (unlikely(!zero_page)) |
653b2ea33 dax: add tracepoi... |
1356 |
goto fallback; |
642261ac9 dax: add struct i... |
1357 |
|
3fe0791c2 dax: store pfns i... |
1358 |
pfn = page_to_pfn_t(zero_page); |
b15cd8006 dax: Convert page... |
1359 |
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, |
3159f943a xarray: Replace e... |
1360 |
DAX_PMD | DAX_ZERO_PAGE, false); |
642261ac9 dax: add struct i... |
1361 |
|
11cf9d863 fs/dax: Deposit p... |
1362 1363 1364 1365 1366 |
if (arch_needs_pgtable_deposit()) { pgtable = pte_alloc_one(vma->vm_mm); if (!pgtable) return VM_FAULT_OOM; } |
f42003917 mm, dax: change p... |
1367 1368 |
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { |
642261ac9 dax: add struct i... |
1369 |
spin_unlock(ptl); |
653b2ea33 dax: add tracepoi... |
1370 |
goto fallback; |
642261ac9 dax: add struct i... |
1371 |
} |
11cf9d863 fs/dax: Deposit p... |
1372 1373 1374 1375 |
if (pgtable) { pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); mm_inc_nr_ptes(vma->vm_mm); } |
f42003917 mm, dax: change p... |
1376 |
pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); |
642261ac9 dax: add struct i... |
1377 |
pmd_entry = pmd_mkhuge(pmd_entry); |
f42003917 mm, dax: change p... |
1378 |
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); |
642261ac9 dax: add struct i... |
1379 |
spin_unlock(ptl); |
b15cd8006 dax: Convert page... |
1380 |
trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry); |
642261ac9 dax: add struct i... |
1381 |
return VM_FAULT_NOPAGE; |
653b2ea33 dax: add tracepoi... |
1382 1383 |
fallback: |
11cf9d863 fs/dax: Deposit p... |
1384 1385 |
if (pgtable) pte_free(vma->vm_mm, pgtable); |
b15cd8006 dax: Convert page... |
1386 |
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry); |
653b2ea33 dax: add tracepoi... |
1387 |
return VM_FAULT_FALLBACK; |
642261ac9 dax: add struct i... |
1388 |
} |
ab77dab46 fs/dax.c: use new... |
1389 |
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
a2d581675 mm,fs,dax: change... |
1390 |
const struct iomap_ops *ops) |
642261ac9 dax: add struct i... |
1391 |
{ |
f42003917 mm, dax: change p... |
1392 |
struct vm_area_struct *vma = vmf->vma; |
642261ac9 dax: add struct i... |
1393 |
struct address_space *mapping = vma->vm_file->f_mapping; |
b15cd8006 dax: Convert page... |
1394 |
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER); |
d8a849e1b mm, dax: make pmd... |
1395 1396 |
unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; |
caa51d26f dax, iomap: Add s... |
1397 |
bool sync; |
9484ab1bf dax: Introduce IO... |
1398 |
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; |
642261ac9 dax: add struct i... |
1399 |
struct inode *inode = mapping->host; |
ab77dab46 fs/dax.c: use new... |
1400 |
vm_fault_t result = VM_FAULT_FALLBACK; |
642261ac9 dax: add struct i... |
1401 |
struct iomap iomap = { 0 }; |
b15cd8006 dax: Convert page... |
1402 |
pgoff_t max_pgoff; |
642261ac9 dax: add struct i... |
1403 1404 1405 |
void *entry; loff_t pos; int error; |
302a5e312 dax: Inline dax_p... |
1406 |
pfn_t pfn; |
642261ac9 dax: add struct i... |
1407 |
|
282a8e039 dax: add tracepoi... |
1408 1409 1410 1411 1412 |
/* * Check whether offset isn't beyond end of file now. Caller is * supposed to hold locks serializing us with truncate / punch hole so * this is a reliable test. */ |
957ac8c42 dax: fix PMD faul... |
1413 |
max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
282a8e039 dax: add tracepoi... |
1414 |
|
f42003917 mm, dax: change p... |
1415 |
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); |
282a8e039 dax: add tracepoi... |
1416 |
|
fffa281b4 dax: fix deadlock... |
1417 1418 1419 1420 |
/* * Make sure that the faulting address's PMD offset (color) matches * the PMD offset from the start of the file. This is necessary so * that a PMD range in the page table overlaps exactly with a PMD |
a77d19f46 dax: Rename some ... |
1421 |
* range in the page cache. |
fffa281b4 dax: fix deadlock... |
1422 1423 1424 1425 |
*/ if ((vmf->pgoff & PG_PMD_COLOUR) != ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) goto fallback; |
642261ac9 dax: add struct i... |
1426 1427 1428 1429 1430 1431 1432 1433 1434 |
/* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; /* If the PMD would extend outside the VMA */ if (pmd_addr < vma->vm_start) goto fallback; if ((pmd_addr + PMD_SIZE) > vma->vm_end) goto fallback; |
b15cd8006 dax: Convert page... |
1435 |
if (xas.xa_index >= max_pgoff) { |
282a8e039 dax: add tracepoi... |
1436 1437 1438 |
result = VM_FAULT_SIGBUS; goto out; } |
642261ac9 dax: add struct i... |
1439 1440 |
/* If the PMD would extend beyond the file size */ |
b15cd8006 dax: Convert page... |
1441 |
if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff) |
642261ac9 dax: add struct i... |
1442 1443 1444 |
goto fallback; /* |
b15cd8006 dax: Convert page... |
1445 1446 1447 1448 |
* grab_mapping_entry() will make sure we get an empty PMD entry, * a zero PMD entry or a DAX PMD. If it can't (because a PTE * entry is already in the array, for instance), it will return * VM_FAULT_FALLBACK. |
876f29460 dax: fix PMD data... |
1449 |
*/ |
23c84eb78 dax: Fix missed w... |
1450 |
entry = grab_mapping_entry(&xas, mapping, PMD_ORDER); |
b15cd8006 dax: Convert page... |
1451 1452 |
if (xa_is_internal(entry)) { result = xa_to_internal(entry); |
876f29460 dax: fix PMD data... |
1453 |
goto fallback; |
b15cd8006 dax: Convert page... |
1454 |
} |
876f29460 dax: fix PMD data... |
1455 1456 |
/* |
e2093926a dax: fix race bet... |
1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 |
* It is possible, particularly with mixed reads & writes to private * mappings, that we have raced with a PTE fault that overlaps with * the PMD we need to set up. If so just return and the fault will be * retried. */ if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && !pmd_devmap(*vmf->pmd)) { result = 0; goto unlock_entry; } /* |
642261ac9 dax: add struct i... |
1469 1470 1471 1472 |
* Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way * to look up our filesystem block. */ |
b15cd8006 dax: Convert page... |
1473 |
pos = (loff_t)xas.xa_index << PAGE_SHIFT; |
642261ac9 dax: add struct i... |
1474 1475 |
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); if (error) |
876f29460 dax: fix PMD data... |
1476 |
goto unlock_entry; |
9f141d6ef dax: Call ->iomap... |
1477 |
|
642261ac9 dax: add struct i... |
1478 1479 |
if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; |
aaa422c4c fs, dax: unify IO... |
1480 |
sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap); |
caa51d26f dax, iomap: Add s... |
1481 |
|
642261ac9 dax: add struct i... |
1482 1483 |
switch (iomap.type) { case IOMAP_MAPPED: |
302a5e312 dax: Inline dax_p... |
1484 1485 1486 |
error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); if (error < 0) goto finish_iomap; |
b15cd8006 dax: Convert page... |
1487 |
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, |
3159f943a xarray: Replace e... |
1488 |
DAX_PMD, write && !sync); |
302a5e312 dax: Inline dax_p... |
1489 |
|
caa51d26f dax, iomap: Add s... |
1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 |
/* * If we are doing synchronous page fault and inode needs fsync, * we can insert PMD into page tables only after that happens. * Skip insertion for now and return the pfn so that caller can * insert it after fsync is done. */ if (sync) { if (WARN_ON_ONCE(!pfnp)) goto finish_iomap; *pfnp = pfn; result = VM_FAULT_NEEDDSYNC; goto finish_iomap; } |
302a5e312 dax: Inline dax_p... |
1503 |
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); |
fce86ff58 mm/huge_memory: f... |
1504 |
result = vmf_insert_pfn_pmd(vmf, pfn, write); |
642261ac9 dax: add struct i... |
1505 1506 1507 1508 |
break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) |
876f29460 dax: fix PMD data... |
1509 |
break; |
b15cd8006 dax: Convert page... |
1510 |
result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry); |
642261ac9 dax: add struct i... |
1511 1512 1513 1514 1515 1516 1517 1518 |
break; default: WARN_ON_ONCE(1); break; } finish_iomap: if (ops->iomap_end) { |
9f141d6ef dax: Call ->iomap... |
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 |
int copied = PMD_SIZE; if (result == VM_FAULT_FALLBACK) copied = 0; /* * The fault is done by now and there's no way back (other * thread may be already happily using PMD we have installed). * Just ignore error from ->iomap_end since we cannot do much * with it. */ ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, &iomap); |
642261ac9 dax: add struct i... |
1531 |
} |
876f29460 dax: fix PMD data... |
1532 |
unlock_entry: |
b15cd8006 dax: Convert page... |
1533 |
dax_unlock_entry(&xas, entry); |
642261ac9 dax: add struct i... |
1534 1535 |
fallback: if (result == VM_FAULT_FALLBACK) { |
d8a849e1b mm, dax: make pmd... |
1536 |
split_huge_pmd(vma, vmf->pmd, vmf->address); |
642261ac9 dax: add struct i... |
1537 1538 |
count_vm_event(THP_FAULT_FALLBACK); } |
282a8e039 dax: add tracepoi... |
1539 |
out: |
f42003917 mm, dax: change p... |
1540 |
trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result); |
642261ac9 dax: add struct i... |
1541 1542 |
return result; } |
a2d581675 mm,fs,dax: change... |
1543 |
#else |
ab77dab46 fs/dax.c: use new... |
1544 |
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
01cddfe99 mm,fs,dax: mark d... |
1545 |
const struct iomap_ops *ops) |
a2d581675 mm,fs,dax: change... |
1546 1547 1548 |
{ return VM_FAULT_FALLBACK; } |
642261ac9 dax: add struct i... |
1549 |
#endif /* CONFIG_FS_DAX_PMD */ |
a2d581675 mm,fs,dax: change... |
1550 1551 1552 1553 |
/** * dax_iomap_fault - handle a page fault on a DAX file * @vmf: The description of the fault |
cec04e8c8 dax: Fix comment ... |
1554 |
* @pe_size: Size of the page to fault in |
9a0dd4225 dax: Allow dax_io... |
1555 |
* @pfnp: PFN to insert for synchronous faults if fsync is required |
c0b246259 dax: pass detaile... |
1556 |
* @iomap_errp: Storage for detailed error code in case of error |
cec04e8c8 dax: Fix comment ... |
1557 |
* @ops: Iomap ops passed from the file system |
a2d581675 mm,fs,dax: change... |
1558 1559 1560 1561 1562 1563 |
* * When a page fault occurs, filesystems may call this helper in * their fault handler for DAX files. dax_iomap_fault() assumes the caller * has done all the necessary locking for page fault to proceed * successfully. */ |
ab77dab46 fs/dax.c: use new... |
1564 |
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, |
c0b246259 dax: pass detaile... |
1565 |
pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) |
a2d581675 mm,fs,dax: change... |
1566 |
{ |
c791ace1e mm: replace FAULT... |
1567 1568 |
switch (pe_size) { case PE_SIZE_PTE: |
c0b246259 dax: pass detaile... |
1569 |
return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); |
c791ace1e mm: replace FAULT... |
1570 |
case PE_SIZE_PMD: |
9a0dd4225 dax: Allow dax_io... |
1571 |
return dax_iomap_pmd_fault(vmf, pfnp, ops); |
a2d581675 mm,fs,dax: change... |
1572 1573 1574 1575 1576 |
default: return VM_FAULT_FALLBACK; } } EXPORT_SYMBOL_GPL(dax_iomap_fault); |
71eab6dfd dax: Implement da... |
1577 |
|
a77d19f46 dax: Rename some ... |
1578 |
/* |
71eab6dfd dax: Implement da... |
1579 1580 |
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables * @vmf: The description of the fault |
71eab6dfd dax: Implement da... |
1581 |
* @pfn: PFN to insert |
cfc93c6c6 dax: Convert dax_... |
1582 |
* @order: Order of entry to insert. |
71eab6dfd dax: Implement da... |
1583 |
* |
a77d19f46 dax: Rename some ... |
1584 1585 |
* This function inserts a writeable PTE or PMD entry into the page tables * for an mmaped DAX file. It also marks the page cache entry as dirty. |
71eab6dfd dax: Implement da... |
1586 |
*/ |
cfc93c6c6 dax: Convert dax_... |
1587 1588 |
static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) |
71eab6dfd dax: Implement da... |
1589 1590 |
{ struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
cfc93c6c6 dax: Convert dax_... |
1591 1592 |
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order); void *entry; |
ab77dab46 fs/dax.c: use new... |
1593 |
vm_fault_t ret; |
71eab6dfd dax: Implement da... |
1594 |
|
cfc93c6c6 dax: Convert dax_... |
1595 |
xas_lock_irq(&xas); |
23c84eb78 dax: Fix missed w... |
1596 |
entry = get_unlocked_entry(&xas, order); |
71eab6dfd dax: Implement da... |
1597 |
/* Did we race with someone splitting entry or so? */ |
23c84eb78 dax: Fix missed w... |
1598 1599 |
if (!entry || dax_is_conflict(entry) || (order == 0 && !dax_is_pte_entry(entry))) { |
cfc93c6c6 dax: Convert dax_... |
1600 1601 |
put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); |
71eab6dfd dax: Implement da... |
1602 1603 1604 1605 |
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } |
cfc93c6c6 dax: Convert dax_... |
1606 1607 1608 1609 |
xas_set_mark(&xas, PAGECACHE_TAG_DIRTY); dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); if (order == 0) |
ab77dab46 fs/dax.c: use new... |
1610 |
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); |
71eab6dfd dax: Implement da... |
1611 |
#ifdef CONFIG_FS_DAX_PMD |
cfc93c6c6 dax: Convert dax_... |
1612 |
else if (order == PMD_ORDER) |
fce86ff58 mm/huge_memory: f... |
1613 |
ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE); |
71eab6dfd dax: Implement da... |
1614 |
#endif |
cfc93c6c6 dax: Convert dax_... |
1615 |
else |
ab77dab46 fs/dax.c: use new... |
1616 |
ret = VM_FAULT_FALLBACK; |
cfc93c6c6 dax: Convert dax_... |
1617 |
dax_unlock_entry(&xas, entry); |
ab77dab46 fs/dax.c: use new... |
1618 1619 |
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); return ret; |
71eab6dfd dax: Implement da... |
1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 |
} /** * dax_finish_sync_fault - finish synchronous page fault * @vmf: The description of the fault * @pe_size: Size of entry to be inserted * @pfn: PFN to insert * * This function ensures that the file range touched by the page fault is * stored persistently on the media and handles inserting of appropriate page * table entry. */ |
ab77dab46 fs/dax.c: use new... |
1632 1633 |
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn) |
71eab6dfd dax: Implement da... |
1634 1635 1636 |
{ int err; loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; |
cfc93c6c6 dax: Convert dax_... |
1637 1638 |
unsigned int order = pe_order(pe_size); size_t len = PAGE_SIZE << order; |
71eab6dfd dax: Implement da... |
1639 |
|
71eab6dfd dax: Implement da... |
1640 1641 1642 |
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1); if (err) return VM_FAULT_SIGBUS; |
cfc93c6c6 dax: Convert dax_... |
1643 |
return dax_insert_pfn_mkwrite(vmf, pfn, order); |
71eab6dfd dax: Implement da... |
1644 1645 |
} EXPORT_SYMBOL_GPL(dax_finish_sync_fault); |