Blame view
fs/dax.c
46.4 KB
2025cf9e1 treewide: Replace... |
1 |
// SPDX-License-Identifier: GPL-2.0-only |
d475c6346 dax,ext2: replace... |
2 3 4 5 6 |
/* * fs/dax.c - Direct Access filesystem code * Copyright (c) 2013-2014 Intel Corporation * Author: Matthew Wilcox <matthew.r.wilcox@intel.com> * Author: Ross Zwisler <ross.zwisler@linux.intel.com> |
d475c6346 dax,ext2: replace... |
7 8 9 10 11 |
*/ #include <linux/atomic.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> |
d77e92e27 dax: update PMD f... |
12 |
#include <linux/dax.h> |
d475c6346 dax,ext2: replace... |
13 14 |
#include <linux/fs.h> #include <linux/genhd.h> |
f7ca90b16 dax,ext2: replace... |
15 16 17 |
#include <linux/highmem.h> #include <linux/memcontrol.h> #include <linux/mm.h> |
d475c6346 dax,ext2: replace... |
18 |
#include <linux/mutex.h> |
9973c98ec dax: add support ... |
19 |
#include <linux/pagevec.h> |
289c6aeda dax,ext2: replace... |
20 |
#include <linux/sched.h> |
f361bf4a6 sched/headers: Pr... |
21 |
#include <linux/sched/signal.h> |
d475c6346 dax,ext2: replace... |
22 |
#include <linux/uio.h> |
f7ca90b16 dax,ext2: replace... |
23 |
#include <linux/vmstat.h> |
34c0fd540 mm, dax, pmem: in... |
24 |
#include <linux/pfn_t.h> |
0e749e542 dax: increase gra... |
25 |
#include <linux/sizes.h> |
4b4bb46d0 dax: clear dirty ... |
26 |
#include <linux/mmu_notifier.h> |
a254e5681 dax: provide an i... |
27 |
#include <linux/iomap.h> |
11cf9d863 fs/dax: Deposit p... |
28 |
#include <asm/pgalloc.h> |
d475c6346 dax,ext2: replace... |
29 |
|
282a8e039 dax: add tracepoi... |
30 31 |
#define CREATE_TRACE_POINTS #include <trace/events/fs_dax.h> |
cfc93c6c6 dax: Convert dax_... |
32 33 34 35 36 37 38 39 40 41 |
static inline unsigned int pe_order(enum page_entry_size pe_size) { if (pe_size == PE_SIZE_PTE) return PAGE_SHIFT - PAGE_SHIFT; if (pe_size == PE_SIZE_PMD) return PMD_SHIFT - PAGE_SHIFT; if (pe_size == PE_SIZE_PUD) return PUD_SHIFT - PAGE_SHIFT; return ~0; } |
ac401cc78 dax: New fault lo... |
42 43 44 |
/* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) |
917f34526 dax: use PG_PMD_C... |
45 46 |
/* The 'colour' (ie low bits) within a PMD of a page offset. */ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) |
977fbdcd5 mm: add unmap_map... |
47 |
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT) |
917f34526 dax: use PG_PMD_C... |
48 |
|
cfc93c6c6 dax: Convert dax_... |
49 50 |
/* The order of a PMD entry */ #define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT) |
ce95ab0fa dax: make 'wait_t... |
51 |
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES]; |
ac401cc78 dax: New fault lo... |
52 53 54 55 56 57 58 59 60 61 |
static int __init init_dax_wait_table(void) { int i; for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++) init_waitqueue_head(wait_table + i); return 0; } fs_initcall(init_dax_wait_table); |
527b19d08 dax: move all DAX... |
62 |
/* |
3159f943a xarray: Replace e... |
63 64 65 66 |
* DAX pagecache entries use XArray value entries so they can't be mistaken * for pages. We use one bit for locking, one bit for the entry size (PMD) * and two more to tell us if the entry is a zero page or an empty entry that * is just used for locking. In total four special bits. |
527b19d08 dax: move all DAX... |
67 68 69 70 71 |
* * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem * block allocation. */ |
3159f943a xarray: Replace e... |
72 73 74 75 76 |
#define DAX_SHIFT (4) #define DAX_LOCKED (1UL << 0) #define DAX_PMD (1UL << 1) #define DAX_ZERO_PAGE (1UL << 2) #define DAX_EMPTY (1UL << 3) |
527b19d08 dax: move all DAX... |
77 |
|
a77d19f46 dax: Rename some ... |
78 |
static unsigned long dax_to_pfn(void *entry) |
527b19d08 dax: move all DAX... |
79 |
{ |
3159f943a xarray: Replace e... |
80 |
return xa_to_value(entry) >> DAX_SHIFT; |
527b19d08 dax: move all DAX... |
81 |
} |
9f32d2213 dax: Convert dax_... |
82 83 84 85 |
static void *dax_make_entry(pfn_t pfn, unsigned long flags) { return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); } |
cfc93c6c6 dax: Convert dax_... |
86 87 88 89 |
static bool dax_is_locked(void *entry) { return xa_to_value(entry) & DAX_LOCKED; } |
a77d19f46 dax: Rename some ... |
90 |
static unsigned int dax_entry_order(void *entry) |
527b19d08 dax: move all DAX... |
91 |
{ |
3159f943a xarray: Replace e... |
92 |
if (xa_to_value(entry) & DAX_PMD) |
cfc93c6c6 dax: Convert dax_... |
93 |
return PMD_ORDER; |
527b19d08 dax: move all DAX... |
94 95 |
return 0; } |
fda490d39 dax: Fix dax_unlo... |
96 |
static unsigned long dax_is_pmd_entry(void *entry) |
d1a5f2b4d block: use DAX fo... |
97 |
{ |
3159f943a xarray: Replace e... |
98 |
return xa_to_value(entry) & DAX_PMD; |
d1a5f2b4d block: use DAX fo... |
99 |
} |
fda490d39 dax: Fix dax_unlo... |
100 |
static bool dax_is_pte_entry(void *entry) |
d475c6346 dax,ext2: replace... |
101 |
{ |
3159f943a xarray: Replace e... |
102 |
return !(xa_to_value(entry) & DAX_PMD); |
d475c6346 dax,ext2: replace... |
103 |
} |
642261ac9 dax: add struct i... |
104 |
static int dax_is_zero_entry(void *entry) |
d475c6346 dax,ext2: replace... |
105 |
{ |
3159f943a xarray: Replace e... |
106 |
return xa_to_value(entry) & DAX_ZERO_PAGE; |
d475c6346 dax,ext2: replace... |
107 |
} |
642261ac9 dax: add struct i... |
108 |
static int dax_is_empty_entry(void *entry) |
b2e0d1625 dax: fix lifetime... |
109 |
{ |
3159f943a xarray: Replace e... |
110 |
return xa_to_value(entry) & DAX_EMPTY; |
b2e0d1625 dax: fix lifetime... |
111 |
} |
f7ca90b16 dax,ext2: replace... |
112 |
/* |
23c84eb78 dax: Fix missed w... |
113 114 115 116 117 118 119 120 121 |
* true if the entry that was found is of a smaller order than the entry * we were looking for */ static bool dax_is_conflict(void *entry) { return entry == XA_RETRY_ENTRY; } /* |
a77d19f46 dax: Rename some ... |
122 |
* DAX page cache entry locking |
ac401cc78 dax: New fault lo... |
123 124 |
*/ struct exceptional_entry_key { |
ec4907ff6 dax: Hash on XArr... |
125 |
struct xarray *xa; |
63e95b5c4 dax: coordinate l... |
126 |
pgoff_t entry_start; |
ac401cc78 dax: New fault lo... |
127 128 129 |
}; struct wait_exceptional_entry_queue { |
ac6424b98 sched/wait: Renam... |
130 |
wait_queue_entry_t wait; |
ac401cc78 dax: New fault lo... |
131 132 |
struct exceptional_entry_key key; }; |
b15cd8006 dax: Convert page... |
133 134 |
static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas, void *entry, struct exceptional_entry_key *key) |
63e95b5c4 dax: coordinate l... |
135 136 |
{ unsigned long hash; |
b15cd8006 dax: Convert page... |
137 |
unsigned long index = xas->xa_index; |
63e95b5c4 dax: coordinate l... |
138 139 140 141 142 143 |
/* * If 'entry' is a PMD, align the 'index' that we use for the wait * queue to the start of that PMD. This ensures that all offsets in * the range covered by the PMD map to the same bit lock. */ |
642261ac9 dax: add struct i... |
144 |
if (dax_is_pmd_entry(entry)) |
917f34526 dax: use PG_PMD_C... |
145 |
index &= ~PG_PMD_COLOUR; |
b15cd8006 dax: Convert page... |
146 |
key->xa = xas->xa; |
63e95b5c4 dax: coordinate l... |
147 |
key->entry_start = index; |
b15cd8006 dax: Convert page... |
148 |
hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS); |
63e95b5c4 dax: coordinate l... |
149 150 |
return wait_table + hash; } |
ec4907ff6 dax: Hash on XArr... |
151 152 |
static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode, int sync, void *keyp) |
ac401cc78 dax: New fault lo... |
153 154 155 156 |
{ struct exceptional_entry_key *key = keyp; struct wait_exceptional_entry_queue *ewait = container_of(wait, struct wait_exceptional_entry_queue, wait); |
ec4907ff6 dax: Hash on XArr... |
157 |
if (key->xa != ewait->key.xa || |
63e95b5c4 dax: coordinate l... |
158 |
key->entry_start != ewait->key.entry_start) |
ac401cc78 dax: New fault lo... |
159 160 161 162 163 |
return 0; return autoremove_wake_function(wait, mode, sync, NULL); } /* |
b93b01631 page cache: use x... |
164 165 166 |
* @entry may no longer be the entry at the index in the mapping. * The important information it's conveying is whether the entry at * this index used to be a PMD entry. |
e30331ff0 dax: relocate som... |
167 |
*/ |
b15cd8006 dax: Convert page... |
168 |
static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) |
e30331ff0 dax: relocate som... |
169 170 171 |
{ struct exceptional_entry_key key; wait_queue_head_t *wq; |
b15cd8006 dax: Convert page... |
172 |
wq = dax_entry_waitqueue(xas, entry, &key); |
e30331ff0 dax: relocate som... |
173 174 175 |
/* * Checking for locked entry and prepare_to_wait_exclusive() happens |
b93b01631 page cache: use x... |
176 |
* under the i_pages lock, ditto for entry handling in our callers. |
e30331ff0 dax: relocate som... |
177 178 179 180 181 182 |
* So at this point all tasks that could have seen our entry locked * must be in the waitqueue and the following check will see them. */ if (waitqueue_active(wq)) __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } |
cfc93c6c6 dax: Convert dax_... |
183 184 185 186 |
/* * Look up entry in page cache, wait for it to become unlocked if it * is a DAX entry and return it. The caller must subsequently call * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry() |
23c84eb78 dax: Fix missed w... |
187 188 189 |
* if it did. The entry returned may have a larger order than @order. * If @order is larger than the order of the entry found in i_pages, this * function returns a dax_is_conflict entry. |
cfc93c6c6 dax: Convert dax_... |
190 191 192 |
* * Must be called with the i_pages lock held. */ |
23c84eb78 dax: Fix missed w... |
193 |
static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) |
cfc93c6c6 dax: Convert dax_... |
194 195 196 197 198 199 200 201 202 |
{ void *entry; struct wait_exceptional_entry_queue ewait; wait_queue_head_t *wq; init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; for (;;) { |
0e40de033 dax: Fix huge pag... |
203 |
entry = xas_find_conflict(xas); |
6370740e5 fs/dax: Fix pmd v... |
204 205 |
if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) return entry; |
23c84eb78 dax: Fix missed w... |
206 207 |
if (dax_entry_order(entry) < order) return XA_RETRY_ENTRY; |
6370740e5 fs/dax: Fix pmd v... |
208 |
if (!dax_is_locked(entry)) |
cfc93c6c6 dax: Convert dax_... |
209 |
return entry; |
b15cd8006 dax: Convert page... |
210 |
wq = dax_entry_waitqueue(xas, entry, &ewait.key); |
cfc93c6c6 dax: Convert dax_... |
211 212 213 214 215 216 217 218 219 |
prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); xas_unlock_irq(xas); xas_reset(xas); schedule(); finish_wait(wq, &ewait.wait); xas_lock_irq(xas); } } |
55e56f06e dax: Don't access... |
220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
/* * The only thing keeping the address space around is the i_pages lock * (it's cycled in clear_inode() after removing the entries from i_pages) * After we call xas_unlock_irq(), we cannot touch xas->xa. */ static void wait_entry_unlocked(struct xa_state *xas, void *entry) { struct wait_exceptional_entry_queue ewait; wait_queue_head_t *wq; init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; wq = dax_entry_waitqueue(xas, entry, &ewait.key); |
d8a706414 dax: Use non-excl... |
234 235 236 237 238 239 240 |
/* * Unlike get_unlocked_entry() there is no guarantee that this * path ever successfully retrieves an unlocked entry before an * inode dies. Perform a non-exclusive wait in case this path * never successfully performs its own wake up. */ prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); |
55e56f06e dax: Don't access... |
241 242 243 |
xas_unlock_irq(xas); schedule(); finish_wait(wq, &ewait.wait); |
55e56f06e dax: Don't access... |
244 |
} |
cfc93c6c6 dax: Convert dax_... |
245 246 247 |
static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ |
61c30c98e dax: Fix missed w... |
248 |
if (entry && !dax_is_conflict(entry)) |
cfc93c6c6 dax: Convert dax_... |
249 250 251 252 253 254 255 256 257 258 259 |
dax_wake_entry(xas, entry, false); } /* * We used the xa_state to get the entry, but then we locked the entry and * dropped the xa_lock, so we know the xa_state is stale and must be reset * before use. */ static void dax_unlock_entry(struct xa_state *xas, void *entry) { void *old; |
7ae2ea7dc dax: Make sure th... |
260 |
BUG_ON(dax_is_locked(entry)); |
cfc93c6c6 dax: Convert dax_... |
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
xas_reset(xas); xas_lock_irq(xas); old = xas_store(xas, entry); xas_unlock_irq(xas); BUG_ON(!dax_is_locked(old)); dax_wake_entry(xas, entry, false); } /* * Return: The entry stored at this location before it was locked. */ static void *dax_lock_entry(struct xa_state *xas, void *entry) { unsigned long v = xa_to_value(entry); return xas_store(xas, xa_mk_value(v | DAX_LOCKED)); } |
d2c997c0f fs, dax: use page... |
277 278 279 280 281 282 283 284 285 286 287 |
static unsigned long dax_entry_size(void *entry) { if (dax_is_zero_entry(entry)) return 0; else if (dax_is_empty_entry(entry)) return 0; else if (dax_is_pmd_entry(entry)) return PMD_SIZE; else return PAGE_SIZE; } |
a77d19f46 dax: Rename some ... |
288 |
static unsigned long dax_end_pfn(void *entry) |
d2c997c0f fs, dax: use page... |
289 |
{ |
a77d19f46 dax: Rename some ... |
290 |
return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; |
d2c997c0f fs, dax: use page... |
291 292 293 294 295 296 297 |
} /* * Iterate through all mapped pfns represented by an entry, i.e. skip * 'empty' and 'zero' entries. */ #define for_each_mapped_pfn(entry, pfn) \ |
a77d19f46 dax: Rename some ... |
298 299 |
for (pfn = dax_to_pfn(entry); \ pfn < dax_end_pfn(entry); pfn++) |
d2c997c0f fs, dax: use page... |
300 |
|
73449daf8 filesystem-dax: S... |
301 302 303 304 305 306 307 |
/* * TODO: for reflink+dax we need a way to associate a single page with * multiple address_space instances at different linear_page_index() * offsets. */ static void dax_associate_entry(void *entry, struct address_space *mapping, struct vm_area_struct *vma, unsigned long address) |
d2c997c0f fs, dax: use page... |
308 |
{ |
73449daf8 filesystem-dax: S... |
309 310 |
unsigned long size = dax_entry_size(entry), pfn, index; int i = 0; |
d2c997c0f fs, dax: use page... |
311 312 313 |
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; |
73449daf8 filesystem-dax: S... |
314 |
index = linear_page_index(vma, address & ~(size - 1)); |
d2c997c0f fs, dax: use page... |
315 316 317 318 319 |
for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(page->mapping); page->mapping = mapping; |
73449daf8 filesystem-dax: S... |
320 |
page->index = index + i++; |
d2c997c0f fs, dax: use page... |
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 |
} } static void dax_disassociate_entry(void *entry, struct address_space *mapping, bool trunc) { unsigned long pfn; if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(trunc && page_ref_count(page) > 1); WARN_ON_ONCE(page->mapping && page->mapping != mapping); page->mapping = NULL; |
73449daf8 filesystem-dax: S... |
338 |
page->index = 0; |
d2c997c0f fs, dax: use page... |
339 340 |
} } |
5fac7408d mm, fs, dax: hand... |
341 342 343 344 345 346 347 348 349 350 351 352 |
static struct page *dax_busy_page(void *entry) { unsigned long pfn; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); if (page_ref_count(page) > 1) return page; } return NULL; } |
c5bbd4515 dax: Reinstate RC... |
353 354 355 356 357 |
/* * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page * @page: The page whose entry we want to lock * * Context: Process context. |
27359fd6e dax: Fix unlock m... |
358 359 |
* Return: A cookie to pass to dax_unlock_page() or 0 if the entry could * not be locked. |
c5bbd4515 dax: Reinstate RC... |
360 |
*/ |
27359fd6e dax: Fix unlock m... |
361 |
dax_entry_t dax_lock_page(struct page *page) |
c2a7d2a11 filesystem-dax: I... |
362 |
{ |
9f32d2213 dax: Convert dax_... |
363 364 |
XA_STATE(xas, NULL, 0); void *entry; |
c2a7d2a11 filesystem-dax: I... |
365 |
|
c5bbd4515 dax: Reinstate RC... |
366 367 |
/* Ensure page->mapping isn't freed while we look at it */ rcu_read_lock(); |
c2a7d2a11 filesystem-dax: I... |
368 |
for (;;) { |
9f32d2213 dax: Convert dax_... |
369 |
struct address_space *mapping = READ_ONCE(page->mapping); |
c2a7d2a11 filesystem-dax: I... |
370 |
|
27359fd6e dax: Fix unlock m... |
371 |
entry = NULL; |
c93db7bb6 dax: Check page->... |
372 |
if (!mapping || !dax_mapping(mapping)) |
c5bbd4515 dax: Reinstate RC... |
373 |
break; |
c2a7d2a11 filesystem-dax: I... |
374 375 376 377 378 379 380 381 |
/* * In the device-dax case there's no need to lock, a * struct dev_pagemap pin is sufficient to keep the * inode alive, and we assume we have dev_pagemap pin * otherwise we would not have a valid pfn_to_page() * translation. */ |
27359fd6e dax: Fix unlock m... |
382 |
entry = (void *)~0UL; |
9f32d2213 dax: Convert dax_... |
383 |
if (S_ISCHR(mapping->host->i_mode)) |
c5bbd4515 dax: Reinstate RC... |
384 |
break; |
c2a7d2a11 filesystem-dax: I... |
385 |
|
9f32d2213 dax: Convert dax_... |
386 387 |
xas.xa = &mapping->i_pages; xas_lock_irq(&xas); |
c2a7d2a11 filesystem-dax: I... |
388 |
if (mapping != page->mapping) { |
9f32d2213 dax: Convert dax_... |
389 |
xas_unlock_irq(&xas); |
c2a7d2a11 filesystem-dax: I... |
390 391 |
continue; } |
9f32d2213 dax: Convert dax_... |
392 393 394 |
xas_set(&xas, page->index); entry = xas_load(&xas); if (dax_is_locked(entry)) { |
c5bbd4515 dax: Reinstate RC... |
395 |
rcu_read_unlock(); |
55e56f06e dax: Don't access... |
396 |
wait_entry_unlocked(&xas, entry); |
c5bbd4515 dax: Reinstate RC... |
397 |
rcu_read_lock(); |
6d7cd8c13 dax: Remove optim... |
398 |
continue; |
c2a7d2a11 filesystem-dax: I... |
399 |
} |
9f32d2213 dax: Convert dax_... |
400 401 |
dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); |
c5bbd4515 dax: Reinstate RC... |
402 |
break; |
c2a7d2a11 filesystem-dax: I... |
403 |
} |
c5bbd4515 dax: Reinstate RC... |
404 |
rcu_read_unlock(); |
27359fd6e dax: Fix unlock m... |
405 |
return (dax_entry_t)entry; |
c2a7d2a11 filesystem-dax: I... |
406 |
} |
27359fd6e dax: Fix unlock m... |
407 |
void dax_unlock_page(struct page *page, dax_entry_t cookie) |
c2a7d2a11 filesystem-dax: I... |
408 409 |
{ struct address_space *mapping = page->mapping; |
9f32d2213 dax: Convert dax_... |
410 |
XA_STATE(xas, &mapping->i_pages, page->index); |
c2a7d2a11 filesystem-dax: I... |
411 |
|
9f32d2213 dax: Convert dax_... |
412 |
if (S_ISCHR(mapping->host->i_mode)) |
c2a7d2a11 filesystem-dax: I... |
413 |
return; |
27359fd6e dax: Fix unlock m... |
414 |
dax_unlock_entry(&xas, (void *)cookie); |
c2a7d2a11 filesystem-dax: I... |
415 |
} |
ac401cc78 dax: New fault lo... |
416 |
/* |
a77d19f46 dax: Rename some ... |
417 418 419 |
* Find page cache entry at given index. If it is a DAX entry, return it * with the entry locked. If the page cache doesn't contain an entry at * that index, add a locked empty entry. |
ac401cc78 dax: New fault lo... |
420 |
* |
3159f943a xarray: Replace e... |
421 |
* When requesting an entry with size DAX_PMD, grab_mapping_entry() will |
b15cd8006 dax: Convert page... |
422 423 424 |
* either return that locked entry or will return VM_FAULT_FALLBACK. * This will happen if there are any PTE entries within the PMD range * that we are requesting. |
642261ac9 dax: add struct i... |
425 |
* |
b15cd8006 dax: Convert page... |
426 427 428 429 430 431 |
* We always favor PTE entries over PMD entries. There isn't a flow where we * evict PTE entries in order to 'upgrade' them to a PMD entry. A PMD * insertion will fail if it finds any PTE entries already in the tree, and a * PTE insertion will cause an existing PMD entry to be unmapped and * downgraded to PTE entries. This happens for both PMD zero pages as * well as PMD empty entries. |
642261ac9 dax: add struct i... |
432 |
* |
b15cd8006 dax: Convert page... |
433 434 435 |
* The exception to this downgrade path is for PMD entries that have * real storage backing them. We will leave these real PMD entries in * the tree, and PTE writes will simply dirty the entire PMD entry. |
642261ac9 dax: add struct i... |
436 |
* |
ac401cc78 dax: New fault lo... |
437 438 439 |
* Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For * persistent memory the benefit is doubtful. We can add that later if we can * show it helps. |
b15cd8006 dax: Convert page... |
440 441 442 443 |
* * On error, this function does not return an ERR_PTR. Instead it returns * a VM_FAULT code, encoded as an xarray internal entry. The ERR_PTR values * overlap with xarray value entries. |
ac401cc78 dax: New fault lo... |
444 |
*/ |
b15cd8006 dax: Convert page... |
445 |
static void *grab_mapping_entry(struct xa_state *xas, |
23c84eb78 dax: Fix missed w... |
446 |
struct address_space *mapping, unsigned int order) |
ac401cc78 dax: New fault lo... |
447 |
{ |
b15cd8006 dax: Convert page... |
448 449 450 |
unsigned long index = xas->xa_index; bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */ void *entry; |
642261ac9 dax: add struct i... |
451 |
|
b15cd8006 dax: Convert page... |
452 453 |
retry: xas_lock_irq(xas); |
23c84eb78 dax: Fix missed w... |
454 |
entry = get_unlocked_entry(xas, order); |
91d25ba8a dax: use common 4... |
455 |
|
642261ac9 dax: add struct i... |
456 |
if (entry) { |
23c84eb78 dax: Fix missed w... |
457 458 |
if (dax_is_conflict(entry)) goto fallback; |
0e40de033 dax: Fix huge pag... |
459 |
if (!xa_is_value(entry)) { |
49688e654 dax: Fix incorrec... |
460 |
xas_set_err(xas, -EIO); |
b15cd8006 dax: Convert page... |
461 462 |
goto out_unlock; } |
23c84eb78 dax: Fix missed w... |
463 |
if (order == 0) { |
91d25ba8a dax: use common 4... |
464 |
if (dax_is_pmd_entry(entry) && |
642261ac9 dax: add struct i... |
465 466 467 468 469 470 |
(dax_is_zero_entry(entry) || dax_is_empty_entry(entry))) { pmd_downgrade = true; } } } |
b15cd8006 dax: Convert page... |
471 472 473 474 475 476 |
if (pmd_downgrade) { /* * Make sure 'entry' remains valid while we drop * the i_pages lock. */ dax_lock_entry(xas, entry); |
642261ac9 dax: add struct i... |
477 |
|
642261ac9 dax: add struct i... |
478 479 480 481 482 |
/* * Besides huge zero pages the only other thing that gets * downgraded are empty entries which don't need to be * unmapped. */ |
b15cd8006 dax: Convert page... |
483 484 485 486 487 488 489 |
if (dax_is_zero_entry(entry)) { xas_unlock_irq(xas); unmap_mapping_pages(mapping, xas->xa_index & ~PG_PMD_COLOUR, PG_PMD_NR, false); xas_reset(xas); xas_lock_irq(xas); |
e11f8b7b6 dax: fix radix tr... |
490 |
} |
b15cd8006 dax: Convert page... |
491 492 493 494 495 496 497 |
dax_disassociate_entry(entry, mapping, false); xas_store(xas, NULL); /* undo the PMD join */ dax_wake_entry(xas, entry, true); mapping->nrexceptional--; entry = NULL; xas_set(xas, index); } |
642261ac9 dax: add struct i... |
498 |
|
b15cd8006 dax: Convert page... |
499 500 501 |
if (entry) { dax_lock_entry(xas, entry); } else { |
23c84eb78 dax: Fix missed w... |
502 503 504 505 506 |
unsigned long flags = DAX_EMPTY; if (order > 0) flags |= DAX_PMD; entry = dax_make_entry(pfn_to_pfn_t(0), flags); |
b15cd8006 dax: Convert page... |
507 508 509 |
dax_lock_entry(xas, entry); if (xas_error(xas)) goto out_unlock; |
ac401cc78 dax: New fault lo... |
510 |
mapping->nrexceptional++; |
ac401cc78 dax: New fault lo... |
511 |
} |
b15cd8006 dax: Convert page... |
512 513 514 515 516 517 518 519 520 |
out_unlock: xas_unlock_irq(xas); if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM)) goto retry; if (xas->xa_node == XA_ERROR(-ENOMEM)) return xa_mk_internal(VM_FAULT_OOM); if (xas_error(xas)) return xa_mk_internal(VM_FAULT_SIGBUS); |
e3ad61c64 dax: consistent v... |
521 |
return entry; |
b15cd8006 dax: Convert page... |
522 523 524 |
fallback: xas_unlock_irq(xas); return xa_mk_internal(VM_FAULT_FALLBACK); |
ac401cc78 dax: New fault lo... |
525 |
} |
5fac7408d mm, fs, dax: hand... |
526 |
/** |
6bbdd563e dax: Create a ran... |
527 |
* dax_layout_busy_page_range - find first pinned page in @mapping |
5fac7408d mm, fs, dax: hand... |
528 |
* @mapping: address space to scan for a page with ref count > 1 |
6bbdd563e dax: Create a ran... |
529 530 531 |
* @start: Starting offset. Page containing 'start' is included. * @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX, * pages from 'start' till the end of file are included. |
5fac7408d mm, fs, dax: hand... |
532 533 534 535 536 537 538 539 540 541 542 543 |
* * DAX requires ZONE_DEVICE mapped pages. These pages are never * 'onlined' to the page allocator so they are considered idle when * page->count == 1. A filesystem uses this interface to determine if * any page in the mapping is busy, i.e. for DMA, or other * get_user_pages() usages. * * It is expected that the filesystem is holding locks to block the * establishment of new mappings in this address_space. I.e. it expects * to be able to run unmap_mapping_range() and subsequently not race * mapping_mapped() becoming true. */ |
6bbdd563e dax: Create a ran... |
544 545 |
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end) |
5fac7408d mm, fs, dax: hand... |
546 |
{ |
084a89900 dax: Convert dax_... |
547 548 |
void *entry; unsigned int scanned = 0; |
5fac7408d mm, fs, dax: hand... |
549 |
struct page *page = NULL; |
6bbdd563e dax: Create a ran... |
550 551 552 |
pgoff_t start_idx = start >> PAGE_SHIFT; pgoff_t end_idx; XA_STATE(xas, &mapping->i_pages, start_idx); |
5fac7408d mm, fs, dax: hand... |
553 554 555 556 557 558 559 560 561 |
/* * In the 'limited' case get_user_pages() for dax is disabled. */ if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return NULL; if (!dax_mapping(mapping) || !mapping_mapped(mapping)) return NULL; |
6bbdd563e dax: Create a ran... |
562 563 564 565 566 |
/* If end == LLONG_MAX, all pages from start to till end of file */ if (end == LLONG_MAX) end_idx = ULONG_MAX; else end_idx = end >> PAGE_SHIFT; |
5fac7408d mm, fs, dax: hand... |
567 568 |
/* * If we race get_user_pages_fast() here either we'll see the |
084a89900 dax: Convert dax_... |
569 |
* elevated page count in the iteration and wait, or |
5fac7408d mm, fs, dax: hand... |
570 571 572 573 |
* get_user_pages_fast() will see that the page it took a reference * against is no longer mapped in the page tables and bail to the * get_user_pages() slow path. The slow path is protected by * pte_lock() and pmd_lock(). New references are not taken without |
6bbdd563e dax: Create a ran... |
574 |
* holding those locks, and unmap_mapping_pages() will not zero the |
5fac7408d mm, fs, dax: hand... |
575 576 577 578 |
* pte or pmd without holding the respective lock, so we are * guaranteed to either see new references or prevent new * references from being established. */ |
6bbdd563e dax: Create a ran... |
579 |
unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0); |
5fac7408d mm, fs, dax: hand... |
580 |
|
084a89900 dax: Convert dax_... |
581 |
xas_lock_irq(&xas); |
6bbdd563e dax: Create a ran... |
582 |
xas_for_each(&xas, entry, end_idx) { |
084a89900 dax: Convert dax_... |
583 584 585 |
if (WARN_ON_ONCE(!xa_is_value(entry))) continue; if (unlikely(dax_is_locked(entry))) |
23c84eb78 dax: Fix missed w... |
586 |
entry = get_unlocked_entry(&xas, 0); |
084a89900 dax: Convert dax_... |
587 588 589 |
if (entry) page = dax_busy_page(entry); put_unlocked_entry(&xas, entry); |
5fac7408d mm, fs, dax: hand... |
590 591 |
if (page) break; |
084a89900 dax: Convert dax_... |
592 593 594 595 596 597 598 |
if (++scanned % XA_CHECK_SCHED) continue; xas_pause(&xas); xas_unlock_irq(&xas); cond_resched(); xas_lock_irq(&xas); |
5fac7408d mm, fs, dax: hand... |
599 |
} |
084a89900 dax: Convert dax_... |
600 |
xas_unlock_irq(&xas); |
5fac7408d mm, fs, dax: hand... |
601 602 |
return page; } |
6bbdd563e dax: Create a ran... |
603 604 605 606 607 608 |
EXPORT_SYMBOL_GPL(dax_layout_busy_page_range); struct page *dax_layout_busy_page(struct address_space *mapping) { return dax_layout_busy_page_range(mapping, 0, LLONG_MAX); } |
5fac7408d mm, fs, dax: hand... |
609 |
EXPORT_SYMBOL_GPL(dax_layout_busy_page); |
a77d19f46 dax: Rename some ... |
610 |
static int __dax_invalidate_entry(struct address_space *mapping, |
c6dcf52c2 mm: Invalidate DA... |
611 612 |
pgoff_t index, bool trunc) { |
07f2d89cc dax: Convert __da... |
613 |
XA_STATE(xas, &mapping->i_pages, index); |
c6dcf52c2 mm: Invalidate DA... |
614 615 |
int ret = 0; void *entry; |
c6dcf52c2 mm: Invalidate DA... |
616 |
|
07f2d89cc dax: Convert __da... |
617 |
xas_lock_irq(&xas); |
23c84eb78 dax: Fix missed w... |
618 |
entry = get_unlocked_entry(&xas, 0); |
3159f943a xarray: Replace e... |
619 |
if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) |
c6dcf52c2 mm: Invalidate DA... |
620 621 |
goto out; if (!trunc && |
07f2d89cc dax: Convert __da... |
622 623 |
(xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) || xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE))) |
c6dcf52c2 mm: Invalidate DA... |
624 |
goto out; |
d2c997c0f fs, dax: use page... |
625 |
dax_disassociate_entry(entry, mapping, trunc); |
07f2d89cc dax: Convert __da... |
626 |
xas_store(&xas, NULL); |
c6dcf52c2 mm: Invalidate DA... |
627 628 629 |
mapping->nrexceptional--; ret = 1; out: |
07f2d89cc dax: Convert __da... |
630 631 |
put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); |
c6dcf52c2 mm: Invalidate DA... |
632 633 |
return ret; } |
07f2d89cc dax: Convert __da... |
634 |
|
ac401cc78 dax: New fault lo... |
635 |
/* |
3159f943a xarray: Replace e... |
636 637 |
* Delete DAX entry at @index from @mapping. Wait for it * to be unlocked before deleting it. |
ac401cc78 dax: New fault lo... |
638 639 640 |
*/ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { |
a77d19f46 dax: Rename some ... |
641 |
int ret = __dax_invalidate_entry(mapping, index, true); |
ac401cc78 dax: New fault lo... |
642 |
|
ac401cc78 dax: New fault lo... |
643 644 645 |
/* * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the |
a77d19f46 dax: Rename some ... |
646 |
* page cache (usually fs-private i_mmap_sem for writing). Since the |
3159f943a xarray: Replace e... |
647 |
* caller has seen a DAX entry for this index, we better find it |
ac401cc78 dax: New fault lo... |
648 649 |
* at that index as well... */ |
c6dcf52c2 mm: Invalidate DA... |
650 651 652 653 654 |
WARN_ON_ONCE(!ret); return ret; } /* |
3159f943a xarray: Replace e... |
655 |
* Invalidate DAX entry if it is clean. |
c6dcf52c2 mm: Invalidate DA... |
656 657 658 659 |
*/ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index) { |
a77d19f46 dax: Rename some ... |
660 |
return __dax_invalidate_entry(mapping, index, false); |
ac401cc78 dax: New fault lo... |
661 |
} |
c7fe193f1 fs/dax: Remove un... |
662 663 |
static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, struct page *to, unsigned long vaddr) |
f7ca90b16 dax,ext2: replace... |
664 |
{ |
cccbce671 filesystem-dax: c... |
665 666 |
void *vto, *kaddr; pgoff_t pgoff; |
cccbce671 filesystem-dax: c... |
667 668 |
long rc; int id; |
c7fe193f1 fs/dax: Remove un... |
669 |
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff); |
cccbce671 filesystem-dax: c... |
670 671 672 673 |
if (rc) return rc; id = dax_read_lock(); |
c7fe193f1 fs/dax: Remove un... |
674 |
rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), &kaddr, NULL); |
cccbce671 filesystem-dax: c... |
675 676 677 678 |
if (rc < 0) { dax_read_unlock(id); return rc; } |
f7ca90b16 dax,ext2: replace... |
679 |
vto = kmap_atomic(to); |
cccbce671 filesystem-dax: c... |
680 |
copy_user_page(vto, (void __force *)kaddr, vaddr, to); |
f7ca90b16 dax,ext2: replace... |
681 |
kunmap_atomic(vto); |
cccbce671 filesystem-dax: c... |
682 |
dax_read_unlock(id); |
f7ca90b16 dax,ext2: replace... |
683 684 |
return 0; } |
642261ac9 dax: add struct i... |
685 686 687 688 689 690 691 |
/* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to * PTEs. If we happen to be trying to insert a PTE and there is a PMD * already in the tree, we will skip the insertion and just dirty the PMD as * appropriate. */ |
b15cd8006 dax: Convert page... |
692 693 694 |
static void *dax_insert_entry(struct xa_state *xas, struct address_space *mapping, struct vm_fault *vmf, void *entry, pfn_t pfn, unsigned long flags, bool dirty) |
9973c98ec dax: add support ... |
695 |
{ |
b15cd8006 dax: Convert page... |
696 |
void *new_entry = dax_make_entry(pfn, flags); |
9973c98ec dax: add support ... |
697 |
|
f5b7b7487 dax: Allow tuning... |
698 |
if (dirty) |
d2b2a28e6 dax: dirty inode ... |
699 |
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
9973c98ec dax: add support ... |
700 |
|
3159f943a xarray: Replace e... |
701 |
if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { |
b15cd8006 dax: Convert page... |
702 |
unsigned long index = xas->xa_index; |
91d25ba8a dax: use common 4... |
703 704 |
/* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) |
977fbdcd5 mm: add unmap_map... |
705 |
unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, |
b15cd8006 dax: Convert page... |
706 |
PG_PMD_NR, false); |
91d25ba8a dax: use common 4... |
707 |
else /* pte entry */ |
b15cd8006 dax: Convert page... |
708 |
unmap_mapping_pages(mapping, index, 1, false); |
9973c98ec dax: add support ... |
709 |
} |
b15cd8006 dax: Convert page... |
710 711 |
xas_reset(xas); xas_lock_irq(xas); |
1571c029a dax: Fix xarray e... |
712 713 |
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { void *old; |
d2c997c0f fs, dax: use page... |
714 |
dax_disassociate_entry(entry, mapping, false); |
73449daf8 filesystem-dax: S... |
715 |
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); |
642261ac9 dax: add struct i... |
716 |
/* |
a77d19f46 dax: Rename some ... |
717 |
* Only swap our new entry into the page cache if the current |
642261ac9 dax: add struct i... |
718 |
* entry is a zero page or an empty entry. If a normal PTE or |
a77d19f46 dax: Rename some ... |
719 |
* PMD entry is already in the cache, we leave it alone. This |
642261ac9 dax: add struct i... |
720 721 722 723 |
* means that if we are trying to insert a PTE and the * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ |
1571c029a dax: Fix xarray e... |
724 |
old = dax_lock_entry(xas, new_entry); |
b15cd8006 dax: Convert page... |
725 726 |
WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | DAX_LOCKED)); |
91d25ba8a dax: use common 4... |
727 |
entry = new_entry; |
b15cd8006 dax: Convert page... |
728 729 |
} else { xas_load(xas); /* Walk the xa_state */ |
9973c98ec dax: add support ... |
730 |
} |
91d25ba8a dax: use common 4... |
731 |
|
f5b7b7487 dax: Allow tuning... |
732 |
if (dirty) |
b15cd8006 dax: Convert page... |
733 |
xas_set_mark(xas, PAGECACHE_TAG_DIRTY); |
91d25ba8a dax: use common 4... |
734 |
|
b15cd8006 dax: Convert page... |
735 |
xas_unlock_irq(xas); |
91d25ba8a dax: use common 4... |
736 |
return entry; |
9973c98ec dax: add support ... |
737 |
} |
a77d19f46 dax: Rename some ... |
738 739 |
static inline unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma) |
4b4bb46d0 dax: clear dirty ... |
740 741 742 743 744 745 746 747 748 |
{ unsigned long address; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); return address; } /* Walk all mappings of a given index of a file and writeprotect them */ |
a77d19f46 dax: Rename some ... |
749 750 |
static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, unsigned long pfn) |
4b4bb46d0 dax: clear dirty ... |
751 752 |
{ struct vm_area_struct *vma; |
f729c8c9b dax: wrprotect pm... |
753 754 |
pte_t pte, *ptep = NULL; pmd_t *pmdp = NULL; |
4b4bb46d0 dax: clear dirty ... |
755 |
spinlock_t *ptl; |
4b4bb46d0 dax: clear dirty ... |
756 757 758 |
i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { |
ac46d4f3c mm/mmu_notifier: ... |
759 760 |
struct mmu_notifier_range range; unsigned long address; |
4b4bb46d0 dax: clear dirty ... |
761 762 763 764 765 766 767 |
cond_resched(); if (!(vma->vm_flags & VM_SHARED)) continue; address = pgoff_address(index, vma); |
a4d1a8852 dax: update to ne... |
768 769 |
/* |
0cefc36b3 fs/dax: NIT fix c... |
770 |
* Note because we provide range to follow_pte_pmd it will |
a4d1a8852 dax: update to ne... |
771 772 773 |
* call mmu_notifier_invalidate_range_start() on our behalf * before taking any lock. */ |
ac46d4f3c mm/mmu_notifier: ... |
774 775 |
if (follow_pte_pmd(vma->vm_mm, address, &range, &ptep, &pmdp, &ptl)) |
4b4bb46d0 dax: clear dirty ... |
776 |
continue; |
4b4bb46d0 dax: clear dirty ... |
777 |
|
0f10851ea mm/mmu_notifier: ... |
778 779 780 781 782 |
/* * No need to call mmu_notifier_invalidate_range() as we are * downgrading page table protection not changing it to point * to a new page. * |
ad56b738c docs/vm: rename d... |
783 |
* See Documentation/vm/mmu_notifier.rst |
0f10851ea mm/mmu_notifier: ... |
784 |
*/ |
f729c8c9b dax: wrprotect pm... |
785 786 787 788 789 790 |
if (pmdp) { #ifdef CONFIG_FS_DAX_PMD pmd_t pmd; if (pfn != pmd_pfn(*pmdp)) goto unlock_pmd; |
f6f373216 Revert "mm: repla... |
791 |
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) |
f729c8c9b dax: wrprotect pm... |
792 793 794 |
goto unlock_pmd; flush_cache_page(vma, address, pfn); |
024eee0e8 mm: page_mkclean ... |
795 |
pmd = pmdp_invalidate(vma, address, pmdp); |
f729c8c9b dax: wrprotect pm... |
796 797 798 |
pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); |
f729c8c9b dax: wrprotect pm... |
799 |
unlock_pmd: |
f729c8c9b dax: wrprotect pm... |
800 |
#endif |
ee190ca65 fs/dax.c: release... |
801 |
spin_unlock(ptl); |
f729c8c9b dax: wrprotect pm... |
802 803 804 805 806 807 808 809 810 811 812 |
} else { if (pfn != pte_pfn(*ptep)) goto unlock_pte; if (!pte_dirty(*ptep) && !pte_write(*ptep)) goto unlock_pte; flush_cache_page(vma, address, pfn); pte = ptep_clear_flush(vma, address, ptep); pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(vma->vm_mm, address, ptep, pte); |
f729c8c9b dax: wrprotect pm... |
813 814 815 |
unlock_pte: pte_unmap_unlock(ptep, ptl); } |
4b4bb46d0 dax: clear dirty ... |
816 |
|
ac46d4f3c mm/mmu_notifier: ... |
817 |
mmu_notifier_invalidate_range_end(&range); |
4b4bb46d0 dax: clear dirty ... |
818 819 820 |
} i_mmap_unlock_read(mapping); } |
9fc747f68 dax: Convert dax ... |
821 822 |
static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, struct address_space *mapping, void *entry) |
9973c98ec dax: add support ... |
823 |
{ |
e4b3448bc dax: Flush partia... |
824 |
unsigned long pfn, index, count; |
3fe0791c2 dax: store pfns i... |
825 |
long ret = 0; |
9973c98ec dax: add support ... |
826 |
|
9973c98ec dax: add support ... |
827 |
/* |
a6abc2c0e dax: make cache f... |
828 829 |
* A page got tagged dirty in DAX mapping? Something is seriously * wrong. |
9973c98ec dax: add support ... |
830 |
*/ |
3159f943a xarray: Replace e... |
831 |
if (WARN_ON(!xa_is_value(entry))) |
a6abc2c0e dax: make cache f... |
832 |
return -EIO; |
9973c98ec dax: add support ... |
833 |
|
9fc747f68 dax: Convert dax ... |
834 835 |
if (unlikely(dax_is_locked(entry))) { void *old_entry = entry; |
23c84eb78 dax: Fix missed w... |
836 |
entry = get_unlocked_entry(xas, 0); |
9fc747f68 dax: Convert dax ... |
837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 |
/* Entry got punched out / reallocated? */ if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) goto put_unlocked; /* * Entry got reallocated elsewhere? No need to writeback. * We have to compare pfns as we must not bail out due to * difference in lockbit or entry type. */ if (dax_to_pfn(old_entry) != dax_to_pfn(entry)) goto put_unlocked; if (WARN_ON_ONCE(dax_is_empty_entry(entry) || dax_is_zero_entry(entry))) { ret = -EIO; goto put_unlocked; } /* Another fsync thread may have already done this entry */ if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE)) goto put_unlocked; |
9973c98ec dax: add support ... |
857 |
} |
a6abc2c0e dax: make cache f... |
858 |
/* Lock the entry to serialize with page faults */ |
9fc747f68 dax: Convert dax ... |
859 |
dax_lock_entry(xas, entry); |
a6abc2c0e dax: make cache f... |
860 861 862 863 |
/* * We can clear the tag now but we have to be careful so that concurrent * dax_writeback_one() calls for the same index cannot finish before we * actually flush the caches. This is achieved as the calls will look |
b93b01631 page cache: use x... |
864 865 |
* at the entry only under the i_pages lock and once they do that * they will see the entry locked and wait for it to unlock. |
a6abc2c0e dax: make cache f... |
866 |
*/ |
9fc747f68 dax: Convert dax ... |
867 868 |
xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irq(xas); |
a6abc2c0e dax: make cache f... |
869 |
|
642261ac9 dax: add struct i... |
870 |
/* |
e4b3448bc dax: Flush partia... |
871 872 873 |
* If dax_writeback_mapping_range() was given a wbc->range_start * in the middle of a PMD, the 'index' we use needs to be * aligned to the start of the PMD. |
3fe0791c2 dax: store pfns i... |
874 875 |
* This allows us to flush for PMD_SIZE and not have to worry about * partial PMD writebacks. |
642261ac9 dax: add struct i... |
876 |
*/ |
a77d19f46 dax: Rename some ... |
877 |
pfn = dax_to_pfn(entry); |
e4b3448bc dax: Flush partia... |
878 879 |
count = 1UL << dax_entry_order(entry); index = xas->xa_index & ~(count - 1); |
cccbce671 filesystem-dax: c... |
880 |
|
e4b3448bc dax: Flush partia... |
881 882 |
dax_entry_mkclean(mapping, index, pfn); dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE); |
4b4bb46d0 dax: clear dirty ... |
883 884 885 886 887 888 |
/* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as * the pfn mappings are writeprotected and fault waits for mapping * entry lock. */ |
9fc747f68 dax: Convert dax ... |
889 890 891 892 893 |
xas_reset(xas); xas_lock_irq(xas); xas_store(xas, entry); xas_clear_mark(xas, PAGECACHE_TAG_DIRTY); dax_wake_entry(xas, entry, false); |
e4b3448bc dax: Flush partia... |
894 |
trace_dax_writeback_one(mapping->host, index, count); |
9973c98ec dax: add support ... |
895 |
return ret; |
a6abc2c0e dax: make cache f... |
896 |
put_unlocked: |
9fc747f68 dax: Convert dax ... |
897 |
put_unlocked_entry(xas, entry); |
9973c98ec dax: add support ... |
898 899 900 901 902 903 904 905 |
return ret; } /* * Flush the mapping to the persistent domain within the byte range of [start, * end]. This is required by data integrity operations to ensure file data is * on persistent storage prior to completion of the operation. */ |
7f6d5b529 dax: move writeba... |
906 |
int dax_writeback_mapping_range(struct address_space *mapping, |
3f666c56c dax: Pass dax_dev... |
907 |
struct dax_device *dax_dev, struct writeback_control *wbc) |
9973c98ec dax: add support ... |
908 |
{ |
9fc747f68 dax: Convert dax ... |
909 |
XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT); |
9973c98ec dax: add support ... |
910 |
struct inode *inode = mapping->host; |
9fc747f68 dax: Convert dax ... |
911 |
pgoff_t end_index = wbc->range_end >> PAGE_SHIFT; |
9fc747f68 dax: Convert dax ... |
912 913 914 |
void *entry; int ret = 0; unsigned int scanned = 0; |
9973c98ec dax: add support ... |
915 916 917 |
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) return -EIO; |
7f6d5b529 dax: move writeba... |
918 919 |
if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; |
9fc747f68 dax: Convert dax ... |
920 |
trace_dax_writeback_range(inode, xas.xa_index, end_index); |
9973c98ec dax: add support ... |
921 |
|
9fc747f68 dax: Convert dax ... |
922 |
tag_pages_for_writeback(mapping, xas.xa_index, end_index); |
9973c98ec dax: add support ... |
923 |
|
9fc747f68 dax: Convert dax ... |
924 925 926 927 928 |
xas_lock_irq(&xas); xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) { ret = dax_writeback_one(&xas, dax_dev, mapping, entry); if (ret < 0) { mapping_set_error(mapping, ret); |
9973c98ec dax: add support ... |
929 |
break; |
9973c98ec dax: add support ... |
930 |
} |
9fc747f68 dax: Convert dax ... |
931 932 933 934 935 936 937 |
if (++scanned % XA_CHECK_SCHED) continue; xas_pause(&xas); xas_unlock_irq(&xas); cond_resched(); xas_lock_irq(&xas); |
9973c98ec dax: add support ... |
938 |
} |
9fc747f68 dax: Convert dax ... |
939 |
xas_unlock_irq(&xas); |
9fc747f68 dax: Convert dax ... |
940 941 |
trace_dax_writeback_range_done(inode, xas.xa_index, end_index); return ret; |
9973c98ec dax: add support ... |
942 943 |
} EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); |
31a6f1a6e dax: Simplify arg... |
944 |
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) |
f7ca90b16 dax,ext2: replace... |
945 |
{ |
a3841f94c Merge tag 'libnvd... |
946 |
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; |
31a6f1a6e dax: Simplify arg... |
947 |
} |
5e161e406 dax: Factor out g... |
948 949 |
static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, pfn_t *pfnp) |
f7ca90b16 dax,ext2: replace... |
950 |
{ |
31a6f1a6e dax: Simplify arg... |
951 |
const sector_t sector = dax_iomap_sector(iomap, pos); |
cccbce671 filesystem-dax: c... |
952 953 |
pgoff_t pgoff; int id, rc; |
5e161e406 dax: Factor out g... |
954 |
long length; |
f7ca90b16 dax,ext2: replace... |
955 |
|
5e161e406 dax: Factor out g... |
956 |
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff); |
cccbce671 filesystem-dax: c... |
957 958 |
if (rc) return rc; |
cccbce671 filesystem-dax: c... |
959 |
id = dax_read_lock(); |
5e161e406 dax: Factor out g... |
960 |
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), |
86ed913b0 filesystem-dax: D... |
961 |
NULL, pfnp); |
5e161e406 dax: Factor out g... |
962 963 964 |
if (length < 0) { rc = length; goto out; |
cccbce671 filesystem-dax: c... |
965 |
} |
5e161e406 dax: Factor out g... |
966 967 968 969 970 971 972 973 974 975 |
rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) goto out; /* For larger pages we need devmap */ if (length > 1 && !pfn_t_devmap(*pfnp)) goto out; rc = 0; out: |
cccbce671 filesystem-dax: c... |
976 |
dax_read_unlock(id); |
5e161e406 dax: Factor out g... |
977 |
return rc; |
0e3b210ce dax: use pfn_mkwr... |
978 |
} |
0e3b210ce dax: use pfn_mkwr... |
979 |
|
e30331ff0 dax: relocate som... |
980 |
/* |
91d25ba8a dax: use common 4... |
981 982 983 984 985 |
* The user has performed a load from a hole in the file. Allocating a new * page in the file would cause excessive storage usage for workloads with * sparse files. Instead we insert a read-only mapping of the 4k zero page. * If this page is ever written to we will re-fault and change the mapping to * point to real DAX storage instead. |
e30331ff0 dax: relocate som... |
986 |
*/ |
b15cd8006 dax: Convert page... |
987 988 989 |
static vm_fault_t dax_load_hole(struct xa_state *xas, struct address_space *mapping, void **entry, struct vm_fault *vmf) |
e30331ff0 dax: relocate som... |
990 991 |
{ struct inode *inode = mapping->host; |
91d25ba8a dax: use common 4... |
992 |
unsigned long vaddr = vmf->address; |
b90ca5cc3 filesystem-dax: F... |
993 994 |
pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); vm_fault_t ret; |
e30331ff0 dax: relocate som... |
995 |
|
b15cd8006 dax: Convert page... |
996 |
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, |
3159f943a xarray: Replace e... |
997 |
DAX_ZERO_PAGE, false); |
ab77dab46 fs/dax.c: use new... |
998 |
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); |
e30331ff0 dax: relocate som... |
999 1000 1001 |
trace_dax_load_hole(inode, vmf, ret); return ret; } |
81ee8e52a iomap: Change cal... |
1002 |
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap) |
679c8bd3b dax: export a low... |
1003 |
{ |
4f3b4f161 dax,iomap: Add he... |
1004 |
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK); |
0a23f9ffa dax: Use new dax ... |
1005 1006 1007 1008 |
pgoff_t pgoff; long rc, id; void *kaddr; bool page_aligned = false; |
81ee8e52a iomap: Change cal... |
1009 1010 |
unsigned offset = offset_in_page(pos); unsigned size = min_t(u64, PAGE_SIZE - offset, length); |
cccbce671 filesystem-dax: c... |
1011 |
|
0a23f9ffa dax: Use new dax ... |
1012 |
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) && |
81ee8e52a iomap: Change cal... |
1013 |
(size == PAGE_SIZE)) |
0a23f9ffa dax: Use new dax ... |
1014 |
page_aligned = true; |
cccbce671 filesystem-dax: c... |
1015 |
|
4f3b4f161 dax,iomap: Add he... |
1016 |
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff); |
0a23f9ffa dax: Use new dax ... |
1017 1018 1019 1020 1021 1022 |
if (rc) return rc; id = dax_read_lock(); if (page_aligned) |
81ee8e52a iomap: Change cal... |
1023 |
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); |
0a23f9ffa dax: Use new dax ... |
1024 |
else |
4f3b4f161 dax,iomap: Add he... |
1025 |
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); |
0a23f9ffa dax: Use new dax ... |
1026 1027 1028 1029 1030 1031 |
if (rc < 0) { dax_read_unlock(id); return rc; } if (!page_aligned) { |
81f558701 x86, dax: replace... |
1032 |
memset(kaddr + offset, 0, size); |
4f3b4f161 dax,iomap: Add he... |
1033 |
dax_flush(iomap->dax_dev, kaddr + offset, size); |
4b0228fa1 dax: for truncate... |
1034 |
} |
0a23f9ffa dax: Use new dax ... |
1035 |
dax_read_unlock(id); |
81ee8e52a iomap: Change cal... |
1036 |
return size; |
679c8bd3b dax: export a low... |
1037 |
} |
679c8bd3b dax: export a low... |
1038 |
|
a254e5681 dax: provide an i... |
1039 |
static loff_t |
11c59c92f dax: correct dax ... |
1040 |
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, |
c039b9979 iomap: use a srcm... |
1041 |
struct iomap *iomap, struct iomap *srcmap) |
a254e5681 dax: provide an i... |
1042 |
{ |
cccbce671 filesystem-dax: c... |
1043 1044 |
struct block_device *bdev = iomap->bdev; struct dax_device *dax_dev = iomap->dax_dev; |
a254e5681 dax: provide an i... |
1045 1046 1047 |
struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; |
a77d47864 dax: Report bytes... |
1048 |
size_t xfer; |
cccbce671 filesystem-dax: c... |
1049 |
int id; |
a254e5681 dax: provide an i... |
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 |
if (iov_iter_rw(iter) == READ) { end = min(end, i_size_read(inode)); if (pos >= end) return 0; if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) return iov_iter_zero(min(length, end - pos), iter); } if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; |
e3fce68cd dax: Avoid page i... |
1062 1063 1064 1065 1066 |
/* * Write can allocate block for an area which has a hole page mapped * into page tables. We have to tear down these mappings so that data * written by write(2) is visible in mmap. */ |
cd656375f mm: fix data corr... |
1067 |
if (iomap->flags & IOMAP_F_NEW) { |
e3fce68cd dax: Avoid page i... |
1068 1069 1070 1071 |
invalidate_inode_pages2_range(inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); } |
cccbce671 filesystem-dax: c... |
1072 |
id = dax_read_lock(); |
a254e5681 dax: provide an i... |
1073 1074 |
while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); |
cccbce671 filesystem-dax: c... |
1075 1076 |
const size_t size = ALIGN(length + offset, PAGE_SIZE); const sector_t sector = dax_iomap_sector(iomap, pos); |
a254e5681 dax: provide an i... |
1077 |
ssize_t map_len; |
cccbce671 filesystem-dax: c... |
1078 1079 |
pgoff_t pgoff; void *kaddr; |
a254e5681 dax: provide an i... |
1080 |
|
d1908f525 fs: break out of ... |
1081 1082 1083 1084 |
if (fatal_signal_pending(current)) { ret = -EINTR; break; } |
cccbce671 filesystem-dax: c... |
1085 1086 1087 1088 1089 |
ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); if (ret) break; map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), |
86ed913b0 filesystem-dax: D... |
1090 |
&kaddr, NULL); |
a254e5681 dax: provide an i... |
1091 1092 1093 1094 |
if (map_len < 0) { ret = map_len; break; } |
cccbce671 filesystem-dax: c... |
1095 1096 |
map_len = PFN_PHYS(map_len); kaddr += offset; |
a254e5681 dax: provide an i... |
1097 1098 1099 |
map_len -= offset; if (map_len > end - pos) map_len = end - pos; |
a2e050f5a dax: explain how ... |
1100 1101 1102 1103 1104 |
/* * The userspace address for the memory copy has already been * validated via access_ok() in either vfs_read() or * vfs_write(), depending on which operation we are doing. */ |
a254e5681 dax: provide an i... |
1105 |
if (iov_iter_rw(iter) == WRITE) |
a77d47864 dax: Report bytes... |
1106 |
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, |
fec53774f filesystem-dax: c... |
1107 |
map_len, iter); |
a254e5681 dax: provide an i... |
1108 |
else |
a77d47864 dax: Report bytes... |
1109 |
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, |
b3a9a0c36 dax: Introduce a ... |
1110 |
map_len, iter); |
a254e5681 dax: provide an i... |
1111 |
|
a77d47864 dax: Report bytes... |
1112 1113 1114 1115 1116 1117 1118 1119 |
pos += xfer; length -= xfer; done += xfer; if (xfer == 0) ret = -EFAULT; if (xfer < map_len) break; |
a254e5681 dax: provide an i... |
1120 |
} |
cccbce671 filesystem-dax: c... |
1121 |
dax_read_unlock(id); |
a254e5681 dax: provide an i... |
1122 1123 1124 1125 1126 |
return done ? done : ret; } /** |
11c59c92f dax: correct dax ... |
1127 |
* dax_iomap_rw - Perform I/O to a DAX file |
a254e5681 dax: provide an i... |
1128 1129 1130 1131 1132 1133 1134 1135 1136 |
* @iocb: The control block for this I/O * @iter: The addresses to do I/O from or to * @ops: iomap ops passed from the file system * * This function performs read and write operations to directly mapped * persistent memory. The callers needs to take care of read/write exclusion * and evicting any page cache pages in the region under I/O. */ ssize_t |
11c59c92f dax: correct dax ... |
1137 |
dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, |
8ff6daa17 iomap: constify s... |
1138 |
const struct iomap_ops *ops) |
a254e5681 dax: provide an i... |
1139 1140 1141 1142 1143 |
{ struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos, ret = 0, done = 0; unsigned flags = 0; |
168316db3 dax: assert that ... |
1144 |
if (iov_iter_rw(iter) == WRITE) { |
9ffbe8ac0 locking/lockdep: ... |
1145 |
lockdep_assert_held_write(&inode->i_rwsem); |
a254e5681 dax: provide an i... |
1146 |
flags |= IOMAP_WRITE; |
168316db3 dax: assert that ... |
1147 1148 1149 |
} else { lockdep_assert_held(&inode->i_rwsem); } |
a254e5681 dax: provide an i... |
1150 |
|
96222d538 dax: pass NOWAIT ... |
1151 1152 |
if (iocb->ki_flags & IOCB_NOWAIT) flags |= IOMAP_NOWAIT; |
a254e5681 dax: provide an i... |
1153 1154 |
while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, |
11c59c92f dax: correct dax ... |
1155 |
iter, dax_iomap_actor); |
a254e5681 dax: provide an i... |
1156 1157 1158 1159 1160 1161 1162 1163 1164 |
if (ret <= 0) break; pos += ret; done += ret; } iocb->ki_pos += done; return done ? done : ret; } |
11c59c92f dax: correct dax ... |
1165 |
EXPORT_SYMBOL_GPL(dax_iomap_rw); |
a7d73fe6c dax: provide an i... |
1166 |
|
ab77dab46 fs/dax.c: use new... |
1167 |
static vm_fault_t dax_fault_return(int error) |
9f141d6ef dax: Call ->iomap... |
1168 1169 1170 |
{ if (error == 0) return VM_FAULT_NOPAGE; |
c9aed74e6 fs/dax: Convert t... |
1171 |
return vmf_error(error); |
9f141d6ef dax: Call ->iomap... |
1172 |
} |
aaa422c4c fs, dax: unify IO... |
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 |
/* * MAP_SYNC on a dax mapping guarantees dirty metadata is * flushed on write-faults (non-cow), but not read-faults. */ static bool dax_fault_is_synchronous(unsigned long flags, struct vm_area_struct *vma, struct iomap *iomap) { return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) && (iomap->flags & IOMAP_F_DIRTY); } |
ab77dab46 fs/dax.c: use new... |
1183 |
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, |
c0b246259 dax: pass detaile... |
1184 |
int *iomap_errp, const struct iomap_ops *ops) |
a7d73fe6c dax: provide an i... |
1185 |
{ |
a0987ad5c dax: Create local... |
1186 1187 |
struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; |
b15cd8006 dax: Convert page... |
1188 |
XA_STATE(xas, &mapping->i_pages, vmf->pgoff); |
a7d73fe6c dax: provide an i... |
1189 |
struct inode *inode = mapping->host; |
1a29d85eb mm: use vmf->addr... |
1190 |
unsigned long vaddr = vmf->address; |
a7d73fe6c dax: provide an i... |
1191 |
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; |
c039b9979 iomap: use a srcm... |
1192 1193 |
struct iomap iomap = { .type = IOMAP_HOLE }; struct iomap srcmap = { .type = IOMAP_HOLE }; |
9484ab1bf dax: Introduce IO... |
1194 |
unsigned flags = IOMAP_FAULT; |
a7d73fe6c dax: provide an i... |
1195 |
int error, major = 0; |
d2c43ef13 dax: Create local... |
1196 |
bool write = vmf->flags & FAULT_FLAG_WRITE; |
caa51d26f dax, iomap: Add s... |
1197 |
bool sync; |
ab77dab46 fs/dax.c: use new... |
1198 |
vm_fault_t ret = 0; |
a7d73fe6c dax: provide an i... |
1199 |
void *entry; |
1b5a1cb21 dax: Inline dax_i... |
1200 |
pfn_t pfn; |
a7d73fe6c dax: provide an i... |
1201 |
|
ab77dab46 fs/dax.c: use new... |
1202 |
trace_dax_pte_fault(inode, vmf, ret); |
a7d73fe6c dax: provide an i... |
1203 1204 1205 1206 1207 |
/* * Check whether offset isn't beyond end of file now. Caller is supposed * to hold locks serializing us with truncate / punch hole so this is * a reliable test. */ |
a9c42b33e dax: add tracepoi... |
1208 |
if (pos >= i_size_read(inode)) { |
ab77dab46 fs/dax.c: use new... |
1209 |
ret = VM_FAULT_SIGBUS; |
a9c42b33e dax: add tracepoi... |
1210 1211 |
goto out; } |
a7d73fe6c dax: provide an i... |
1212 |
|
d2c43ef13 dax: Create local... |
1213 |
if (write && !vmf->cow_page) |
a7d73fe6c dax: provide an i... |
1214 |
flags |= IOMAP_WRITE; |
b15cd8006 dax: Convert page... |
1215 1216 1217 |
entry = grab_mapping_entry(&xas, mapping, 0); if (xa_is_internal(entry)) { ret = xa_to_internal(entry); |
13e451fdc dax: fix data cor... |
1218 1219 |
goto out; } |
a7d73fe6c dax: provide an i... |
1220 |
/* |
e2093926a dax: fix race bet... |
1221 1222 1223 1224 1225 1226 |
* It is possible, particularly with mixed reads & writes to private * mappings, that we have raced with a PMD fault that overlaps with * the PTE we need to set up. If so just return and the fault will be * retried. */ if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { |
ab77dab46 fs/dax.c: use new... |
1227 |
ret = VM_FAULT_NOPAGE; |
e2093926a dax: fix race bet... |
1228 1229 1230 1231 |
goto unlock_entry; } /* |
a7d73fe6c dax: provide an i... |
1232 1233 1234 1235 |
* Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means * that we never have to deal with more than a single extent here. */ |
c039b9979 iomap: use a srcm... |
1236 |
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap); |
c0b246259 dax: pass detaile... |
1237 1238 |
if (iomap_errp) *iomap_errp = error; |
a9c42b33e dax: add tracepoi... |
1239 |
if (error) { |
ab77dab46 fs/dax.c: use new... |
1240 |
ret = dax_fault_return(error); |
13e451fdc dax: fix data cor... |
1241 |
goto unlock_entry; |
a9c42b33e dax: add tracepoi... |
1242 |
} |
a7d73fe6c dax: provide an i... |
1243 |
if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { |
13e451fdc dax: fix data cor... |
1244 1245 |
error = -EIO; /* fs corruption? */ goto error_finish_iomap; |
a7d73fe6c dax: provide an i... |
1246 |
} |
a7d73fe6c dax: provide an i... |
1247 |
if (vmf->cow_page) { |
31a6f1a6e dax: Simplify arg... |
1248 |
sector_t sector = dax_iomap_sector(&iomap, pos); |
a7d73fe6c dax: provide an i... |
1249 1250 1251 1252 1253 1254 |
switch (iomap.type) { case IOMAP_HOLE: case IOMAP_UNWRITTEN: clear_user_highpage(vmf->cow_page, vaddr); break; case IOMAP_MAPPED: |
c7fe193f1 fs/dax: Remove un... |
1255 1256 |
error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev, sector, vmf->cow_page, vaddr); |
a7d73fe6c dax: provide an i... |
1257 1258 1259 1260 1261 1262 1263 1264 |
break; default: WARN_ON_ONCE(1); error = -EIO; break; } if (error) |
13e451fdc dax: fix data cor... |
1265 |
goto error_finish_iomap; |
b1aa812b2 mm: move handling... |
1266 1267 |
__SetPageUptodate(vmf->cow_page); |
ab77dab46 fs/dax.c: use new... |
1268 1269 1270 |
ret = finish_fault(vmf); if (!ret) ret = VM_FAULT_DONE_COW; |
13e451fdc dax: fix data cor... |
1271 |
goto finish_iomap; |
a7d73fe6c dax: provide an i... |
1272 |
} |
aaa422c4c fs, dax: unify IO... |
1273 |
sync = dax_fault_is_synchronous(flags, vma, &iomap); |
caa51d26f dax, iomap: Add s... |
1274 |
|
a7d73fe6c dax: provide an i... |
1275 1276 1277 1278 |
switch (iomap.type) { case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); |
a0987ad5c dax: Create local... |
1279 |
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); |
a7d73fe6c dax: provide an i... |
1280 1281 |
major = VM_FAULT_MAJOR; } |
1b5a1cb21 dax: Inline dax_i... |
1282 1283 1284 |
error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); if (error < 0) goto error_finish_iomap; |
b15cd8006 dax: Convert page... |
1285 |
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, |
caa51d26f dax, iomap: Add s... |
1286 |
0, write && !sync); |
1b5a1cb21 dax: Inline dax_i... |
1287 |
|
caa51d26f dax, iomap: Add s... |
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 |
/* * If we are doing synchronous page fault and inode needs fsync, * we can insert PTE into page tables only after that happens. * Skip insertion for now and return the pfn so that caller can * insert it after fsync is done. */ if (sync) { if (WARN_ON_ONCE(!pfnp)) { error = -EIO; goto error_finish_iomap; } *pfnp = pfn; |
ab77dab46 fs/dax.c: use new... |
1300 |
ret = VM_FAULT_NEEDDSYNC | major; |
caa51d26f dax, iomap: Add s... |
1301 1302 |
goto finish_iomap; } |
1b5a1cb21 dax: Inline dax_i... |
1303 1304 |
trace_dax_insert_mapping(inode, vmf, entry); if (write) |
ab77dab46 fs/dax.c: use new... |
1305 |
ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn); |
1b5a1cb21 dax: Inline dax_i... |
1306 |
else |
ab77dab46 fs/dax.c: use new... |
1307 |
ret = vmf_insert_mixed(vma, vaddr, pfn); |
1b5a1cb21 dax: Inline dax_i... |
1308 |
|
ab77dab46 fs/dax.c: use new... |
1309 |
goto finish_iomap; |
a7d73fe6c dax: provide an i... |
1310 1311 |
case IOMAP_UNWRITTEN: case IOMAP_HOLE: |
d2c43ef13 dax: Create local... |
1312 |
if (!write) { |
b15cd8006 dax: Convert page... |
1313 |
ret = dax_load_hole(&xas, mapping, &entry, vmf); |
13e451fdc dax: fix data cor... |
1314 |
goto finish_iomap; |
1550290b0 dax: dax_iomap_fa... |
1315 |
} |
df561f668 treewide: Use fal... |
1316 |
fallthrough; |
a7d73fe6c dax: provide an i... |
1317 1318 1319 1320 1321 |
default: WARN_ON_ONCE(1); error = -EIO; break; } |
13e451fdc dax: fix data cor... |
1322 |
error_finish_iomap: |
ab77dab46 fs/dax.c: use new... |
1323 |
ret = dax_fault_return(error); |
9f141d6ef dax: Call ->iomap... |
1324 1325 1326 |
finish_iomap: if (ops->iomap_end) { int copied = PAGE_SIZE; |
ab77dab46 fs/dax.c: use new... |
1327 |
if (ret & VM_FAULT_ERROR) |
9f141d6ef dax: Call ->iomap... |
1328 1329 1330 1331 1332 1333 1334 1335 |
copied = 0; /* * The fault is done by now and there's no way back (other * thread may be already happily using PTE we have installed). * Just ignore error from ->iomap_end since we cannot do much * with it. */ ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); |
1550290b0 dax: dax_iomap_fa... |
1336 |
} |
13e451fdc dax: fix data cor... |
1337 |
unlock_entry: |
b15cd8006 dax: Convert page... |
1338 |
dax_unlock_entry(&xas, entry); |
13e451fdc dax: fix data cor... |
1339 |
out: |
ab77dab46 fs/dax.c: use new... |
1340 1341 |
trace_dax_pte_fault_done(inode, vmf, ret); return ret | major; |
a7d73fe6c dax: provide an i... |
1342 |
} |
642261ac9 dax: add struct i... |
1343 1344 |
#ifdef CONFIG_FS_DAX_PMD |
b15cd8006 dax: Convert page... |
1345 1346 |
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, struct iomap *iomap, void **entry) |
642261ac9 dax: add struct i... |
1347 |
{ |
f42003917 mm, dax: change p... |
1348 1349 |
struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; |
11cf9d863 fs/dax: Deposit p... |
1350 |
struct vm_area_struct *vma = vmf->vma; |
653b2ea33 dax: add tracepoi... |
1351 |
struct inode *inode = mapping->host; |
11cf9d863 fs/dax: Deposit p... |
1352 |
pgtable_t pgtable = NULL; |
642261ac9 dax: add struct i... |
1353 1354 1355 |
struct page *zero_page; spinlock_t *ptl; pmd_t pmd_entry; |
3fe0791c2 dax: store pfns i... |
1356 |
pfn_t pfn; |
642261ac9 dax: add struct i... |
1357 |
|
f42003917 mm, dax: change p... |
1358 |
zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); |
642261ac9 dax: add struct i... |
1359 1360 |
if (unlikely(!zero_page)) |
653b2ea33 dax: add tracepoi... |
1361 |
goto fallback; |
642261ac9 dax: add struct i... |
1362 |
|
3fe0791c2 dax: store pfns i... |
1363 |
pfn = page_to_pfn_t(zero_page); |
b15cd8006 dax: Convert page... |
1364 |
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, |
3159f943a xarray: Replace e... |
1365 |
DAX_PMD | DAX_ZERO_PAGE, false); |
642261ac9 dax: add struct i... |
1366 |
|
11cf9d863 fs/dax: Deposit p... |
1367 1368 1369 1370 1371 |
if (arch_needs_pgtable_deposit()) { pgtable = pte_alloc_one(vma->vm_mm); if (!pgtable) return VM_FAULT_OOM; } |
f42003917 mm, dax: change p... |
1372 1373 |
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { |
642261ac9 dax: add struct i... |
1374 |
spin_unlock(ptl); |
653b2ea33 dax: add tracepoi... |
1375 |
goto fallback; |
642261ac9 dax: add struct i... |
1376 |
} |
11cf9d863 fs/dax: Deposit p... |
1377 1378 1379 1380 |
if (pgtable) { pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); mm_inc_nr_ptes(vma->vm_mm); } |
f42003917 mm, dax: change p... |
1381 |
pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); |
642261ac9 dax: add struct i... |
1382 |
pmd_entry = pmd_mkhuge(pmd_entry); |
f42003917 mm, dax: change p... |
1383 |
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); |
642261ac9 dax: add struct i... |
1384 |
spin_unlock(ptl); |
b15cd8006 dax: Convert page... |
1385 |
trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry); |
642261ac9 dax: add struct i... |
1386 |
return VM_FAULT_NOPAGE; |
653b2ea33 dax: add tracepoi... |
1387 1388 |
fallback: |
11cf9d863 fs/dax: Deposit p... |
1389 1390 |
if (pgtable) pte_free(vma->vm_mm, pgtable); |
b15cd8006 dax: Convert page... |
1391 |
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry); |
653b2ea33 dax: add tracepoi... |
1392 |
return VM_FAULT_FALLBACK; |
642261ac9 dax: add struct i... |
1393 |
} |
ab77dab46 fs/dax.c: use new... |
1394 |
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
a2d581675 mm,fs,dax: change... |
1395 |
const struct iomap_ops *ops) |
642261ac9 dax: add struct i... |
1396 |
{ |
f42003917 mm, dax: change p... |
1397 |
struct vm_area_struct *vma = vmf->vma; |
642261ac9 dax: add struct i... |
1398 |
struct address_space *mapping = vma->vm_file->f_mapping; |
b15cd8006 dax: Convert page... |
1399 |
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER); |
d8a849e1b mm, dax: make pmd... |
1400 1401 |
unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; |
caa51d26f dax, iomap: Add s... |
1402 |
bool sync; |
9484ab1bf dax: Introduce IO... |
1403 |
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; |
642261ac9 dax: add struct i... |
1404 |
struct inode *inode = mapping->host; |
ab77dab46 fs/dax.c: use new... |
1405 |
vm_fault_t result = VM_FAULT_FALLBACK; |
c039b9979 iomap: use a srcm... |
1406 1407 |
struct iomap iomap = { .type = IOMAP_HOLE }; struct iomap srcmap = { .type = IOMAP_HOLE }; |
b15cd8006 dax: Convert page... |
1408 |
pgoff_t max_pgoff; |
642261ac9 dax: add struct i... |
1409 1410 1411 |
void *entry; loff_t pos; int error; |
302a5e312 dax: Inline dax_p... |
1412 |
pfn_t pfn; |
642261ac9 dax: add struct i... |
1413 |
|
282a8e039 dax: add tracepoi... |
1414 1415 1416 1417 1418 |
/* * Check whether offset isn't beyond end of file now. Caller is * supposed to hold locks serializing us with truncate / punch hole so * this is a reliable test. */ |
957ac8c42 dax: fix PMD faul... |
1419 |
max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
282a8e039 dax: add tracepoi... |
1420 |
|
f42003917 mm, dax: change p... |
1421 |
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); |
282a8e039 dax: add tracepoi... |
1422 |
|
fffa281b4 dax: fix deadlock... |
1423 1424 1425 1426 |
/* * Make sure that the faulting address's PMD offset (color) matches * the PMD offset from the start of the file. This is necessary so * that a PMD range in the page table overlaps exactly with a PMD |
a77d19f46 dax: Rename some ... |
1427 |
* range in the page cache. |
fffa281b4 dax: fix deadlock... |
1428 1429 1430 1431 |
*/ if ((vmf->pgoff & PG_PMD_COLOUR) != ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) goto fallback; |
642261ac9 dax: add struct i... |
1432 1433 1434 1435 1436 1437 1438 1439 1440 |
/* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; /* If the PMD would extend outside the VMA */ if (pmd_addr < vma->vm_start) goto fallback; if ((pmd_addr + PMD_SIZE) > vma->vm_end) goto fallback; |
b15cd8006 dax: Convert page... |
1441 |
if (xas.xa_index >= max_pgoff) { |
282a8e039 dax: add tracepoi... |
1442 1443 1444 |
result = VM_FAULT_SIGBUS; goto out; } |
642261ac9 dax: add struct i... |
1445 1446 |
/* If the PMD would extend beyond the file size */ |
b15cd8006 dax: Convert page... |
1447 |
if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff) |
642261ac9 dax: add struct i... |
1448 1449 1450 |
goto fallback; /* |
b15cd8006 dax: Convert page... |
1451 1452 1453 1454 |
* grab_mapping_entry() will make sure we get an empty PMD entry, * a zero PMD entry or a DAX PMD. If it can't (because a PTE * entry is already in the array, for instance), it will return * VM_FAULT_FALLBACK. |
876f29460 dax: fix PMD data... |
1455 |
*/ |
23c84eb78 dax: Fix missed w... |
1456 |
entry = grab_mapping_entry(&xas, mapping, PMD_ORDER); |
b15cd8006 dax: Convert page... |
1457 1458 |
if (xa_is_internal(entry)) { result = xa_to_internal(entry); |
876f29460 dax: fix PMD data... |
1459 |
goto fallback; |
b15cd8006 dax: Convert page... |
1460 |
} |
876f29460 dax: fix PMD data... |
1461 1462 |
/* |
e2093926a dax: fix race bet... |
1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 |
* It is possible, particularly with mixed reads & writes to private * mappings, that we have raced with a PTE fault that overlaps with * the PMD we need to set up. If so just return and the fault will be * retried. */ if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && !pmd_devmap(*vmf->pmd)) { result = 0; goto unlock_entry; } /* |
642261ac9 dax: add struct i... |
1475 1476 1477 1478 |
* Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way * to look up our filesystem block. */ |
b15cd8006 dax: Convert page... |
1479 |
pos = (loff_t)xas.xa_index << PAGE_SHIFT; |
c039b9979 iomap: use a srcm... |
1480 1481 |
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap, &srcmap); |
642261ac9 dax: add struct i... |
1482 |
if (error) |
876f29460 dax: fix PMD data... |
1483 |
goto unlock_entry; |
9f141d6ef dax: Call ->iomap... |
1484 |
|
642261ac9 dax: add struct i... |
1485 1486 |
if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; |
aaa422c4c fs, dax: unify IO... |
1487 |
sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap); |
caa51d26f dax, iomap: Add s... |
1488 |
|
642261ac9 dax: add struct i... |
1489 1490 |
switch (iomap.type) { case IOMAP_MAPPED: |
302a5e312 dax: Inline dax_p... |
1491 1492 1493 |
error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); if (error < 0) goto finish_iomap; |
b15cd8006 dax: Convert page... |
1494 |
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, |
3159f943a xarray: Replace e... |
1495 |
DAX_PMD, write && !sync); |
302a5e312 dax: Inline dax_p... |
1496 |
|
caa51d26f dax, iomap: Add s... |
1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 |
/* * If we are doing synchronous page fault and inode needs fsync, * we can insert PMD into page tables only after that happens. * Skip insertion for now and return the pfn so that caller can * insert it after fsync is done. */ if (sync) { if (WARN_ON_ONCE(!pfnp)) goto finish_iomap; *pfnp = pfn; result = VM_FAULT_NEEDDSYNC; goto finish_iomap; } |
302a5e312 dax: Inline dax_p... |
1510 |
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); |
fce86ff58 mm/huge_memory: f... |
1511 |
result = vmf_insert_pfn_pmd(vmf, pfn, write); |
642261ac9 dax: add struct i... |
1512 1513 1514 1515 |
break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) |
876f29460 dax: fix PMD data... |
1516 |
break; |
b15cd8006 dax: Convert page... |
1517 |
result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry); |
642261ac9 dax: add struct i... |
1518 1519 1520 1521 1522 1523 1524 1525 |
break; default: WARN_ON_ONCE(1); break; } finish_iomap: if (ops->iomap_end) { |
9f141d6ef dax: Call ->iomap... |
1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 |
int copied = PMD_SIZE; if (result == VM_FAULT_FALLBACK) copied = 0; /* * The fault is done by now and there's no way back (other * thread may be already happily using PMD we have installed). * Just ignore error from ->iomap_end since we cannot do much * with it. */ ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, &iomap); |
642261ac9 dax: add struct i... |
1538 |
} |
876f29460 dax: fix PMD data... |
1539 |
unlock_entry: |
b15cd8006 dax: Convert page... |
1540 |
dax_unlock_entry(&xas, entry); |
642261ac9 dax: add struct i... |
1541 1542 |
fallback: if (result == VM_FAULT_FALLBACK) { |
d8a849e1b mm, dax: make pmd... |
1543 |
split_huge_pmd(vma, vmf->pmd, vmf->address); |
642261ac9 dax: add struct i... |
1544 1545 |
count_vm_event(THP_FAULT_FALLBACK); } |
282a8e039 dax: add tracepoi... |
1546 |
out: |
f42003917 mm, dax: change p... |
1547 |
trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result); |
642261ac9 dax: add struct i... |
1548 1549 |
return result; } |
a2d581675 mm,fs,dax: change... |
1550 |
#else |
ab77dab46 fs/dax.c: use new... |
1551 |
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
01cddfe99 mm,fs,dax: mark d... |
1552 |
const struct iomap_ops *ops) |
a2d581675 mm,fs,dax: change... |
1553 1554 1555 |
{ return VM_FAULT_FALLBACK; } |
642261ac9 dax: add struct i... |
1556 |
#endif /* CONFIG_FS_DAX_PMD */ |
a2d581675 mm,fs,dax: change... |
1557 1558 1559 1560 |
/** * dax_iomap_fault - handle a page fault on a DAX file * @vmf: The description of the fault |
cec04e8c8 dax: Fix comment ... |
1561 |
* @pe_size: Size of the page to fault in |
9a0dd4225 dax: Allow dax_io... |
1562 |
* @pfnp: PFN to insert for synchronous faults if fsync is required |
c0b246259 dax: pass detaile... |
1563 |
* @iomap_errp: Storage for detailed error code in case of error |
cec04e8c8 dax: Fix comment ... |
1564 |
* @ops: Iomap ops passed from the file system |
a2d581675 mm,fs,dax: change... |
1565 1566 1567 1568 1569 1570 |
* * When a page fault occurs, filesystems may call this helper in * their fault handler for DAX files. dax_iomap_fault() assumes the caller * has done all the necessary locking for page fault to proceed * successfully. */ |
ab77dab46 fs/dax.c: use new... |
1571 |
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, |
c0b246259 dax: pass detaile... |
1572 |
pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) |
a2d581675 mm,fs,dax: change... |
1573 |
{ |
c791ace1e mm: replace FAULT... |
1574 1575 |
switch (pe_size) { case PE_SIZE_PTE: |
c0b246259 dax: pass detaile... |
1576 |
return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); |
c791ace1e mm: replace FAULT... |
1577 |
case PE_SIZE_PMD: |
9a0dd4225 dax: Allow dax_io... |
1578 |
return dax_iomap_pmd_fault(vmf, pfnp, ops); |
a2d581675 mm,fs,dax: change... |
1579 1580 1581 1582 1583 |
default: return VM_FAULT_FALLBACK; } } EXPORT_SYMBOL_GPL(dax_iomap_fault); |
71eab6dfd dax: Implement da... |
1584 |
|
a77d19f46 dax: Rename some ... |
1585 |
/* |
71eab6dfd dax: Implement da... |
1586 1587 |
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables * @vmf: The description of the fault |
71eab6dfd dax: Implement da... |
1588 |
* @pfn: PFN to insert |
cfc93c6c6 dax: Convert dax_... |
1589 |
* @order: Order of entry to insert. |
71eab6dfd dax: Implement da... |
1590 |
* |
a77d19f46 dax: Rename some ... |
1591 1592 |
* This function inserts a writeable PTE or PMD entry into the page tables * for an mmaped DAX file. It also marks the page cache entry as dirty. |
71eab6dfd dax: Implement da... |
1593 |
*/ |
cfc93c6c6 dax: Convert dax_... |
1594 1595 |
static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) |
71eab6dfd dax: Implement da... |
1596 1597 |
{ struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
cfc93c6c6 dax: Convert dax_... |
1598 1599 |
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order); void *entry; |
ab77dab46 fs/dax.c: use new... |
1600 |
vm_fault_t ret; |
71eab6dfd dax: Implement da... |
1601 |
|
cfc93c6c6 dax: Convert dax_... |
1602 |
xas_lock_irq(&xas); |
23c84eb78 dax: Fix missed w... |
1603 |
entry = get_unlocked_entry(&xas, order); |
71eab6dfd dax: Implement da... |
1604 |
/* Did we race with someone splitting entry or so? */ |
23c84eb78 dax: Fix missed w... |
1605 1606 |
if (!entry || dax_is_conflict(entry) || (order == 0 && !dax_is_pte_entry(entry))) { |
cfc93c6c6 dax: Convert dax_... |
1607 1608 |
put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); |
71eab6dfd dax: Implement da... |
1609 1610 1611 1612 |
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } |
cfc93c6c6 dax: Convert dax_... |
1613 1614 1615 1616 |
xas_set_mark(&xas, PAGECACHE_TAG_DIRTY); dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); if (order == 0) |
ab77dab46 fs/dax.c: use new... |
1617 |
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); |
71eab6dfd dax: Implement da... |
1618 |
#ifdef CONFIG_FS_DAX_PMD |
cfc93c6c6 dax: Convert dax_... |
1619 |
else if (order == PMD_ORDER) |
fce86ff58 mm/huge_memory: f... |
1620 |
ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE); |
71eab6dfd dax: Implement da... |
1621 |
#endif |
cfc93c6c6 dax: Convert dax_... |
1622 |
else |
ab77dab46 fs/dax.c: use new... |
1623 |
ret = VM_FAULT_FALLBACK; |
cfc93c6c6 dax: Convert dax_... |
1624 |
dax_unlock_entry(&xas, entry); |
ab77dab46 fs/dax.c: use new... |
1625 1626 |
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); return ret; |
71eab6dfd dax: Implement da... |
1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 |
} /** * dax_finish_sync_fault - finish synchronous page fault * @vmf: The description of the fault * @pe_size: Size of entry to be inserted * @pfn: PFN to insert * * This function ensures that the file range touched by the page fault is * stored persistently on the media and handles inserting of appropriate page * table entry. */ |
ab77dab46 fs/dax.c: use new... |
1639 1640 |
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn) |
71eab6dfd dax: Implement da... |
1641 1642 1643 |
{ int err; loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; |
cfc93c6c6 dax: Convert dax_... |
1644 1645 |
unsigned int order = pe_order(pe_size); size_t len = PAGE_SIZE << order; |
71eab6dfd dax: Implement da... |
1646 |
|
71eab6dfd dax: Implement da... |
1647 1648 1649 |
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1); if (err) return VM_FAULT_SIGBUS; |
cfc93c6c6 dax: Convert dax_... |
1650 |
return dax_insert_pfn_mkwrite(vmf, pfn, order); |
71eab6dfd dax: Implement da... |
1651 1652 |
} EXPORT_SYMBOL_GPL(dax_finish_sync_fault); |