Blame view
fs/sync.c
10.4 KB
b24413180 License cleanup: ... |
1 |
// SPDX-License-Identifier: GPL-2.0 |
f79e2abb9 [PATCH] sys_sync_... |
2 3 4 5 6 7 8 |
/* * High-level sync()-related operations */ #include <linux/kernel.h> #include <linux/file.h> #include <linux/fs.h> |
5a0e3ad6a include cleanup: ... |
9 |
#include <linux/slab.h> |
630d9c472 fs: reduce the us... |
10 |
#include <linux/export.h> |
b7ed78f56 introduce sys_syn... |
11 |
#include <linux/namei.h> |
914e26379 [PATCH] severing ... |
12 |
#include <linux/sched.h> |
f79e2abb9 [PATCH] sys_sync_... |
13 14 15 16 |
#include <linux/writeback.h> #include <linux/syscalls.h> #include <linux/linkage.h> #include <linux/pagemap.h> |
cf9a2ae8d [PATCH] BLOCK: Mo... |
17 |
#include <linux/quotaops.h> |
5129a469a Catch filesystems... |
18 |
#include <linux/backing-dev.h> |
5a3e5cb8e vfs: Fix sys_sync... |
19 |
#include "internal.h" |
f79e2abb9 [PATCH] sys_sync_... |
20 21 22 |
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) |
c15c54f5f vfs: Move syncing... |
23 |
/* |
d8a8559cd writeback: get ri... |
24 25 26 27 28 |
* Do the filesystem syncing work. For simple filesystems * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to * submit IO for these buffers via __sync_blockdev(). This also speeds up the * wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. |
c15c54f5f vfs: Move syncing... |
29 |
*/ |
0dc83bd30 Revert "writeback... |
30 |
static int __sync_filesystem(struct super_block *sb, int wait) |
c15c54f5f vfs: Move syncing... |
31 |
{ |
5fb324ad2 quota: move code ... |
32 |
if (wait) |
0dc83bd30 Revert "writeback... |
33 |
sync_inodes_sb(sb); |
5fb324ad2 quota: move code ... |
34 |
else |
0e175a183 writeback: Add a ... |
35 |
writeback_inodes_sb(sb, WB_REASON_SYNC); |
5fb324ad2 quota: move code ... |
36 |
|
c15c54f5f vfs: Move syncing... |
37 38 39 40 41 42 43 44 45 46 |
if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); return __sync_blockdev(sb->s_bdev, wait); } /* * Write out and wait upon all dirty data associated with this * superblock. Filesystem data as well as the underlying block * device. Takes the superblock lock. */ |
60b0680fa vfs: Rename fsync... |
47 |
int sync_filesystem(struct super_block *sb) |
c15c54f5f vfs: Move syncing... |
48 49 |
{ int ret; |
5af7926ff enforce ->sync_fs... |
50 51 52 53 54 55 56 57 58 |
/* * We need to be protected against the filesystem going from * r/o to r/w or vice versa. */ WARN_ON(!rwsem_is_locked(&sb->s_umount)); /* * No point in syncing out anything if the filesystem is read-only. */ |
bc98a42c1 VFS: Convert sb->... |
59 |
if (sb_rdonly(sb)) |
5af7926ff enforce ->sync_fs... |
60 |
return 0; |
0dc83bd30 Revert "writeback... |
61 |
ret = __sync_filesystem(sb, 0); |
c15c54f5f vfs: Move syncing... |
62 63 |
if (ret < 0) return ret; |
0dc83bd30 Revert "writeback... |
64 |
return __sync_filesystem(sb, 1); |
c15c54f5f vfs: Move syncing... |
65 |
} |
10096fb10 Export sync_files... |
66 |
EXPORT_SYMBOL(sync_filesystem); |
c15c54f5f vfs: Move syncing... |
67 |
|
b3de65310 vfs: Reorder oper... |
68 |
static void sync_inodes_one_sb(struct super_block *sb, void *arg) |
01a05b337 new helper: itera... |
69 |
{ |
bc98a42c1 VFS: Convert sb->... |
70 |
if (!sb_rdonly(sb)) |
0dc83bd30 Revert "writeback... |
71 |
sync_inodes_sb(sb); |
01a05b337 new helper: itera... |
72 |
} |
b3de65310 vfs: Reorder oper... |
73 |
|
b3de65310 vfs: Reorder oper... |
74 75 |
static void sync_fs_one_sb(struct super_block *sb, void *arg) { |
bc98a42c1 VFS: Convert sb->... |
76 |
if (!sb_rdonly(sb) && sb->s_op->sync_fs) |
b3de65310 vfs: Reorder oper... |
77 78 |
sb->s_op->sync_fs(sb, *(int *)arg); } |
d0e91b13e vfs: Remove unnec... |
79 |
static void fdatawrite_one_bdev(struct block_device *bdev, void *arg) |
b3de65310 vfs: Reorder oper... |
80 |
{ |
d0e91b13e vfs: Remove unnec... |
81 |
filemap_fdatawrite(bdev->bd_inode->i_mapping); |
a8c7176b6 vfs: Make sys_syn... |
82 |
} |
d0e91b13e vfs: Remove unnec... |
83 |
static void fdatawait_one_bdev(struct block_device *bdev, void *arg) |
a8c7176b6 vfs: Make sys_syn... |
84 |
{ |
aa750fd71 mm/filemap.c: mak... |
85 86 87 88 89 90 |
/* * We keep the error status of individual mapping so that * applications can catch the writeback error using fsync(2). * See filemap_fdatawait_keep_errors() for details. */ filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping); |
c15c54f5f vfs: Move syncing... |
91 |
} |
3beab0b42 sys_sync(): fix 1... |
92 |
/* |
4ea425b63 vfs: Avoid unnece... |
93 94 95 96 97 98 99 100 |
* Sync everything. We start by waking flusher threads so that most of * writeback runs on all devices in parallel. Then we sync all inodes reliably * which effectively also waits for all flusher threads to finish doing * writeback. At this point all data is on disk so metadata should be stable * and we tell filesystems to sync their metadata via ->sync_fs() calls. * Finally, we writeout all block devices because some filesystems (e.g. ext2) * just write metadata (such as inodes or bitmaps) to block device page cache * and do not sync it on their own in ->sync_fs(). |
3beab0b42 sys_sync(): fix 1... |
101 |
*/ |
70f68ee81 fs: add ksys_sync... |
102 |
void ksys_sync(void) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
103 |
{ |
b3de65310 vfs: Reorder oper... |
104 |
int nowait = 0, wait = 1; |
9ba4b2dfa fs: kill 'nr_page... |
105 |
wakeup_flusher_threads(WB_REASON_SYNC); |
0dc83bd30 Revert "writeback... |
106 |
iterate_supers(sync_inodes_one_sb, NULL); |
4ea425b63 vfs: Avoid unnece... |
107 |
iterate_supers(sync_fs_one_sb, &nowait); |
b3de65310 vfs: Reorder oper... |
108 |
iterate_supers(sync_fs_one_sb, &wait); |
d0e91b13e vfs: Remove unnec... |
109 110 |
iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_bdevs(fdatawait_one_bdev, NULL); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
111 112 |
if (unlikely(laptop_mode)) laptop_sync_completion(); |
70f68ee81 fs: add ksys_sync... |
113 114 115 116 117 |
} SYSCALL_DEFINE0(sync) { ksys_sync(); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
118 119 |
return 0; } |
a2a9537ac Get rid of pdflus... |
120 121 |
static void do_sync_work(struct work_struct *work) { |
b3de65310 vfs: Reorder oper... |
122 |
int nowait = 0; |
5cee5815d vfs: Make sys_syn... |
123 124 125 126 |
/* * Sync twice to reduce the possibility we skipped some inodes / pages * because they were temporarily locked */ |
b3de65310 vfs: Reorder oper... |
127 128 |
iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); |
d0e91b13e vfs: Remove unnec... |
129 |
iterate_bdevs(fdatawrite_one_bdev, NULL); |
b3de65310 vfs: Reorder oper... |
130 131 |
iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); |
d0e91b13e vfs: Remove unnec... |
132 |
iterate_bdevs(fdatawrite_one_bdev, NULL); |
5cee5815d vfs: Make sys_syn... |
133 134 |
printk("Emergency Sync complete "); |
a2a9537ac Get rid of pdflus... |
135 136 |
kfree(work); } |
cf9a2ae8d [PATCH] BLOCK: Mo... |
137 138 |
void emergency_sync(void) { |
a2a9537ac Get rid of pdflus... |
139 140 141 142 143 144 145 |
struct work_struct *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) { INIT_WORK(work, do_sync_work); schedule_work(work); } |
cf9a2ae8d [PATCH] BLOCK: Mo... |
146 |
} |
b7ed78f56 introduce sys_syn... |
147 148 149 150 151 |
/* * sync a single super */ SYSCALL_DEFINE1(syncfs, int, fd) { |
2903ff019 switch simple cas... |
152 |
struct fd f = fdget(fd); |
b7ed78f56 introduce sys_syn... |
153 154 |
struct super_block *sb; int ret; |
b7ed78f56 introduce sys_syn... |
155 |
|
2903ff019 switch simple cas... |
156 |
if (!f.file) |
b7ed78f56 introduce sys_syn... |
157 |
return -EBADF; |
b583043e9 kill f_dentry uses |
158 |
sb = f.file->f_path.dentry->d_sb; |
b7ed78f56 introduce sys_syn... |
159 160 161 162 |
down_read(&sb->s_umount); ret = sync_filesystem(sb); up_read(&sb->s_umount); |
2903ff019 switch simple cas... |
163 |
fdput(f); |
b7ed78f56 introduce sys_syn... |
164 165 |
return ret; } |
4c728ef58 add a vfs_fsync h... |
166 |
/** |
148f948ba vfs: Introduce ne... |
167 |
* vfs_fsync_range - helper to sync a range of data & metadata to disk |
4c728ef58 add a vfs_fsync h... |
168 |
* @file: file to sync |
148f948ba vfs: Introduce ne... |
169 170 171 |
* @start: offset in bytes of the beginning of data range to sync * @end: offset in bytes of the end of data range (inclusive) * @datasync: perform only datasync |
4c728ef58 add a vfs_fsync h... |
172 |
* |
148f948ba vfs: Introduce ne... |
173 174 175 |
* Write back data in range @start..@end and metadata for @file to disk. If * @datasync is set only metadata needed to access modified file data is * written. |
4c728ef58 add a vfs_fsync h... |
176 |
*/ |
8018ab057 sanitize vfs_fsyn... |
177 |
int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
178 |
{ |
0ae45f63d vfs: add support ... |
179 |
struct inode *inode = file->f_mapping->host; |
72c2d5319 file->f_op is nev... |
180 |
if (!file->f_op->fsync) |
02c24a821 fs: push i_mutex ... |
181 |
return -EINVAL; |
0d07e5573 fs: don't clear I... |
182 |
if (!datasync && (inode->i_state & I_DIRTY_TIME)) |
0ae45f63d vfs: add support ... |
183 |
mark_inode_dirty_sync(inode); |
0f41074a6 fs: remove call_f... |
184 |
return file->f_op->fsync(file, start, end, datasync); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
185 |
} |
148f948ba vfs: Introduce ne... |
186 187 188 189 190 |
EXPORT_SYMBOL(vfs_fsync_range); /** * vfs_fsync - perform a fsync or fdatasync on a file * @file: file to sync |
148f948ba vfs: Introduce ne... |
191 192 193 194 |
* @datasync: only perform a fdatasync operation * * Write back data and metadata for @file to disk. If @datasync is * set only metadata needed to access modified file data is written. |
148f948ba vfs: Introduce ne... |
195 |
*/ |
8018ab057 sanitize vfs_fsyn... |
196 |
int vfs_fsync(struct file *file, int datasync) |
148f948ba vfs: Introduce ne... |
197 |
{ |
8018ab057 sanitize vfs_fsyn... |
198 |
return vfs_fsync_range(file, 0, LLONG_MAX, datasync); |
148f948ba vfs: Introduce ne... |
199 |
} |
4c728ef58 add a vfs_fsync h... |
200 |
EXPORT_SYMBOL(vfs_fsync); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
201 |
|
4c728ef58 add a vfs_fsync h... |
202 |
static int do_fsync(unsigned int fd, int datasync) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
203 |
{ |
2903ff019 switch simple cas... |
204 |
struct fd f = fdget(fd); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
205 |
int ret = -EBADF; |
2903ff019 switch simple cas... |
206 207 208 |
if (f.file) { ret = vfs_fsync(f.file, datasync); fdput(f); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
209 210 211 |
} return ret; } |
a5f8fa9e9 [CVE-2009-0029] S... |
212 |
SYSCALL_DEFINE1(fsync, unsigned int, fd) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
213 |
{ |
4c728ef58 add a vfs_fsync h... |
214 |
return do_fsync(fd, 0); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
215 |
} |
a5f8fa9e9 [CVE-2009-0029] S... |
216 |
SYSCALL_DEFINE1(fdatasync, unsigned int, fd) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
217 |
{ |
4c728ef58 add a vfs_fsync h... |
218 |
return do_fsync(fd, 1); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
219 |
} |
22f96b380 fs: add sync_file... |
220 221 |
int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, unsigned int flags) |
f79e2abb9 [PATCH] sys_sync_... |
222 223 |
{ int ret; |
7a0ad10c3 fold do_sync_file... |
224 |
struct address_space *mapping; |
f79e2abb9 [PATCH] sys_sync_... |
225 |
loff_t endbyte; /* inclusive */ |
f79e2abb9 [PATCH] sys_sync_... |
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
umode_t i_mode; ret = -EINVAL; if (flags & ~VALID_FLAGS) goto out; endbyte = offset + nbytes; if ((s64)offset < 0) goto out; if ((s64)endbyte < 0) goto out; if (endbyte < offset) goto out; if (sizeof(pgoff_t) == 4) { |
09cbfeaf1 mm, fs: get rid o... |
242 |
if (offset >= (0x100000000ULL << PAGE_SHIFT)) { |
f79e2abb9 [PATCH] sys_sync_... |
243 244 245 246 247 248 249 |
/* * The range starts outside a 32 bit machine's * pagecache addressing capabilities. Let it "succeed" */ ret = 0; goto out; } |
09cbfeaf1 mm, fs: get rid o... |
250 |
if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) { |
f79e2abb9 [PATCH] sys_sync_... |
251 252 253 254 255 256 257 258 |
/* * Out to EOF */ nbytes = 0; } } if (nbytes == 0) |
111ebb6e6 [PATCH] writeback... |
259 |
endbyte = LLONG_MAX; |
f79e2abb9 [PATCH] sys_sync_... |
260 261 |
else endbyte--; /* inclusive */ |
22f96b380 fs: add sync_file... |
262 |
i_mode = file_inode(file)->i_mode; |
f79e2abb9 [PATCH] sys_sync_... |
263 264 265 |
ret = -ESPIPE; if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && !S_ISLNK(i_mode)) |
22f96b380 fs: add sync_file... |
266 |
goto out; |
f79e2abb9 [PATCH] sys_sync_... |
267 |
|
22f96b380 fs: add sync_file... |
268 |
mapping = file->f_mapping; |
7a0ad10c3 fold do_sync_file... |
269 270 |
ret = 0; if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { |
22f96b380 fs: add sync_file... |
271 |
ret = file_fdatawait_range(file, offset, endbyte); |
7a0ad10c3 fold do_sync_file... |
272 |
if (ret < 0) |
22f96b380 fs: add sync_file... |
273 |
goto out; |
7a0ad10c3 fold do_sync_file... |
274 275 276 |
} if (flags & SYNC_FILE_RANGE_WRITE) { |
c553ea4fd fs/sync.c: sync_f... |
277 278 279 280 281 |
int sync_mode = WB_SYNC_NONE; if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == SYNC_FILE_RANGE_WRITE_AND_WAIT) sync_mode = WB_SYNC_ALL; |
23d012709 fs/sync.c: make s... |
282 |
ret = __filemap_fdatawrite_range(mapping, offset, endbyte, |
c553ea4fd fs/sync.c: sync_f... |
283 |
sync_mode); |
7a0ad10c3 fold do_sync_file... |
284 |
if (ret < 0) |
22f96b380 fs: add sync_file... |
285 |
goto out; |
7a0ad10c3 fold do_sync_file... |
286 287 288 |
} if (flags & SYNC_FILE_RANGE_WAIT_AFTER) |
22f96b380 fs: add sync_file... |
289 |
ret = file_fdatawait_range(file, offset, endbyte); |
7a0ad10c3 fold do_sync_file... |
290 |
|
f79e2abb9 [PATCH] sys_sync_... |
291 292 293 |
out: return ret; } |
22f96b380 fs: add sync_file... |
294 |
/* |
c553ea4fd fs/sync.c: sync_f... |
295 |
* ksys_sync_file_range() permits finely controlled syncing over a segment of |
22f96b380 fs: add sync_file... |
296 |
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is |
c553ea4fd fs/sync.c: sync_f... |
297 |
* zero then ksys_sync_file_range() will operate from offset out to EOF. |
22f96b380 fs: add sync_file... |
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
* * The flag bits are: * * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range * before performing the write. * * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the * range which are not presently under writeback. Note that this may block for * significant periods due to exhaustion of disk request structures. * * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range * after performing the write. * * Useful combinations of the flag bits are: * * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages |
c553ea4fd fs/sync.c: sync_f... |
314 |
* in the range which were dirty on entry to ksys_sync_file_range() are placed |
22f96b380 fs: add sync_file... |
315 316 317 318 319 320 321 322 323 324 325 |
* under writeout. This is a start-write-for-data-integrity operation. * * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which * are not presently under writeout. This is an asynchronous flush-to-disk * operation. Not suitable for data integrity operations. * * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for * completion of writeout of all pages in the range. This will be used after an * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait * for that operation to complete and to return the result. * |
c553ea4fd fs/sync.c: sync_f... |
326 327 |
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT): |
22f96b380 fs: add sync_file... |
328 329 |
* a traditional sync() operation. This is a write-for-data-integrity operation * which will ensure that all pages in the range which were dirty on entry to |
c553ea4fd fs/sync.c: sync_f... |
330 331 332 |
* ksys_sync_file_range() are written to disk. It should be noted that disk * caches are not flushed by this call, so there are no guarantees here that the * data will be available on disk after a crash. |
22f96b380 fs: add sync_file... |
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 |
* * * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any * I/O errors or ENOSPC conditions and will return those to the caller, after * clearing the EIO and ENOSPC flags in the address_space. * * It should be noted that none of these operations write out the file's * metadata. So unless the application is strictly performing overwrites of * already-instantiated disk blocks, there are no guarantees here that the data * will be available after a crash. */ int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, unsigned int flags) { int ret; struct fd f; ret = -EBADF; f = fdget(fd); if (f.file) ret = sync_file_range(f.file, offset, nbytes, flags); fdput(f); return ret; } |
806cbae12 fs: add ksys_sync... |
358 359 360 361 362 |
SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, unsigned int, flags) { return ksys_sync_file_range(fd, offset, nbytes, flags); } |
edd5cd4a9 Introduce fixed s... |
363 364 |
/* It would be nice if people remember that not all the world's an i386 when they introduce new system calls */ |
4a0fd5bf0 teach SYSCALL_DEF... |
365 366 |
SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags, loff_t, offset, loff_t, nbytes) |
edd5cd4a9 Introduce fixed s... |
367 |
{ |
806cbae12 fs: add ksys_sync... |
368 |
return ksys_sync_file_range(fd, offset, nbytes, flags); |
edd5cd4a9 Introduce fixed s... |
369 |
} |