Blame view
fs/sync.c
9.71 KB
f79e2abb9 [PATCH] sys_sync_... |
1 2 3 4 5 6 7 |
/* * High-level sync()-related operations */ #include <linux/kernel.h> #include <linux/file.h> #include <linux/fs.h> |
5a0e3ad6a include cleanup: ... |
8 |
#include <linux/slab.h> |
630d9c472 fs: reduce the us... |
9 |
#include <linux/export.h> |
b7ed78f56 introduce sys_syn... |
10 |
#include <linux/namei.h> |
914e26379 [PATCH] severing ... |
11 |
#include <linux/sched.h> |
f79e2abb9 [PATCH] sys_sync_... |
12 13 14 15 |
#include <linux/writeback.h> #include <linux/syscalls.h> #include <linux/linkage.h> #include <linux/pagemap.h> |
cf9a2ae8d [PATCH] BLOCK: Mo... |
16 |
#include <linux/quotaops.h> |
5129a469a Catch filesystems... |
17 |
#include <linux/backing-dev.h> |
5a3e5cb8e vfs: Fix sys_sync... |
18 |
#include "internal.h" |
f79e2abb9 [PATCH] sys_sync_... |
19 20 21 |
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) |
c15c54f5f vfs: Move syncing... |
22 |
/* |
d8a8559cd writeback: get ri... |
23 24 25 26 27 |
* Do the filesystem syncing work. For simple filesystems * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to * submit IO for these buffers via __sync_blockdev(). This also speeds up the * wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. |
c15c54f5f vfs: Move syncing... |
28 |
*/ |
0dc83bd30 Revert "writeback... |
29 |
static int __sync_filesystem(struct super_block *sb, int wait) |
c15c54f5f vfs: Move syncing... |
30 |
{ |
5fb324ad2 quota: move code ... |
31 |
if (wait) |
0dc83bd30 Revert "writeback... |
32 |
sync_inodes_sb(sb); |
5fb324ad2 quota: move code ... |
33 |
else |
0e175a183 writeback: Add a ... |
34 |
writeback_inodes_sb(sb, WB_REASON_SYNC); |
5fb324ad2 quota: move code ... |
35 |
|
c15c54f5f vfs: Move syncing... |
36 37 38 39 40 41 42 43 44 45 |
if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); return __sync_blockdev(sb->s_bdev, wait); } /* * Write out and wait upon all dirty data associated with this * superblock. Filesystem data as well as the underlying block * device. Takes the superblock lock. */ |
60b0680fa vfs: Rename fsync... |
46 |
int sync_filesystem(struct super_block *sb) |
c15c54f5f vfs: Move syncing... |
47 48 |
{ int ret; |
5af7926ff enforce ->sync_fs... |
49 50 51 52 53 54 55 56 57 58 59 |
/* * We need to be protected against the filesystem going from * r/o to r/w or vice versa. */ WARN_ON(!rwsem_is_locked(&sb->s_umount)); /* * No point in syncing out anything if the filesystem is read-only. */ if (sb->s_flags & MS_RDONLY) return 0; |
0dc83bd30 Revert "writeback... |
60 |
ret = __sync_filesystem(sb, 0); |
c15c54f5f vfs: Move syncing... |
61 62 |
if (ret < 0) return ret; |
0dc83bd30 Revert "writeback... |
63 |
return __sync_filesystem(sb, 1); |
c15c54f5f vfs: Move syncing... |
64 |
} |
10096fb10 Export sync_files... |
65 |
EXPORT_SYMBOL(sync_filesystem); |
c15c54f5f vfs: Move syncing... |
66 |
|
b3de65310 vfs: Reorder oper... |
67 |
static void sync_inodes_one_sb(struct super_block *sb, void *arg) |
01a05b337 new helper: itera... |
68 |
{ |
95f28604a fs: assign sb->s_... |
69 |
if (!(sb->s_flags & MS_RDONLY)) |
0dc83bd30 Revert "writeback... |
70 |
sync_inodes_sb(sb); |
01a05b337 new helper: itera... |
71 |
} |
b3de65310 vfs: Reorder oper... |
72 |
|
b3de65310 vfs: Reorder oper... |
73 74 75 76 77 |
static void sync_fs_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) sb->s_op->sync_fs(sb, *(int *)arg); } |
d0e91b13e vfs: Remove unnec... |
78 |
static void fdatawrite_one_bdev(struct block_device *bdev, void *arg) |
b3de65310 vfs: Reorder oper... |
79 |
{ |
d0e91b13e vfs: Remove unnec... |
80 |
filemap_fdatawrite(bdev->bd_inode->i_mapping); |
a8c7176b6 vfs: Make sys_syn... |
81 |
} |
d0e91b13e vfs: Remove unnec... |
82 |
static void fdatawait_one_bdev(struct block_device *bdev, void *arg) |
a8c7176b6 vfs: Make sys_syn... |
83 |
{ |
d0e91b13e vfs: Remove unnec... |
84 |
filemap_fdatawait(bdev->bd_inode->i_mapping); |
c15c54f5f vfs: Move syncing... |
85 |
} |
3beab0b42 sys_sync(): fix 1... |
86 |
/* |
4ea425b63 vfs: Avoid unnece... |
87 88 89 90 91 92 93 94 |
* Sync everything. We start by waking flusher threads so that most of * writeback runs on all devices in parallel. Then we sync all inodes reliably * which effectively also waits for all flusher threads to finish doing * writeback. At this point all data is on disk so metadata should be stable * and we tell filesystems to sync their metadata via ->sync_fs() calls. * Finally, we writeout all block devices because some filesystems (e.g. ext2) * just write metadata (such as inodes or bitmaps) to block device page cache * and do not sync it on their own in ->sync_fs(). |
3beab0b42 sys_sync(): fix 1... |
95 |
*/ |
5cee5815d vfs: Make sys_syn... |
96 |
SYSCALL_DEFINE0(sync) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
97 |
{ |
b3de65310 vfs: Reorder oper... |
98 |
int nowait = 0, wait = 1; |
0e175a183 writeback: Add a ... |
99 |
wakeup_flusher_threads(0, WB_REASON_SYNC); |
0dc83bd30 Revert "writeback... |
100 |
iterate_supers(sync_inodes_one_sb, NULL); |
4ea425b63 vfs: Avoid unnece... |
101 |
iterate_supers(sync_fs_one_sb, &nowait); |
b3de65310 vfs: Reorder oper... |
102 |
iterate_supers(sync_fs_one_sb, &wait); |
d0e91b13e vfs: Remove unnec... |
103 104 |
iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_bdevs(fdatawait_one_bdev, NULL); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
105 106 |
if (unlikely(laptop_mode)) laptop_sync_completion(); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
107 108 |
return 0; } |
a2a9537ac Get rid of pdflus... |
109 110 |
static void do_sync_work(struct work_struct *work) { |
b3de65310 vfs: Reorder oper... |
111 |
int nowait = 0; |
5cee5815d vfs: Make sys_syn... |
112 113 114 115 |
/* * Sync twice to reduce the possibility we skipped some inodes / pages * because they were temporarily locked */ |
b3de65310 vfs: Reorder oper... |
116 117 |
iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); |
d0e91b13e vfs: Remove unnec... |
118 |
iterate_bdevs(fdatawrite_one_bdev, NULL); |
b3de65310 vfs: Reorder oper... |
119 120 |
iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); |
d0e91b13e vfs: Remove unnec... |
121 |
iterate_bdevs(fdatawrite_one_bdev, NULL); |
5cee5815d vfs: Make sys_syn... |
122 123 |
printk("Emergency Sync complete "); |
a2a9537ac Get rid of pdflus... |
124 125 |
kfree(work); } |
cf9a2ae8d [PATCH] BLOCK: Mo... |
126 127 |
void emergency_sync(void) { |
a2a9537ac Get rid of pdflus... |
128 129 130 131 132 133 134 |
struct work_struct *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) { INIT_WORK(work, do_sync_work); schedule_work(work); } |
cf9a2ae8d [PATCH] BLOCK: Mo... |
135 |
} |
b7ed78f56 introduce sys_syn... |
136 137 138 139 140 |
/* * sync a single super */ SYSCALL_DEFINE1(syncfs, int, fd) { |
2903ff019 switch simple cas... |
141 |
struct fd f = fdget(fd); |
b7ed78f56 introduce sys_syn... |
142 143 |
struct super_block *sb; int ret; |
b7ed78f56 introduce sys_syn... |
144 |
|
2903ff019 switch simple cas... |
145 |
if (!f.file) |
b7ed78f56 introduce sys_syn... |
146 |
return -EBADF; |
b583043e9 kill f_dentry uses |
147 |
sb = f.file->f_path.dentry->d_sb; |
b7ed78f56 introduce sys_syn... |
148 149 150 151 |
down_read(&sb->s_umount); ret = sync_filesystem(sb); up_read(&sb->s_umount); |
2903ff019 switch simple cas... |
152 |
fdput(f); |
b7ed78f56 introduce sys_syn... |
153 154 |
return ret; } |
4c728ef58 add a vfs_fsync h... |
155 |
/** |
148f948ba vfs: Introduce ne... |
156 |
* vfs_fsync_range - helper to sync a range of data & metadata to disk |
4c728ef58 add a vfs_fsync h... |
157 |
* @file: file to sync |
148f948ba vfs: Introduce ne... |
158 159 160 |
* @start: offset in bytes of the beginning of data range to sync * @end: offset in bytes of the end of data range (inclusive) * @datasync: perform only datasync |
4c728ef58 add a vfs_fsync h... |
161 |
* |
148f948ba vfs: Introduce ne... |
162 163 164 |
* Write back data in range @start..@end and metadata for @file to disk. If * @datasync is set only metadata needed to access modified file data is * written. |
4c728ef58 add a vfs_fsync h... |
165 |
*/ |
8018ab057 sanitize vfs_fsyn... |
166 |
int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
167 |
{ |
0ae45f63d vfs: add support ... |
168 |
struct inode *inode = file->f_mapping->host; |
72c2d5319 file->f_op is nev... |
169 |
if (!file->f_op->fsync) |
02c24a821 fs: push i_mutex ... |
170 |
return -EINVAL; |
0ae45f63d vfs: add support ... |
171 172 173 174 175 176 |
if (!datasync && (inode->i_state & I_DIRTY_TIME)) { spin_lock(&inode->i_lock); inode->i_state &= ~I_DIRTY_TIME; spin_unlock(&inode->i_lock); mark_inode_dirty_sync(inode); } |
02c24a821 fs: push i_mutex ... |
177 |
return file->f_op->fsync(file, start, end, datasync); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
178 |
} |
148f948ba vfs: Introduce ne... |
179 180 181 182 183 |
EXPORT_SYMBOL(vfs_fsync_range); /** * vfs_fsync - perform a fsync or fdatasync on a file * @file: file to sync |
148f948ba vfs: Introduce ne... |
184 185 186 187 |
* @datasync: only perform a fdatasync operation * * Write back data and metadata for @file to disk. If @datasync is * set only metadata needed to access modified file data is written. |
148f948ba vfs: Introduce ne... |
188 |
*/ |
8018ab057 sanitize vfs_fsyn... |
189 |
int vfs_fsync(struct file *file, int datasync) |
148f948ba vfs: Introduce ne... |
190 |
{ |
8018ab057 sanitize vfs_fsyn... |
191 |
return vfs_fsync_range(file, 0, LLONG_MAX, datasync); |
148f948ba vfs: Introduce ne... |
192 |
} |
4c728ef58 add a vfs_fsync h... |
193 |
EXPORT_SYMBOL(vfs_fsync); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
194 |
|
4c728ef58 add a vfs_fsync h... |
195 |
static int do_fsync(unsigned int fd, int datasync) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
196 |
{ |
2903ff019 switch simple cas... |
197 |
struct fd f = fdget(fd); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
198 |
int ret = -EBADF; |
2903ff019 switch simple cas... |
199 200 201 |
if (f.file) { ret = vfs_fsync(f.file, datasync); fdput(f); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
202 203 204 |
} return ret; } |
a5f8fa9e9 [CVE-2009-0029] S... |
205 |
SYSCALL_DEFINE1(fsync, unsigned int, fd) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
206 |
{ |
4c728ef58 add a vfs_fsync h... |
207 |
return do_fsync(fd, 0); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
208 |
} |
a5f8fa9e9 [CVE-2009-0029] S... |
209 |
SYSCALL_DEFINE1(fdatasync, unsigned int, fd) |
cf9a2ae8d [PATCH] BLOCK: Mo... |
210 |
{ |
4c728ef58 add a vfs_fsync h... |
211 |
return do_fsync(fd, 1); |
cf9a2ae8d [PATCH] BLOCK: Mo... |
212 213 214 |
} /* |
f79e2abb9 [PATCH] sys_sync_... |
215 216 217 218 219 220 221 222 223 224 |
* sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is * zero then sys_sync_file_range() will operate from offset out to EOF. * * The flag bits are: * * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range * before performing the write. * * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the |
cce770815 SYNC_FILE_RANGE_W... |
225 226 |
* range which are not presently under writeback. Note that this may block for * significant periods due to exhaustion of disk request structures. |
f79e2abb9 [PATCH] sys_sync_... |
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
* * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range * after performing the write. * * Useful combinations of the flag bits are: * * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages * in the range which were dirty on entry to sys_sync_file_range() are placed * under writeout. This is a start-write-for-data-integrity operation. * * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which * are not presently under writeout. This is an asynchronous flush-to-disk * operation. Not suitable for data integrity operations. * * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for * completion of writeout of all pages in the range. This will be used after an * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait * for that operation to complete and to return the result. * * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER: * a traditional sync() operation. This is a write-for-data-integrity operation * which will ensure that all pages in the range which were dirty on entry to * sys_sync_file_range() are committed to disk. * * * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any * I/O errors or ENOSPC conditions and will return those to the caller, after * clearing the EIO and ENOSPC flags in the address_space. * * It should be noted that none of these operations write out the file's * metadata. So unless the application is strictly performing overwrites of * already-instantiated disk blocks, there are no guarantees here that the data * will be available after a crash. */ |
4a0fd5bf0 teach SYSCALL_DEF... |
261 262 |
SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, unsigned int, flags) |
f79e2abb9 [PATCH] sys_sync_... |
263 264 |
{ int ret; |
2903ff019 switch simple cas... |
265 |
struct fd f; |
7a0ad10c3 fold do_sync_file... |
266 |
struct address_space *mapping; |
f79e2abb9 [PATCH] sys_sync_... |
267 |
loff_t endbyte; /* inclusive */ |
f79e2abb9 [PATCH] sys_sync_... |
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 |
umode_t i_mode; ret = -EINVAL; if (flags & ~VALID_FLAGS) goto out; endbyte = offset + nbytes; if ((s64)offset < 0) goto out; if ((s64)endbyte < 0) goto out; if (endbyte < offset) goto out; if (sizeof(pgoff_t) == 4) { if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { /* * The range starts outside a 32 bit machine's * pagecache addressing capabilities. Let it "succeed" */ ret = 0; goto out; } if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { /* * Out to EOF */ nbytes = 0; } } if (nbytes == 0) |
111ebb6e6 [PATCH] writeback... |
301 |
endbyte = LLONG_MAX; |
f79e2abb9 [PATCH] sys_sync_... |
302 303 304 305 |
else endbyte--; /* inclusive */ ret = -EBADF; |
2903ff019 switch simple cas... |
306 307 |
f = fdget(fd); if (!f.file) |
f79e2abb9 [PATCH] sys_sync_... |
308 |
goto out; |
496ad9aa8 new helper: file_... |
309 |
i_mode = file_inode(f.file)->i_mode; |
f79e2abb9 [PATCH] sys_sync_... |
310 311 312 313 |
ret = -ESPIPE; if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && !S_ISLNK(i_mode)) goto out_put; |
2903ff019 switch simple cas... |
314 |
mapping = f.file->f_mapping; |
7a0ad10c3 fold do_sync_file... |
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
if (!mapping) { ret = -EINVAL; goto out_put; } ret = 0; if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { ret = filemap_fdatawait_range(mapping, offset, endbyte); if (ret < 0) goto out_put; } if (flags & SYNC_FILE_RANGE_WRITE) { ret = filemap_fdatawrite_range(mapping, offset, endbyte); if (ret < 0) goto out_put; } if (flags & SYNC_FILE_RANGE_WAIT_AFTER) ret = filemap_fdatawait_range(mapping, offset, endbyte); |
f79e2abb9 [PATCH] sys_sync_... |
335 |
out_put: |
2903ff019 switch simple cas... |
336 |
fdput(f); |
f79e2abb9 [PATCH] sys_sync_... |
337 338 339 |
out: return ret; } |
edd5cd4a9 Introduce fixed s... |
340 341 |
/* It would be nice if people remember that not all the world's an i386 when they introduce new system calls */ |
4a0fd5bf0 teach SYSCALL_DEF... |
342 343 |
SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags, loff_t, offset, loff_t, nbytes) |
edd5cd4a9 Introduce fixed s... |
344 345 346 |
{ return sys_sync_file_range(fd, offset, nbytes, flags); } |