Commit 5cee5815d1564bbbd505fea86f4550f1efdb5cd0
Committed by
Al Viro
1 parent
429479f031
Exists in
master
and in
4 other branches
vfs: Make sys_sync() use fsync_super() (version 4)
It is unnecessarily fragile to have two places (fsync_super() and do_sync()) doing data integrity sync of the filesystem. Alter __fsync_super() to accommodate needs of both callers and use it. So after this patch __fsync_super() is the only place where we gather all the calls needed to properly send all data on a filesystem to disk. Nice bonus is that we get a complete livelock avoidance and write_supers() is now only used for periodic writeback of superblocks. sync_blockdevs() introduced a couple of patches ago is gone now. [build fixes folded] Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 7 changed files with 51 additions and 135 deletions Side-by-side Diff
fs/block_dev.c
... | ... | @@ -176,17 +176,22 @@ |
176 | 176 | iov, offset, nr_segs, blkdev_get_blocks, NULL); |
177 | 177 | } |
178 | 178 | |
179 | +int __sync_blockdev(struct block_device *bdev, int wait) | |
180 | +{ | |
181 | + if (!bdev) | |
182 | + return 0; | |
183 | + if (!wait) | |
184 | + return filemap_flush(bdev->bd_inode->i_mapping); | |
185 | + return filemap_write_and_wait(bdev->bd_inode->i_mapping); | |
186 | +} | |
187 | + | |
179 | 188 | /* |
180 | 189 | * Write out and wait upon all the dirty data associated with a block |
181 | 190 | * device via its mapping. Does not take the superblock lock. |
182 | 191 | */ |
183 | 192 | int sync_blockdev(struct block_device *bdev) |
184 | 193 | { |
185 | - int ret = 0; | |
186 | - | |
187 | - if (bdev) | |
188 | - ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); | |
189 | - return ret; | |
194 | + return __sync_blockdev(bdev, 1); | |
190 | 195 | } |
191 | 196 | EXPORT_SYMBOL(sync_blockdev); |
192 | 197 |
fs/fs-writeback.c
... | ... | @@ -679,55 +679,6 @@ |
679 | 679 | } |
680 | 680 | |
681 | 681 | /** |
682 | - * sync_inodes - writes all inodes to disk | |
683 | - * @wait: wait for completion | |
684 | - * | |
685 | - * sync_inodes() goes through each super block's dirty inode list, writes the | |
686 | - * inodes out, waits on the writeout and puts the inodes back on the normal | |
687 | - * list. | |
688 | - * | |
689 | - * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle | |
690 | - * part of the sync functions is that the blockdev "superblock" is processed | |
691 | - * last. This is because the write_inode() function of a typical fs will | |
692 | - * perform no I/O, but will mark buffers in the blockdev mapping as dirty. | |
693 | - * What we want to do is to perform all that dirtying first, and then write | |
694 | - * back all those inode blocks via the blockdev mapping in one sweep. So the | |
695 | - * additional (somewhat redundant) sync_blockdev() calls here are to make | |
696 | - * sure that really happens. Because if we call sync_inodes_sb(wait=1) with | |
697 | - * outstanding dirty inodes, the writeback goes block-at-a-time within the | |
698 | - * filesystem's write_inode(). This is extremely slow. | |
699 | - */ | |
700 | -static void __sync_inodes(int wait) | |
701 | -{ | |
702 | - struct super_block *sb; | |
703 | - | |
704 | - spin_lock(&sb_lock); | |
705 | -restart: | |
706 | - list_for_each_entry(sb, &super_blocks, s_list) { | |
707 | - sb->s_count++; | |
708 | - spin_unlock(&sb_lock); | |
709 | - down_read(&sb->s_umount); | |
710 | - if (sb->s_root) { | |
711 | - sync_inodes_sb(sb, wait); | |
712 | - sync_blockdev(sb->s_bdev); | |
713 | - } | |
714 | - up_read(&sb->s_umount); | |
715 | - spin_lock(&sb_lock); | |
716 | - if (__put_super_and_need_restart(sb)) | |
717 | - goto restart; | |
718 | - } | |
719 | - spin_unlock(&sb_lock); | |
720 | -} | |
721 | - | |
722 | -void sync_inodes(int wait) | |
723 | -{ | |
724 | - __sync_inodes(0); | |
725 | - | |
726 | - if (wait) | |
727 | - __sync_inodes(1); | |
728 | -} | |
729 | - | |
730 | -/** | |
731 | 682 | * write_inode_now - write an inode to disk |
732 | 683 | * @inode: inode to write to disk |
733 | 684 | * @sync: whether the write should be synchronous or not |
fs/internal.h
... | ... | @@ -25,6 +25,8 @@ |
25 | 25 | return sb == blockdev_superblock; |
26 | 26 | } |
27 | 27 | |
28 | +extern int __sync_blockdev(struct block_device *bdev, int wait); | |
29 | + | |
28 | 30 | #else |
29 | 31 | static inline void bdev_cache_init(void) |
30 | 32 | { |
... | ... | @@ -34,6 +36,11 @@ |
34 | 36 | { |
35 | 37 | return 0; |
36 | 38 | } |
39 | + | |
40 | +static inline int __sync_blockdev(struct block_device *bdev, int wait) | |
41 | +{ | |
42 | + return 0; | |
43 | +} | |
37 | 44 | #endif |
38 | 45 | |
39 | 46 | /* |
... | ... | @@ -71,13 +78,4 @@ |
71 | 78 | * file_table.c |
72 | 79 | */ |
73 | 80 | extern void mark_files_ro(struct super_block *); |
74 | - | |
75 | -/* | |
76 | - * super.c | |
77 | - */ | |
78 | -#ifdef CONFIG_BLOCK | |
79 | -extern void sync_blockdevs(void); | |
80 | -#else | |
81 | -static inline void sync_blockdevs(void) { } | |
82 | -#endif |
fs/super.c
... | ... | @@ -284,23 +284,23 @@ |
284 | 284 | EXPORT_SYMBOL(unlock_super); |
285 | 285 | |
286 | 286 | /* |
287 | - * Write out and wait upon all dirty data associated with this | |
288 | - * superblock. Filesystem data as well as the underlying block | |
289 | - * device. Takes the superblock lock. Requires a second blkdev | |
290 | - * flush by the caller to complete the operation. | |
287 | + * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) | |
288 | + * just dirties buffers with inodes so we have to submit IO for these buffers | |
289 | + * via __sync_blockdev(). This also speeds up the wait == 1 case since in that | |
290 | + * case write_inode() functions do sync_dirty_buffer() and thus effectively | |
291 | + * write one block at a time. | |
291 | 292 | */ |
292 | -static int __fsync_super(struct super_block *sb) | |
293 | +static int __fsync_super(struct super_block *sb, int wait) | |
293 | 294 | { |
294 | - sync_inodes_sb(sb, 0); | |
295 | 295 | vfs_dq_sync(sb); |
296 | - sync_inodes_sb(sb, 1); | |
296 | + sync_inodes_sb(sb, wait); | |
297 | 297 | lock_super(sb); |
298 | 298 | if (sb->s_dirt && sb->s_op->write_super) |
299 | 299 | sb->s_op->write_super(sb); |
300 | 300 | unlock_super(sb); |
301 | 301 | if (sb->s_op->sync_fs) |
302 | - sb->s_op->sync_fs(sb, 1); | |
303 | - return sync_blockdev(sb->s_bdev); | |
302 | + sb->s_op->sync_fs(sb, wait); | |
303 | + return __sync_blockdev(sb->s_bdev, wait); | |
304 | 304 | } |
305 | 305 | |
306 | 306 | /* |
... | ... | @@ -310,7 +310,12 @@ |
310 | 310 | */ |
311 | 311 | int fsync_super(struct super_block *sb) |
312 | 312 | { |
313 | - return __fsync_super(sb); | |
313 | + int ret; | |
314 | + | |
315 | + ret = __fsync_super(sb, 0); | |
316 | + if (ret < 0) | |
317 | + return ret; | |
318 | + return __fsync_super(sb, 1); | |
314 | 319 | } |
315 | 320 | EXPORT_SYMBOL_GPL(fsync_super); |
316 | 321 | |
317 | 322 | |
318 | 323 | |
319 | 324 | |
... | ... | @@ -469,20 +474,18 @@ |
469 | 474 | } |
470 | 475 | |
471 | 476 | /* |
472 | - * Call the ->sync_fs super_op against all filesystems which are r/w and | |
473 | - * which implement it. | |
477 | + * Sync all the data for all the filesystems (called by sys_sync() and | |
478 | + * emergency sync) | |
474 | 479 | * |
475 | 480 | * This operation is careful to avoid the livelock which could easily happen |
476 | - * if two or more filesystems are being continuously dirtied. s_need_sync_fs | |
481 | + * if two or more filesystems are being continuously dirtied. s_need_sync | |
477 | 482 | * is used only here. We set it against all filesystems and then clear it as |
478 | 483 | * we sync them. So redirtied filesystems are skipped. |
479 | 484 | * |
480 | 485 | * But if process A is currently running sync_filesystems and then process B |
481 | - * calls sync_filesystems as well, process B will set all the s_need_sync_fs | |
486 | + * calls sync_filesystems as well, process B will set all the s_need_sync | |
482 | 487 | * flags again, which will cause process A to resync everything. Fix that with |
483 | 488 | * a local mutex. |
484 | - * | |
485 | - * (Fabian) Avoid sync_fs with clean fs & wait mode 0 | |
486 | 489 | */ |
487 | 490 | void sync_filesystems(int wait) |
488 | 491 | { |
489 | 492 | |
490 | 493 | |
491 | 494 | |
492 | 495 | |
... | ... | @@ -492,25 +495,23 @@ |
492 | 495 | mutex_lock(&mutex); /* Could be down_interruptible */ |
493 | 496 | spin_lock(&sb_lock); |
494 | 497 | list_for_each_entry(sb, &super_blocks, s_list) { |
495 | - if (!sb->s_op->sync_fs) | |
496 | - continue; | |
497 | 498 | if (sb->s_flags & MS_RDONLY) |
498 | 499 | continue; |
499 | - sb->s_need_sync_fs = 1; | |
500 | + sb->s_need_sync = 1; | |
500 | 501 | } |
501 | 502 | |
502 | 503 | restart: |
503 | 504 | list_for_each_entry(sb, &super_blocks, s_list) { |
504 | - if (!sb->s_need_sync_fs) | |
505 | + if (!sb->s_need_sync) | |
505 | 506 | continue; |
506 | - sb->s_need_sync_fs = 0; | |
507 | + sb->s_need_sync = 0; | |
507 | 508 | if (sb->s_flags & MS_RDONLY) |
508 | 509 | continue; /* hm. Was remounted r/o meanwhile */ |
509 | 510 | sb->s_count++; |
510 | 511 | spin_unlock(&sb_lock); |
511 | 512 | down_read(&sb->s_umount); |
512 | 513 | if (sb->s_root) |
513 | - sb->s_op->sync_fs(sb, wait); | |
514 | + __fsync_super(sb, wait); | |
514 | 515 | up_read(&sb->s_umount); |
515 | 516 | /* restart only when sb is no longer on the list */ |
516 | 517 | spin_lock(&sb_lock); |
... | ... | @@ -520,33 +521,6 @@ |
520 | 521 | spin_unlock(&sb_lock); |
521 | 522 | mutex_unlock(&mutex); |
522 | 523 | } |
523 | - | |
524 | -#ifdef CONFIG_BLOCK | |
525 | -/* | |
526 | - * Sync all block devices underlying some superblock | |
527 | - */ | |
528 | -void sync_blockdevs(void) | |
529 | -{ | |
530 | - struct super_block *sb; | |
531 | - | |
532 | - spin_lock(&sb_lock); | |
533 | -restart: | |
534 | - list_for_each_entry(sb, &super_blocks, s_list) { | |
535 | - if (!sb->s_bdev) | |
536 | - continue; | |
537 | - sb->s_count++; | |
538 | - spin_unlock(&sb_lock); | |
539 | - down_read(&sb->s_umount); | |
540 | - if (sb->s_root) | |
541 | - sync_blockdev(sb->s_bdev); | |
542 | - up_read(&sb->s_umount); | |
543 | - spin_lock(&sb_lock); | |
544 | - if (__put_super_and_need_restart(sb)) | |
545 | - goto restart; | |
546 | - } | |
547 | - spin_unlock(&sb_lock); | |
548 | -} | |
549 | -#endif | |
550 | 524 | |
551 | 525 | /** |
552 | 526 | * get_super - get the superblock of a device |
fs/sync.c
... | ... | @@ -18,35 +18,24 @@ |
18 | 18 | #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ |
19 | 19 | SYNC_FILE_RANGE_WAIT_AFTER) |
20 | 20 | |
21 | -/* | |
22 | - * sync everything. Start out by waking pdflush, because that writes back | |
23 | - * all queues in parallel. | |
24 | - */ | |
25 | -static void do_sync(unsigned long wait) | |
21 | +SYSCALL_DEFINE0(sync) | |
26 | 22 | { |
27 | - wakeup_pdflush(0); | |
28 | - sync_inodes(0); /* All mappings, inodes and their blockdevs */ | |
29 | - vfs_dq_sync(NULL); | |
30 | - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ | |
31 | - sync_supers(); /* Write the superblocks */ | |
32 | - sync_filesystems(0); /* Start syncing the filesystems */ | |
33 | - sync_filesystems(wait); /* Waitingly sync the filesystems */ | |
34 | - sync_blockdevs(); | |
35 | - if (!wait) | |
36 | - printk("Emergency Sync complete\n"); | |
23 | + sync_filesystems(0); | |
24 | + sync_filesystems(1); | |
37 | 25 | if (unlikely(laptop_mode)) |
38 | 26 | laptop_sync_completion(); |
39 | -} | |
40 | - | |
41 | -SYSCALL_DEFINE0(sync) | |
42 | -{ | |
43 | - do_sync(1); | |
44 | 27 | return 0; |
45 | 28 | } |
46 | 29 | |
47 | 30 | static void do_sync_work(struct work_struct *work) |
48 | 31 | { |
49 | - do_sync(0); | |
32 | + /* | |
33 | + * Sync twice to reduce the possibility we skipped some inodes / pages | |
34 | + * because they were temporarily locked | |
35 | + */ | |
36 | + sync_filesystems(0); | |
37 | + sync_filesystems(0); | |
38 | + printk("Emergency Sync complete\n"); | |
50 | 39 | kfree(work); |
51 | 40 | } |
52 | 41 |
include/linux/fs.h
include/linux/writeback.h
... | ... | @@ -79,7 +79,6 @@ |
79 | 79 | void writeback_inodes(struct writeback_control *wbc); |
80 | 80 | int inode_wait(void *); |
81 | 81 | void sync_inodes_sb(struct super_block *, int wait); |
82 | -void sync_inodes(int wait); | |
83 | 82 | |
84 | 83 | /* writeback.h requires fs.h; it, too, is not included from here. */ |
85 | 84 | static inline void wait_on_inode(struct inode *inode) |