Commit 0dc83bd30b0bf5410c0933cfbbf8853248eff0a9

Authored by Jan Kara
1 parent 1362f4ea20

Revert "writeback: do not sync data dirtied after sync start"

This reverts commit c4a391b53a72d2df4ee97f96f78c1d5971b47489. Dave
Chinner <david@fromorbit.com> has reported the commit may cause some
inodes to be left out from sync(2). This is because we can call
redirty_tail() for some inode (which sets i_dirtied_when to current time)
after sync(2) has started or similarly requeue_inode() can set
i_dirtied_when to current time if writeback had to skip some pages. The
real problem is in the functions clobbering i_dirtied_when but fixing
that isn't trivial so revert is a safer choice for now.

CC: stable@vger.kernel.org # >= 3.13
Signed-off-by: Jan Kara <jack@suse.cz>

Showing 5 changed files with 22 additions and 36 deletions Side-by-side Diff

... ... @@ -40,18 +40,13 @@
40 40 struct wb_writeback_work {
41 41 long nr_pages;
42 42 struct super_block *sb;
43   - /*
44   - * Write only inodes dirtied before this time. Don't forget to set
45   - * older_than_this_is_set when you set this.
46   - */
47   - unsigned long older_than_this;
  43 + unsigned long *older_than_this;
48 44 enum writeback_sync_modes sync_mode;
49 45 unsigned int tagged_writepages:1;
50 46 unsigned int for_kupdate:1;
51 47 unsigned int range_cyclic:1;
52 48 unsigned int for_background:1;
53 49 unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
54   - unsigned int older_than_this_is_set:1;
55 50 enum wb_reason reason; /* why was writeback initiated? */
56 51  
57 52 struct list_head list; /* pending work list */
58 53  
... ... @@ -252,10 +247,10 @@
252 247 int do_sb_sort = 0;
253 248 int moved = 0;
254 249  
255   - WARN_ON_ONCE(!work->older_than_this_is_set);
256 250 while (!list_empty(delaying_queue)) {
257 251 inode = wb_inode(delaying_queue->prev);
258   - if (inode_dirtied_after(inode, work->older_than_this))
  252 + if (work->older_than_this &&
  253 + inode_dirtied_after(inode, *work->older_than_this))
259 254 break;
260 255 list_move(&inode->i_wb_list, &tmp);
261 256 moved++;
... ... @@ -742,8 +737,6 @@
742 737 .sync_mode = WB_SYNC_NONE,
743 738 .range_cyclic = 1,
744 739 .reason = reason,
745   - .older_than_this = jiffies,
746   - .older_than_this_is_set = 1,
747 740 };
748 741  
749 742 spin_lock(&wb->list_lock);
750 743  
... ... @@ -802,13 +795,12 @@
802 795 {
803 796 unsigned long wb_start = jiffies;
804 797 long nr_pages = work->nr_pages;
  798 + unsigned long oldest_jif;
805 799 struct inode *inode;
806 800 long progress;
807 801  
808   - if (!work->older_than_this_is_set) {
809   - work->older_than_this = jiffies;
810   - work->older_than_this_is_set = 1;
811   - }
  802 + oldest_jif = jiffies;
  803 + work->older_than_this = &oldest_jif;
812 804  
813 805 spin_lock(&wb->list_lock);
814 806 for (;;) {
815 807  
... ... @@ -842,10 +834,10 @@
842 834 * safe.
843 835 */
844 836 if (work->for_kupdate) {
845   - work->older_than_this = jiffies -
  837 + oldest_jif = jiffies -
846 838 msecs_to_jiffies(dirty_expire_interval * 10);
847 839 } else if (work->for_background)
848   - work->older_than_this = jiffies;
  840 + oldest_jif = jiffies;
849 841  
850 842 trace_writeback_start(wb->bdi, work);
851 843 if (list_empty(&wb->b_io))
852 844  
853 845  
854 846  
... ... @@ -1357,21 +1349,18 @@
1357 1349  
1358 1350 /**
1359 1351 * sync_inodes_sb - sync sb inode pages
1360   - * @sb: the superblock
1361   - * @older_than_this: timestamp
  1352 + * @sb: the superblock
1362 1353 *
1363 1354 * This function writes and waits on any dirty inode belonging to this
1364   - * superblock that has been dirtied before given timestamp.
  1355 + * super_block.
1365 1356 */
1366   -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this)
  1357 +void sync_inodes_sb(struct super_block *sb)
1367 1358 {
1368 1359 DECLARE_COMPLETION_ONSTACK(done);
1369 1360 struct wb_writeback_work work = {
1370 1361 .sb = sb,
1371 1362 .sync_mode = WB_SYNC_ALL,
1372 1363 .nr_pages = LONG_MAX,
1373   - .older_than_this = older_than_this,
1374   - .older_than_this_is_set = 1,
1375 1364 .range_cyclic = 0,
1376 1365 .done = &done,
1377 1366 .reason = WB_REASON_SYNC,
... ... @@ -27,11 +27,10 @@
27 27 * wait == 1 case since in that case write_inode() functions do
28 28 * sync_dirty_buffer() and thus effectively write one block at a time.
29 29 */
30   -static int __sync_filesystem(struct super_block *sb, int wait,
31   - unsigned long start)
  30 +static int __sync_filesystem(struct super_block *sb, int wait)
32 31 {
33 32 if (wait)
34   - sync_inodes_sb(sb, start);
  33 + sync_inodes_sb(sb);
35 34 else
36 35 writeback_inodes_sb(sb, WB_REASON_SYNC);
37 36  
... ... @@ -48,7 +47,6 @@
48 47 int sync_filesystem(struct super_block *sb)
49 48 {
50 49 int ret;
51   - unsigned long start = jiffies;
52 50  
53 51 /*
54 52 * We need to be protected against the filesystem going from
55 53  
56 54  
... ... @@ -62,17 +60,17 @@
62 60 if (sb->s_flags & MS_RDONLY)
63 61 return 0;
64 62  
65   - ret = __sync_filesystem(sb, 0, start);
  63 + ret = __sync_filesystem(sb, 0);
66 64 if (ret < 0)
67 65 return ret;
68   - return __sync_filesystem(sb, 1, start);
  66 + return __sync_filesystem(sb, 1);
69 67 }
70 68 EXPORT_SYMBOL_GPL(sync_filesystem);
71 69  
72 70 static void sync_inodes_one_sb(struct super_block *sb, void *arg)
73 71 {
74 72 if (!(sb->s_flags & MS_RDONLY))
75   - sync_inodes_sb(sb, *((unsigned long *)arg));
  73 + sync_inodes_sb(sb);
76 74 }
77 75  
78 76 static void sync_fs_one_sb(struct super_block *sb, void *arg)
79 77  
... ... @@ -104,10 +102,9 @@
104 102 SYSCALL_DEFINE0(sync)
105 103 {
106 104 int nowait = 0, wait = 1;
107   - unsigned long start = jiffies;
108 105  
109 106 wakeup_flusher_threads(0, WB_REASON_SYNC);
110   - iterate_supers(sync_inodes_one_sb, &start);
  107 + iterate_supers(sync_inodes_one_sb, NULL);
111 108 iterate_supers(sync_fs_one_sb, &nowait);
112 109 iterate_supers(sync_fs_one_sb, &wait);
113 110 iterate_bdevs(fdatawrite_one_bdev, NULL);
... ... @@ -913,7 +913,7 @@
913 913 struct super_block *sb = mp->m_super;
914 914  
915 915 if (down_read_trylock(&sb->s_umount)) {
916   - sync_inodes_sb(sb, jiffies);
  916 + sync_inodes_sb(sb);
917 917 up_read(&sb->s_umount);
918 918 }
919 919 }
include/linux/writeback.h
... ... @@ -97,7 +97,7 @@
97 97 int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
98 98 int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
99 99 enum wb_reason reason);
100   -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this);
  100 +void sync_inodes_sb(struct super_block *);
101 101 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
102 102 void inode_wait_for_writeback(struct inode *inode);
103 103  
include/trace/events/writeback.h
... ... @@ -287,11 +287,11 @@
287 287 __field(int, reason)
288 288 ),
289 289 TP_fast_assign(
290   - unsigned long older_than_this = work->older_than_this;
  290 + unsigned long *older_than_this = work->older_than_this;
291 291 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
292   - __entry->older = older_than_this;
  292 + __entry->older = older_than_this ? *older_than_this : 0;
293 293 __entry->age = older_than_this ?
294   - (jiffies - older_than_this) * 1000 / HZ : -1;
  294 + (jiffies - *older_than_this) * 1000 / HZ : -1;
295 295 __entry->moved = moved;
296 296 __entry->reason = work->reason;
297 297 ),