Commit 20bec8ab1458c24bed0d5492ee15d87807fc415a
Exists in
master
and in
39 other branches
Merge branch 'ext3-latency-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'ext3-latency-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext3: Add replace-on-rename hueristics for data=writeback mode ext3: Add replace-on-truncate hueristics for data=writeback mode ext3: Use WRITE_SYNC for commits which are caused by fsync() block_write_full_page: Use synchronous writes for WBC_SYNC_ALL writebacks
Showing 8 changed files Side-by-side Diff
fs/buffer.c
... | ... | @@ -1595,6 +1595,7 @@ |
1595 | 1595 | struct buffer_head *bh, *head; |
1596 | 1596 | const unsigned blocksize = 1 << inode->i_blkbits; |
1597 | 1597 | int nr_underway = 0; |
1598 | + int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | |
1598 | 1599 | |
1599 | 1600 | BUG_ON(!PageLocked(page)); |
1600 | 1601 | |
... | ... | @@ -1686,7 +1687,7 @@ |
1686 | 1687 | do { |
1687 | 1688 | struct buffer_head *next = bh->b_this_page; |
1688 | 1689 | if (buffer_async_write(bh)) { |
1689 | - submit_bh(WRITE, bh); | |
1690 | + submit_bh(write_op, bh); | |
1690 | 1691 | nr_underway++; |
1691 | 1692 | } |
1692 | 1693 | bh = next; |
... | ... | @@ -1740,7 +1741,7 @@ |
1740 | 1741 | struct buffer_head *next = bh->b_this_page; |
1741 | 1742 | if (buffer_async_write(bh)) { |
1742 | 1743 | clear_buffer_dirty(bh); |
1743 | - submit_bh(WRITE, bh); | |
1744 | + submit_bh(write_op, bh); | |
1744 | 1745 | nr_underway++; |
1745 | 1746 | } |
1746 | 1747 | bh = next; |
fs/ext3/file.c
... | ... | @@ -33,6 +33,10 @@ |
33 | 33 | */ |
34 | 34 | static int ext3_release_file (struct inode * inode, struct file * filp) |
35 | 35 | { |
36 | + if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) { | |
37 | + filemap_flush(inode->i_mapping); | |
38 | + EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE; | |
39 | + } | |
36 | 40 | /* if we are the last writer on the inode, drop the block reservation */ |
37 | 41 | if ((filp->f_mode & FMODE_WRITE) && |
38 | 42 | (atomic_read(&inode->i_writecount) == 1)) |
fs/ext3/inode.c
... | ... | @@ -2363,6 +2363,9 @@ |
2363 | 2363 | if (!ext3_can_truncate(inode)) |
2364 | 2364 | return; |
2365 | 2365 | |
2366 | + if (inode->i_size == 0 && ext3_should_writeback_data(inode)) | |
2367 | + ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; | |
2368 | + | |
2366 | 2369 | /* |
2367 | 2370 | * We have to lock the EOF page here, because lock_page() nests |
2368 | 2371 | * outside journal_start(). |
fs/ext3/namei.c
... | ... | @@ -2274,7 +2274,7 @@ |
2274 | 2274 | struct inode * old_inode, * new_inode; |
2275 | 2275 | struct buffer_head * old_bh, * new_bh, * dir_bh; |
2276 | 2276 | struct ext3_dir_entry_2 * old_de, * new_de; |
2277 | - int retval; | |
2277 | + int retval, flush_file = 0; | |
2278 | 2278 | |
2279 | 2279 | old_bh = new_bh = dir_bh = NULL; |
2280 | 2280 | |
... | ... | @@ -2410,6 +2410,8 @@ |
2410 | 2410 | ext3_mark_inode_dirty(handle, new_inode); |
2411 | 2411 | if (!new_inode->i_nlink) |
2412 | 2412 | ext3_orphan_add(handle, new_inode); |
2413 | + if (ext3_should_writeback_data(new_inode)) | |
2414 | + flush_file = 1; | |
2413 | 2415 | } |
2414 | 2416 | retval = 0; |
2415 | 2417 | |
... | ... | @@ -2418,6 +2420,8 @@ |
2418 | 2420 | brelse (old_bh); |
2419 | 2421 | brelse (new_bh); |
2420 | 2422 | ext3_journal_stop(handle); |
2423 | + if (retval == 0 && flush_file) | |
2424 | + filemap_flush(old_inode->i_mapping); | |
2421 | 2425 | return retval; |
2422 | 2426 | } |
2423 | 2427 |
fs/jbd/commit.c
... | ... | @@ -20,6 +20,7 @@ |
20 | 20 | #include <linux/slab.h> |
21 | 21 | #include <linux/mm.h> |
22 | 22 | #include <linux/pagemap.h> |
23 | +#include <linux/bio.h> | |
23 | 24 | |
24 | 25 | /* |
25 | 26 | * Default IO end handler for temporary BJ_IO buffer_heads. |
26 | 27 | |
... | ... | @@ -171,14 +172,15 @@ |
171 | 172 | return (ret == -EIO); |
172 | 173 | } |
173 | 174 | |
174 | -static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | |
175 | +static void journal_do_submit_data(struct buffer_head **wbuf, int bufs, | |
176 | + int write_op) | |
175 | 177 | { |
176 | 178 | int i; |
177 | 179 | |
178 | 180 | for (i = 0; i < bufs; i++) { |
179 | 181 | wbuf[i]->b_end_io = end_buffer_write_sync; |
180 | 182 | /* We use-up our safety reference in submit_bh() */ |
181 | - submit_bh(WRITE, wbuf[i]); | |
183 | + submit_bh(write_op, wbuf[i]); | |
182 | 184 | } |
183 | 185 | } |
184 | 186 | |
... | ... | @@ -186,7 +188,8 @@ |
186 | 188 | * Submit all the data buffers to disk |
187 | 189 | */ |
188 | 190 | static int journal_submit_data_buffers(journal_t *journal, |
189 | - transaction_t *commit_transaction) | |
191 | + transaction_t *commit_transaction, | |
192 | + int write_op) | |
190 | 193 | { |
191 | 194 | struct journal_head *jh; |
192 | 195 | struct buffer_head *bh; |
... | ... | @@ -225,7 +228,7 @@ |
225 | 228 | BUFFER_TRACE(bh, "needs blocking lock"); |
226 | 229 | spin_unlock(&journal->j_list_lock); |
227 | 230 | /* Write out all data to prevent deadlocks */ |
228 | - journal_do_submit_data(wbuf, bufs); | |
231 | + journal_do_submit_data(wbuf, bufs, write_op); | |
229 | 232 | bufs = 0; |
230 | 233 | lock_buffer(bh); |
231 | 234 | spin_lock(&journal->j_list_lock); |
... | ... | @@ -256,7 +259,7 @@ |
256 | 259 | jbd_unlock_bh_state(bh); |
257 | 260 | if (bufs == journal->j_wbufsize) { |
258 | 261 | spin_unlock(&journal->j_list_lock); |
259 | - journal_do_submit_data(wbuf, bufs); | |
262 | + journal_do_submit_data(wbuf, bufs, write_op); | |
260 | 263 | bufs = 0; |
261 | 264 | goto write_out_data; |
262 | 265 | } |
... | ... | @@ -286,7 +289,7 @@ |
286 | 289 | } |
287 | 290 | } |
288 | 291 | spin_unlock(&journal->j_list_lock); |
289 | - journal_do_submit_data(wbuf, bufs); | |
292 | + journal_do_submit_data(wbuf, bufs, write_op); | |
290 | 293 | |
291 | 294 | return err; |
292 | 295 | } |
... | ... | @@ -315,6 +318,7 @@ |
315 | 318 | int first_tag = 0; |
316 | 319 | int tag_flag; |
317 | 320 | int i; |
321 | + int write_op = WRITE; | |
318 | 322 | |
319 | 323 | /* |
320 | 324 | * First job: lock down the current transaction and wait for |
... | ... | @@ -347,6 +351,8 @@ |
347 | 351 | spin_lock(&journal->j_state_lock); |
348 | 352 | commit_transaction->t_state = T_LOCKED; |
349 | 353 | |
354 | + if (commit_transaction->t_synchronous_commit) | |
355 | + write_op = WRITE_SYNC; | |
350 | 356 | spin_lock(&commit_transaction->t_handle_lock); |
351 | 357 | while (commit_transaction->t_updates) { |
352 | 358 | DEFINE_WAIT(wait); |
... | ... | @@ -431,7 +437,8 @@ |
431 | 437 | * Now start flushing things to disk, in the order they appear |
432 | 438 | * on the transaction lists. Data blocks go first. |
433 | 439 | */ |
434 | - err = journal_submit_data_buffers(journal, commit_transaction); | |
440 | + err = journal_submit_data_buffers(journal, commit_transaction, | |
441 | + write_op); | |
435 | 442 | |
436 | 443 | /* |
437 | 444 | * Wait for all previously submitted IO to complete. |
... | ... | @@ -660,7 +667,7 @@ |
660 | 667 | clear_buffer_dirty(bh); |
661 | 668 | set_buffer_uptodate(bh); |
662 | 669 | bh->b_end_io = journal_end_buffer_io_sync; |
663 | - submit_bh(WRITE, bh); | |
670 | + submit_bh(write_op, bh); | |
664 | 671 | } |
665 | 672 | cond_resched(); |
666 | 673 |
fs/jbd/transaction.c
include/linux/ext3_fs.h
... | ... | @@ -208,6 +208,7 @@ |
208 | 208 | #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ |
209 | 209 | #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ |
210 | 210 | #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ |
211 | +#define EXT3_STATE_FLUSH_ON_CLOSE 0x00000008 | |
211 | 212 | |
212 | 213 | /* Used to pass group descriptor data when online resize is done */ |
213 | 214 | struct ext3_new_group_input { |
include/linux/jbd.h