Commit 17335dcc471199717839b2fa3492ca36f70f1168
Committed by
Theodore Ts'o
1 parent
28a535f9a0
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
ext4: serialize dio nonlocked reads with defrag workers
Inode's block defrag and ext4_change_inode_journal_flag() may affect nonlocked DIO reads result, so proper synchronization required. - Add missed inode_dio_wait() calls where appropriate - Check inode state under extra i_dio_count reference. Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Showing 4 changed files with 44 additions and 0 deletions Side-by-side Diff
fs/ext4/ext4.h
... | ... | @@ -1358,6 +1358,8 @@ |
1358 | 1358 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1359 | 1359 | EXT4_STATE_NEWENTRY, /* File just added to dir */ |
1360 | 1360 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ |
1361 | + EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read | |
1362 | + nolocking */ | |
1361 | 1363 | }; |
1362 | 1364 | |
1363 | 1365 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ |
... | ... | @@ -2467,6 +2469,21 @@ |
2467 | 2469 | static inline void set_bitmap_uptodate(struct buffer_head *bh) |
2468 | 2470 | { |
2469 | 2471 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); |
2472 | +} | |
2473 | + | |
2474 | +/* | |
2475 | + * Disable DIO read nolock optimization, so new dioreaders will be forced | |
2476 | + * to grab i_mutex | |
2477 | + */ | |
2478 | +static inline void ext4_inode_block_unlocked_dio(struct inode *inode) | |
2479 | +{ | |
2480 | + ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | |
2481 | + smp_mb(); | |
2482 | +} | |
2483 | +static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) | |
2484 | +{ | |
2485 | + smp_mb(); | |
2486 | + ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | |
2470 | 2487 | } |
2471 | 2488 | |
2472 | 2489 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
fs/ext4/indirect.c
... | ... | @@ -810,11 +810,25 @@ |
810 | 810 | if (unlikely(!list_empty(&ei->i_completed_io_list))) |
811 | 811 | ext4_flush_completed_IO(inode); |
812 | 812 | |
813 | + /* | |
814 | + * Nolock dioread optimization may be dynamically disabled | |
815 | + * via ext4_inode_block_unlocked_dio(). Check inode's state | |
816 | + * while holding extra i_dio_count ref. | |
817 | + */ | |
818 | + atomic_inc(&inode->i_dio_count); | |
819 | + smp_mb(); | |
820 | + if (unlikely(ext4_test_inode_state(inode, | |
821 | + EXT4_STATE_DIOREAD_LOCK))) { | |
822 | + inode_dio_done(inode); | |
823 | + goto locked; | |
824 | + } | |
813 | 825 | ret = __blockdev_direct_IO(rw, iocb, inode, |
814 | 826 | inode->i_sb->s_bdev, iov, |
815 | 827 | offset, nr_segs, |
816 | 828 | ext4_get_block, NULL, NULL, 0); |
829 | + inode_dio_done(inode); | |
817 | 830 | } else { |
831 | +locked: | |
818 | 832 | ret = blockdev_direct_IO(rw, iocb, inode, iov, |
819 | 833 | offset, nr_segs, ext4_get_block); |
820 | 834 |
fs/ext4/inode.c
... | ... | @@ -4720,6 +4720,10 @@ |
4720 | 4720 | return err; |
4721 | 4721 | } |
4722 | 4722 | |
4723 | + /* Wait for all existing dio workers */ | |
4724 | + ext4_inode_block_unlocked_dio(inode); | |
4725 | + inode_dio_wait(inode); | |
4726 | + | |
4723 | 4727 | jbd2_journal_lock_updates(journal); |
4724 | 4728 | |
4725 | 4729 | /* |
... | ... | @@ -4739,6 +4743,7 @@ |
4739 | 4743 | ext4_set_aops(inode); |
4740 | 4744 | |
4741 | 4745 | jbd2_journal_unlock_updates(journal); |
4746 | + ext4_inode_resume_unlocked_dio(inode); | |
4742 | 4747 | |
4743 | 4748 | /* Finally we can mark the inode as dirty. */ |
4744 | 4749 |
fs/ext4/move_extent.c
... | ... | @@ -1323,6 +1323,12 @@ |
1323 | 1323 | /* Protect orig and donor inodes against a truncate */ |
1324 | 1324 | mext_inode_double_lock(orig_inode, donor_inode); |
1325 | 1325 | |
1326 | + /* Wait for all existing dio workers */ | |
1327 | + ext4_inode_block_unlocked_dio(orig_inode); | |
1328 | + ext4_inode_block_unlocked_dio(donor_inode); | |
1329 | + inode_dio_wait(orig_inode); | |
1330 | + inode_dio_wait(donor_inode); | |
1331 | + | |
1326 | 1332 | /* Protect extent tree against block allocations via delalloc */ |
1327 | 1333 | double_down_write_data_sem(orig_inode, donor_inode); |
1328 | 1334 | /* Check the filesystem environment whether move_extent can be done */ |
... | ... | @@ -1521,6 +1527,8 @@ |
1521 | 1527 | kfree(holecheck_path); |
1522 | 1528 | } |
1523 | 1529 | double_up_write_data_sem(orig_inode, donor_inode); |
1530 | + ext4_inode_resume_unlocked_dio(orig_inode); | |
1531 | + ext4_inode_resume_unlocked_dio(donor_inode); | |
1524 | 1532 | mext_inode_double_unlock(orig_inode, donor_inode); |
1525 | 1533 | |
1526 | 1534 | return ret; |