Commit 17335dcc471199717839b2fa3492ca36f70f1168

Authored by Dmitry Monakhov
Committed by Theodore Ts'o
1 parent 28a535f9a0

ext4: serialize dio nonlocked reads with defrag workers

Inode's block defrag and ext4_change_inode_journal_flag() may
affect nonlocked DIO reads result, so proper synchronization
required.

- Add missed inode_dio_wait() calls where appropriate
- Check inode state under extra i_dio_count reference.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

Showing 4 changed files with 44 additions and 0 deletions Side-by-side Diff

... ... @@ -1358,6 +1358,8 @@
1358 1358 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1359 1359 EXT4_STATE_NEWENTRY, /* File just added to dir */
1360 1360 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
  1361 + EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
  1362 + nolocking */
1361 1363 };
1362 1364  
1363 1365 #define EXT4_INODE_BIT_FNS(name, field, offset) \
... ... @@ -2467,6 +2469,21 @@
2467 2469 static inline void set_bitmap_uptodate(struct buffer_head *bh)
2468 2470 {
2469 2471 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
  2472 +}
  2473 +
  2474 +/*
  2475 + * Disable DIO read nolock optimization, so new dioreaders will be forced
  2476 + * to grab i_mutex
  2477 + */
  2478 +static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
  2479 +{
  2480 + ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
  2481 + smp_mb();
  2482 +}
  2483 +static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
  2484 +{
  2485 + smp_mb();
  2486 + ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
2470 2487 }
2471 2488  
2472 2489 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
... ... @@ -810,11 +810,25 @@
810 810 if (unlikely(!list_empty(&ei->i_completed_io_list)))
811 811 ext4_flush_completed_IO(inode);
812 812  
  813 + /*
  814 + * Nolock dioread optimization may be dynamically disabled
  815 + * via ext4_inode_block_unlocked_dio(). Check inode's state
  816 + * while holding extra i_dio_count ref.
  817 + */
  818 + atomic_inc(&inode->i_dio_count);
  819 + smp_mb();
  820 + if (unlikely(ext4_test_inode_state(inode,
  821 + EXT4_STATE_DIOREAD_LOCK))) {
  822 + inode_dio_done(inode);
  823 + goto locked;
  824 + }
813 825 ret = __blockdev_direct_IO(rw, iocb, inode,
814 826 inode->i_sb->s_bdev, iov,
815 827 offset, nr_segs,
816 828 ext4_get_block, NULL, NULL, 0);
  829 + inode_dio_done(inode);
817 830 } else {
  831 +locked:
818 832 ret = blockdev_direct_IO(rw, iocb, inode, iov,
819 833 offset, nr_segs, ext4_get_block);
820 834  
... ... @@ -4720,6 +4720,10 @@
4720 4720 return err;
4721 4721 }
4722 4722  
  4723 + /* Wait for all existing dio workers */
  4724 + ext4_inode_block_unlocked_dio(inode);
  4725 + inode_dio_wait(inode);
  4726 +
4723 4727 jbd2_journal_lock_updates(journal);
4724 4728  
4725 4729 /*
... ... @@ -4739,6 +4743,7 @@
4739 4743 ext4_set_aops(inode);
4740 4744  
4741 4745 jbd2_journal_unlock_updates(journal);
  4746 + ext4_inode_resume_unlocked_dio(inode);
4742 4747  
4743 4748 /* Finally we can mark the inode as dirty. */
4744 4749  
fs/ext4/move_extent.c
... ... @@ -1323,6 +1323,12 @@
1323 1323 /* Protect orig and donor inodes against a truncate */
1324 1324 mext_inode_double_lock(orig_inode, donor_inode);
1325 1325  
  1326 + /* Wait for all existing dio workers */
  1327 + ext4_inode_block_unlocked_dio(orig_inode);
  1328 + ext4_inode_block_unlocked_dio(donor_inode);
  1329 + inode_dio_wait(orig_inode);
  1330 + inode_dio_wait(donor_inode);
  1331 +
1326 1332 /* Protect extent tree against block allocations via delalloc */
1327 1333 double_down_write_data_sem(orig_inode, donor_inode);
1328 1334 /* Check the filesystem environment whether move_extent can be done */
... ... @@ -1521,6 +1527,8 @@
1521 1527 kfree(holecheck_path);
1522 1528 }
1523 1529 double_up_write_data_sem(orig_inode, donor_inode);
  1530 + ext4_inode_resume_unlocked_dio(orig_inode);
  1531 + ext4_inode_resume_unlocked_dio(donor_inode);
1524 1532 mext_inode_double_unlock(orig_inode, donor_inode);
1525 1533  
1526 1534 return ret;