Commit a4bfb4cf11fd2211b788af59dc8a8b4394bca227

Authored by Joel Becker
1 parent 1739da4054

ocfs2: When zero extending, do it by page.

ocfs2_zero_extend() does its zeroing block by block, but it calls a
function named ocfs2_write_zero_page().  Let's have
ocfs2_write_zero_page() handle the page level.  From
ocfs2_zero_extend()'s perspective, it is now page-at-a-time.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Cc: stable@kernel.org

Showing 2 changed files with 84 additions and 64 deletions Side-by-side Diff

... ... @@ -459,36 +459,6 @@
459 459 return ret;
460 460 }
461 461  
462   -handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
463   - struct page *page,
464   - unsigned from,
465   - unsigned to)
466   -{
467   - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
468   - handle_t *handle;
469   - int ret = 0;
470   -
471   - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
472   - if (IS_ERR(handle)) {
473   - ret = -ENOMEM;
474   - mlog_errno(ret);
475   - goto out;
476   - }
477   -
478   - if (ocfs2_should_order_data(inode)) {
479   - ret = ocfs2_jbd2_file_inode(handle, inode);
480   - if (ret < 0)
481   - mlog_errno(ret);
482   - }
483   -out:
484   - if (ret) {
485   - if (!IS_ERR(handle))
486   - ocfs2_commit_trans(osb, handle);
487   - handle = ERR_PTR(ret);
488   - }
489   - return handle;
490   -}
491   -
492 462 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
493 463 {
494 464 sector_t status;
... ... @@ -724,28 +724,55 @@
724 724 return status;
725 725 }
726 726  
  727 +/*
  728 + * While a write will already be ordering the data, a truncate will not.
  729 + * Thus, we need to explicitly order the zeroed pages.
  730 + */
  731 +static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
  732 +{
  733 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  734 + handle_t *handle = NULL;
  735 + int ret = 0;
  736 +
  737 + if (!ocfs2_should_order_data(inode))
  738 + goto out;
  739 +
  740 + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
  741 + if (IS_ERR(handle)) {
  742 + ret = -ENOMEM;
  743 + mlog_errno(ret);
  744 + goto out;
  745 + }
  746 +
  747 + ret = ocfs2_jbd2_file_inode(handle, inode);
  748 + if (ret < 0)
  749 + mlog_errno(ret);
  750 +
  751 +out:
  752 + if (ret) {
  753 + if (!IS_ERR(handle))
  754 + ocfs2_commit_trans(osb, handle);
  755 + handle = ERR_PTR(ret);
  756 + }
  757 + return handle;
  758 +}
  759 +
727 760 /* Some parts of this taken from generic_cont_expand, which turned out
728 761 * to be too fragile to do exactly what we need without us having to
729 762 * worry about recursive locking in ->write_begin() and ->write_end(). */
730   -static int ocfs2_write_zero_page(struct inode *inode,
731   - u64 size)
  763 +static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
  764 + u64 abs_to)
732 765 {
733 766 struct address_space *mapping = inode->i_mapping;
734 767 struct page *page;
735   - unsigned long index;
736   - unsigned int offset;
  768 + unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
737 769 handle_t *handle = NULL;
738 770 int ret;
  771 + unsigned zero_from, zero_to, block_start, block_end;
739 772  
740   - offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
741   - /* ugh. in prepare/commit_write, if from==to==start of block, we
742   - ** skip the prepare. make sure we never send an offset for the start
743   - ** of a block
744   - */
745   - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746   - offset++;
747   - }
748   - index = size >> PAGE_CACHE_SHIFT;
  773 + BUG_ON(abs_from >= abs_to);
  774 + BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
  775 + BUG_ON(abs_from & (inode->i_blkbits - 1));
749 776  
750 777 page = grab_cache_page(mapping, index);
751 778 if (!page) {
752 779  
753 780  
754 781  
755 782  
756 783  
... ... @@ -754,31 +781,51 @@
754 781 goto out;
755 782 }
756 783  
757   - ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
758   - if (ret < 0) {
759   - mlog_errno(ret);
760   - goto out_unlock;
761   - }
  784 + /* Get the offsets within the page that we want to zero */
  785 + zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
  786 + zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
  787 + if (!zero_to)
  788 + zero_to = PAGE_CACHE_SIZE;
762 789  
763   - if (ocfs2_should_order_data(inode)) {
764   - handle = ocfs2_start_walk_page_trans(inode, page, offset,
765   - offset);
766   - if (IS_ERR(handle)) {
767   - ret = PTR_ERR(handle);
768   - handle = NULL;
  790 + /* We know that zero_from is block aligned */
  791 + for (block_start = zero_from; block_start < zero_to;
  792 + block_start = block_end) {
  793 + block_end = block_start + (1 << inode->i_blkbits);
  794 +
  795 + /*
  796 + * block_start is block-aligned. Bump it by one to
  797 + * force ocfs2_{prepare,commit}_write() to zero the
  798 + * whole block.
  799 + */
  800 + ret = ocfs2_prepare_write_nolock(inode, page,
  801 + block_start + 1,
  802 + block_start + 1);
  803 + if (ret < 0) {
  804 + mlog_errno(ret);
769 805 goto out_unlock;
770 806 }
771   - }
772 807  
773   - /* must not update i_size! */
774   - ret = block_commit_write(page, offset, offset);
775   - if (ret < 0)
776   - mlog_errno(ret);
777   - else
778   - ret = 0;
  808 + if (!handle) {
  809 + handle = ocfs2_zero_start_ordered_transaction(inode);
  810 + if (IS_ERR(handle)) {
  811 + ret = PTR_ERR(handle);
  812 + handle = NULL;
  813 + break;
  814 + }
  815 + }
779 816  
  817 + /* must not update i_size! */
  818 + ret = block_commit_write(page, block_start + 1,
  819 + block_start + 1);
  820 + if (ret < 0)
  821 + mlog_errno(ret);
  822 + else
  823 + ret = 0;
  824 + }
  825 +
780 826 if (handle)
781 827 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
  828 +
782 829 out_unlock:
783 830 unlock_page(page);
784 831 page_cache_release(page);
785 832  
786 833  
... ... @@ -790,18 +837,21 @@
790 837 u64 zero_to_size)
791 838 {
792 839 int ret = 0;
793   - u64 start_off;
  840 + u64 start_off, next_off;
794 841 struct super_block *sb = inode->i_sb;
795 842  
796 843 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
797 844 while (start_off < zero_to_size) {
798   - ret = ocfs2_write_zero_page(inode, start_off);
  845 + next_off = (start_off & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
  846 + if (next_off > zero_to_size)
  847 + next_off = zero_to_size;
  848 + ret = ocfs2_write_zero_page(inode, start_off, next_off);
799 849 if (ret < 0) {
800 850 mlog_errno(ret);
801 851 goto out;
802 852 }
803 853  
804   - start_off += sb->s_blocksize;
  854 + start_off = next_off;
805 855  
806 856 /*
807 857 * Very large extends have the potential to lock up