Commit a4bfb4cf11fd2211b788af59dc8a8b4394bca227
1 parent
1739da4054
Exists in
master
and in
7 other branches
ocfs2: When zero extending, do it by page.
ocfs2_zero_extend() does its zeroing block by block, but it calls a function named ocfs2_write_zero_page(). Let's have ocfs2_write_zero_page() handle the page level. From ocfs2_zero_extend()'s perspective, it is now page-at-a-time. Signed-off-by: Joel Becker <joel.becker@oracle.com> Cc: stable@kernel.org
Showing 2 changed files with 84 additions and 64 deletions Side-by-side Diff
fs/ocfs2/aops.c
... | ... | @@ -459,36 +459,6 @@ |
459 | 459 | return ret; |
460 | 460 | } |
461 | 461 | |
462 | -handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |
463 | - struct page *page, | |
464 | - unsigned from, | |
465 | - unsigned to) | |
466 | -{ | |
467 | - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | |
468 | - handle_t *handle; | |
469 | - int ret = 0; | |
470 | - | |
471 | - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | |
472 | - if (IS_ERR(handle)) { | |
473 | - ret = -ENOMEM; | |
474 | - mlog_errno(ret); | |
475 | - goto out; | |
476 | - } | |
477 | - | |
478 | - if (ocfs2_should_order_data(inode)) { | |
479 | - ret = ocfs2_jbd2_file_inode(handle, inode); | |
480 | - if (ret < 0) | |
481 | - mlog_errno(ret); | |
482 | - } | |
483 | -out: | |
484 | - if (ret) { | |
485 | - if (!IS_ERR(handle)) | |
486 | - ocfs2_commit_trans(osb, handle); | |
487 | - handle = ERR_PTR(ret); | |
488 | - } | |
489 | - return handle; | |
490 | -} | |
491 | - | |
492 | 462 | static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) |
493 | 463 | { |
494 | 464 | sector_t status; |
fs/ocfs2/file.c
... | ... | @@ -724,28 +724,55 @@ |
724 | 724 | return status; |
725 | 725 | } |
726 | 726 | |
727 | +/* | |
728 | + * While a write will already be ordering the data, a truncate will not. | |
729 | + * Thus, we need to explicitly order the zeroed pages. | |
730 | + */ | |
731 | +static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) | |
732 | +{ | |
733 | + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | |
734 | + handle_t *handle = NULL; | |
735 | + int ret = 0; | |
736 | + | |
737 | + if (!ocfs2_should_order_data(inode)) | |
738 | + goto out; | |
739 | + | |
740 | + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | |
741 | + if (IS_ERR(handle)) { | |
742 | + ret = -ENOMEM; | |
743 | + mlog_errno(ret); | |
744 | + goto out; | |
745 | + } | |
746 | + | |
747 | + ret = ocfs2_jbd2_file_inode(handle, inode); | |
748 | + if (ret < 0) | |
749 | + mlog_errno(ret); | |
750 | + | |
751 | +out: | |
752 | + if (ret) { | |
753 | + if (!IS_ERR(handle)) | |
754 | + ocfs2_commit_trans(osb, handle); | |
755 | + handle = ERR_PTR(ret); | |
756 | + } | |
757 | + return handle; | |
758 | +} | |
759 | + | |
727 | 760 | /* Some parts of this taken from generic_cont_expand, which turned out |
728 | 761 | * to be too fragile to do exactly what we need without us having to |
729 | 762 | * worry about recursive locking in ->write_begin() and ->write_end(). */ |
730 | -static int ocfs2_write_zero_page(struct inode *inode, | |
731 | - u64 size) | |
763 | +static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |
764 | + u64 abs_to) | |
732 | 765 | { |
733 | 766 | struct address_space *mapping = inode->i_mapping; |
734 | 767 | struct page *page; |
735 | - unsigned long index; | |
736 | - unsigned int offset; | |
768 | + unsigned long index = abs_from >> PAGE_CACHE_SHIFT; | |
737 | 769 | handle_t *handle = NULL; |
738 | 770 | int ret; |
771 | + unsigned zero_from, zero_to, block_start, block_end; | |
739 | 772 | |
740 | - offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ | |
741 | - /* ugh. in prepare/commit_write, if from==to==start of block, we | |
742 | - ** skip the prepare. make sure we never send an offset for the start | |
743 | - ** of a block | |
744 | - */ | |
745 | - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { | |
746 | - offset++; | |
747 | - } | |
748 | - index = size >> PAGE_CACHE_SHIFT; | |
773 | + BUG_ON(abs_from >= abs_to); | |
774 | + BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | |
775 | + BUG_ON(abs_from & (inode->i_blkbits - 1)); | |
749 | 776 | |
750 | 777 | page = grab_cache_page(mapping, index); |
751 | 778 | if (!page) { |
752 | 779 | |
753 | 780 | |
754 | 781 | |
755 | 782 | |
756 | 783 | |
... | ... | @@ -754,31 +781,51 @@ |
754 | 781 | goto out; |
755 | 782 | } |
756 | 783 | |
757 | - ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); | |
758 | - if (ret < 0) { | |
759 | - mlog_errno(ret); | |
760 | - goto out_unlock; | |
761 | - } | |
784 | + /* Get the offsets within the page that we want to zero */ | |
785 | + zero_from = abs_from & (PAGE_CACHE_SIZE - 1); | |
786 | + zero_to = abs_to & (PAGE_CACHE_SIZE - 1); | |
787 | + if (!zero_to) | |
788 | + zero_to = PAGE_CACHE_SIZE; | |
762 | 789 | |
763 | - if (ocfs2_should_order_data(inode)) { | |
764 | - handle = ocfs2_start_walk_page_trans(inode, page, offset, | |
765 | - offset); | |
766 | - if (IS_ERR(handle)) { | |
767 | - ret = PTR_ERR(handle); | |
768 | - handle = NULL; | |
790 | + /* We know that zero_from is block aligned */ | |
791 | + for (block_start = zero_from; block_start < zero_to; | |
792 | + block_start = block_end) { | |
793 | + block_end = block_start + (1 << inode->i_blkbits); | |
794 | + | |
795 | + /* | |
796 | + * block_start is block-aligned. Bump it by one to | |
797 | + * force ocfs2_{prepare,commit}_write() to zero the | |
798 | + * whole block. | |
799 | + */ | |
800 | + ret = ocfs2_prepare_write_nolock(inode, page, | |
801 | + block_start + 1, | |
802 | + block_start + 1); | |
803 | + if (ret < 0) { | |
804 | + mlog_errno(ret); | |
769 | 805 | goto out_unlock; |
770 | 806 | } |
771 | - } | |
772 | 807 | |
773 | - /* must not update i_size! */ | |
774 | - ret = block_commit_write(page, offset, offset); | |
775 | - if (ret < 0) | |
776 | - mlog_errno(ret); | |
777 | - else | |
778 | - ret = 0; | |
808 | + if (!handle) { | |
809 | + handle = ocfs2_zero_start_ordered_transaction(inode); | |
810 | + if (IS_ERR(handle)) { | |
811 | + ret = PTR_ERR(handle); | |
812 | + handle = NULL; | |
813 | + break; | |
814 | + } | |
815 | + } | |
779 | 816 | |
817 | + /* must not update i_size! */ | |
818 | + ret = block_commit_write(page, block_start + 1, | |
819 | + block_start + 1); | |
820 | + if (ret < 0) | |
821 | + mlog_errno(ret); | |
822 | + else | |
823 | + ret = 0; | |
824 | + } | |
825 | + | |
780 | 826 | if (handle) |
781 | 827 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
828 | + | |
782 | 829 | out_unlock: |
783 | 830 | unlock_page(page); |
784 | 831 | page_cache_release(page); |
785 | 832 | |
786 | 833 | |
... | ... | @@ -790,18 +837,21 @@ |
790 | 837 | u64 zero_to_size) |
791 | 838 | { |
792 | 839 | int ret = 0; |
793 | - u64 start_off; | |
840 | + u64 start_off, next_off; | |
794 | 841 | struct super_block *sb = inode->i_sb; |
795 | 842 | |
796 | 843 | start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); |
797 | 844 | while (start_off < zero_to_size) { |
798 | - ret = ocfs2_write_zero_page(inode, start_off); | |
845 | + next_off = (start_off & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; | |
846 | + if (next_off > zero_to_size) | |
847 | + next_off = zero_to_size; | |
848 | + ret = ocfs2_write_zero_page(inode, start_off, next_off); | |
799 | 849 | if (ret < 0) { |
800 | 850 | mlog_errno(ret); |
801 | 851 | goto out; |
802 | 852 | } |
803 | 853 | |
804 | - start_off += sb->s_blocksize; | |
854 | + start_off = next_off; | |
805 | 855 | |
806 | 856 | /* |
807 | 857 | * Very large extends have the potential to lock up |