ocfs2: When zero extending, do it by page.

ocfs2_zero_extend() does its zeroing block by block, but it calls a function named ocfs2_write_zero_page(). Let's have ocfs2_write_zero_page() handle the page level. From ocfs2_zero_extend()'s perspective, it is now page-at-a-time. Signed-off-by: Joel Becker <joel.becker@oracle.com> Cc: stable@kernel.org

ocfs2: When zero extending, do it by page.
ocfs2_zero_extend() does its zeroing block by block, but it calls a function named ocfs2_write_zero_page(). Let's have ocfs2_write_zero_page() handle the page level. From ocfs2_zero_extend()'s perspective, it is now page-at-a-time. Signed-off-by: Joel Becker <joel.becker@oracle.com> Cc: stable@kernel.org
Joel Becker
1 parent 1739da4054
Showing 2 changed files with 84 additions and 64 deletions Side-by-side Diff
fs/ocfs2/aops.c
fs/ocfs2/file.c
@@ -459,36 +459,6 @@
 	return ret;
 }
  
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
-							 struct page *page,
-							 unsigned from,
-							 unsigned to)
-{
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	handle_t *handle;
-	int ret = 0;
-
-	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
-	if (IS_ERR(handle)) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		goto out;
-	}
-
-	if (ocfs2_should_order_data(inode)) {
-		ret = ocfs2_jbd2_file_inode(handle, inode);
-		if (ret < 0)
-			mlog_errno(ret);
-	}
-out:
-	if (ret) {
-		if (!IS_ERR(handle))
-			ocfs2_commit_trans(osb, handle);
-		handle = ERR_PTR(ret);
-	}
-	return handle;
-}
-
 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 {
 	sector_t status;
@@ -724,28 +724,55 @@
 	return status;
 }
  
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	handle_t *handle = NULL;
+	int ret = 0;
+
+	if (!ocfs2_should_order_data(inode))
+		goto out;
+
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_jbd2_file_inode(handle, inode);
+	if (ret < 0)
+		mlog_errno(ret);
+
+out:
+	if (ret) {
+		if (!IS_ERR(handle))
+			ocfs2_commit_trans(osb, handle);
+		handle = ERR_PTR(ret);
+	}
+	return handle;
+}
+
 /* Some parts of this taken from generic_cont_expand, which turned out
  * to be too fragile to do exactly what we need without us having to
  * worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
-				 u64 size)
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
+				 u64 abs_to)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
-	unsigned long index;
-	unsigned int offset;
+	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
 	handle_t *handle = NULL;
 	int ret;
+	unsigned zero_from, zero_to, block_start, block_end;
  
-	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
-	/* ugh.  in prepare/commit_write, if from==to==start of block, we
-	** skip the prepare.  make sure we never send an offset for the start
-	** of a block
-	*/
-	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-		offset++;
-	}
-	index = size >> PAGE_CACHE_SHIFT;
+	BUG_ON(abs_from >= abs_to);
+	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+	BUG_ON(abs_from & (inode->i_blkbits - 1));
  
 	page = grab_cache_page(mapping, index);
 	if (!page) {
  
  
  
  
  
@@ -754,31 +781,51 @@
 		goto out;
 	}
  
-	ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
-	if (ret < 0) {
-		mlog_errno(ret);
-		goto out_unlock;
-	}
+	/* Get the offsets within the page that we want to zero */
+	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
+	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+	if (!zero_to)
+		zero_to = PAGE_CACHE_SIZE;
  
-	if (ocfs2_should_order_data(inode)) {
-		handle = ocfs2_start_walk_page_trans(inode, page, offset,
-						     offset);
-		if (IS_ERR(handle)) {
-			ret = PTR_ERR(handle);
-			handle = NULL;
+	/* We know that zero_from is block aligned */
+	for (block_start = zero_from; block_start < zero_to;
+	     block_start = block_end) {
+		block_end = block_start + (1 << inode->i_blkbits);
+
+		/*
+		 * block_start is block-aligned.  Bump it by one to
+		 * force ocfs2_{prepare,commit}_write() to zero the
+		 * whole block.
+		 */
+		ret = ocfs2_prepare_write_nolock(inode, page,
+						 block_start + 1,
+						 block_start + 1);
+		if (ret < 0) {
+			mlog_errno(ret);
 			goto out_unlock;
 		}
-	}
  
-	/* must not update i_size! */
-	ret = block_commit_write(page, offset, offset);
-	if (ret < 0)
-		mlog_errno(ret);
-	else
-		ret = 0;
+		if (!handle) {
+			handle = ocfs2_zero_start_ordered_transaction(inode);
+			if (IS_ERR(handle)) {
+				ret = PTR_ERR(handle);
+				handle = NULL;
+				break;
+			}
+		}
  
+		/* must not update i_size! */
+		ret = block_commit_write(page, block_start + 1,
+					 block_start + 1);
+		if (ret < 0)
+			mlog_errno(ret);
+		else
+			ret = 0;
+	}
+
 	if (handle)
 		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+
 out_unlock:
 	unlock_page(page);
 	page_cache_release(page);
  
  
@@ -790,18 +837,21 @@
 			     u64 zero_to_size)
 {
 	int ret = 0;
-	u64 start_off;
+	u64 start_off, next_off;
 	struct super_block *sb = inode->i_sb;
  
 	start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
 	while (start_off < zero_to_size) {
-		ret = ocfs2_write_zero_page(inode, start_off);
+		next_off = (start_off & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+		if (next_off > zero_to_size)
+			next_off = zero_to_size;
+		ret = ocfs2_write_zero_page(inode, start_off, next_off);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
 		}
  
-		start_off += sb->s_blocksize;
+		start_off = next_off;
  
 		/*
 		 * Very large extends have the potential to lock up
...	...	@@ -459,36 +459,6 @@
459	459	return ret;
460	460	}
461	461
462		-handle_t ocfs2_start_walk_page_trans(struct inode inode,
463		- struct page *page,
464		- unsigned from,
465		- unsigned to)
466		-{
467		- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
468		- handle_t *handle;
469		- int ret = 0;
470		-
471		- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
472		- if (IS_ERR(handle)) {
473		- ret = -ENOMEM;
474		- mlog_errno(ret);
475		- goto out;
476		- }
477		-
478		- if (ocfs2_should_order_data(inode)) {
479		- ret = ocfs2_jbd2_file_inode(handle, inode);
480		- if (ret < 0)
481		- mlog_errno(ret);
482		- }
483		-out:
484		- if (ret) {
485		- if (!IS_ERR(handle))
486		- ocfs2_commit_trans(osb, handle);
487		- handle = ERR_PTR(ret);
488		- }
489		- return handle;
490		-}
491		-
492	462	static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
493	463	{
494	464	sector_t status;
...	...	@@ -724,28 +724,55 @@
724	724	return status;
725	725	}
726	726
	727	+/*
	728	+ * While a write will already be ordering the data, a truncate will not.
	729	+ * Thus, we need to explicitly order the zeroed pages.
	730	+ */
	731	+static handle_t ocfs2_zero_start_ordered_transaction(struct inode inode)
	732	+{
	733	+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	734	+ handle_t *handle = NULL;
	735	+ int ret = 0;
	736	+
	737	+ if (!ocfs2_should_order_data(inode))
	738	+ goto out;
	739	+
	740	+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
	741	+ if (IS_ERR(handle)) {
	742	+ ret = -ENOMEM;
	743	+ mlog_errno(ret);
	744	+ goto out;
	745	+ }
	746	+
	747	+ ret = ocfs2_jbd2_file_inode(handle, inode);
	748	+ if (ret < 0)
	749	+ mlog_errno(ret);
	750	+
	751	+out:
	752	+ if (ret) {
	753	+ if (!IS_ERR(handle))
	754	+ ocfs2_commit_trans(osb, handle);
	755	+ handle = ERR_PTR(ret);
	756	+ }
	757	+ return handle;
	758	+}
	759	+
727	760	/* Some parts of this taken from generic_cont_expand, which turned out
728	761	* to be too fragile to do exactly what we need without us having to
729	762	* worry about recursive locking in ->write_begin() and ->write_end(). */
730		-static int ocfs2_write_zero_page(struct inode *inode,
731		- u64 size)
	763	+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
	764	+ u64 abs_to)
732	765	{
733	766	struct address_space *mapping = inode->i_mapping;
734	767	struct page *page;
735		- unsigned long index;
736		- unsigned int offset;
	768	+ unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
737	769	handle_t *handle = NULL;
738	770	int ret;
	771	+ unsigned zero_from, zero_to, block_start, block_end;
739	772
740		- offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
741		- /* ugh. in prepare/commit_write, if from==to==start of block, we
742		- ** skip the prepare. make sure we never send an offset for the start
743		- ** of a block
744		- */
745		- if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746		- offset++;
747		- }
748		- index = size >> PAGE_CACHE_SHIFT;
	773	+ BUG_ON(abs_from >= abs_to);
	774	+ BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
	775	+ BUG_ON(abs_from & (inode->i_blkbits - 1));
749	776
750	777	page = grab_cache_page(mapping, index);
751	778	if (!page) {
752	779
753	780
754	781
755	782
756	783
...	...	@@ -754,31 +781,51 @@
754	781	goto out;
755	782	}
756	783
757		- ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
758		- if (ret < 0) {
759		- mlog_errno(ret);
760		- goto out_unlock;
761		- }
	784	+ /* Get the offsets within the page that we want to zero */
	785	+ zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
	786	+ zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
	787	+ if (!zero_to)
	788	+ zero_to = PAGE_CACHE_SIZE;
762	789
763		- if (ocfs2_should_order_data(inode)) {
764		- handle = ocfs2_start_walk_page_trans(inode, page, offset,
765		- offset);
766		- if (IS_ERR(handle)) {
767		- ret = PTR_ERR(handle);
768		- handle = NULL;
	790	+ /* We know that zero_from is block aligned */
	791	+ for (block_start = zero_from; block_start < zero_to;
	792	+ block_start = block_end) {
	793	+ block_end = block_start + (1 << inode->i_blkbits);
	794	+
	795	+ /*
	796	+ * block_start is block-aligned. Bump it by one to
	797	+ * force ocfs2_{prepare,commit}_write() to zero the
	798	+ * whole block.
	799	+ */
	800	+ ret = ocfs2_prepare_write_nolock(inode, page,
	801	+ block_start + 1,
	802	+ block_start + 1);
	803	+ if (ret < 0) {
	804	+ mlog_errno(ret);
769	805	goto out_unlock;
770	806	}
771		- }
772	807
773		- /* must not update i_size! */
774		- ret = block_commit_write(page, offset, offset);
775		- if (ret < 0)
776		- mlog_errno(ret);
777		- else
778		- ret = 0;
	808	+ if (!handle) {
	809	+ handle = ocfs2_zero_start_ordered_transaction(inode);
	810	+ if (IS_ERR(handle)) {
	811	+ ret = PTR_ERR(handle);
	812	+ handle = NULL;
	813	+ break;
	814	+ }
	815	+ }
779	816
	817	+ /* must not update i_size! */
	818	+ ret = block_commit_write(page, block_start + 1,
	819	+ block_start + 1);
	820	+ if (ret < 0)
	821	+ mlog_errno(ret);
	822	+ else
	823	+ ret = 0;
	824	+ }
	825	+
780	826	if (handle)
781	827	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
	828	+
782	829	out_unlock:
783	830	unlock_page(page);
784	831	page_cache_release(page);
785	832
786	833
...	...	@@ -790,18 +837,21 @@
790	837	u64 zero_to_size)
791	838	{
792	839	int ret = 0;
793		- u64 start_off;
	840	+ u64 start_off, next_off;
794	841	struct super_block *sb = inode->i_sb;
795	842
796	843	start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
797	844	while (start_off < zero_to_size) {
798		- ret = ocfs2_write_zero_page(inode, start_off);
	845	+ next_off = (start_off & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
	846	+ if (next_off > zero_to_size)
	847	+ next_off = zero_to_size;
	848	+ ret = ocfs2_write_zero_page(inode, start_off, next_off);
799	849	if (ret < 0) {
800	850	mlog_errno(ret);
801	851	goto out;
802	852	}
803	853
804		- start_off += sb->s_blocksize;
	854	+ start_off = next_off;
805	855
806	856	/*
807	857	* Very large extends have the potential to lock up