ocfs2: fix write() performance regression

On file systems which don't support sparse files, Ocfs2_map_page_blocks() was reading blocks on appending writes. This caused write performance to suffer dramatically. Fix this by detecting an appending write on a nonsparse fs and skipping the read. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

ocfs2: fix write() performance regression
On file systems which don't support sparse files, Ocfs2_map_page_blocks() was reading blocks on appending writes. This caused write performance to suffer dramatically. Fix this by detecting an appending write on a nonsparse fs and skipping the read. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Mark Fasheh
1 parent 9ea2d32f40
Showing 1 changed file with 22 additions and 0 deletions Side-by-side Diff
fs/ocfs2/aops.c
@@ -729,6 +729,27 @@
 }
  
 /*
+ * Nonsparse file systems fully allocate before we get to the write
+ * code. This prevents ocfs2_write() from tagging the write as an
+ * allocating one, which means ocfs2_map_page_blocks() might try to
+ * read-in the blocks at the tail of our file. Avoid reading them by
+ * testing i_size against each block offset.
+ */
+static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
+				 unsigned int block_start)
+{
+	u64 offset = page_offset(page) + block_start;
+
+	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+		return 1;
+
+	if (i_size_read(inode) > offset)
+		return 1;
+
+	return 0;
+}
+
+/*
  * Some of this taken from block_prepare_write(). We already have our
  * mapping by now though, and the entire write will be allocating or
  * it won't, so not much need to use BH_New.
@@ -781,6 +802,7 @@
 				set_buffer_uptodate(bh);
 		} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
 			   !buffer_new(bh) &&
+			   ocfs2_should_read_blk(inode, page, block_start) &&
 			   (block_start < from || block_end > to)) {
 			ll_rw_block(READ, 1, &bh);
 			*wait_bh++=bh;
...	...	@@ -729,6 +729,27 @@
729	729	}
730	730
731	731	/*
	732	+ * Nonsparse file systems fully allocate before we get to the write
	733	+ * code. This prevents ocfs2_write() from tagging the write as an
	734	+ * allocating one, which means ocfs2_map_page_blocks() might try to
	735	+ * read-in the blocks at the tail of our file. Avoid reading them by
	736	+ * testing i_size against each block offset.
	737	+ */
	738	+static int ocfs2_should_read_blk(struct inode inode, struct page page,
	739	+ unsigned int block_start)
	740	+{
	741	+ u64 offset = page_offset(page) + block_start;
	742	+
	743	+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
	744	+ return 1;
	745	+
	746	+ if (i_size_read(inode) > offset)
	747	+ return 1;
	748	+
	749	+ return 0;
	750	+}
	751	+
	752	+/*
732	753	* Some of this taken from block_prepare_write(). We already have our
733	754	* mapping by now though, and the entire write will be allocating or
734	755	* it won't, so not much need to use BH_New.
...	...	@@ -781,6 +802,7 @@
781	802	set_buffer_uptodate(bh);
782	803	} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
783	804	!buffer_new(bh) &&
	805	+ ocfs2_should_read_blk(inode, page, block_start) &&
784	806	(block_start < from \|\| block_end > to)) {
785	807	ll_rw_block(READ, 1, &bh);
786	808	*wait_bh++=bh;