ocfs2: shared writeable mmap

Implement cluster consistent shared writeable mappings using the ->page_mkwrite() callback. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

ocfs2: shared writeable mmap
Implement cluster consistent shared writeable mappings using the ->page_mkwrite() callback. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Mark Fasheh
1 parent 607d44aa3f
Showing 4 changed files with 200 additions and 39 deletions Side-by-side Diff
fs/ocfs2/aops.c
fs/ocfs2/aops.h
fs/ocfs2/file.c
fs/ocfs2/mmap.c
@@ -1034,7 +1034,8 @@
  */
 static int ocfs2_grab_pages_for_write(struct address_space *mapping,
 				      struct ocfs2_write_ctxt *wc,
-				      u32 cpos, loff_t user_pos, int new)
+				      u32 cpos, loff_t user_pos, int new,
+				      struct page *mmap_page)
 {
 	int ret = 0, i;
 	unsigned long start, target_index, index;
@@ -1058,11 +1059,36 @@
 	for(i = 0; i < wc->w_num_pages; i++) {
 		index = start + i;
  
-		wc->w_pages[i] = find_or_create_page(mapping, index, GFP_NOFS);
-		if (!wc->w_pages[i]) {
-			ret = -ENOMEM;
-			mlog_errno(ret);
-			goto out;
+		if (index == target_index && mmap_page) {
+			/*
+			 * ocfs2_pagemkwrite() is a little different
+			 * and wants us to directly use the page
+			 * passed in.
+			 */
+			lock_page(mmap_page);
+
+			if (mmap_page->mapping != mapping) {
+				unlock_page(mmap_page);
+				/*
+				 * Sanity check - the locking in
+				 * ocfs2_pagemkwrite() should ensure
+				 * that this code doesn't trigger.
+				 */
+				ret = -EINVAL;
+				mlog_errno(ret);
+				goto out;
+			}
+
+			page_cache_get(mmap_page);
+			wc->w_pages[i] = mmap_page;
+		} else {
+			wc->w_pages[i] = find_or_create_page(mapping, index,
+							     GFP_NOFS);
+			if (!wc->w_pages[i]) {
+				ret = -ENOMEM;
+				mlog_errno(ret);
+				goto out;
+			}
 		}
  
 		if (index == target_index)
@@ -1213,10 +1239,10 @@
 	}
 }
  
-static int ocfs2_write_begin_nolock(struct address_space *mapping,
-				    loff_t pos, unsigned len, unsigned flags,
-				    struct page **pagep, void **fsdata,
-				    struct buffer_head *di_bh)
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+			     loff_t pos, unsigned len, unsigned flags,
+			     struct page **pagep, void **fsdata,
+			     struct buffer_head *di_bh, struct page *mmap_page)
 {
 	int ret, i, credits = OCFS2_INODE_UPDATE_CREDITS;
 	unsigned int num_clusters = 0, clusters_to_alloc = 0;
@@ -1318,7 +1344,7 @@
 	 * extent.
 	 */
 	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
-					 clusters_to_alloc);
+					 clusters_to_alloc, mmap_page);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -1386,7 +1412,7 @@
 	}
  
 	ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
-				       fsdata, di_bh);
+				       fsdata, di_bh, NULL);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_fail_data;
@@ -1407,9 +1433,9 @@
 	return ret;
 }
  
-static int ocfs2_write_end_nolock(struct address_space *mapping,
-				  loff_t pos, unsigned len, unsigned copied,
-				  struct page *page, void *fsdata)
+int ocfs2_write_end_nolock(struct address_space *mapping,
+			   loff_t pos, unsigned len, unsigned copied,
+			   struct page *page, void *fsdata)
 {
 	int i;
 	unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
@@ -50,6 +50,15 @@
 		    loff_t pos, unsigned len, unsigned copied,
 		    struct page *page, void *fsdata);
  
+int ocfs2_write_end_nolock(struct address_space *mapping,
+			   loff_t pos, unsigned len, unsigned copied,
+			   struct page *page, void *fsdata);
+
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+			     loff_t pos, unsigned len, unsigned flags,
+			     struct page **pagep, void **fsdata,
+			     struct buffer_head *di_bh, struct page *mmap_page);
+
 /* all ocfs2_dio_end_io()'s fault */
 #define ocfs2_iocb_is_rw_locked(iocb) \
 	test_bit(0, (unsigned long *)&iocb->private)
@@ -1001,6 +1001,13 @@
 		goto bail_unlock;
 	}
  
+	/*
+	 * This will intentionally not wind up calling vmtruncate(),
+	 * since all the work for a size change has been done above.
+	 * Otherwise, we could get into problems with truncate as
+	 * ip_alloc_sem is used there to protect against i_size
+	 * changes.
+	 */
 	status = inode_setattr(inode, attr);
 	if (status < 0) {
 		mlog_errno(status);
@@ -37,11 +37,29 @@
  
 #include "ocfs2.h"
  
+#include "aops.h"
 #include "dlmglue.h"
 #include "file.h"
 #include "inode.h"
 #include "mmap.h"
  
+static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
+{
+	/* The best way to deal with signals in the vm path is
+	 * to block them upfront, rather than allowing the
+	 * locking paths to return -ERESTARTSYS. */
+	sigfillset(blocked);
+
+	/* We should technically never get a bad return value
+	 * from sigprocmask */
+	return sigprocmask(SIG_BLOCK, blocked, oldset);
+}
+
+static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
+{
+	return sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
 static struct page *ocfs2_nopage(struct vm_area_struct * area,
 				 unsigned long address,
 				 int *type)
@@ -53,14 +71,7 @@
 	mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
 		   type);
  
-	/* The best way to deal with signals in this path is
-	 * to block them upfront, rather than allowing the
-	 * locking paths to return -ERESTARTSYS. */
-	sigfillset(&blocked);
-
-	/* We should technically never get a bad ret return
-	 * from sigprocmask */
-	ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
+	ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -68,7 +79,7 @@
  
 	page = filemap_nopage(area, address, type);
  
-	ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
+	ret = ocfs2_vm_op_unblock_sigs(&oldset);
 	if (ret < 0)
 		mlog_errno(ret);
 out:
  
  
  
  
  
  
@@ -76,27 +87,135 @@
 	return page;
 }
  
-static struct vm_operations_struct ocfs2_file_vm_ops = {
-	.nopage = ocfs2_nopage,
-};
+static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
+				struct page *page)
+{
+	int ret;
+	struct address_space *mapping = inode->i_mapping;
+	loff_t pos = page->index << PAGE_CACHE_SHIFT;
+	unsigned int len = PAGE_CACHE_SIZE;
+	pgoff_t last_index;
+	struct page *locked_page = NULL;
+	void *fsdata;
+	loff_t size = i_size_read(inode);
  
-int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
+	/*
+	 * Another node might have truncated while we were waiting on
+	 * cluster locks.
+	 */
+	last_index = size >> PAGE_CACHE_SHIFT;
+	if (page->index > last_index) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * The i_size check above doesn't catch the case where nodes
+	 * truncated and then re-extended the file. We'll re-check the
+	 * page mapping after taking the page lock inside of
+	 * ocfs2_write_begin_nolock().
+	 */
+	if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
+	 * advantage of the allocation code there. We pass a write
+	 * length of the whole page (chopped to i_size) to make sure
+	 * the whole thing is allocated.
+	 *
+	 * Since we know the page is up to date, we don't have to
+	 * worry about ocfs2_write_begin() skipping some buffer reads
+	 * because the "write" would invalidate their data.
+	 */
+	if (page->index == last_index)
+		len = size & ~PAGE_CACHE_MASK;
+
+	ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
+				       &fsdata, di_bh, page);
+	if (ret) {
+		if (ret != -ENOSPC)
+			mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
+				     fsdata);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+	BUG_ON(ret != len);
+	ret = 0;
+out:
+	return ret;
+}
+
+static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 {
-	int ret = 0, lock_level = 0;
-	struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
+	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct buffer_head *di_bh = NULL;
+	sigset_t blocked, oldset;
+	int ret, ret2;
  
+	ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
 	/*
-	 * Only support shared writeable mmap for local mounts which
-	 * don't know about holes.
+	 * The cluster locks taken will block a truncate from another
+	 * node. Taking the data lock will also ensure that we don't
+	 * attempt page truncation as part of a downconvert.
 	 */
-	if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) &&
-	    ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
-	    ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
-		mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
-		/* This is -EINVAL because generic_file_readonly_mmap
-		 * returns it in a similar situation. */
-		return -EINVAL;
+	ret = ocfs2_meta_lock(inode, &di_bh, 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
 	}
+
+	/*
+	 * The alloc sem should be enough to serialize with
+	 * ocfs2_truncate_file() changing i_size as well as any thread
+	 * modifying the inode btree.
+	 */
+	down_write(&OCFS2_I(inode)->ip_alloc_sem);
+
+	ret = ocfs2_data_lock(inode, 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_meta_unlock;
+	}
+
+	ret = __ocfs2_page_mkwrite(inode, di_bh, page);
+
+	ocfs2_data_unlock(inode, 1);
+
+out_meta_unlock:
+	up_write(&OCFS2_I(inode)->ip_alloc_sem);
+
+	brelse(di_bh);
+	ocfs2_meta_unlock(inode, 1);
+
+out:
+	ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
+	if (ret2 < 0)
+		mlog_errno(ret2);
+
+	return ret;
+}
+
+static struct vm_operations_struct ocfs2_file_vm_ops = {
+	.nopage		= ocfs2_nopage,
+	.page_mkwrite	= ocfs2_page_mkwrite,
+};
+
+int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int ret = 0, lock_level = 0;
  
 	ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
 				    file->f_vfsmnt, &lock_level);
...	...	@@ -1034,7 +1034,8 @@
1034	1034	*/
1035	1035	static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1036	1036	struct ocfs2_write_ctxt *wc,
1037		- u32 cpos, loff_t user_pos, int new)
	1037	+ u32 cpos, loff_t user_pos, int new,
	1038	+ struct page *mmap_page)
1038	1039	{
1039	1040	int ret = 0, i;
1040	1041	unsigned long start, target_index, index;
...	...	@@ -1058,11 +1059,36 @@
1058	1059	for(i = 0; i < wc->w_num_pages; i++) {
1059	1060	index = start + i;
1060	1061
1061		- wc->w_pages[i] = find_or_create_page(mapping, index, GFP_NOFS);
1062		- if (!wc->w_pages[i]) {
1063		- ret = -ENOMEM;
1064		- mlog_errno(ret);
1065		- goto out;
	1062	+ if (index == target_index && mmap_page) {
	1063	+ /*
	1064	+ * ocfs2_pagemkwrite() is a little different
	1065	+ * and wants us to directly use the page
	1066	+ * passed in.
	1067	+ */
	1068	+ lock_page(mmap_page);
	1069	+
	1070	+ if (mmap_page->mapping != mapping) {
	1071	+ unlock_page(mmap_page);
	1072	+ /*
	1073	+ * Sanity check - the locking in
	1074	+ * ocfs2_pagemkwrite() should ensure
	1075	+ * that this code doesn't trigger.
	1076	+ */
	1077	+ ret = -EINVAL;
	1078	+ mlog_errno(ret);
	1079	+ goto out;
	1080	+ }
	1081	+
	1082	+ page_cache_get(mmap_page);
	1083	+ wc->w_pages[i] = mmap_page;
	1084	+ } else {
	1085	+ wc->w_pages[i] = find_or_create_page(mapping, index,
	1086	+ GFP_NOFS);
	1087	+ if (!wc->w_pages[i]) {
	1088	+ ret = -ENOMEM;
	1089	+ mlog_errno(ret);
	1090	+ goto out;
	1091	+ }
1066	1092	}
1067	1093
1068	1094	if (index == target_index)
...	...	@@ -1213,10 +1239,10 @@
1213	1239	}
1214	1240	}
1215	1241
1216		-static int ocfs2_write_begin_nolock(struct address_space *mapping,
1217		- loff_t pos, unsigned len, unsigned flags,
1218		- struct page pagep, void fsdata,
1219		- struct buffer_head *di_bh)
	1242	+int ocfs2_write_begin_nolock(struct address_space *mapping,
	1243	+ loff_t pos, unsigned len, unsigned flags,
	1244	+ struct page pagep, void fsdata,
	1245	+ struct buffer_head di_bh, struct page mmap_page)
1220	1246	{
1221	1247	int ret, i, credits = OCFS2_INODE_UPDATE_CREDITS;
1222	1248	unsigned int num_clusters = 0, clusters_to_alloc = 0;
...	...	@@ -1318,7 +1344,7 @@
1318	1344	* extent.
1319	1345	*/
1320	1346	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1321		- clusters_to_alloc);
	1347	+ clusters_to_alloc, mmap_page);
1322	1348	if (ret) {
1323	1349	mlog_errno(ret);
1324	1350	goto out_commit;
...	...	@@ -1386,7 +1412,7 @@
1386	1412	}
1387	1413
1388	1414	ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
1389		- fsdata, di_bh);
	1415	+ fsdata, di_bh, NULL);
1390	1416	if (ret) {
1391	1417	mlog_errno(ret);
1392	1418	goto out_fail_data;
...	...	@@ -1407,9 +1433,9 @@
1407	1433	return ret;
1408	1434	}
1409	1435
1410		-static int ocfs2_write_end_nolock(struct address_space *mapping,
1411		- loff_t pos, unsigned len, unsigned copied,
1412		- struct page page, void fsdata)
	1436	+int ocfs2_write_end_nolock(struct address_space *mapping,
	1437	+ loff_t pos, unsigned len, unsigned copied,
	1438	+ struct page page, void fsdata)
1413	1439	{
1414	1440	int i;
1415	1441	unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
...	...	@@ -50,6 +50,15 @@
50	50	loff_t pos, unsigned len, unsigned copied,
51	51	struct page page, void fsdata);
52	52
	53	+int ocfs2_write_end_nolock(struct address_space *mapping,
	54	+ loff_t pos, unsigned len, unsigned copied,
	55	+ struct page page, void fsdata);
	56	+
	57	+int ocfs2_write_begin_nolock(struct address_space *mapping,
	58	+ loff_t pos, unsigned len, unsigned flags,
	59	+ struct page pagep, void fsdata,
	60	+ struct buffer_head di_bh, struct page mmap_page);
	61	+
53	62	/* all ocfs2_dio_end_io()'s fault */
54	63	#define ocfs2_iocb_is_rw_locked(iocb) \
55	64	test_bit(0, (unsigned long *)&iocb->private)
...	...	@@ -1001,6 +1001,13 @@
1001	1001	goto bail_unlock;
1002	1002	}
1003	1003
	1004	+ /*
	1005	+ * This will intentionally not wind up calling vmtruncate(),
	1006	+ * since all the work for a size change has been done above.
	1007	+ * Otherwise, we could get into problems with truncate as
	1008	+ * ip_alloc_sem is used there to protect against i_size
	1009	+ * changes.
	1010	+ */
1004	1011	status = inode_setattr(inode, attr);
1005	1012	if (status < 0) {
1006	1013	mlog_errno(status);
...	...	@@ -37,11 +37,29 @@
37	37
38	38	#include "ocfs2.h"
39	39
	40	+#include "aops.h"
40	41	#include "dlmglue.h"
41	42	#include "file.h"
42	43	#include "inode.h"
43	44	#include "mmap.h"
44	45
	46	+static inline int ocfs2_vm_op_block_sigs(sigset_t blocked, sigset_t oldset)
	47	+{
	48	+ /* The best way to deal with signals in the vm path is
	49	+ * to block them upfront, rather than allowing the
	50	+ * locking paths to return -ERESTARTSYS. */
	51	+ sigfillset(blocked);
	52	+
	53	+ /* We should technically never get a bad return value
	54	+ * from sigprocmask */
	55	+ return sigprocmask(SIG_BLOCK, blocked, oldset);
	56	+}
	57	+
	58	+static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
	59	+{
	60	+ return sigprocmask(SIG_SETMASK, oldset, NULL);
	61	+}
	62	+
45	63	static struct page ocfs2_nopage(struct vm_area_struct area,
46	64	unsigned long address,
47	65	int *type)
...	...	@@ -53,14 +71,7 @@
53	71	mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
54	72	type);
55	73
56		- /* The best way to deal with signals in this path is
57		- * to block them upfront, rather than allowing the
58		- * locking paths to return -ERESTARTSYS. */
59		- sigfillset(&blocked);
60		-
61		- /* We should technically never get a bad ret return
62		- * from sigprocmask */
63		- ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
	74	+ ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
64	75	if (ret < 0) {
65	76	mlog_errno(ret);
66	77	goto out;
...	...	@@ -68,7 +79,7 @@
68	79
69	80	page = filemap_nopage(area, address, type);
70	81
71		- ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
	82	+ ret = ocfs2_vm_op_unblock_sigs(&oldset);
72	83	if (ret < 0)
73	84	mlog_errno(ret);
74	85	out:
75	86
76	87
77	88
78	89
79	90
80	91
...	...	@@ -76,27 +87,135 @@
76	87	return page;
77	88	}
78	89
79		-static struct vm_operations_struct ocfs2_file_vm_ops = {
80		- .nopage = ocfs2_nopage,
81		-};
	90	+static int __ocfs2_page_mkwrite(struct inode inode, struct buffer_head di_bh,
	91	+ struct page *page)
	92	+{
	93	+ int ret;
	94	+ struct address_space *mapping = inode->i_mapping;
	95	+ loff_t pos = page->index << PAGE_CACHE_SHIFT;
	96	+ unsigned int len = PAGE_CACHE_SIZE;
	97	+ pgoff_t last_index;
	98	+ struct page *locked_page = NULL;
	99	+ void *fsdata;
	100	+ loff_t size = i_size_read(inode);
82	101
83		-int ocfs2_mmap(struct file file, struct vm_area_struct vma)
	102	+ /*
	103	+ * Another node might have truncated while we were waiting on
	104	+ * cluster locks.
	105	+ */
	106	+ last_index = size >> PAGE_CACHE_SHIFT;
	107	+ if (page->index > last_index) {
	108	+ ret = -EINVAL;
	109	+ goto out;
	110	+ }
	111	+
	112	+ /*
	113	+ * The i_size check above doesn't catch the case where nodes
	114	+ * truncated and then re-extended the file. We'll re-check the
	115	+ * page mapping after taking the page lock inside of
	116	+ * ocfs2_write_begin_nolock().
	117	+ */
	118	+ if (!PageUptodate(page) \|\| page->mapping != inode->i_mapping) {
	119	+ ret = -EINVAL;
	120	+ goto out;
	121	+ }
	122	+
	123	+ /*
	124	+ * Call ocfs2_write_begin() and ocfs2_write_end() to take
	125	+ * advantage of the allocation code there. We pass a write
	126	+ * length of the whole page (chopped to i_size) to make sure
	127	+ * the whole thing is allocated.
	128	+ *
	129	+ * Since we know the page is up to date, we don't have to
	130	+ * worry about ocfs2_write_begin() skipping some buffer reads
	131	+ * because the "write" would invalidate their data.
	132	+ */
	133	+ if (page->index == last_index)
	134	+ len = size & ~PAGE_CACHE_MASK;
	135	+
	136	+ ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
	137	+ &fsdata, di_bh, page);
	138	+ if (ret) {
	139	+ if (ret != -ENOSPC)
	140	+ mlog_errno(ret);
	141	+ goto out;
	142	+ }
	143	+
	144	+ ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
	145	+ fsdata);
	146	+ if (ret < 0) {
	147	+ mlog_errno(ret);
	148	+ goto out;
	149	+ }
	150	+ BUG_ON(ret != len);
	151	+ ret = 0;
	152	+out:
	153	+ return ret;
	154	+}
	155	+
	156	+static int ocfs2_page_mkwrite(struct vm_area_struct vma, struct page page)
84	157	{
85		- int ret = 0, lock_level = 0;
86		- struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
	158	+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
	159	+ struct buffer_head *di_bh = NULL;
	160	+ sigset_t blocked, oldset;
	161	+ int ret, ret2;
87	162
	163	+ ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
	164	+ if (ret < 0) {
	165	+ mlog_errno(ret);
	166	+ return ret;
	167	+ }
	168	+
88	169	/*
89		- * Only support shared writeable mmap for local mounts which
90		- * don't know about holes.
	170	+ * The cluster locks taken will block a truncate from another
	171	+ * node. Taking the data lock will also ensure that we don't
	172	+ * attempt page truncation as part of a downconvert.
91	173	*/
92		- if ((!ocfs2_mount_local(osb) \|\| ocfs2_sparse_alloc(osb)) &&
93		- ((vma->vm_flags & VM_SHARED) \|\| (vma->vm_flags & VM_MAYSHARE)) &&
94		- ((vma->vm_flags & VM_WRITE) \|\| (vma->vm_flags & VM_MAYWRITE))) {
95		- mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
96		- /* This is -EINVAL because generic_file_readonly_mmap
97		- * returns it in a similar situation. */
98		- return -EINVAL;
	174	+ ret = ocfs2_meta_lock(inode, &di_bh, 1);
	175	+ if (ret < 0) {
	176	+ mlog_errno(ret);
	177	+ goto out;
99	178	}
	179	+
	180	+ /*
	181	+ * The alloc sem should be enough to serialize with
	182	+ * ocfs2_truncate_file() changing i_size as well as any thread
	183	+ * modifying the inode btree.
	184	+ */
	185	+ down_write(&OCFS2_I(inode)->ip_alloc_sem);
	186	+
	187	+ ret = ocfs2_data_lock(inode, 1);
	188	+ if (ret < 0) {
	189	+ mlog_errno(ret);
	190	+ goto out_meta_unlock;
	191	+ }
	192	+
	193	+ ret = __ocfs2_page_mkwrite(inode, di_bh, page);
	194	+
	195	+ ocfs2_data_unlock(inode, 1);
	196	+
	197	+out_meta_unlock:
	198	+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
	199	+
	200	+ brelse(di_bh);
	201	+ ocfs2_meta_unlock(inode, 1);
	202	+
	203	+out:
	204	+ ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
	205	+ if (ret2 < 0)
	206	+ mlog_errno(ret2);
	207	+
	208	+ return ret;
	209	+}
	210	+
	211	+static struct vm_operations_struct ocfs2_file_vm_ops = {
	212	+ .nopage = ocfs2_nopage,
	213	+ .page_mkwrite = ocfs2_page_mkwrite,
	214	+};
	215	+
	216	+int ocfs2_mmap(struct file file, struct vm_area_struct vma)
	217	+{
	218	+ int ret = 0, lock_level = 0;
100	219
101	220	ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
102	221	file->f_vfsmnt, &lock_level);