Commit 7bdb0d18bfd381cc5491eb95973ec5604b356c7e
Committed by
Joel Becker
1 parent
75d9bbc738
Exists in
master
and in
7 other branches
ocfs2: Add a mount option "coherency=*" to handle cluster coherency for O_DIRECT writes.
Currently, the default behavior of O_DIRECT writes was allowing concurrent writing among nodes to the same file, with no cluster coherency guaranteed (no EX lock held). This can leave stale data in the cache for buffered reads on other nodes. The new mount option introduce a chance to choose two different behaviors for O_DIRECT writes: * coherency=full, as the default value, will disallow concurrent O_DIRECT writes by taking EX locks. * coherency=buffered, allow concurrent O_DIRECT writes without EX lock among nodes, which gains high performance at risk of getting stale data on other nodes. Signed-off-by: Tristan Ye <tristan.ye@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Showing 4 changed files with 52 additions and 2 deletions Side-by-side Diff
Documentation/filesystems/ocfs2.txt
... | ... | @@ -87,4 +87,11 @@ |
87 | 87 | reservations - users should rarely need to change this |
88 | 88 | value. If allocation reservations are turned off, this |
89 | 89 | option will have no effect. |
90 | +coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode | |
91 | + lock will be taken to force other nodes drop cache, | |
92 | + therefore full cluster coherency is guaranteed even | |
93 | + for O_DIRECT writes. | |
94 | +coherency=buffered Allow concurrent O_DIRECT writes without EX lock among | |
95 | + nodes, which gains high performance at risk of getting | |
96 | + stale data on other nodes. |
fs/ocfs2/file.c
... | ... | @@ -2225,6 +2225,8 @@ |
2225 | 2225 | struct file *file = iocb->ki_filp; |
2226 | 2226 | struct inode *inode = file->f_path.dentry->d_inode; |
2227 | 2227 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2228 | + int full_coherency = !(osb->s_mount_opt & | |
2229 | + OCFS2_MOUNT_COHERENCY_BUFFERED); | |
2228 | 2230 | |
2229 | 2231 | mlog_entry("(0x%p, %u, '%.*s')\n", file, |
2230 | 2232 | (unsigned int)nr_segs, |
2231 | 2233 | |
... | ... | @@ -2248,12 +2250,35 @@ |
2248 | 2250 | have_alloc_sem = 1; |
2249 | 2251 | } |
2250 | 2252 | |
2251 | - /* concurrent O_DIRECT writes are allowed */ | |
2252 | - rw_level = !direct_io; | |
2253 | + /* | |
2254 | + * Concurrent O_DIRECT writes are allowed with | |
2255 | + * mount_option "coherency=buffered". | |
2256 | + */ | |
2257 | + rw_level = (!direct_io || full_coherency); | |
2258 | + | |
2253 | 2259 | ret = ocfs2_rw_lock(inode, rw_level); |
2254 | 2260 | if (ret < 0) { |
2255 | 2261 | mlog_errno(ret); |
2256 | 2262 | goto out_sems; |
2263 | + } | |
2264 | + | |
2265 | + /* | |
2266 | + * O_DIRECT writes with "coherency=full" need to take EX cluster | |
2267 | + * inode_lock to guarantee coherency. | |
2268 | + */ | |
2269 | + if (direct_io && full_coherency) { | |
2270 | + /* | |
2271 | + * We need to take and drop the inode lock to force | |
2272 | + * other nodes to drop their caches. Buffered I/O | |
2273 | + * already does this in write_begin(). | |
2274 | + */ | |
2275 | + ret = ocfs2_inode_lock(inode, NULL, 1); | |
2276 | + if (ret < 0) { | |
2277 | + mlog_errno(ret); | |
2278 | + goto out_sems; | |
2279 | + } | |
2280 | + | |
2281 | + ocfs2_inode_unlock(inode, 1); | |
2257 | 2282 | } |
2258 | 2283 | |
2259 | 2284 | can_do_direct = direct_io; |
fs/ocfs2/ocfs2.h
... | ... | @@ -263,6 +263,9 @@ |
263 | 263 | control lists */ |
264 | 264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ |
265 | 265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ |
266 | + | |
267 | + OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12 /* Allow concurrent O_DIRECT | |
268 | + writes */ | |
266 | 269 | }; |
267 | 270 | |
268 | 271 | #define OCFS2_OSB_SOFT_RO 0x0001 |
fs/ocfs2/super.c
... | ... | @@ -177,6 +177,8 @@ |
177 | 177 | Opt_noacl, |
178 | 178 | Opt_usrquota, |
179 | 179 | Opt_grpquota, |
180 | + Opt_coherency_buffered, | |
181 | + Opt_coherency_full, | |
180 | 182 | Opt_resv_level, |
181 | 183 | Opt_dir_resv_level, |
182 | 184 | Opt_err, |
... | ... | @@ -205,6 +207,8 @@ |
205 | 207 | {Opt_noacl, "noacl"}, |
206 | 208 | {Opt_usrquota, "usrquota"}, |
207 | 209 | {Opt_grpquota, "grpquota"}, |
210 | + {Opt_coherency_buffered, "coherency=buffered"}, | |
211 | + {Opt_coherency_full, "coherency=full"}, | |
208 | 212 | {Opt_resv_level, "resv_level=%u"}, |
209 | 213 | {Opt_dir_resv_level, "dir_resv_level=%u"}, |
210 | 214 | {Opt_err, NULL} |
... | ... | @@ -1452,6 +1456,12 @@ |
1452 | 1456 | case Opt_grpquota: |
1453 | 1457 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; |
1454 | 1458 | break; |
1459 | + case Opt_coherency_buffered: | |
1460 | + mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; | |
1461 | + break; | |
1462 | + case Opt_coherency_full: | |
1463 | + mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; | |
1464 | + break; | |
1455 | 1465 | case Opt_acl: |
1456 | 1466 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; |
1457 | 1467 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; |
... | ... | @@ -1549,6 +1559,11 @@ |
1549 | 1559 | seq_printf(s, ",usrquota"); |
1550 | 1560 | if (opts & OCFS2_MOUNT_GRPQUOTA) |
1551 | 1561 | seq_printf(s, ",grpquota"); |
1562 | + | |
1563 | + if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) | |
1564 | + seq_printf(s, ",coherency=buffered"); | |
1565 | + else | |
1566 | + seq_printf(s, ",coherency=full"); | |
1552 | 1567 | |
1553 | 1568 | if (opts & OCFS2_MOUNT_NOUSERXATTR) |
1554 | 1569 | seq_printf(s, ",nouser_xattr"); |