Commit a27a263bae072a499acc77b632238a6dacccf888

Authored by Christoph Hellwig
Committed by Alex Elder
1 parent c46a131c0c

xfs: make log devices with write back caches work

There's no reason not to support cache flushing on external log devices.
The only thing this really requires is flushing the data device first
both in fsync and log commits.  A side effect is that we also have to
remove the barrier write test during mount, which has been superflous
since the new FLUSH+FUA code anyway.  Also use the chance to flush the
RT subvolume write cache before the fsync commit, which is required
for correct semantics.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>

Showing 3 changed files with 41 additions and 95 deletions Side-by-side Diff

fs/xfs/linux-2.6/xfs_file.c
... ... @@ -131,19 +131,34 @@
131 131 {
132 132 struct inode *inode = file->f_mapping->host;
133 133 struct xfs_inode *ip = XFS_I(inode);
  134 + struct xfs_mount *mp = ip->i_mount;
134 135 struct xfs_trans *tp;
135 136 int error = 0;
136 137 int log_flushed = 0;
137 138  
138 139 trace_xfs_file_fsync(ip);
139 140  
140   - if (XFS_FORCED_SHUTDOWN(ip->i_mount))
  141 + if (XFS_FORCED_SHUTDOWN(mp))
141 142 return -XFS_ERROR(EIO);
142 143  
143 144 xfs_iflags_clear(ip, XFS_ITRUNCATED);
144 145  
145 146 xfs_ioend_wait(ip);
146 147  
  148 + if (mp->m_flags & XFS_MOUNT_BARRIER) {
  149 + /*
  150 + * If we have an RT and/or log subvolume we need to make sure
  151 + * to flush the write cache the device used for file data
  152 + * first. This is to ensure newly written file data make
  153 + * it to disk before logging the new inode size in case of
  154 + * an extending write.
  155 + */
  156 + if (XFS_IS_REALTIME_INODE(ip))
  157 + xfs_blkdev_issue_flush(mp->m_rtdev_targp);
  158 + else if (mp->m_logdev_targp != mp->m_ddev_targp)
  159 + xfs_blkdev_issue_flush(mp->m_ddev_targp);
  160 + }
  161 +
147 162 /*
148 163 * We always need to make sure that the required inode state is safe on
149 164 * disk. The inode might be clean but we still might need to force the
150 165  
... ... @@ -175,9 +190,9 @@
175 190 * updates. The sync transaction will also force the log.
176 191 */
177 192 xfs_iunlock(ip, XFS_ILOCK_SHARED);
178   - tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
  193 + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
179 194 error = xfs_trans_reserve(tp, 0,
180   - XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
  195 + XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
181 196 if (error) {
182 197 xfs_trans_cancel(tp, 0);
183 198 return -error;
184 199  
... ... @@ -209,28 +224,25 @@
209 224 * force the log.
210 225 */
211 226 if (xfs_ipincount(ip)) {
212   - error = _xfs_log_force_lsn(ip->i_mount,
  227 + error = _xfs_log_force_lsn(mp,
213 228 ip->i_itemp->ili_last_lsn,
214 229 XFS_LOG_SYNC, &log_flushed);
215 230 }
216 231 xfs_iunlock(ip, XFS_ILOCK_SHARED);
217 232 }
218 233  
219   - if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
220   - /*
221   - * If the log write didn't issue an ordered tag we need
222   - * to flush the disk cache for the data device now.
223   - */
224   - if (!log_flushed)
225   - xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
226   -
227   - /*
228   - * If this inode is on the RT dev we need to flush that
229   - * cache as well.
230   - */
231   - if (XFS_IS_REALTIME_INODE(ip))
232   - xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
233   - }
  234 + /*
  235 + * If we only have a single device, and the log force about was
  236 + * a no-op we might have to flush the data device cache here.
  237 + * This can only happen for fdatasync/O_DSYNC if we were overwriting
  238 + * an already allocated file and thus do not have any metadata to
  239 + * commit.
  240 + */
  241 + if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
  242 + mp->m_logdev_targp == mp->m_ddev_targp &&
  243 + !XFS_IS_REALTIME_INODE(ip) &&
  244 + !log_flushed)
  245 + xfs_blkdev_issue_flush(mp->m_ddev_targp);
234 246  
235 247 return -error;
236 248 }
fs/xfs/linux-2.6/xfs_super.c
... ... @@ -627,68 +627,6 @@
627 627 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
628 628 }
629 629  
630   -/*
631   - * Try to write out the superblock using barriers.
632   - */
633   -STATIC int
634   -xfs_barrier_test(
635   - xfs_mount_t *mp)
636   -{
637   - xfs_buf_t *sbp = xfs_getsb(mp, 0);
638   - int error;
639   -
640   - XFS_BUF_UNDONE(sbp);
641   - XFS_BUF_UNREAD(sbp);
642   - XFS_BUF_UNDELAYWRITE(sbp);
643   - XFS_BUF_WRITE(sbp);
644   - XFS_BUF_UNASYNC(sbp);
645   - XFS_BUF_ORDERED(sbp);
646   -
647   - xfsbdstrat(mp, sbp);
648   - error = xfs_buf_iowait(sbp);
649   -
650   - /*
651   - * Clear all the flags we set and possible error state in the
652   - * buffer. We only did the write to try out whether barriers
653   - * worked and shouldn't leave any traces in the superblock
654   - * buffer.
655   - */
656   - XFS_BUF_DONE(sbp);
657   - XFS_BUF_ERROR(sbp, 0);
658   - XFS_BUF_UNORDERED(sbp);
659   -
660   - xfs_buf_relse(sbp);
661   - return error;
662   -}
663   -
664   -STATIC void
665   -xfs_mountfs_check_barriers(xfs_mount_t *mp)
666   -{
667   - int error;
668   -
669   - if (mp->m_logdev_targp != mp->m_ddev_targp) {
670   - xfs_notice(mp,
671   - "Disabling barriers, not supported with external log device");
672   - mp->m_flags &= ~XFS_MOUNT_BARRIER;
673   - return;
674   - }
675   -
676   - if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
677   - xfs_notice(mp,
678   - "Disabling barriers, underlying device is readonly");
679   - mp->m_flags &= ~XFS_MOUNT_BARRIER;
680   - return;
681   - }
682   -
683   - error = xfs_barrier_test(mp);
684   - if (error) {
685   - xfs_notice(mp,
686   - "Disabling barriers, trial barrier write failed");
687   - mp->m_flags &= ~XFS_MOUNT_BARRIER;
688   - return;
689   - }
690   -}
691   -
692 630 void
693 631 xfs_blkdev_issue_flush(
694 632 xfs_buftarg_t *buftarg)
... ... @@ -1240,14 +1178,6 @@
1240 1178 switch (token) {
1241 1179 case Opt_barrier:
1242 1180 mp->m_flags |= XFS_MOUNT_BARRIER;
1243   -
1244   - /*
1245   - * Test if barriers are actually working if we can,
1246   - * else delay this check until the filesystem is
1247   - * marked writeable.
1248   - */
1249   - if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1250   - xfs_mountfs_check_barriers(mp);
1251 1181 break;
1252 1182 case Opt_nobarrier:
1253 1183 mp->m_flags &= ~XFS_MOUNT_BARRIER;
... ... @@ -1282,8 +1212,6 @@
1282 1212 /* ro -> rw */
1283 1213 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1284 1214 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1285   - if (mp->m_flags & XFS_MOUNT_BARRIER)
1286   - xfs_mountfs_check_barriers(mp);
1287 1215  
1288 1216 /*
1289 1217 * If this is the first remount to writeable state we
... ... @@ -1464,9 +1392,6 @@
1464 1392 error = xfs_setup_devices(mp);
1465 1393 if (error)
1466 1394 goto out_free_sb;
1467   -
1468   - if (mp->m_flags & XFS_MOUNT_BARRIER)
1469   - xfs_mountfs_check_barriers(mp);
1470 1395  
1471 1396 error = xfs_filestream_mount(mp);
1472 1397 if (error)
... ... @@ -1372,8 +1372,17 @@
1372 1372 XFS_BUF_ASYNC(bp);
1373 1373 bp->b_flags |= XBF_LOG_BUFFER;
1374 1374  
1375   - if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
  1375 + if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
  1376 + /*
  1377 + * If we have an external log device, flush the data device
  1378 + * before flushing the log to make sure all meta data
  1379 + * written back from the AIL actually made it to disk
  1380 + * before writing out the new log tail LSN in the log buffer.
  1381 + */
  1382 + if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
  1383 + xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
1376 1384 XFS_BUF_ORDERED(bp);
  1385 + }
1377 1386  
1378 1387 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1379 1388 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);