Commit 281627df3eb55e1b729b9bb06fff5ff112929646

Authored by Christoph Hellwig
Committed by Ben Myers
1 parent 84803fb782

xfs: log file size updates at I/O completion time

Do not use unlogged metadata updates and the VFS dirty bit for updating
the file size after writeback.  In addition to causing various problems
with updates getting delayed for far too long this also drags in the
unscalable VFS dirty tracking, and is one of the few remaining unlogged
metadata updates.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

Showing 2 changed files with 111 additions and 24 deletions Side-by-side Diff

... ... @@ -26,6 +26,7 @@
26 26 #include "xfs_bmap_btree.h"
27 27 #include "xfs_dinode.h"
28 28 #include "xfs_inode.h"
  29 +#include "xfs_inode_item.h"
29 30 #include "xfs_alloc.h"
30 31 #include "xfs_error.h"
31 32 #include "xfs_rw.h"
32 33  
33 34  
34 35  
35 36  
36 37  
... ... @@ -107,25 +108,65 @@
107 108 XFS_I(ioend->io_inode)->i_d.di_size;
108 109 }
109 110  
  111 +STATIC int
  112 +xfs_setfilesize_trans_alloc(
  113 + struct xfs_ioend *ioend)
  114 +{
  115 + struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
  116 + struct xfs_trans *tp;
  117 + int error;
  118 +
  119 + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
  120 +
  121 + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
  122 + if (error) {
  123 + xfs_trans_cancel(tp, 0);
  124 + return error;
  125 + }
  126 +
  127 + ioend->io_append_trans = tp;
  128 +
  129 + /*
  130 + * We hand off the transaction to the completion thread now, so
  131 + * clear the flag here.
  132 + */
  133 + current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
  134 + return 0;
  135 +}
  136 +
110 137 /*
111 138 * Update on-disk file size now that data has been written to disk.
112 139 */
113   -STATIC void
  140 +STATIC int
114 141 xfs_setfilesize(
115 142 struct xfs_ioend *ioend)
116 143 {
117 144 struct xfs_inode *ip = XFS_I(ioend->io_inode);
  145 + struct xfs_trans *tp = ioend->io_append_trans;
118 146 xfs_fsize_t isize;
119 147  
  148 + /*
  149 + * The transaction was allocated in the I/O submission thread,
  150 + * thus we need to mark ourselves as beeing in a transaction
  151 + * manually.
  152 + */
  153 + current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
  154 +
120 155 xfs_ilock(ip, XFS_ILOCK_EXCL);
121 156 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
122   - if (isize) {
123   - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
124   - ip->i_d.di_size = isize;
125   - xfs_mark_inode_dirty(ip);
  157 + if (!isize) {
  158 + xfs_iunlock(ip, XFS_ILOCK_EXCL);
  159 + xfs_trans_cancel(tp, 0);
  160 + return 0;
126 161 }
127 162  
128   - xfs_iunlock(ip, XFS_ILOCK_EXCL);
  163 + trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
  164 +
  165 + ip->i_d.di_size = isize;
  166 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  167 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  168 +
  169 + return xfs_trans_commit(tp, 0);
129 170 }
130 171  
131 172 /*
... ... @@ -143,7 +184,7 @@
143 184  
144 185 if (ioend->io_type == IO_UNWRITTEN)
145 186 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
146   - else if (xfs_ioend_is_append(ioend))
  187 + else if (ioend->io_append_trans)
147 188 queue_work(mp->m_data_workqueue, &ioend->io_work);
148 189 else
149 190 xfs_destroy_ioend(ioend);
150 191  
151 192  
... ... @@ -173,18 +214,32 @@
173 214 * range to normal written extens after the data I/O has finished.
174 215 */
175 216 if (ioend->io_type == IO_UNWRITTEN) {
  217 + /*
  218 + * For buffered I/O we never preallocate a transaction when
  219 + * doing the unwritten extent conversion, but for direct I/O
  220 + * we do not know if we are converting an unwritten extent
  221 + * or not at the point where we preallocate the transaction.
  222 + */
  223 + if (ioend->io_append_trans) {
  224 + ASSERT(ioend->io_isdirect);
  225 +
  226 + current_set_flags_nested(
  227 + &ioend->io_append_trans->t_pflags, PF_FSTRANS);
  228 + xfs_trans_cancel(ioend->io_append_trans, 0);
  229 + }
  230 +
176 231 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
177 232 ioend->io_size);
178 233 if (error) {
179 234 ioend->io_error = -error;
180 235 goto done;
181 236 }
  237 + } else if (ioend->io_append_trans) {
  238 + error = xfs_setfilesize(ioend);
  239 + if (error)
  240 + ioend->io_error = -error;
182 241 } else {
183   - /*
184   - * We might have to update the on-disk file size after
185   - * extending writes.
186   - */
187   - xfs_setfilesize(ioend);
  242 + ASSERT(!xfs_ioend_is_append(ioend));
188 243 }
189 244  
190 245 done:
... ... @@ -224,6 +279,7 @@
224 279 */
225 280 atomic_set(&ioend->io_remaining, 1);
226 281 ioend->io_isasync = 0;
  282 + ioend->io_isdirect = 0;
227 283 ioend->io_error = 0;
228 284 ioend->io_list = NULL;
229 285 ioend->io_type = type;
... ... @@ -234,6 +290,7 @@
234 290 ioend->io_size = 0;
235 291 ioend->io_iocb = NULL;
236 292 ioend->io_result = 0;
  293 + ioend->io_append_trans = NULL;
237 294  
238 295 INIT_WORK(&ioend->io_work, xfs_end_io);
239 296 return ioend;
240 297  
... ... @@ -341,18 +398,9 @@
341 398 xfs_ioend_t *ioend,
342 399 struct bio *bio)
343 400 {
344   - struct xfs_inode *ip = XFS_I(ioend->io_inode);
345 401 atomic_inc(&ioend->io_remaining);
346 402 bio->bi_private = ioend;
347 403 bio->bi_end_io = xfs_end_bio;
348   -
349   - /*
350   - * If the I/O is beyond EOF we mark the inode dirty immediately
351   - * but don't update the inode size until I/O completion.
352   - */
353   - if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
354   - xfs_mark_inode_dirty(ip);
355   -
356 404 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
357 405 }
358 406  
359 407  
... ... @@ -999,8 +1047,20 @@
999 1047 wbc, end_index);
1000 1048 }
1001 1049  
1002   - if (iohead)
  1050 + if (iohead) {
  1051 + /*
  1052 + * Reserve log space if we might write beyond the on-disk
  1053 + * inode size.
  1054 + */
  1055 + if (ioend->io_type != IO_UNWRITTEN &&
  1056 + xfs_ioend_is_append(ioend)) {
  1057 + err = xfs_setfilesize_trans_alloc(ioend);
  1058 + if (err)
  1059 + goto error;
  1060 + }
  1061 +
1003 1062 xfs_submit_ioend(wbc, iohead);
  1063 + }
1004 1064  
1005 1065 return 0;
1006 1066  
1007 1067  
1008 1068  
1009 1069  
... ... @@ -1280,17 +1340,32 @@
1280 1340 {
1281 1341 struct inode *inode = iocb->ki_filp->f_mapping->host;
1282 1342 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
  1343 + struct xfs_ioend *ioend = NULL;
1283 1344 ssize_t ret;
1284 1345  
1285 1346 if (rw & WRITE) {
1286   - iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
  1347 + size_t size = iov_length(iov, nr_segs);
1287 1348  
  1349 + /*
  1350 + * We need to preallocate a transaction for a size update
  1351 + * here. In the case that this write both updates the size
  1352 + * and converts at least on unwritten extent we will cancel
  1353 + * the still clean transaction after the I/O has finished.
  1354 + */
  1355 + iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
  1356 + if (offset + size > XFS_I(inode)->i_d.di_size) {
  1357 + ret = xfs_setfilesize_trans_alloc(ioend);
  1358 + if (ret)
  1359 + goto out_destroy_ioend;
  1360 + ioend->io_isdirect = 1;
  1361 + }
  1362 +
1288 1363 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1289 1364 offset, nr_segs,
1290 1365 xfs_get_blocks_direct,
1291 1366 xfs_end_io_direct_write, NULL, 0);
1292 1367 if (ret != -EIOCBQUEUED && iocb->private)
1293   - xfs_destroy_ioend(iocb->private);
  1368 + goto out_trans_cancel;
1294 1369 } else {
1295 1370 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1296 1371 offset, nr_segs,
... ... @@ -1298,6 +1373,16 @@
1298 1373 NULL, NULL, 0);
1299 1374 }
1300 1375  
  1376 + return ret;
  1377 +
  1378 +out_trans_cancel:
  1379 + if (ioend->io_append_trans) {
  1380 + current_set_flags_nested(&ioend->io_append_trans->t_pflags,
  1381 + PF_FSTRANS);
  1382 + xfs_trans_cancel(ioend->io_append_trans, 0);
  1383 + }
  1384 +out_destroy_ioend:
  1385 + xfs_destroy_ioend(ioend);
1301 1386 return ret;
1302 1387 }
1303 1388  
... ... @@ -46,12 +46,14 @@
46 46 int io_error; /* I/O error code */
47 47 atomic_t io_remaining; /* hold count */
48 48 unsigned int io_isasync : 1; /* needs aio_complete */
  49 + unsigned int io_isdirect : 1;/* direct I/O */
49 50 struct inode *io_inode; /* file being written to */
50 51 struct buffer_head *io_buffer_head;/* buffer linked list head */
51 52 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
52 53 size_t io_size; /* size of the extent */
53 54 xfs_off_t io_offset; /* offset in the file */
54 55 struct work_struct io_work; /* xfsdatad work queue */
  56 + struct xfs_trans *io_append_trans;/* xact. for size update */
55 57 struct kiocb *io_iocb;
56 58 int io_result;
57 59 } xfs_ioend_t;