Commit 281627df3eb55e1b729b9bb06fff5ff112929646

Authored by Christoph Hellwig
Committed by Ben Myers
1 parent 84803fb782

xfs: log file size updates at I/O completion time

Do not use unlogged metadata updates and the VFS dirty bit for updating
the file size after writeback.  In addition to causing various problems
with updates getting delayed for far too long this also drags in the
unscalable VFS dirty tracking, and is one of the few remaining unlogged
metadata updates.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

Showing 2 changed files with 111 additions and 24 deletions Inline Diff

1 /* 1 /*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_bit.h" 19 #include "xfs_bit.h"
20 #include "xfs_log.h" 20 #include "xfs_log.h"
21 #include "xfs_inum.h" 21 #include "xfs_inum.h"
22 #include "xfs_sb.h" 22 #include "xfs_sb.h"
23 #include "xfs_ag.h" 23 #include "xfs_ag.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_mount.h" 25 #include "xfs_mount.h"
26 #include "xfs_bmap_btree.h" 26 #include "xfs_bmap_btree.h"
27 #include "xfs_dinode.h" 27 #include "xfs_dinode.h"
28 #include "xfs_inode.h" 28 #include "xfs_inode.h"
29 #include "xfs_inode_item.h"
29 #include "xfs_alloc.h" 30 #include "xfs_alloc.h"
30 #include "xfs_error.h" 31 #include "xfs_error.h"
31 #include "xfs_rw.h" 32 #include "xfs_rw.h"
32 #include "xfs_iomap.h" 33 #include "xfs_iomap.h"
33 #include "xfs_vnodeops.h" 34 #include "xfs_vnodeops.h"
34 #include "xfs_trace.h" 35 #include "xfs_trace.h"
35 #include "xfs_bmap.h" 36 #include "xfs_bmap.h"
36 #include <linux/gfp.h> 37 #include <linux/gfp.h>
37 #include <linux/mpage.h> 38 #include <linux/mpage.h>
38 #include <linux/pagevec.h> 39 #include <linux/pagevec.h>
39 #include <linux/writeback.h> 40 #include <linux/writeback.h>
40 41
41 void 42 void
42 xfs_count_page_state( 43 xfs_count_page_state(
43 struct page *page, 44 struct page *page,
44 int *delalloc, 45 int *delalloc,
45 int *unwritten) 46 int *unwritten)
46 { 47 {
47 struct buffer_head *bh, *head; 48 struct buffer_head *bh, *head;
48 49
49 *delalloc = *unwritten = 0; 50 *delalloc = *unwritten = 0;
50 51
51 bh = head = page_buffers(page); 52 bh = head = page_buffers(page);
52 do { 53 do {
53 if (buffer_unwritten(bh)) 54 if (buffer_unwritten(bh))
54 (*unwritten) = 1; 55 (*unwritten) = 1;
55 else if (buffer_delay(bh)) 56 else if (buffer_delay(bh))
56 (*delalloc) = 1; 57 (*delalloc) = 1;
57 } while ((bh = bh->b_this_page) != head); 58 } while ((bh = bh->b_this_page) != head);
58 } 59 }
59 60
60 STATIC struct block_device * 61 STATIC struct block_device *
61 xfs_find_bdev_for_inode( 62 xfs_find_bdev_for_inode(
62 struct inode *inode) 63 struct inode *inode)
63 { 64 {
64 struct xfs_inode *ip = XFS_I(inode); 65 struct xfs_inode *ip = XFS_I(inode);
65 struct xfs_mount *mp = ip->i_mount; 66 struct xfs_mount *mp = ip->i_mount;
66 67
67 if (XFS_IS_REALTIME_INODE(ip)) 68 if (XFS_IS_REALTIME_INODE(ip))
68 return mp->m_rtdev_targp->bt_bdev; 69 return mp->m_rtdev_targp->bt_bdev;
69 else 70 else
70 return mp->m_ddev_targp->bt_bdev; 71 return mp->m_ddev_targp->bt_bdev;
71 } 72 }
72 73
73 /* 74 /*
74 * We're now finished for good with this ioend structure. 75 * We're now finished for good with this ioend structure.
75 * Update the page state via the associated buffer_heads, 76 * Update the page state via the associated buffer_heads,
76 * release holds on the inode and bio, and finally free 77 * release holds on the inode and bio, and finally free
77 * up memory. Do not use the ioend after this. 78 * up memory. Do not use the ioend after this.
78 */ 79 */
79 STATIC void 80 STATIC void
80 xfs_destroy_ioend( 81 xfs_destroy_ioend(
81 xfs_ioend_t *ioend) 82 xfs_ioend_t *ioend)
82 { 83 {
83 struct buffer_head *bh, *next; 84 struct buffer_head *bh, *next;
84 85
85 for (bh = ioend->io_buffer_head; bh; bh = next) { 86 for (bh = ioend->io_buffer_head; bh; bh = next) {
86 next = bh->b_private; 87 next = bh->b_private;
87 bh->b_end_io(bh, !ioend->io_error); 88 bh->b_end_io(bh, !ioend->io_error);
88 } 89 }
89 90
90 if (ioend->io_iocb) { 91 if (ioend->io_iocb) {
91 if (ioend->io_isasync) { 92 if (ioend->io_isasync) {
92 aio_complete(ioend->io_iocb, ioend->io_error ? 93 aio_complete(ioend->io_iocb, ioend->io_error ?
93 ioend->io_error : ioend->io_result, 0); 94 ioend->io_error : ioend->io_result, 0);
94 } 95 }
95 inode_dio_done(ioend->io_inode); 96 inode_dio_done(ioend->io_inode);
96 } 97 }
97 98
98 mempool_free(ioend, xfs_ioend_pool); 99 mempool_free(ioend, xfs_ioend_pool);
99 } 100 }
100 101
101 /* 102 /*
102 * Fast and loose check if this write could update the on-disk inode size. 103 * Fast and loose check if this write could update the on-disk inode size.
103 */ 104 */
104 static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 105 static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
105 { 106 {
106 return ioend->io_offset + ioend->io_size > 107 return ioend->io_offset + ioend->io_size >
107 XFS_I(ioend->io_inode)->i_d.di_size; 108 XFS_I(ioend->io_inode)->i_d.di_size;
108 } 109 }
109 110
111 STATIC int
112 xfs_setfilesize_trans_alloc(
113 struct xfs_ioend *ioend)
114 {
115 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
116 struct xfs_trans *tp;
117 int error;
118
119 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
120
121 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
122 if (error) {
123 xfs_trans_cancel(tp, 0);
124 return error;
125 }
126
127 ioend->io_append_trans = tp;
128
129 /*
130 * We hand off the transaction to the completion thread now, so
131 * clear the flag here.
132 */
133 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
134 return 0;
135 }
136
110 /* 137 /*
111 * Update on-disk file size now that data has been written to disk. 138 * Update on-disk file size now that data has been written to disk.
112 */ 139 */
113 STATIC void 140 STATIC int
114 xfs_setfilesize( 141 xfs_setfilesize(
115 struct xfs_ioend *ioend) 142 struct xfs_ioend *ioend)
116 { 143 {
117 struct xfs_inode *ip = XFS_I(ioend->io_inode); 144 struct xfs_inode *ip = XFS_I(ioend->io_inode);
145 struct xfs_trans *tp = ioend->io_append_trans;
118 xfs_fsize_t isize; 146 xfs_fsize_t isize;
119 147
148 /*
149 * The transaction was allocated in the I/O submission thread,
150 * thus we need to mark ourselves as beeing in a transaction
151 * manually.
152 */
153 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
154
120 xfs_ilock(ip, XFS_ILOCK_EXCL); 155 xfs_ilock(ip, XFS_ILOCK_EXCL);
121 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); 156 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
122 if (isize) { 157 if (!isize) {
123 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 158 xfs_iunlock(ip, XFS_ILOCK_EXCL);
124 ip->i_d.di_size = isize; 159 xfs_trans_cancel(tp, 0);
125 xfs_mark_inode_dirty(ip); 160 return 0;
126 } 161 }
127 162
128 xfs_iunlock(ip, XFS_ILOCK_EXCL); 163 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
164
165 ip->i_d.di_size = isize;
166 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
167 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
168
169 return xfs_trans_commit(tp, 0);
129 } 170 }
130 171
131 /* 172 /*
132 * Schedule IO completion handling on the final put of an ioend. 173 * Schedule IO completion handling on the final put of an ioend.
133 * 174 *
134 * If there is no work to do we might as well call it a day and free the 175 * If there is no work to do we might as well call it a day and free the
135 * ioend right now. 176 * ioend right now.
136 */ 177 */
137 STATIC void 178 STATIC void
138 xfs_finish_ioend( 179 xfs_finish_ioend(
139 struct xfs_ioend *ioend) 180 struct xfs_ioend *ioend)
140 { 181 {
141 if (atomic_dec_and_test(&ioend->io_remaining)) { 182 if (atomic_dec_and_test(&ioend->io_remaining)) {
142 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 183 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
143 184
144 if (ioend->io_type == IO_UNWRITTEN) 185 if (ioend->io_type == IO_UNWRITTEN)
145 queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 186 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
146 else if (xfs_ioend_is_append(ioend)) 187 else if (ioend->io_append_trans)
147 queue_work(mp->m_data_workqueue, &ioend->io_work); 188 queue_work(mp->m_data_workqueue, &ioend->io_work);
148 else 189 else
149 xfs_destroy_ioend(ioend); 190 xfs_destroy_ioend(ioend);
150 } 191 }
151 } 192 }
152 193
153 /* 194 /*
154 * IO write completion. 195 * IO write completion.
155 */ 196 */
156 STATIC void 197 STATIC void
157 xfs_end_io( 198 xfs_end_io(
158 struct work_struct *work) 199 struct work_struct *work)
159 { 200 {
160 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); 201 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
161 struct xfs_inode *ip = XFS_I(ioend->io_inode); 202 struct xfs_inode *ip = XFS_I(ioend->io_inode);
162 int error = 0; 203 int error = 0;
163 204
164 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 205 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
165 ioend->io_error = -EIO; 206 ioend->io_error = -EIO;
166 goto done; 207 goto done;
167 } 208 }
168 if (ioend->io_error) 209 if (ioend->io_error)
169 goto done; 210 goto done;
170 211
171 /* 212 /*
172 * For unwritten extents we need to issue transactions to convert a 213 * For unwritten extents we need to issue transactions to convert a
173 * range to normal written extens after the data I/O has finished. 214 * range to normal written extens after the data I/O has finished.
174 */ 215 */
175 if (ioend->io_type == IO_UNWRITTEN) { 216 if (ioend->io_type == IO_UNWRITTEN) {
217 /*
218 * For buffered I/O we never preallocate a transaction when
219 * doing the unwritten extent conversion, but for direct I/O
220 * we do not know if we are converting an unwritten extent
221 * or not at the point where we preallocate the transaction.
222 */
223 if (ioend->io_append_trans) {
224 ASSERT(ioend->io_isdirect);
225
226 current_set_flags_nested(
227 &ioend->io_append_trans->t_pflags, PF_FSTRANS);
228 xfs_trans_cancel(ioend->io_append_trans, 0);
229 }
230
176 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 231 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
177 ioend->io_size); 232 ioend->io_size);
178 if (error) { 233 if (error) {
179 ioend->io_error = -error; 234 ioend->io_error = -error;
180 goto done; 235 goto done;
181 } 236 }
237 } else if (ioend->io_append_trans) {
238 error = xfs_setfilesize(ioend);
239 if (error)
240 ioend->io_error = -error;
182 } else { 241 } else {
183 /* 242 ASSERT(!xfs_ioend_is_append(ioend));
184 * We might have to update the on-disk file size after
185 * extending writes.
186 */
187 xfs_setfilesize(ioend);
188 } 243 }
189 244
190 done: 245 done:
191 xfs_destroy_ioend(ioend); 246 xfs_destroy_ioend(ioend);
192 } 247 }
193 248
194 /* 249 /*
195 * Call IO completion handling in caller context on the final put of an ioend. 250 * Call IO completion handling in caller context on the final put of an ioend.
196 */ 251 */
197 STATIC void 252 STATIC void
198 xfs_finish_ioend_sync( 253 xfs_finish_ioend_sync(
199 struct xfs_ioend *ioend) 254 struct xfs_ioend *ioend)
200 { 255 {
201 if (atomic_dec_and_test(&ioend->io_remaining)) 256 if (atomic_dec_and_test(&ioend->io_remaining))
202 xfs_end_io(&ioend->io_work); 257 xfs_end_io(&ioend->io_work);
203 } 258 }
204 259
205 /* 260 /*
206 * Allocate and initialise an IO completion structure. 261 * Allocate and initialise an IO completion structure.
207 * We need to track unwritten extent write completion here initially. 262 * We need to track unwritten extent write completion here initially.
208 * We'll need to extend this for updating the ondisk inode size later 263 * We'll need to extend this for updating the ondisk inode size later
209 * (vs. incore size). 264 * (vs. incore size).
210 */ 265 */
211 STATIC xfs_ioend_t * 266 STATIC xfs_ioend_t *
212 xfs_alloc_ioend( 267 xfs_alloc_ioend(
213 struct inode *inode, 268 struct inode *inode,
214 unsigned int type) 269 unsigned int type)
215 { 270 {
216 xfs_ioend_t *ioend; 271 xfs_ioend_t *ioend;
217 272
218 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); 273 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
219 274
220 /* 275 /*
221 * Set the count to 1 initially, which will prevent an I/O 276 * Set the count to 1 initially, which will prevent an I/O
222 * completion callback from happening before we have started 277 * completion callback from happening before we have started
223 * all the I/O from calling the completion routine too early. 278 * all the I/O from calling the completion routine too early.
224 */ 279 */
225 atomic_set(&ioend->io_remaining, 1); 280 atomic_set(&ioend->io_remaining, 1);
226 ioend->io_isasync = 0; 281 ioend->io_isasync = 0;
282 ioend->io_isdirect = 0;
227 ioend->io_error = 0; 283 ioend->io_error = 0;
228 ioend->io_list = NULL; 284 ioend->io_list = NULL;
229 ioend->io_type = type; 285 ioend->io_type = type;
230 ioend->io_inode = inode; 286 ioend->io_inode = inode;
231 ioend->io_buffer_head = NULL; 287 ioend->io_buffer_head = NULL;
232 ioend->io_buffer_tail = NULL; 288 ioend->io_buffer_tail = NULL;
233 ioend->io_offset = 0; 289 ioend->io_offset = 0;
234 ioend->io_size = 0; 290 ioend->io_size = 0;
235 ioend->io_iocb = NULL; 291 ioend->io_iocb = NULL;
236 ioend->io_result = 0; 292 ioend->io_result = 0;
293 ioend->io_append_trans = NULL;
237 294
238 INIT_WORK(&ioend->io_work, xfs_end_io); 295 INIT_WORK(&ioend->io_work, xfs_end_io);
239 return ioend; 296 return ioend;
240 } 297 }
241 298
242 STATIC int 299 STATIC int
243 xfs_map_blocks( 300 xfs_map_blocks(
244 struct inode *inode, 301 struct inode *inode,
245 loff_t offset, 302 loff_t offset,
246 struct xfs_bmbt_irec *imap, 303 struct xfs_bmbt_irec *imap,
247 int type, 304 int type,
248 int nonblocking) 305 int nonblocking)
249 { 306 {
250 struct xfs_inode *ip = XFS_I(inode); 307 struct xfs_inode *ip = XFS_I(inode);
251 struct xfs_mount *mp = ip->i_mount; 308 struct xfs_mount *mp = ip->i_mount;
252 ssize_t count = 1 << inode->i_blkbits; 309 ssize_t count = 1 << inode->i_blkbits;
253 xfs_fileoff_t offset_fsb, end_fsb; 310 xfs_fileoff_t offset_fsb, end_fsb;
254 int error = 0; 311 int error = 0;
255 int bmapi_flags = XFS_BMAPI_ENTIRE; 312 int bmapi_flags = XFS_BMAPI_ENTIRE;
256 int nimaps = 1; 313 int nimaps = 1;
257 314
258 if (XFS_FORCED_SHUTDOWN(mp)) 315 if (XFS_FORCED_SHUTDOWN(mp))
259 return -XFS_ERROR(EIO); 316 return -XFS_ERROR(EIO);
260 317
261 if (type == IO_UNWRITTEN) 318 if (type == IO_UNWRITTEN)
262 bmapi_flags |= XFS_BMAPI_IGSTATE; 319 bmapi_flags |= XFS_BMAPI_IGSTATE;
263 320
264 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 321 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
265 if (nonblocking) 322 if (nonblocking)
266 return -XFS_ERROR(EAGAIN); 323 return -XFS_ERROR(EAGAIN);
267 xfs_ilock(ip, XFS_ILOCK_SHARED); 324 xfs_ilock(ip, XFS_ILOCK_SHARED);
268 } 325 }
269 326
270 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 327 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
271 (ip->i_df.if_flags & XFS_IFEXTENTS)); 328 (ip->i_df.if_flags & XFS_IFEXTENTS));
272 ASSERT(offset <= mp->m_maxioffset); 329 ASSERT(offset <= mp->m_maxioffset);
273 330
274 if (offset + count > mp->m_maxioffset) 331 if (offset + count > mp->m_maxioffset)
275 count = mp->m_maxioffset - offset; 332 count = mp->m_maxioffset - offset;
276 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 333 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
277 offset_fsb = XFS_B_TO_FSBT(mp, offset); 334 offset_fsb = XFS_B_TO_FSBT(mp, offset);
278 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 335 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
279 imap, &nimaps, bmapi_flags); 336 imap, &nimaps, bmapi_flags);
280 xfs_iunlock(ip, XFS_ILOCK_SHARED); 337 xfs_iunlock(ip, XFS_ILOCK_SHARED);
281 338
282 if (error) 339 if (error)
283 return -XFS_ERROR(error); 340 return -XFS_ERROR(error);
284 341
285 if (type == IO_DELALLOC && 342 if (type == IO_DELALLOC &&
286 (!nimaps || isnullstartblock(imap->br_startblock))) { 343 (!nimaps || isnullstartblock(imap->br_startblock))) {
287 error = xfs_iomap_write_allocate(ip, offset, count, imap); 344 error = xfs_iomap_write_allocate(ip, offset, count, imap);
288 if (!error) 345 if (!error)
289 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 346 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
290 return -XFS_ERROR(error); 347 return -XFS_ERROR(error);
291 } 348 }
292 349
293 #ifdef DEBUG 350 #ifdef DEBUG
294 if (type == IO_UNWRITTEN) { 351 if (type == IO_UNWRITTEN) {
295 ASSERT(nimaps); 352 ASSERT(nimaps);
296 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 353 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
297 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 354 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
298 } 355 }
299 #endif 356 #endif
300 if (nimaps) 357 if (nimaps)
301 trace_xfs_map_blocks_found(ip, offset, count, type, imap); 358 trace_xfs_map_blocks_found(ip, offset, count, type, imap);
302 return 0; 359 return 0;
303 } 360 }
304 361
305 STATIC int 362 STATIC int
306 xfs_imap_valid( 363 xfs_imap_valid(
307 struct inode *inode, 364 struct inode *inode,
308 struct xfs_bmbt_irec *imap, 365 struct xfs_bmbt_irec *imap,
309 xfs_off_t offset) 366 xfs_off_t offset)
310 { 367 {
311 offset >>= inode->i_blkbits; 368 offset >>= inode->i_blkbits;
312 369
313 return offset >= imap->br_startoff && 370 return offset >= imap->br_startoff &&
314 offset < imap->br_startoff + imap->br_blockcount; 371 offset < imap->br_startoff + imap->br_blockcount;
315 } 372 }
316 373
317 /* 374 /*
318 * BIO completion handler for buffered IO. 375 * BIO completion handler for buffered IO.
319 */ 376 */
320 STATIC void 377 STATIC void
321 xfs_end_bio( 378 xfs_end_bio(
322 struct bio *bio, 379 struct bio *bio,
323 int error) 380 int error)
324 { 381 {
325 xfs_ioend_t *ioend = bio->bi_private; 382 xfs_ioend_t *ioend = bio->bi_private;
326 383
327 ASSERT(atomic_read(&bio->bi_cnt) >= 1); 384 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
328 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 385 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
329 386
330 /* Toss bio and pass work off to an xfsdatad thread */ 387 /* Toss bio and pass work off to an xfsdatad thread */
331 bio->bi_private = NULL; 388 bio->bi_private = NULL;
332 bio->bi_end_io = NULL; 389 bio->bi_end_io = NULL;
333 bio_put(bio); 390 bio_put(bio);
334 391
335 xfs_finish_ioend(ioend); 392 xfs_finish_ioend(ioend);
336 } 393 }
337 394
338 STATIC void 395 STATIC void
339 xfs_submit_ioend_bio( 396 xfs_submit_ioend_bio(
340 struct writeback_control *wbc, 397 struct writeback_control *wbc,
341 xfs_ioend_t *ioend, 398 xfs_ioend_t *ioend,
342 struct bio *bio) 399 struct bio *bio)
343 { 400 {
344 struct xfs_inode *ip = XFS_I(ioend->io_inode);
345 atomic_inc(&ioend->io_remaining); 401 atomic_inc(&ioend->io_remaining);
346 bio->bi_private = ioend; 402 bio->bi_private = ioend;
347 bio->bi_end_io = xfs_end_bio; 403 bio->bi_end_io = xfs_end_bio;
348
349 /*
350 * If the I/O is beyond EOF we mark the inode dirty immediately
351 * but don't update the inode size until I/O completion.
352 */
353 if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
354 xfs_mark_inode_dirty(ip);
355
356 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); 404 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
357 } 405 }
358 406
359 STATIC struct bio * 407 STATIC struct bio *
360 xfs_alloc_ioend_bio( 408 xfs_alloc_ioend_bio(
361 struct buffer_head *bh) 409 struct buffer_head *bh)
362 { 410 {
363 int nvecs = bio_get_nr_vecs(bh->b_bdev); 411 int nvecs = bio_get_nr_vecs(bh->b_bdev);
364 struct bio *bio = bio_alloc(GFP_NOIO, nvecs); 412 struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
365 413
366 ASSERT(bio->bi_private == NULL); 414 ASSERT(bio->bi_private == NULL);
367 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 415 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
368 bio->bi_bdev = bh->b_bdev; 416 bio->bi_bdev = bh->b_bdev;
369 return bio; 417 return bio;
370 } 418 }
371 419
372 STATIC void 420 STATIC void
373 xfs_start_buffer_writeback( 421 xfs_start_buffer_writeback(
374 struct buffer_head *bh) 422 struct buffer_head *bh)
375 { 423 {
376 ASSERT(buffer_mapped(bh)); 424 ASSERT(buffer_mapped(bh));
377 ASSERT(buffer_locked(bh)); 425 ASSERT(buffer_locked(bh));
378 ASSERT(!buffer_delay(bh)); 426 ASSERT(!buffer_delay(bh));
379 ASSERT(!buffer_unwritten(bh)); 427 ASSERT(!buffer_unwritten(bh));
380 428
381 mark_buffer_async_write(bh); 429 mark_buffer_async_write(bh);
382 set_buffer_uptodate(bh); 430 set_buffer_uptodate(bh);
383 clear_buffer_dirty(bh); 431 clear_buffer_dirty(bh);
384 } 432 }
385 433
386 STATIC void 434 STATIC void
387 xfs_start_page_writeback( 435 xfs_start_page_writeback(
388 struct page *page, 436 struct page *page,
389 int clear_dirty, 437 int clear_dirty,
390 int buffers) 438 int buffers)
391 { 439 {
392 ASSERT(PageLocked(page)); 440 ASSERT(PageLocked(page));
393 ASSERT(!PageWriteback(page)); 441 ASSERT(!PageWriteback(page));
394 if (clear_dirty) 442 if (clear_dirty)
395 clear_page_dirty_for_io(page); 443 clear_page_dirty_for_io(page);
396 set_page_writeback(page); 444 set_page_writeback(page);
397 unlock_page(page); 445 unlock_page(page);
398 /* If no buffers on the page are to be written, finish it here */ 446 /* If no buffers on the page are to be written, finish it here */
399 if (!buffers) 447 if (!buffers)
400 end_page_writeback(page); 448 end_page_writeback(page);
401 } 449 }
402 450
403 static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) 451 static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
404 { 452 {
405 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 453 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
406 } 454 }
407 455
408 /* 456 /*
409 * Submit all of the bios for all of the ioends we have saved up, covering the 457 * Submit all of the bios for all of the ioends we have saved up, covering the
410 * initial writepage page and also any probed pages. 458 * initial writepage page and also any probed pages.
411 * 459 *
412 * Because we may have multiple ioends spanning a page, we need to start 460 * Because we may have multiple ioends spanning a page, we need to start
413 * writeback on all the buffers before we submit them for I/O. If we mark the 461 * writeback on all the buffers before we submit them for I/O. If we mark the
414 * buffers as we got, then we can end up with a page that only has buffers 462 * buffers as we got, then we can end up with a page that only has buffers
415 * marked async write and I/O complete on can occur before we mark the other 463 * marked async write and I/O complete on can occur before we mark the other
416 * buffers async write. 464 * buffers async write.
417 * 465 *
418 * The end result of this is that we trip a bug in end_page_writeback() because 466 * The end result of this is that we trip a bug in end_page_writeback() because
419 * we call it twice for the one page as the code in end_buffer_async_write() 467 * we call it twice for the one page as the code in end_buffer_async_write()
420 * assumes that all buffers on the page are started at the same time. 468 * assumes that all buffers on the page are started at the same time.
421 * 469 *
422 * The fix is two passes across the ioend list - one to start writeback on the 470 * The fix is two passes across the ioend list - one to start writeback on the
423 * buffer_heads, and then submit them for I/O on the second pass. 471 * buffer_heads, and then submit them for I/O on the second pass.
424 */ 472 */
425 STATIC void 473 STATIC void
426 xfs_submit_ioend( 474 xfs_submit_ioend(
427 struct writeback_control *wbc, 475 struct writeback_control *wbc,
428 xfs_ioend_t *ioend) 476 xfs_ioend_t *ioend)
429 { 477 {
430 xfs_ioend_t *head = ioend; 478 xfs_ioend_t *head = ioend;
431 xfs_ioend_t *next; 479 xfs_ioend_t *next;
432 struct buffer_head *bh; 480 struct buffer_head *bh;
433 struct bio *bio; 481 struct bio *bio;
434 sector_t lastblock = 0; 482 sector_t lastblock = 0;
435 483
436 /* Pass 1 - start writeback */ 484 /* Pass 1 - start writeback */
437 do { 485 do {
438 next = ioend->io_list; 486 next = ioend->io_list;
439 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) 487 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
440 xfs_start_buffer_writeback(bh); 488 xfs_start_buffer_writeback(bh);
441 } while ((ioend = next) != NULL); 489 } while ((ioend = next) != NULL);
442 490
443 /* Pass 2 - submit I/O */ 491 /* Pass 2 - submit I/O */
444 ioend = head; 492 ioend = head;
445 do { 493 do {
446 next = ioend->io_list; 494 next = ioend->io_list;
447 bio = NULL; 495 bio = NULL;
448 496
449 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 497 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
450 498
451 if (!bio) { 499 if (!bio) {
452 retry: 500 retry:
453 bio = xfs_alloc_ioend_bio(bh); 501 bio = xfs_alloc_ioend_bio(bh);
454 } else if (bh->b_blocknr != lastblock + 1) { 502 } else if (bh->b_blocknr != lastblock + 1) {
455 xfs_submit_ioend_bio(wbc, ioend, bio); 503 xfs_submit_ioend_bio(wbc, ioend, bio);
456 goto retry; 504 goto retry;
457 } 505 }
458 506
459 if (bio_add_buffer(bio, bh) != bh->b_size) { 507 if (bio_add_buffer(bio, bh) != bh->b_size) {
460 xfs_submit_ioend_bio(wbc, ioend, bio); 508 xfs_submit_ioend_bio(wbc, ioend, bio);
461 goto retry; 509 goto retry;
462 } 510 }
463 511
464 lastblock = bh->b_blocknr; 512 lastblock = bh->b_blocknr;
465 } 513 }
466 if (bio) 514 if (bio)
467 xfs_submit_ioend_bio(wbc, ioend, bio); 515 xfs_submit_ioend_bio(wbc, ioend, bio);
468 xfs_finish_ioend(ioend); 516 xfs_finish_ioend(ioend);
469 } while ((ioend = next) != NULL); 517 } while ((ioend = next) != NULL);
470 } 518 }
471 519
472 /* 520 /*
473 * Cancel submission of all buffer_heads so far in this endio. 521 * Cancel submission of all buffer_heads so far in this endio.
474 * Toss the endio too. Only ever called for the initial page 522 * Toss the endio too. Only ever called for the initial page
475 * in a writepage request, so only ever one page. 523 * in a writepage request, so only ever one page.
476 */ 524 */
477 STATIC void 525 STATIC void
478 xfs_cancel_ioend( 526 xfs_cancel_ioend(
479 xfs_ioend_t *ioend) 527 xfs_ioend_t *ioend)
480 { 528 {
481 xfs_ioend_t *next; 529 xfs_ioend_t *next;
482 struct buffer_head *bh, *next_bh; 530 struct buffer_head *bh, *next_bh;
483 531
484 do { 532 do {
485 next = ioend->io_list; 533 next = ioend->io_list;
486 bh = ioend->io_buffer_head; 534 bh = ioend->io_buffer_head;
487 do { 535 do {
488 next_bh = bh->b_private; 536 next_bh = bh->b_private;
489 clear_buffer_async_write(bh); 537 clear_buffer_async_write(bh);
490 unlock_buffer(bh); 538 unlock_buffer(bh);
491 } while ((bh = next_bh) != NULL); 539 } while ((bh = next_bh) != NULL);
492 540
493 mempool_free(ioend, xfs_ioend_pool); 541 mempool_free(ioend, xfs_ioend_pool);
494 } while ((ioend = next) != NULL); 542 } while ((ioend = next) != NULL);
495 } 543 }
496 544
497 /* 545 /*
498 * Test to see if we've been building up a completion structure for 546 * Test to see if we've been building up a completion structure for
499 * earlier buffers -- if so, we try to append to this ioend if we 547 * earlier buffers -- if so, we try to append to this ioend if we
500 * can, otherwise we finish off any current ioend and start another. 548 * can, otherwise we finish off any current ioend and start another.
501 * Return true if we've finished the given ioend. 549 * Return true if we've finished the given ioend.
502 */ 550 */
503 STATIC void 551 STATIC void
504 xfs_add_to_ioend( 552 xfs_add_to_ioend(
505 struct inode *inode, 553 struct inode *inode,
506 struct buffer_head *bh, 554 struct buffer_head *bh,
507 xfs_off_t offset, 555 xfs_off_t offset,
508 unsigned int type, 556 unsigned int type,
509 xfs_ioend_t **result, 557 xfs_ioend_t **result,
510 int need_ioend) 558 int need_ioend)
511 { 559 {
512 xfs_ioend_t *ioend = *result; 560 xfs_ioend_t *ioend = *result;
513 561
514 if (!ioend || need_ioend || type != ioend->io_type) { 562 if (!ioend || need_ioend || type != ioend->io_type) {
515 xfs_ioend_t *previous = *result; 563 xfs_ioend_t *previous = *result;
516 564
517 ioend = xfs_alloc_ioend(inode, type); 565 ioend = xfs_alloc_ioend(inode, type);
518 ioend->io_offset = offset; 566 ioend->io_offset = offset;
519 ioend->io_buffer_head = bh; 567 ioend->io_buffer_head = bh;
520 ioend->io_buffer_tail = bh; 568 ioend->io_buffer_tail = bh;
521 if (previous) 569 if (previous)
522 previous->io_list = ioend; 570 previous->io_list = ioend;
523 *result = ioend; 571 *result = ioend;
524 } else { 572 } else {
525 ioend->io_buffer_tail->b_private = bh; 573 ioend->io_buffer_tail->b_private = bh;
526 ioend->io_buffer_tail = bh; 574 ioend->io_buffer_tail = bh;
527 } 575 }
528 576
529 bh->b_private = NULL; 577 bh->b_private = NULL;
530 ioend->io_size += bh->b_size; 578 ioend->io_size += bh->b_size;
531 } 579 }
532 580
533 STATIC void 581 STATIC void
534 xfs_map_buffer( 582 xfs_map_buffer(
535 struct inode *inode, 583 struct inode *inode,
536 struct buffer_head *bh, 584 struct buffer_head *bh,
537 struct xfs_bmbt_irec *imap, 585 struct xfs_bmbt_irec *imap,
538 xfs_off_t offset) 586 xfs_off_t offset)
539 { 587 {
540 sector_t bn; 588 sector_t bn;
541 struct xfs_mount *m = XFS_I(inode)->i_mount; 589 struct xfs_mount *m = XFS_I(inode)->i_mount;
542 xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); 590 xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
543 xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); 591 xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
544 592
545 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 593 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
546 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 594 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
547 595
548 bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + 596 bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
549 ((offset - iomap_offset) >> inode->i_blkbits); 597 ((offset - iomap_offset) >> inode->i_blkbits);
550 598
551 ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); 599 ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
552 600
553 bh->b_blocknr = bn; 601 bh->b_blocknr = bn;
554 set_buffer_mapped(bh); 602 set_buffer_mapped(bh);
555 } 603 }
556 604
557 STATIC void 605 STATIC void
558 xfs_map_at_offset( 606 xfs_map_at_offset(
559 struct inode *inode, 607 struct inode *inode,
560 struct buffer_head *bh, 608 struct buffer_head *bh,
561 struct xfs_bmbt_irec *imap, 609 struct xfs_bmbt_irec *imap,
562 xfs_off_t offset) 610 xfs_off_t offset)
563 { 611 {
564 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 612 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
565 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 613 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
566 614
567 xfs_map_buffer(inode, bh, imap, offset); 615 xfs_map_buffer(inode, bh, imap, offset);
568 set_buffer_mapped(bh); 616 set_buffer_mapped(bh);
569 clear_buffer_delay(bh); 617 clear_buffer_delay(bh);
570 clear_buffer_unwritten(bh); 618 clear_buffer_unwritten(bh);
571 } 619 }
572 620
573 /* 621 /*
574 * Test if a given page is suitable for writing as part of an unwritten 622 * Test if a given page is suitable for writing as part of an unwritten
575 * or delayed allocate extent. 623 * or delayed allocate extent.
576 */ 624 */
577 STATIC int 625 STATIC int
578 xfs_is_delayed_page( 626 xfs_is_delayed_page(
579 struct page *page, 627 struct page *page,
580 unsigned int type) 628 unsigned int type)
581 { 629 {
582 if (PageWriteback(page)) 630 if (PageWriteback(page))
583 return 0; 631 return 0;
584 632
585 if (page->mapping && page_has_buffers(page)) { 633 if (page->mapping && page_has_buffers(page)) {
586 struct buffer_head *bh, *head; 634 struct buffer_head *bh, *head;
587 int acceptable = 0; 635 int acceptable = 0;
588 636
589 bh = head = page_buffers(page); 637 bh = head = page_buffers(page);
590 do { 638 do {
591 if (buffer_unwritten(bh)) 639 if (buffer_unwritten(bh))
592 acceptable = (type == IO_UNWRITTEN); 640 acceptable = (type == IO_UNWRITTEN);
593 else if (buffer_delay(bh)) 641 else if (buffer_delay(bh))
594 acceptable = (type == IO_DELALLOC); 642 acceptable = (type == IO_DELALLOC);
595 else if (buffer_dirty(bh) && buffer_mapped(bh)) 643 else if (buffer_dirty(bh) && buffer_mapped(bh))
596 acceptable = (type == IO_OVERWRITE); 644 acceptable = (type == IO_OVERWRITE);
597 else 645 else
598 break; 646 break;
599 } while ((bh = bh->b_this_page) != head); 647 } while ((bh = bh->b_this_page) != head);
600 648
601 if (acceptable) 649 if (acceptable)
602 return 1; 650 return 1;
603 } 651 }
604 652
605 return 0; 653 return 0;
606 } 654 }
607 655
608 /* 656 /*
609 * Allocate & map buffers for page given the extent map. Write it out. 657 * Allocate & map buffers for page given the extent map. Write it out.
610 * except for the original page of a writepage, this is called on 658 * except for the original page of a writepage, this is called on
611 * delalloc/unwritten pages only, for the original page it is possible 659 * delalloc/unwritten pages only, for the original page it is possible
612 * that the page has no mapping at all. 660 * that the page has no mapping at all.
613 */ 661 */
614 STATIC int 662 STATIC int
615 xfs_convert_page( 663 xfs_convert_page(
616 struct inode *inode, 664 struct inode *inode,
617 struct page *page, 665 struct page *page,
618 loff_t tindex, 666 loff_t tindex,
619 struct xfs_bmbt_irec *imap, 667 struct xfs_bmbt_irec *imap,
620 xfs_ioend_t **ioendp, 668 xfs_ioend_t **ioendp,
621 struct writeback_control *wbc) 669 struct writeback_control *wbc)
622 { 670 {
623 struct buffer_head *bh, *head; 671 struct buffer_head *bh, *head;
624 xfs_off_t end_offset; 672 xfs_off_t end_offset;
625 unsigned long p_offset; 673 unsigned long p_offset;
626 unsigned int type; 674 unsigned int type;
627 int len, page_dirty; 675 int len, page_dirty;
628 int count = 0, done = 0, uptodate = 1; 676 int count = 0, done = 0, uptodate = 1;
629 xfs_off_t offset = page_offset(page); 677 xfs_off_t offset = page_offset(page);
630 678
631 if (page->index != tindex) 679 if (page->index != tindex)
632 goto fail; 680 goto fail;
633 if (!trylock_page(page)) 681 if (!trylock_page(page))
634 goto fail; 682 goto fail;
635 if (PageWriteback(page)) 683 if (PageWriteback(page))
636 goto fail_unlock_page; 684 goto fail_unlock_page;
637 if (page->mapping != inode->i_mapping) 685 if (page->mapping != inode->i_mapping)
638 goto fail_unlock_page; 686 goto fail_unlock_page;
639 if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) 687 if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
640 goto fail_unlock_page; 688 goto fail_unlock_page;
641 689
642 /* 690 /*
643 * page_dirty is initially a count of buffers on the page before 691 * page_dirty is initially a count of buffers on the page before
644 * EOF and is decremented as we move each into a cleanable state. 692 * EOF and is decremented as we move each into a cleanable state.
645 * 693 *
646 * Derivation: 694 * Derivation:
647 * 695 *
648 * End offset is the highest offset that this page should represent. 696 * End offset is the highest offset that this page should represent.
649 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 697 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
650 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 698 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
651 * hence give us the correct page_dirty count. On any other page, 699 * hence give us the correct page_dirty count. On any other page,
652 * it will be zero and in that case we need page_dirty to be the 700 * it will be zero and in that case we need page_dirty to be the
653 * count of buffers on the page. 701 * count of buffers on the page.
654 */ 702 */
655 end_offset = min_t(unsigned long long, 703 end_offset = min_t(unsigned long long,
656 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 704 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
657 i_size_read(inode)); 705 i_size_read(inode));
658 706
659 len = 1 << inode->i_blkbits; 707 len = 1 << inode->i_blkbits;
660 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 708 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
661 PAGE_CACHE_SIZE); 709 PAGE_CACHE_SIZE);
662 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 710 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
663 page_dirty = p_offset / len; 711 page_dirty = p_offset / len;
664 712
665 bh = head = page_buffers(page); 713 bh = head = page_buffers(page);
666 do { 714 do {
667 if (offset >= end_offset) 715 if (offset >= end_offset)
668 break; 716 break;
669 if (!buffer_uptodate(bh)) 717 if (!buffer_uptodate(bh))
670 uptodate = 0; 718 uptodate = 0;
671 if (!(PageUptodate(page) || buffer_uptodate(bh))) { 719 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
672 done = 1; 720 done = 1;
673 continue; 721 continue;
674 } 722 }
675 723
676 if (buffer_unwritten(bh) || buffer_delay(bh) || 724 if (buffer_unwritten(bh) || buffer_delay(bh) ||
677 buffer_mapped(bh)) { 725 buffer_mapped(bh)) {
678 if (buffer_unwritten(bh)) 726 if (buffer_unwritten(bh))
679 type = IO_UNWRITTEN; 727 type = IO_UNWRITTEN;
680 else if (buffer_delay(bh)) 728 else if (buffer_delay(bh))
681 type = IO_DELALLOC; 729 type = IO_DELALLOC;
682 else 730 else
683 type = IO_OVERWRITE; 731 type = IO_OVERWRITE;
684 732
685 if (!xfs_imap_valid(inode, imap, offset)) { 733 if (!xfs_imap_valid(inode, imap, offset)) {
686 done = 1; 734 done = 1;
687 continue; 735 continue;
688 } 736 }
689 737
690 lock_buffer(bh); 738 lock_buffer(bh);
691 if (type != IO_OVERWRITE) 739 if (type != IO_OVERWRITE)
692 xfs_map_at_offset(inode, bh, imap, offset); 740 xfs_map_at_offset(inode, bh, imap, offset);
693 xfs_add_to_ioend(inode, bh, offset, type, 741 xfs_add_to_ioend(inode, bh, offset, type,
694 ioendp, done); 742 ioendp, done);
695 743
696 page_dirty--; 744 page_dirty--;
697 count++; 745 count++;
698 } else { 746 } else {
699 done = 1; 747 done = 1;
700 } 748 }
701 } while (offset += len, (bh = bh->b_this_page) != head); 749 } while (offset += len, (bh = bh->b_this_page) != head);
702 750
703 if (uptodate && bh == head) 751 if (uptodate && bh == head)
704 SetPageUptodate(page); 752 SetPageUptodate(page);
705 753
706 if (count) { 754 if (count) {
707 if (--wbc->nr_to_write <= 0 && 755 if (--wbc->nr_to_write <= 0 &&
708 wbc->sync_mode == WB_SYNC_NONE) 756 wbc->sync_mode == WB_SYNC_NONE)
709 done = 1; 757 done = 1;
710 } 758 }
711 xfs_start_page_writeback(page, !page_dirty, count); 759 xfs_start_page_writeback(page, !page_dirty, count);
712 760
713 return done; 761 return done;
714 fail_unlock_page: 762 fail_unlock_page:
715 unlock_page(page); 763 unlock_page(page);
716 fail: 764 fail:
717 return 1; 765 return 1;
718 } 766 }
719 767
720 /* 768 /*
721 * Convert & write out a cluster of pages in the same extent as defined 769 * Convert & write out a cluster of pages in the same extent as defined
722 * by mp and following the start page. 770 * by mp and following the start page.
723 */ 771 */
724 STATIC void 772 STATIC void
725 xfs_cluster_write( 773 xfs_cluster_write(
726 struct inode *inode, 774 struct inode *inode,
727 pgoff_t tindex, 775 pgoff_t tindex,
728 struct xfs_bmbt_irec *imap, 776 struct xfs_bmbt_irec *imap,
729 xfs_ioend_t **ioendp, 777 xfs_ioend_t **ioendp,
730 struct writeback_control *wbc, 778 struct writeback_control *wbc,
731 pgoff_t tlast) 779 pgoff_t tlast)
732 { 780 {
733 struct pagevec pvec; 781 struct pagevec pvec;
734 int done = 0, i; 782 int done = 0, i;
735 783
736 pagevec_init(&pvec, 0); 784 pagevec_init(&pvec, 0);
737 while (!done && tindex <= tlast) { 785 while (!done && tindex <= tlast) {
738 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 786 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
739 787
740 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 788 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
741 break; 789 break;
742 790
743 for (i = 0; i < pagevec_count(&pvec); i++) { 791 for (i = 0; i < pagevec_count(&pvec); i++) {
744 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 792 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
745 imap, ioendp, wbc); 793 imap, ioendp, wbc);
746 if (done) 794 if (done)
747 break; 795 break;
748 } 796 }
749 797
750 pagevec_release(&pvec); 798 pagevec_release(&pvec);
751 cond_resched(); 799 cond_resched();
752 } 800 }
753 } 801 }
754 802
755 STATIC void 803 STATIC void
756 xfs_vm_invalidatepage( 804 xfs_vm_invalidatepage(
757 struct page *page, 805 struct page *page,
758 unsigned long offset) 806 unsigned long offset)
759 { 807 {
760 trace_xfs_invalidatepage(page->mapping->host, page, offset); 808 trace_xfs_invalidatepage(page->mapping->host, page, offset);
761 block_invalidatepage(page, offset); 809 block_invalidatepage(page, offset);
762 } 810 }
763 811
764 /* 812 /*
765 * If the page has delalloc buffers on it, we need to punch them out before we 813 * If the page has delalloc buffers on it, we need to punch them out before we
766 * invalidate the page. If we don't, we leave a stale delalloc mapping on the 814 * invalidate the page. If we don't, we leave a stale delalloc mapping on the
767 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 815 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
768 * is done on that same region - the delalloc extent is returned when none is 816 * is done on that same region - the delalloc extent is returned when none is
769 * supposed to be there. 817 * supposed to be there.
770 * 818 *
771 * We prevent this by truncating away the delalloc regions on the page before 819 * We prevent this by truncating away the delalloc regions on the page before
772 * invalidating it. Because they are delalloc, we can do this without needing a 820 * invalidating it. Because they are delalloc, we can do this without needing a
773 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 821 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
774 * truncation without a transaction as there is no space left for block 822 * truncation without a transaction as there is no space left for block
775 * reservation (typically why we see a ENOSPC in writeback). 823 * reservation (typically why we see a ENOSPC in writeback).
776 * 824 *
777 * This is not a performance critical path, so for now just do the punching a 825 * This is not a performance critical path, so for now just do the punching a
778 * buffer head at a time. 826 * buffer head at a time.
779 */ 827 */
780 STATIC void 828 STATIC void
781 xfs_aops_discard_page( 829 xfs_aops_discard_page(
782 struct page *page) 830 struct page *page)
783 { 831 {
784 struct inode *inode = page->mapping->host; 832 struct inode *inode = page->mapping->host;
785 struct xfs_inode *ip = XFS_I(inode); 833 struct xfs_inode *ip = XFS_I(inode);
786 struct buffer_head *bh, *head; 834 struct buffer_head *bh, *head;
787 loff_t offset = page_offset(page); 835 loff_t offset = page_offset(page);
788 836
789 if (!xfs_is_delayed_page(page, IO_DELALLOC)) 837 if (!xfs_is_delayed_page(page, IO_DELALLOC))
790 goto out_invalidate; 838 goto out_invalidate;
791 839
792 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 840 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
793 goto out_invalidate; 841 goto out_invalidate;
794 842
795 xfs_alert(ip->i_mount, 843 xfs_alert(ip->i_mount,
796 "page discard on page %p, inode 0x%llx, offset %llu.", 844 "page discard on page %p, inode 0x%llx, offset %llu.",
797 page, ip->i_ino, offset); 845 page, ip->i_ino, offset);
798 846
799 xfs_ilock(ip, XFS_ILOCK_EXCL); 847 xfs_ilock(ip, XFS_ILOCK_EXCL);
800 bh = head = page_buffers(page); 848 bh = head = page_buffers(page);
801 do { 849 do {
802 int error; 850 int error;
803 xfs_fileoff_t start_fsb; 851 xfs_fileoff_t start_fsb;
804 852
805 if (!buffer_delay(bh)) 853 if (!buffer_delay(bh))
806 goto next_buffer; 854 goto next_buffer;
807 855
808 start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 856 start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
809 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); 857 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
810 if (error) { 858 if (error) {
811 /* something screwed, just bail */ 859 /* something screwed, just bail */
812 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 860 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
813 xfs_alert(ip->i_mount, 861 xfs_alert(ip->i_mount,
814 "page discard unable to remove delalloc mapping."); 862 "page discard unable to remove delalloc mapping.");
815 } 863 }
816 break; 864 break;
817 } 865 }
818 next_buffer: 866 next_buffer:
819 offset += 1 << inode->i_blkbits; 867 offset += 1 << inode->i_blkbits;
820 868
821 } while ((bh = bh->b_this_page) != head); 869 } while ((bh = bh->b_this_page) != head);
822 870
823 xfs_iunlock(ip, XFS_ILOCK_EXCL); 871 xfs_iunlock(ip, XFS_ILOCK_EXCL);
824 out_invalidate: 872 out_invalidate:
825 xfs_vm_invalidatepage(page, 0); 873 xfs_vm_invalidatepage(page, 0);
826 return; 874 return;
827 } 875 }
828 876
829 /* 877 /*
830 * Write out a dirty page. 878 * Write out a dirty page.
831 * 879 *
832 * For delalloc space on the page we need to allocate space and flush it. 880 * For delalloc space on the page we need to allocate space and flush it.
833 * For unwritten space on the page we need to start the conversion to 881 * For unwritten space on the page we need to start the conversion to
834 * regular allocated space. 882 * regular allocated space.
835 * For any other dirty buffer heads on the page we should flush them. 883 * For any other dirty buffer heads on the page we should flush them.
836 */ 884 */
837 STATIC int 885 STATIC int
838 xfs_vm_writepage( 886 xfs_vm_writepage(
839 struct page *page, 887 struct page *page,
840 struct writeback_control *wbc) 888 struct writeback_control *wbc)
841 { 889 {
842 struct inode *inode = page->mapping->host; 890 struct inode *inode = page->mapping->host;
843 struct buffer_head *bh, *head; 891 struct buffer_head *bh, *head;
844 struct xfs_bmbt_irec imap; 892 struct xfs_bmbt_irec imap;
845 xfs_ioend_t *ioend = NULL, *iohead = NULL; 893 xfs_ioend_t *ioend = NULL, *iohead = NULL;
846 loff_t offset; 894 loff_t offset;
847 unsigned int type; 895 unsigned int type;
848 __uint64_t end_offset; 896 __uint64_t end_offset;
849 pgoff_t end_index, last_index; 897 pgoff_t end_index, last_index;
850 ssize_t len; 898 ssize_t len;
851 int err, imap_valid = 0, uptodate = 1; 899 int err, imap_valid = 0, uptodate = 1;
852 int count = 0; 900 int count = 0;
853 int nonblocking = 0; 901 int nonblocking = 0;
854 902
855 trace_xfs_writepage(inode, page, 0); 903 trace_xfs_writepage(inode, page, 0);
856 904
857 ASSERT(page_has_buffers(page)); 905 ASSERT(page_has_buffers(page));
858 906
859 /* 907 /*
860 * Refuse to write the page out if we are called from reclaim context. 908 * Refuse to write the page out if we are called from reclaim context.
861 * 909 *
862 * This avoids stack overflows when called from deeply used stacks in 910 * This avoids stack overflows when called from deeply used stacks in
863 * random callers for direct reclaim or memcg reclaim. We explicitly 911 * random callers for direct reclaim or memcg reclaim. We explicitly
864 * allow reclaim from kswapd as the stack usage there is relatively low. 912 * allow reclaim from kswapd as the stack usage there is relatively low.
865 * 913 *
866 * This should never happen except in the case of a VM regression so 914 * This should never happen except in the case of a VM regression so
867 * warn about it. 915 * warn about it.
868 */ 916 */
869 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 917 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
870 PF_MEMALLOC)) 918 PF_MEMALLOC))
871 goto redirty; 919 goto redirty;
872 920
873 /* 921 /*
874 * Given that we do not allow direct reclaim to call us, we should 922 * Given that we do not allow direct reclaim to call us, we should
875 * never be called while in a filesystem transaction. 923 * never be called while in a filesystem transaction.
876 */ 924 */
877 if (WARN_ON(current->flags & PF_FSTRANS)) 925 if (WARN_ON(current->flags & PF_FSTRANS))
878 goto redirty; 926 goto redirty;
879 927
880 /* Is this page beyond the end of the file? */ 928 /* Is this page beyond the end of the file? */
881 offset = i_size_read(inode); 929 offset = i_size_read(inode);
882 end_index = offset >> PAGE_CACHE_SHIFT; 930 end_index = offset >> PAGE_CACHE_SHIFT;
883 last_index = (offset - 1) >> PAGE_CACHE_SHIFT; 931 last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
884 if (page->index >= end_index) { 932 if (page->index >= end_index) {
885 if ((page->index >= end_index + 1) || 933 if ((page->index >= end_index + 1) ||
886 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 934 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
887 unlock_page(page); 935 unlock_page(page);
888 return 0; 936 return 0;
889 } 937 }
890 } 938 }
891 939
892 end_offset = min_t(unsigned long long, 940 end_offset = min_t(unsigned long long,
893 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 941 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
894 offset); 942 offset);
895 len = 1 << inode->i_blkbits; 943 len = 1 << inode->i_blkbits;
896 944
897 bh = head = page_buffers(page); 945 bh = head = page_buffers(page);
898 offset = page_offset(page); 946 offset = page_offset(page);
899 type = IO_OVERWRITE; 947 type = IO_OVERWRITE;
900 948
901 if (wbc->sync_mode == WB_SYNC_NONE) 949 if (wbc->sync_mode == WB_SYNC_NONE)
902 nonblocking = 1; 950 nonblocking = 1;
903 951
904 do { 952 do {
905 int new_ioend = 0; 953 int new_ioend = 0;
906 954
907 if (offset >= end_offset) 955 if (offset >= end_offset)
908 break; 956 break;
909 if (!buffer_uptodate(bh)) 957 if (!buffer_uptodate(bh))
910 uptodate = 0; 958 uptodate = 0;
911 959
912 /* 960 /*
913 * set_page_dirty dirties all buffers in a page, independent 961 * set_page_dirty dirties all buffers in a page, independent
914 * of their state. The dirty state however is entirely 962 * of their state. The dirty state however is entirely
915 * meaningless for holes (!mapped && uptodate), so skip 963 * meaningless for holes (!mapped && uptodate), so skip
916 * buffers covering holes here. 964 * buffers covering holes here.
917 */ 965 */
918 if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 966 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
919 imap_valid = 0; 967 imap_valid = 0;
920 continue; 968 continue;
921 } 969 }
922 970
923 if (buffer_unwritten(bh)) { 971 if (buffer_unwritten(bh)) {
924 if (type != IO_UNWRITTEN) { 972 if (type != IO_UNWRITTEN) {
925 type = IO_UNWRITTEN; 973 type = IO_UNWRITTEN;
926 imap_valid = 0; 974 imap_valid = 0;
927 } 975 }
928 } else if (buffer_delay(bh)) { 976 } else if (buffer_delay(bh)) {
929 if (type != IO_DELALLOC) { 977 if (type != IO_DELALLOC) {
930 type = IO_DELALLOC; 978 type = IO_DELALLOC;
931 imap_valid = 0; 979 imap_valid = 0;
932 } 980 }
933 } else if (buffer_uptodate(bh)) { 981 } else if (buffer_uptodate(bh)) {
934 if (type != IO_OVERWRITE) { 982 if (type != IO_OVERWRITE) {
935 type = IO_OVERWRITE; 983 type = IO_OVERWRITE;
936 imap_valid = 0; 984 imap_valid = 0;
937 } 985 }
938 } else { 986 } else {
939 if (PageUptodate(page)) { 987 if (PageUptodate(page)) {
940 ASSERT(buffer_mapped(bh)); 988 ASSERT(buffer_mapped(bh));
941 imap_valid = 0; 989 imap_valid = 0;
942 } 990 }
943 continue; 991 continue;
944 } 992 }
945 993
946 if (imap_valid) 994 if (imap_valid)
947 imap_valid = xfs_imap_valid(inode, &imap, offset); 995 imap_valid = xfs_imap_valid(inode, &imap, offset);
948 if (!imap_valid) { 996 if (!imap_valid) {
949 /* 997 /*
950 * If we didn't have a valid mapping then we need to 998 * If we didn't have a valid mapping then we need to
951 * put the new mapping into a separate ioend structure. 999 * put the new mapping into a separate ioend structure.
952 * This ensures non-contiguous extents always have 1000 * This ensures non-contiguous extents always have
953 * separate ioends, which is particularly important 1001 * separate ioends, which is particularly important
954 * for unwritten extent conversion at I/O completion 1002 * for unwritten extent conversion at I/O completion
955 * time. 1003 * time.
956 */ 1004 */
957 new_ioend = 1; 1005 new_ioend = 1;
958 err = xfs_map_blocks(inode, offset, &imap, type, 1006 err = xfs_map_blocks(inode, offset, &imap, type,
959 nonblocking); 1007 nonblocking);
960 if (err) 1008 if (err)
961 goto error; 1009 goto error;
962 imap_valid = xfs_imap_valid(inode, &imap, offset); 1010 imap_valid = xfs_imap_valid(inode, &imap, offset);
963 } 1011 }
964 if (imap_valid) { 1012 if (imap_valid) {
965 lock_buffer(bh); 1013 lock_buffer(bh);
966 if (type != IO_OVERWRITE) 1014 if (type != IO_OVERWRITE)
967 xfs_map_at_offset(inode, bh, &imap, offset); 1015 xfs_map_at_offset(inode, bh, &imap, offset);
968 xfs_add_to_ioend(inode, bh, offset, type, &ioend, 1016 xfs_add_to_ioend(inode, bh, offset, type, &ioend,
969 new_ioend); 1017 new_ioend);
970 count++; 1018 count++;
971 } 1019 }
972 1020
973 if (!iohead) 1021 if (!iohead)
974 iohead = ioend; 1022 iohead = ioend;
975 1023
976 } while (offset += len, ((bh = bh->b_this_page) != head)); 1024 } while (offset += len, ((bh = bh->b_this_page) != head));
977 1025
978 if (uptodate && bh == head) 1026 if (uptodate && bh == head)
979 SetPageUptodate(page); 1027 SetPageUptodate(page);
980 1028
981 xfs_start_page_writeback(page, 1, count); 1029 xfs_start_page_writeback(page, 1, count);
982 1030
983 if (ioend && imap_valid) { 1031 if (ioend && imap_valid) {
984 xfs_off_t end_index; 1032 xfs_off_t end_index;
985 1033
986 end_index = imap.br_startoff + imap.br_blockcount; 1034 end_index = imap.br_startoff + imap.br_blockcount;
987 1035
988 /* to bytes */ 1036 /* to bytes */
989 end_index <<= inode->i_blkbits; 1037 end_index <<= inode->i_blkbits;
990 1038
991 /* to pages */ 1039 /* to pages */
992 end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; 1040 end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
993 1041
994 /* check against file size */ 1042 /* check against file size */
995 if (end_index > last_index) 1043 if (end_index > last_index)
996 end_index = last_index; 1044 end_index = last_index;
997 1045
998 xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1046 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
999 wbc, end_index); 1047 wbc, end_index);
1000 } 1048 }
1001 1049
1002 if (iohead) 1050 if (iohead) {
1051 /*
1052 * Reserve log space if we might write beyond the on-disk
1053 * inode size.
1054 */
1055 if (ioend->io_type != IO_UNWRITTEN &&
1056 xfs_ioend_is_append(ioend)) {
1057 err = xfs_setfilesize_trans_alloc(ioend);
1058 if (err)
1059 goto error;
1060 }
1061
1003 xfs_submit_ioend(wbc, iohead); 1062 xfs_submit_ioend(wbc, iohead);
1063 }
1004 1064
1005 return 0; 1065 return 0;
1006 1066
1007 error: 1067 error:
1008 if (iohead) 1068 if (iohead)
1009 xfs_cancel_ioend(iohead); 1069 xfs_cancel_ioend(iohead);
1010 1070
1011 if (err == -EAGAIN) 1071 if (err == -EAGAIN)
1012 goto redirty; 1072 goto redirty;
1013 1073
1014 xfs_aops_discard_page(page); 1074 xfs_aops_discard_page(page);
1015 ClearPageUptodate(page); 1075 ClearPageUptodate(page);
1016 unlock_page(page); 1076 unlock_page(page);
1017 return err; 1077 return err;
1018 1078
1019 redirty: 1079 redirty:
1020 redirty_page_for_writepage(wbc, page); 1080 redirty_page_for_writepage(wbc, page);
1021 unlock_page(page); 1081 unlock_page(page);
1022 return 0; 1082 return 0;
1023 } 1083 }
1024 1084
1025 STATIC int 1085 STATIC int
1026 xfs_vm_writepages( 1086 xfs_vm_writepages(
1027 struct address_space *mapping, 1087 struct address_space *mapping,
1028 struct writeback_control *wbc) 1088 struct writeback_control *wbc)
1029 { 1089 {
1030 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1090 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
1031 return generic_writepages(mapping, wbc); 1091 return generic_writepages(mapping, wbc);
1032 } 1092 }
1033 1093
1034 /* 1094 /*
1035 * Called to move a page into cleanable state - and from there 1095 * Called to move a page into cleanable state - and from there
1036 * to be released. The page should already be clean. We always 1096 * to be released. The page should already be clean. We always
1037 * have buffer heads in this call. 1097 * have buffer heads in this call.
1038 * 1098 *
1039 * Returns 1 if the page is ok to release, 0 otherwise. 1099 * Returns 1 if the page is ok to release, 0 otherwise.
1040 */ 1100 */
1041 STATIC int 1101 STATIC int
1042 xfs_vm_releasepage( 1102 xfs_vm_releasepage(
1043 struct page *page, 1103 struct page *page,
1044 gfp_t gfp_mask) 1104 gfp_t gfp_mask)
1045 { 1105 {
1046 int delalloc, unwritten; 1106 int delalloc, unwritten;
1047 1107
1048 trace_xfs_releasepage(page->mapping->host, page, 0); 1108 trace_xfs_releasepage(page->mapping->host, page, 0);
1049 1109
1050 xfs_count_page_state(page, &delalloc, &unwritten); 1110 xfs_count_page_state(page, &delalloc, &unwritten);
1051 1111
1052 if (WARN_ON(delalloc)) 1112 if (WARN_ON(delalloc))
1053 return 0; 1113 return 0;
1054 if (WARN_ON(unwritten)) 1114 if (WARN_ON(unwritten))
1055 return 0; 1115 return 0;
1056 1116
1057 return try_to_free_buffers(page); 1117 return try_to_free_buffers(page);
1058 } 1118 }
1059 1119
1060 STATIC int 1120 STATIC int
1061 __xfs_get_blocks( 1121 __xfs_get_blocks(
1062 struct inode *inode, 1122 struct inode *inode,
1063 sector_t iblock, 1123 sector_t iblock,
1064 struct buffer_head *bh_result, 1124 struct buffer_head *bh_result,
1065 int create, 1125 int create,
1066 int direct) 1126 int direct)
1067 { 1127 {
1068 struct xfs_inode *ip = XFS_I(inode); 1128 struct xfs_inode *ip = XFS_I(inode);
1069 struct xfs_mount *mp = ip->i_mount; 1129 struct xfs_mount *mp = ip->i_mount;
1070 xfs_fileoff_t offset_fsb, end_fsb; 1130 xfs_fileoff_t offset_fsb, end_fsb;
1071 int error = 0; 1131 int error = 0;
1072 int lockmode = 0; 1132 int lockmode = 0;
1073 struct xfs_bmbt_irec imap; 1133 struct xfs_bmbt_irec imap;
1074 int nimaps = 1; 1134 int nimaps = 1;
1075 xfs_off_t offset; 1135 xfs_off_t offset;
1076 ssize_t size; 1136 ssize_t size;
1077 int new = 0; 1137 int new = 0;
1078 1138
1079 if (XFS_FORCED_SHUTDOWN(mp)) 1139 if (XFS_FORCED_SHUTDOWN(mp))
1080 return -XFS_ERROR(EIO); 1140 return -XFS_ERROR(EIO);
1081 1141
1082 offset = (xfs_off_t)iblock << inode->i_blkbits; 1142 offset = (xfs_off_t)iblock << inode->i_blkbits;
1083 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1143 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1084 size = bh_result->b_size; 1144 size = bh_result->b_size;
1085 1145
1086 if (!create && direct && offset >= i_size_read(inode)) 1146 if (!create && direct && offset >= i_size_read(inode))
1087 return 0; 1147 return 0;
1088 1148
1089 if (create) { 1149 if (create) {
1090 lockmode = XFS_ILOCK_EXCL; 1150 lockmode = XFS_ILOCK_EXCL;
1091 xfs_ilock(ip, lockmode); 1151 xfs_ilock(ip, lockmode);
1092 } else { 1152 } else {
1093 lockmode = xfs_ilock_map_shared(ip); 1153 lockmode = xfs_ilock_map_shared(ip);
1094 } 1154 }
1095 1155
1096 ASSERT(offset <= mp->m_maxioffset); 1156 ASSERT(offset <= mp->m_maxioffset);
1097 if (offset + size > mp->m_maxioffset) 1157 if (offset + size > mp->m_maxioffset)
1098 size = mp->m_maxioffset - offset; 1158 size = mp->m_maxioffset - offset;
1099 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1159 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1100 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1160 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1101 1161
1102 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 1162 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
1103 &imap, &nimaps, XFS_BMAPI_ENTIRE); 1163 &imap, &nimaps, XFS_BMAPI_ENTIRE);
1104 if (error) 1164 if (error)
1105 goto out_unlock; 1165 goto out_unlock;
1106 1166
1107 if (create && 1167 if (create &&
1108 (!nimaps || 1168 (!nimaps ||
1109 (imap.br_startblock == HOLESTARTBLOCK || 1169 (imap.br_startblock == HOLESTARTBLOCK ||
1110 imap.br_startblock == DELAYSTARTBLOCK))) { 1170 imap.br_startblock == DELAYSTARTBLOCK))) {
1111 if (direct) { 1171 if (direct) {
1112 error = xfs_iomap_write_direct(ip, offset, size, 1172 error = xfs_iomap_write_direct(ip, offset, size,
1113 &imap, nimaps); 1173 &imap, nimaps);
1114 } else { 1174 } else {
1115 error = xfs_iomap_write_delay(ip, offset, size, &imap); 1175 error = xfs_iomap_write_delay(ip, offset, size, &imap);
1116 } 1176 }
1117 if (error) 1177 if (error)
1118 goto out_unlock; 1178 goto out_unlock;
1119 1179
1120 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); 1180 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
1121 } else if (nimaps) { 1181 } else if (nimaps) {
1122 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); 1182 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
1123 } else { 1183 } else {
1124 trace_xfs_get_blocks_notfound(ip, offset, size); 1184 trace_xfs_get_blocks_notfound(ip, offset, size);
1125 goto out_unlock; 1185 goto out_unlock;
1126 } 1186 }
1127 xfs_iunlock(ip, lockmode); 1187 xfs_iunlock(ip, lockmode);
1128 1188
1129 if (imap.br_startblock != HOLESTARTBLOCK && 1189 if (imap.br_startblock != HOLESTARTBLOCK &&
1130 imap.br_startblock != DELAYSTARTBLOCK) { 1190 imap.br_startblock != DELAYSTARTBLOCK) {
1131 /* 1191 /*
1132 * For unwritten extents do not report a disk address on 1192 * For unwritten extents do not report a disk address on
1133 * the read case (treat as if we're reading into a hole). 1193 * the read case (treat as if we're reading into a hole).
1134 */ 1194 */
1135 if (create || !ISUNWRITTEN(&imap)) 1195 if (create || !ISUNWRITTEN(&imap))
1136 xfs_map_buffer(inode, bh_result, &imap, offset); 1196 xfs_map_buffer(inode, bh_result, &imap, offset);
1137 if (create && ISUNWRITTEN(&imap)) { 1197 if (create && ISUNWRITTEN(&imap)) {
1138 if (direct) 1198 if (direct)
1139 bh_result->b_private = inode; 1199 bh_result->b_private = inode;
1140 set_buffer_unwritten(bh_result); 1200 set_buffer_unwritten(bh_result);
1141 } 1201 }
1142 } 1202 }
1143 1203
1144 /* 1204 /*
1145 * If this is a realtime file, data may be on a different device. 1205 * If this is a realtime file, data may be on a different device.
1146 * to that pointed to from the buffer_head b_bdev currently. 1206 * to that pointed to from the buffer_head b_bdev currently.
1147 */ 1207 */
1148 bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1208 bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1149 1209
1150 /* 1210 /*
1151 * If we previously allocated a block out beyond eof and we are now 1211 * If we previously allocated a block out beyond eof and we are now
1152 * coming back to use it then we will need to flag it as new even if it 1212 * coming back to use it then we will need to flag it as new even if it
1153 * has a disk address. 1213 * has a disk address.
1154 * 1214 *
1155 * With sub-block writes into unwritten extents we also need to mark 1215 * With sub-block writes into unwritten extents we also need to mark
1156 * the buffer as new so that the unwritten parts of the buffer gets 1216 * the buffer as new so that the unwritten parts of the buffer gets
1157 * correctly zeroed. 1217 * correctly zeroed.
1158 */ 1218 */
1159 if (create && 1219 if (create &&
1160 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1220 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
1161 (offset >= i_size_read(inode)) || 1221 (offset >= i_size_read(inode)) ||
1162 (new || ISUNWRITTEN(&imap)))) 1222 (new || ISUNWRITTEN(&imap))))
1163 set_buffer_new(bh_result); 1223 set_buffer_new(bh_result);
1164 1224
1165 if (imap.br_startblock == DELAYSTARTBLOCK) { 1225 if (imap.br_startblock == DELAYSTARTBLOCK) {
1166 BUG_ON(direct); 1226 BUG_ON(direct);
1167 if (create) { 1227 if (create) {
1168 set_buffer_uptodate(bh_result); 1228 set_buffer_uptodate(bh_result);
1169 set_buffer_mapped(bh_result); 1229 set_buffer_mapped(bh_result);
1170 set_buffer_delay(bh_result); 1230 set_buffer_delay(bh_result);
1171 } 1231 }
1172 } 1232 }
1173 1233
1174 /* 1234 /*
1175 * If this is O_DIRECT or the mpage code calling tell them how large 1235 * If this is O_DIRECT or the mpage code calling tell them how large
1176 * the mapping is, so that we can avoid repeated get_blocks calls. 1236 * the mapping is, so that we can avoid repeated get_blocks calls.
1177 */ 1237 */
1178 if (direct || size > (1 << inode->i_blkbits)) { 1238 if (direct || size > (1 << inode->i_blkbits)) {
1179 xfs_off_t mapping_size; 1239 xfs_off_t mapping_size;
1180 1240
1181 mapping_size = imap.br_startoff + imap.br_blockcount - iblock; 1241 mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
1182 mapping_size <<= inode->i_blkbits; 1242 mapping_size <<= inode->i_blkbits;
1183 1243
1184 ASSERT(mapping_size > 0); 1244 ASSERT(mapping_size > 0);
1185 if (mapping_size > size) 1245 if (mapping_size > size)
1186 mapping_size = size; 1246 mapping_size = size;
1187 if (mapping_size > LONG_MAX) 1247 if (mapping_size > LONG_MAX)
1188 mapping_size = LONG_MAX; 1248 mapping_size = LONG_MAX;
1189 1249
1190 bh_result->b_size = mapping_size; 1250 bh_result->b_size = mapping_size;
1191 } 1251 }
1192 1252
1193 return 0; 1253 return 0;
1194 1254
1195 out_unlock: 1255 out_unlock:
1196 xfs_iunlock(ip, lockmode); 1256 xfs_iunlock(ip, lockmode);
1197 return -error; 1257 return -error;
1198 } 1258 }
1199 1259
1200 int 1260 int
1201 xfs_get_blocks( 1261 xfs_get_blocks(
1202 struct inode *inode, 1262 struct inode *inode,
1203 sector_t iblock, 1263 sector_t iblock,
1204 struct buffer_head *bh_result, 1264 struct buffer_head *bh_result,
1205 int create) 1265 int create)
1206 { 1266 {
1207 return __xfs_get_blocks(inode, iblock, bh_result, create, 0); 1267 return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
1208 } 1268 }
1209 1269
1210 STATIC int 1270 STATIC int
1211 xfs_get_blocks_direct( 1271 xfs_get_blocks_direct(
1212 struct inode *inode, 1272 struct inode *inode,
1213 sector_t iblock, 1273 sector_t iblock,
1214 struct buffer_head *bh_result, 1274 struct buffer_head *bh_result,
1215 int create) 1275 int create)
1216 { 1276 {
1217 return __xfs_get_blocks(inode, iblock, bh_result, create, 1); 1277 return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
1218 } 1278 }
1219 1279
1220 /* 1280 /*
1221 * Complete a direct I/O write request. 1281 * Complete a direct I/O write request.
1222 * 1282 *
1223 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1283 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1224 * need to issue a transaction to convert the range from unwritten to written 1284 * need to issue a transaction to convert the range from unwritten to written
1225 * extents. In case this is regular synchronous I/O we just call xfs_end_io 1285 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1226 * to do this and we are done. But in case this was a successful AIO 1286 * to do this and we are done. But in case this was a successful AIO
1227 * request this handler is called from interrupt context, from which we 1287 * request this handler is called from interrupt context, from which we
1228 * can't start transactions. In that case offload the I/O completion to 1288 * can't start transactions. In that case offload the I/O completion to
1229 * the workqueues we also use for buffered I/O completion. 1289 * the workqueues we also use for buffered I/O completion.
1230 */ 1290 */
1231 STATIC void 1291 STATIC void
1232 xfs_end_io_direct_write( 1292 xfs_end_io_direct_write(
1233 struct kiocb *iocb, 1293 struct kiocb *iocb,
1234 loff_t offset, 1294 loff_t offset,
1235 ssize_t size, 1295 ssize_t size,
1236 void *private, 1296 void *private,
1237 int ret, 1297 int ret,
1238 bool is_async) 1298 bool is_async)
1239 { 1299 {
1240 struct xfs_ioend *ioend = iocb->private; 1300 struct xfs_ioend *ioend = iocb->private;
1241 1301
1242 /* 1302 /*
1243 * While the generic direct I/O code updates the inode size, it does 1303 * While the generic direct I/O code updates the inode size, it does
1244 * so only after the end_io handler is called, which means our 1304 * so only after the end_io handler is called, which means our
1245 * end_io handler thinks the on-disk size is outside the in-core 1305 * end_io handler thinks the on-disk size is outside the in-core
1246 * size. To prevent this just update it a little bit earlier here. 1306 * size. To prevent this just update it a little bit earlier here.
1247 */ 1307 */
1248 if (offset + size > i_size_read(ioend->io_inode)) 1308 if (offset + size > i_size_read(ioend->io_inode))
1249 i_size_write(ioend->io_inode, offset + size); 1309 i_size_write(ioend->io_inode, offset + size);
1250 1310
1251 /* 1311 /*
1252 * blockdev_direct_IO can return an error even after the I/O 1312 * blockdev_direct_IO can return an error even after the I/O
1253 * completion handler was called. Thus we need to protect 1313 * completion handler was called. Thus we need to protect
1254 * against double-freeing. 1314 * against double-freeing.
1255 */ 1315 */
1256 iocb->private = NULL; 1316 iocb->private = NULL;
1257 1317
1258 ioend->io_offset = offset; 1318 ioend->io_offset = offset;
1259 ioend->io_size = size; 1319 ioend->io_size = size;
1260 ioend->io_iocb = iocb; 1320 ioend->io_iocb = iocb;
1261 ioend->io_result = ret; 1321 ioend->io_result = ret;
1262 if (private && size > 0) 1322 if (private && size > 0)
1263 ioend->io_type = IO_UNWRITTEN; 1323 ioend->io_type = IO_UNWRITTEN;
1264 1324
1265 if (is_async) { 1325 if (is_async) {
1266 ioend->io_isasync = 1; 1326 ioend->io_isasync = 1;
1267 xfs_finish_ioend(ioend); 1327 xfs_finish_ioend(ioend);
1268 } else { 1328 } else {
1269 xfs_finish_ioend_sync(ioend); 1329 xfs_finish_ioend_sync(ioend);
1270 } 1330 }
1271 } 1331 }
1272 1332
1273 STATIC ssize_t 1333 STATIC ssize_t
1274 xfs_vm_direct_IO( 1334 xfs_vm_direct_IO(
1275 int rw, 1335 int rw,
1276 struct kiocb *iocb, 1336 struct kiocb *iocb,
1277 const struct iovec *iov, 1337 const struct iovec *iov,
1278 loff_t offset, 1338 loff_t offset,
1279 unsigned long nr_segs) 1339 unsigned long nr_segs)
1280 { 1340 {
1281 struct inode *inode = iocb->ki_filp->f_mapping->host; 1341 struct inode *inode = iocb->ki_filp->f_mapping->host;
1282 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1342 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1343 struct xfs_ioend *ioend = NULL;
1283 ssize_t ret; 1344 ssize_t ret;
1284 1345
1285 if (rw & WRITE) { 1346 if (rw & WRITE) {
1286 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); 1347 size_t size = iov_length(iov, nr_segs);
1287 1348
1349 /*
1350 * We need to preallocate a transaction for a size update
1351 * here. In the case that this write both updates the size
1352 * and converts at least on unwritten extent we will cancel
1353 * the still clean transaction after the I/O has finished.
1354 */
1355 iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
1356 if (offset + size > XFS_I(inode)->i_d.di_size) {
1357 ret = xfs_setfilesize_trans_alloc(ioend);
1358 if (ret)
1359 goto out_destroy_ioend;
1360 ioend->io_isdirect = 1;
1361 }
1362
1288 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1363 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1289 offset, nr_segs, 1364 offset, nr_segs,
1290 xfs_get_blocks_direct, 1365 xfs_get_blocks_direct,
1291 xfs_end_io_direct_write, NULL, 0); 1366 xfs_end_io_direct_write, NULL, 0);
1292 if (ret != -EIOCBQUEUED && iocb->private) 1367 if (ret != -EIOCBQUEUED && iocb->private)
1293 xfs_destroy_ioend(iocb->private); 1368 goto out_trans_cancel;
1294 } else { 1369 } else {
1295 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1370 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1296 offset, nr_segs, 1371 offset, nr_segs,
1297 xfs_get_blocks_direct, 1372 xfs_get_blocks_direct,
1298 NULL, NULL, 0); 1373 NULL, NULL, 0);
1299 } 1374 }
1300 1375
1376 return ret;
1377
1378 out_trans_cancel:
1379 if (ioend->io_append_trans) {
1380 current_set_flags_nested(&ioend->io_append_trans->t_pflags,
1381 PF_FSTRANS);
1382 xfs_trans_cancel(ioend->io_append_trans, 0);
1383 }
1384 out_destroy_ioend:
1385 xfs_destroy_ioend(ioend);
1301 return ret; 1386 return ret;
1302 } 1387 }
1303 1388
1304 STATIC void 1389 STATIC void
1305 xfs_vm_write_failed( 1390 xfs_vm_write_failed(
1306 struct address_space *mapping, 1391 struct address_space *mapping,
1307 loff_t to) 1392 loff_t to)
1308 { 1393 {
1309 struct inode *inode = mapping->host; 1394 struct inode *inode = mapping->host;
1310 1395
1311 if (to > inode->i_size) { 1396 if (to > inode->i_size) {
1312 /* 1397 /*
1313 * Punch out the delalloc blocks we have already allocated. 1398 * Punch out the delalloc blocks we have already allocated.
1314 * 1399 *
1315 * Don't bother with xfs_setattr given that nothing can have 1400 * Don't bother with xfs_setattr given that nothing can have
1316 * made it to disk yet as the page is still locked at this 1401 * made it to disk yet as the page is still locked at this
1317 * point. 1402 * point.
1318 */ 1403 */
1319 struct xfs_inode *ip = XFS_I(inode); 1404 struct xfs_inode *ip = XFS_I(inode);
1320 xfs_fileoff_t start_fsb; 1405 xfs_fileoff_t start_fsb;
1321 xfs_fileoff_t end_fsb; 1406 xfs_fileoff_t end_fsb;
1322 int error; 1407 int error;
1323 1408
1324 truncate_pagecache(inode, to, inode->i_size); 1409 truncate_pagecache(inode, to, inode->i_size);
1325 1410
1326 /* 1411 /*
1327 * Check if there are any blocks that are outside of i_size 1412 * Check if there are any blocks that are outside of i_size
1328 * that need to be trimmed back. 1413 * that need to be trimmed back.
1329 */ 1414 */
1330 start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; 1415 start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
1331 end_fsb = XFS_B_TO_FSB(ip->i_mount, to); 1416 end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
1332 if (end_fsb <= start_fsb) 1417 if (end_fsb <= start_fsb)
1333 return; 1418 return;
1334 1419
1335 xfs_ilock(ip, XFS_ILOCK_EXCL); 1420 xfs_ilock(ip, XFS_ILOCK_EXCL);
1336 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1421 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1337 end_fsb - start_fsb); 1422 end_fsb - start_fsb);
1338 if (error) { 1423 if (error) {
1339 /* something screwed, just bail */ 1424 /* something screwed, just bail */
1340 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1425 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1341 xfs_alert(ip->i_mount, 1426 xfs_alert(ip->i_mount,
1342 "xfs_vm_write_failed: unable to clean up ino %lld", 1427 "xfs_vm_write_failed: unable to clean up ino %lld",
1343 ip->i_ino); 1428 ip->i_ino);
1344 } 1429 }
1345 } 1430 }
1346 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1431 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1347 } 1432 }
1348 } 1433 }
1349 1434
1350 STATIC int 1435 STATIC int
1351 xfs_vm_write_begin( 1436 xfs_vm_write_begin(
1352 struct file *file, 1437 struct file *file,
1353 struct address_space *mapping, 1438 struct address_space *mapping,
1354 loff_t pos, 1439 loff_t pos,
1355 unsigned len, 1440 unsigned len,
1356 unsigned flags, 1441 unsigned flags,
1357 struct page **pagep, 1442 struct page **pagep,
1358 void **fsdata) 1443 void **fsdata)
1359 { 1444 {
1360 int ret; 1445 int ret;
1361 1446
1362 ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, 1447 ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
1363 pagep, xfs_get_blocks); 1448 pagep, xfs_get_blocks);
1364 if (unlikely(ret)) 1449 if (unlikely(ret))
1365 xfs_vm_write_failed(mapping, pos + len); 1450 xfs_vm_write_failed(mapping, pos + len);
1366 return ret; 1451 return ret;
1367 } 1452 }
1368 1453
1369 STATIC int 1454 STATIC int
1370 xfs_vm_write_end( 1455 xfs_vm_write_end(
1371 struct file *file, 1456 struct file *file,
1372 struct address_space *mapping, 1457 struct address_space *mapping,
1373 loff_t pos, 1458 loff_t pos,
1374 unsigned len, 1459 unsigned len,
1375 unsigned copied, 1460 unsigned copied,
1376 struct page *page, 1461 struct page *page,
1377 void *fsdata) 1462 void *fsdata)
1378 { 1463 {
1379 int ret; 1464 int ret;
1380 1465
1381 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); 1466 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1382 if (unlikely(ret < len)) 1467 if (unlikely(ret < len))
1383 xfs_vm_write_failed(mapping, pos + len); 1468 xfs_vm_write_failed(mapping, pos + len);
1384 return ret; 1469 return ret;
1385 } 1470 }
1386 1471
1387 STATIC sector_t 1472 STATIC sector_t
1388 xfs_vm_bmap( 1473 xfs_vm_bmap(
1389 struct address_space *mapping, 1474 struct address_space *mapping,
1390 sector_t block) 1475 sector_t block)
1391 { 1476 {
1392 struct inode *inode = (struct inode *)mapping->host; 1477 struct inode *inode = (struct inode *)mapping->host;
1393 struct xfs_inode *ip = XFS_I(inode); 1478 struct xfs_inode *ip = XFS_I(inode);
1394 1479
1395 trace_xfs_vm_bmap(XFS_I(inode)); 1480 trace_xfs_vm_bmap(XFS_I(inode));
1396 xfs_ilock(ip, XFS_IOLOCK_SHARED); 1481 xfs_ilock(ip, XFS_IOLOCK_SHARED);
1397 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); 1482 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
1398 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1483 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
1399 return generic_block_bmap(mapping, block, xfs_get_blocks); 1484 return generic_block_bmap(mapping, block, xfs_get_blocks);
1400 } 1485 }
1401 1486
1402 STATIC int 1487 STATIC int
1403 xfs_vm_readpage( 1488 xfs_vm_readpage(
1404 struct file *unused, 1489 struct file *unused,
1405 struct page *page) 1490 struct page *page)
1406 { 1491 {
1407 return mpage_readpage(page, xfs_get_blocks); 1492 return mpage_readpage(page, xfs_get_blocks);
1408 } 1493 }
1409 1494
1410 STATIC int 1495 STATIC int
1411 xfs_vm_readpages( 1496 xfs_vm_readpages(
1412 struct file *unused, 1497 struct file *unused,
1413 struct address_space *mapping, 1498 struct address_space *mapping,
1414 struct list_head *pages, 1499 struct list_head *pages,
1415 unsigned nr_pages) 1500 unsigned nr_pages)
1416 { 1501 {
1417 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1502 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1418 } 1503 }
1419 1504
1420 const struct address_space_operations xfs_address_space_operations = { 1505 const struct address_space_operations xfs_address_space_operations = {
1421 .readpage = xfs_vm_readpage, 1506 .readpage = xfs_vm_readpage,
1422 .readpages = xfs_vm_readpages, 1507 .readpages = xfs_vm_readpages,
1 /* 1 /*
2 * Copyright (c) 2005-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2005-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #ifndef __XFS_AOPS_H__ 18 #ifndef __XFS_AOPS_H__
19 #define __XFS_AOPS_H__ 19 #define __XFS_AOPS_H__
20 20
21 extern mempool_t *xfs_ioend_pool; 21 extern mempool_t *xfs_ioend_pool;
22 22
23 /* 23 /*
24 * Types of I/O for bmap clustering and I/O completion tracking. 24 * Types of I/O for bmap clustering and I/O completion tracking.
25 */ 25 */
26 enum { 26 enum {
27 IO_DIRECT = 0, /* special case for direct I/O ioends */ 27 IO_DIRECT = 0, /* special case for direct I/O ioends */
28 IO_DELALLOC, /* mapping covers delalloc region */ 28 IO_DELALLOC, /* mapping covers delalloc region */
29 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ 29 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
30 IO_OVERWRITE, /* mapping covers already allocated extent */ 30 IO_OVERWRITE, /* mapping covers already allocated extent */
31 }; 31 };
32 32
33 #define XFS_IO_TYPES \ 33 #define XFS_IO_TYPES \
34 { 0, "" }, \ 34 { 0, "" }, \
35 { IO_DELALLOC, "delalloc" }, \ 35 { IO_DELALLOC, "delalloc" }, \
36 { IO_UNWRITTEN, "unwritten" }, \ 36 { IO_UNWRITTEN, "unwritten" }, \
37 { IO_OVERWRITE, "overwrite" } 37 { IO_OVERWRITE, "overwrite" }
38 38
39 /* 39 /*
40 * xfs_ioend struct manages large extent writes for XFS. 40 * xfs_ioend struct manages large extent writes for XFS.
41 * It can manage several multi-page bio's at once. 41 * It can manage several multi-page bio's at once.
42 */ 42 */
43 typedef struct xfs_ioend { 43 typedef struct xfs_ioend {
44 struct xfs_ioend *io_list; /* next ioend in chain */ 44 struct xfs_ioend *io_list; /* next ioend in chain */
45 unsigned int io_type; /* delalloc / unwritten */ 45 unsigned int io_type; /* delalloc / unwritten */
46 int io_error; /* I/O error code */ 46 int io_error; /* I/O error code */
47 atomic_t io_remaining; /* hold count */ 47 atomic_t io_remaining; /* hold count */
48 unsigned int io_isasync : 1; /* needs aio_complete */ 48 unsigned int io_isasync : 1; /* needs aio_complete */
49 unsigned int io_isdirect : 1;/* direct I/O */
49 struct inode *io_inode; /* file being written to */ 50 struct inode *io_inode; /* file being written to */
50 struct buffer_head *io_buffer_head;/* buffer linked list head */ 51 struct buffer_head *io_buffer_head;/* buffer linked list head */
51 struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 52 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
52 size_t io_size; /* size of the extent */ 53 size_t io_size; /* size of the extent */
53 xfs_off_t io_offset; /* offset in the file */ 54 xfs_off_t io_offset; /* offset in the file */
54 struct work_struct io_work; /* xfsdatad work queue */ 55 struct work_struct io_work; /* xfsdatad work queue */
56 struct xfs_trans *io_append_trans;/* xact. for size update */
55 struct kiocb *io_iocb; 57 struct kiocb *io_iocb;
56 int io_result; 58 int io_result;
57 } xfs_ioend_t; 59 } xfs_ioend_t;
58 60
59 extern const struct address_space_operations xfs_address_space_operations; 61 extern const struct address_space_operations xfs_address_space_operations;
60 extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); 62 extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
61 63
62 extern void xfs_count_page_state(struct page *, int *, int *); 64 extern void xfs_count_page_state(struct page *, int *, int *);
63 65
64 #endif /* __XFS_AOPS_H__ */ 66 #endif /* __XFS_AOPS_H__ */
65 67