Commit 281627df3eb55e1b729b9bb06fff5ff112929646
Committed by
Ben Myers
1 parent
84803fb782
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
xfs: log file size updates at I/O completion time
Do not use unlogged metadata updates and the VFS dirty bit for updating the file size after writeback. In addition to causing various problems with updates getting delayed for far too long this also drags in the unscalable VFS dirty tracking, and is one of the few remaining unlogged metadata updates. Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Showing 2 changed files with 111 additions and 24 deletions Inline Diff
fs/xfs/xfs_aops.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_bit.h" | 19 | #include "xfs_bit.h" |
20 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
21 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
22 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
23 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
26 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
27 | #include "xfs_dinode.h" | 27 | #include "xfs_dinode.h" |
28 | #include "xfs_inode.h" | 28 | #include "xfs_inode.h" |
29 | #include "xfs_inode_item.h" | ||
29 | #include "xfs_alloc.h" | 30 | #include "xfs_alloc.h" |
30 | #include "xfs_error.h" | 31 | #include "xfs_error.h" |
31 | #include "xfs_rw.h" | 32 | #include "xfs_rw.h" |
32 | #include "xfs_iomap.h" | 33 | #include "xfs_iomap.h" |
33 | #include "xfs_vnodeops.h" | 34 | #include "xfs_vnodeops.h" |
34 | #include "xfs_trace.h" | 35 | #include "xfs_trace.h" |
35 | #include "xfs_bmap.h" | 36 | #include "xfs_bmap.h" |
36 | #include <linux/gfp.h> | 37 | #include <linux/gfp.h> |
37 | #include <linux/mpage.h> | 38 | #include <linux/mpage.h> |
38 | #include <linux/pagevec.h> | 39 | #include <linux/pagevec.h> |
39 | #include <linux/writeback.h> | 40 | #include <linux/writeback.h> |
40 | 41 | ||
41 | void | 42 | void |
42 | xfs_count_page_state( | 43 | xfs_count_page_state( |
43 | struct page *page, | 44 | struct page *page, |
44 | int *delalloc, | 45 | int *delalloc, |
45 | int *unwritten) | 46 | int *unwritten) |
46 | { | 47 | { |
47 | struct buffer_head *bh, *head; | 48 | struct buffer_head *bh, *head; |
48 | 49 | ||
49 | *delalloc = *unwritten = 0; | 50 | *delalloc = *unwritten = 0; |
50 | 51 | ||
51 | bh = head = page_buffers(page); | 52 | bh = head = page_buffers(page); |
52 | do { | 53 | do { |
53 | if (buffer_unwritten(bh)) | 54 | if (buffer_unwritten(bh)) |
54 | (*unwritten) = 1; | 55 | (*unwritten) = 1; |
55 | else if (buffer_delay(bh)) | 56 | else if (buffer_delay(bh)) |
56 | (*delalloc) = 1; | 57 | (*delalloc) = 1; |
57 | } while ((bh = bh->b_this_page) != head); | 58 | } while ((bh = bh->b_this_page) != head); |
58 | } | 59 | } |
59 | 60 | ||
60 | STATIC struct block_device * | 61 | STATIC struct block_device * |
61 | xfs_find_bdev_for_inode( | 62 | xfs_find_bdev_for_inode( |
62 | struct inode *inode) | 63 | struct inode *inode) |
63 | { | 64 | { |
64 | struct xfs_inode *ip = XFS_I(inode); | 65 | struct xfs_inode *ip = XFS_I(inode); |
65 | struct xfs_mount *mp = ip->i_mount; | 66 | struct xfs_mount *mp = ip->i_mount; |
66 | 67 | ||
67 | if (XFS_IS_REALTIME_INODE(ip)) | 68 | if (XFS_IS_REALTIME_INODE(ip)) |
68 | return mp->m_rtdev_targp->bt_bdev; | 69 | return mp->m_rtdev_targp->bt_bdev; |
69 | else | 70 | else |
70 | return mp->m_ddev_targp->bt_bdev; | 71 | return mp->m_ddev_targp->bt_bdev; |
71 | } | 72 | } |
72 | 73 | ||
73 | /* | 74 | /* |
74 | * We're now finished for good with this ioend structure. | 75 | * We're now finished for good with this ioend structure. |
75 | * Update the page state via the associated buffer_heads, | 76 | * Update the page state via the associated buffer_heads, |
76 | * release holds on the inode and bio, and finally free | 77 | * release holds on the inode and bio, and finally free |
77 | * up memory. Do not use the ioend after this. | 78 | * up memory. Do not use the ioend after this. |
78 | */ | 79 | */ |
79 | STATIC void | 80 | STATIC void |
80 | xfs_destroy_ioend( | 81 | xfs_destroy_ioend( |
81 | xfs_ioend_t *ioend) | 82 | xfs_ioend_t *ioend) |
82 | { | 83 | { |
83 | struct buffer_head *bh, *next; | 84 | struct buffer_head *bh, *next; |
84 | 85 | ||
85 | for (bh = ioend->io_buffer_head; bh; bh = next) { | 86 | for (bh = ioend->io_buffer_head; bh; bh = next) { |
86 | next = bh->b_private; | 87 | next = bh->b_private; |
87 | bh->b_end_io(bh, !ioend->io_error); | 88 | bh->b_end_io(bh, !ioend->io_error); |
88 | } | 89 | } |
89 | 90 | ||
90 | if (ioend->io_iocb) { | 91 | if (ioend->io_iocb) { |
91 | if (ioend->io_isasync) { | 92 | if (ioend->io_isasync) { |
92 | aio_complete(ioend->io_iocb, ioend->io_error ? | 93 | aio_complete(ioend->io_iocb, ioend->io_error ? |
93 | ioend->io_error : ioend->io_result, 0); | 94 | ioend->io_error : ioend->io_result, 0); |
94 | } | 95 | } |
95 | inode_dio_done(ioend->io_inode); | 96 | inode_dio_done(ioend->io_inode); |
96 | } | 97 | } |
97 | 98 | ||
98 | mempool_free(ioend, xfs_ioend_pool); | 99 | mempool_free(ioend, xfs_ioend_pool); |
99 | } | 100 | } |
100 | 101 | ||
101 | /* | 102 | /* |
102 | * Fast and loose check if this write could update the on-disk inode size. | 103 | * Fast and loose check if this write could update the on-disk inode size. |
103 | */ | 104 | */ |
104 | static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) | 105 | static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) |
105 | { | 106 | { |
106 | return ioend->io_offset + ioend->io_size > | 107 | return ioend->io_offset + ioend->io_size > |
107 | XFS_I(ioend->io_inode)->i_d.di_size; | 108 | XFS_I(ioend->io_inode)->i_d.di_size; |
108 | } | 109 | } |
109 | 110 | ||
111 | STATIC int | ||
112 | xfs_setfilesize_trans_alloc( | ||
113 | struct xfs_ioend *ioend) | ||
114 | { | ||
115 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | ||
116 | struct xfs_trans *tp; | ||
117 | int error; | ||
118 | |||
119 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
120 | |||
121 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); | ||
122 | if (error) { | ||
123 | xfs_trans_cancel(tp, 0); | ||
124 | return error; | ||
125 | } | ||
126 | |||
127 | ioend->io_append_trans = tp; | ||
128 | |||
129 | /* | ||
130 | * We hand off the transaction to the completion thread now, so | ||
131 | * clear the flag here. | ||
132 | */ | ||
133 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
110 | /* | 137 | /* |
111 | * Update on-disk file size now that data has been written to disk. | 138 | * Update on-disk file size now that data has been written to disk. |
112 | */ | 139 | */ |
113 | STATIC void | 140 | STATIC int |
114 | xfs_setfilesize( | 141 | xfs_setfilesize( |
115 | struct xfs_ioend *ioend) | 142 | struct xfs_ioend *ioend) |
116 | { | 143 | { |
117 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 144 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
145 | struct xfs_trans *tp = ioend->io_append_trans; | ||
118 | xfs_fsize_t isize; | 146 | xfs_fsize_t isize; |
119 | 147 | ||
148 | /* | ||
149 | * The transaction was allocated in the I/O submission thread, | ||
150 | * thus we need to mark ourselves as beeing in a transaction | ||
151 | * manually. | ||
152 | */ | ||
153 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
154 | |||
120 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 155 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
121 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); | 156 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); |
122 | if (isize) { | 157 | if (!isize) { |
123 | trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); | 158 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
124 | ip->i_d.di_size = isize; | 159 | xfs_trans_cancel(tp, 0); |
125 | xfs_mark_inode_dirty(ip); | 160 | return 0; |
126 | } | 161 | } |
127 | 162 | ||
128 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 163 | trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); |
164 | |||
165 | ip->i_d.di_size = isize; | ||
166 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
167 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
168 | |||
169 | return xfs_trans_commit(tp, 0); | ||
129 | } | 170 | } |
130 | 171 | ||
131 | /* | 172 | /* |
132 | * Schedule IO completion handling on the final put of an ioend. | 173 | * Schedule IO completion handling on the final put of an ioend. |
133 | * | 174 | * |
134 | * If there is no work to do we might as well call it a day and free the | 175 | * If there is no work to do we might as well call it a day and free the |
135 | * ioend right now. | 176 | * ioend right now. |
136 | */ | 177 | */ |
137 | STATIC void | 178 | STATIC void |
138 | xfs_finish_ioend( | 179 | xfs_finish_ioend( |
139 | struct xfs_ioend *ioend) | 180 | struct xfs_ioend *ioend) |
140 | { | 181 | { |
141 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 182 | if (atomic_dec_and_test(&ioend->io_remaining)) { |
142 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | 183 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; |
143 | 184 | ||
144 | if (ioend->io_type == IO_UNWRITTEN) | 185 | if (ioend->io_type == IO_UNWRITTEN) |
145 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); | 186 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
146 | else if (xfs_ioend_is_append(ioend)) | 187 | else if (ioend->io_append_trans) |
147 | queue_work(mp->m_data_workqueue, &ioend->io_work); | 188 | queue_work(mp->m_data_workqueue, &ioend->io_work); |
148 | else | 189 | else |
149 | xfs_destroy_ioend(ioend); | 190 | xfs_destroy_ioend(ioend); |
150 | } | 191 | } |
151 | } | 192 | } |
152 | 193 | ||
153 | /* | 194 | /* |
154 | * IO write completion. | 195 | * IO write completion. |
155 | */ | 196 | */ |
156 | STATIC void | 197 | STATIC void |
157 | xfs_end_io( | 198 | xfs_end_io( |
158 | struct work_struct *work) | 199 | struct work_struct *work) |
159 | { | 200 | { |
160 | xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); | 201 | xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); |
161 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 202 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
162 | int error = 0; | 203 | int error = 0; |
163 | 204 | ||
164 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 205 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
165 | ioend->io_error = -EIO; | 206 | ioend->io_error = -EIO; |
166 | goto done; | 207 | goto done; |
167 | } | 208 | } |
168 | if (ioend->io_error) | 209 | if (ioend->io_error) |
169 | goto done; | 210 | goto done; |
170 | 211 | ||
171 | /* | 212 | /* |
172 | * For unwritten extents we need to issue transactions to convert a | 213 | * For unwritten extents we need to issue transactions to convert a |
173 | * range to normal written extens after the data I/O has finished. | 214 | * range to normal written extens after the data I/O has finished. |
174 | */ | 215 | */ |
175 | if (ioend->io_type == IO_UNWRITTEN) { | 216 | if (ioend->io_type == IO_UNWRITTEN) { |
217 | /* | ||
218 | * For buffered I/O we never preallocate a transaction when | ||
219 | * doing the unwritten extent conversion, but for direct I/O | ||
220 | * we do not know if we are converting an unwritten extent | ||
221 | * or not at the point where we preallocate the transaction. | ||
222 | */ | ||
223 | if (ioend->io_append_trans) { | ||
224 | ASSERT(ioend->io_isdirect); | ||
225 | |||
226 | current_set_flags_nested( | ||
227 | &ioend->io_append_trans->t_pflags, PF_FSTRANS); | ||
228 | xfs_trans_cancel(ioend->io_append_trans, 0); | ||
229 | } | ||
230 | |||
176 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 231 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
177 | ioend->io_size); | 232 | ioend->io_size); |
178 | if (error) { | 233 | if (error) { |
179 | ioend->io_error = -error; | 234 | ioend->io_error = -error; |
180 | goto done; | 235 | goto done; |
181 | } | 236 | } |
237 | } else if (ioend->io_append_trans) { | ||
238 | error = xfs_setfilesize(ioend); | ||
239 | if (error) | ||
240 | ioend->io_error = -error; | ||
182 | } else { | 241 | } else { |
183 | /* | 242 | ASSERT(!xfs_ioend_is_append(ioend)); |
184 | * We might have to update the on-disk file size after | ||
185 | * extending writes. | ||
186 | */ | ||
187 | xfs_setfilesize(ioend); | ||
188 | } | 243 | } |
189 | 244 | ||
190 | done: | 245 | done: |
191 | xfs_destroy_ioend(ioend); | 246 | xfs_destroy_ioend(ioend); |
192 | } | 247 | } |
193 | 248 | ||
194 | /* | 249 | /* |
195 | * Call IO completion handling in caller context on the final put of an ioend. | 250 | * Call IO completion handling in caller context on the final put of an ioend. |
196 | */ | 251 | */ |
197 | STATIC void | 252 | STATIC void |
198 | xfs_finish_ioend_sync( | 253 | xfs_finish_ioend_sync( |
199 | struct xfs_ioend *ioend) | 254 | struct xfs_ioend *ioend) |
200 | { | 255 | { |
201 | if (atomic_dec_and_test(&ioend->io_remaining)) | 256 | if (atomic_dec_and_test(&ioend->io_remaining)) |
202 | xfs_end_io(&ioend->io_work); | 257 | xfs_end_io(&ioend->io_work); |
203 | } | 258 | } |
204 | 259 | ||
205 | /* | 260 | /* |
206 | * Allocate and initialise an IO completion structure. | 261 | * Allocate and initialise an IO completion structure. |
207 | * We need to track unwritten extent write completion here initially. | 262 | * We need to track unwritten extent write completion here initially. |
208 | * We'll need to extend this for updating the ondisk inode size later | 263 | * We'll need to extend this for updating the ondisk inode size later |
209 | * (vs. incore size). | 264 | * (vs. incore size). |
210 | */ | 265 | */ |
211 | STATIC xfs_ioend_t * | 266 | STATIC xfs_ioend_t * |
212 | xfs_alloc_ioend( | 267 | xfs_alloc_ioend( |
213 | struct inode *inode, | 268 | struct inode *inode, |
214 | unsigned int type) | 269 | unsigned int type) |
215 | { | 270 | { |
216 | xfs_ioend_t *ioend; | 271 | xfs_ioend_t *ioend; |
217 | 272 | ||
218 | ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); | 273 | ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); |
219 | 274 | ||
220 | /* | 275 | /* |
221 | * Set the count to 1 initially, which will prevent an I/O | 276 | * Set the count to 1 initially, which will prevent an I/O |
222 | * completion callback from happening before we have started | 277 | * completion callback from happening before we have started |
223 | * all the I/O from calling the completion routine too early. | 278 | * all the I/O from calling the completion routine too early. |
224 | */ | 279 | */ |
225 | atomic_set(&ioend->io_remaining, 1); | 280 | atomic_set(&ioend->io_remaining, 1); |
226 | ioend->io_isasync = 0; | 281 | ioend->io_isasync = 0; |
282 | ioend->io_isdirect = 0; | ||
227 | ioend->io_error = 0; | 283 | ioend->io_error = 0; |
228 | ioend->io_list = NULL; | 284 | ioend->io_list = NULL; |
229 | ioend->io_type = type; | 285 | ioend->io_type = type; |
230 | ioend->io_inode = inode; | 286 | ioend->io_inode = inode; |
231 | ioend->io_buffer_head = NULL; | 287 | ioend->io_buffer_head = NULL; |
232 | ioend->io_buffer_tail = NULL; | 288 | ioend->io_buffer_tail = NULL; |
233 | ioend->io_offset = 0; | 289 | ioend->io_offset = 0; |
234 | ioend->io_size = 0; | 290 | ioend->io_size = 0; |
235 | ioend->io_iocb = NULL; | 291 | ioend->io_iocb = NULL; |
236 | ioend->io_result = 0; | 292 | ioend->io_result = 0; |
293 | ioend->io_append_trans = NULL; | ||
237 | 294 | ||
238 | INIT_WORK(&ioend->io_work, xfs_end_io); | 295 | INIT_WORK(&ioend->io_work, xfs_end_io); |
239 | return ioend; | 296 | return ioend; |
240 | } | 297 | } |
241 | 298 | ||
242 | STATIC int | 299 | STATIC int |
243 | xfs_map_blocks( | 300 | xfs_map_blocks( |
244 | struct inode *inode, | 301 | struct inode *inode, |
245 | loff_t offset, | 302 | loff_t offset, |
246 | struct xfs_bmbt_irec *imap, | 303 | struct xfs_bmbt_irec *imap, |
247 | int type, | 304 | int type, |
248 | int nonblocking) | 305 | int nonblocking) |
249 | { | 306 | { |
250 | struct xfs_inode *ip = XFS_I(inode); | 307 | struct xfs_inode *ip = XFS_I(inode); |
251 | struct xfs_mount *mp = ip->i_mount; | 308 | struct xfs_mount *mp = ip->i_mount; |
252 | ssize_t count = 1 << inode->i_blkbits; | 309 | ssize_t count = 1 << inode->i_blkbits; |
253 | xfs_fileoff_t offset_fsb, end_fsb; | 310 | xfs_fileoff_t offset_fsb, end_fsb; |
254 | int error = 0; | 311 | int error = 0; |
255 | int bmapi_flags = XFS_BMAPI_ENTIRE; | 312 | int bmapi_flags = XFS_BMAPI_ENTIRE; |
256 | int nimaps = 1; | 313 | int nimaps = 1; |
257 | 314 | ||
258 | if (XFS_FORCED_SHUTDOWN(mp)) | 315 | if (XFS_FORCED_SHUTDOWN(mp)) |
259 | return -XFS_ERROR(EIO); | 316 | return -XFS_ERROR(EIO); |
260 | 317 | ||
261 | if (type == IO_UNWRITTEN) | 318 | if (type == IO_UNWRITTEN) |
262 | bmapi_flags |= XFS_BMAPI_IGSTATE; | 319 | bmapi_flags |= XFS_BMAPI_IGSTATE; |
263 | 320 | ||
264 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | 321 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { |
265 | if (nonblocking) | 322 | if (nonblocking) |
266 | return -XFS_ERROR(EAGAIN); | 323 | return -XFS_ERROR(EAGAIN); |
267 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 324 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
268 | } | 325 | } |
269 | 326 | ||
270 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 327 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
271 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | 328 | (ip->i_df.if_flags & XFS_IFEXTENTS)); |
272 | ASSERT(offset <= mp->m_maxioffset); | 329 | ASSERT(offset <= mp->m_maxioffset); |
273 | 330 | ||
274 | if (offset + count > mp->m_maxioffset) | 331 | if (offset + count > mp->m_maxioffset) |
275 | count = mp->m_maxioffset - offset; | 332 | count = mp->m_maxioffset - offset; |
276 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | 333 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); |
277 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 334 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
278 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, | 335 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, |
279 | imap, &nimaps, bmapi_flags); | 336 | imap, &nimaps, bmapi_flags); |
280 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 337 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
281 | 338 | ||
282 | if (error) | 339 | if (error) |
283 | return -XFS_ERROR(error); | 340 | return -XFS_ERROR(error); |
284 | 341 | ||
285 | if (type == IO_DELALLOC && | 342 | if (type == IO_DELALLOC && |
286 | (!nimaps || isnullstartblock(imap->br_startblock))) { | 343 | (!nimaps || isnullstartblock(imap->br_startblock))) { |
287 | error = xfs_iomap_write_allocate(ip, offset, count, imap); | 344 | error = xfs_iomap_write_allocate(ip, offset, count, imap); |
288 | if (!error) | 345 | if (!error) |
289 | trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); | 346 | trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); |
290 | return -XFS_ERROR(error); | 347 | return -XFS_ERROR(error); |
291 | } | 348 | } |
292 | 349 | ||
293 | #ifdef DEBUG | 350 | #ifdef DEBUG |
294 | if (type == IO_UNWRITTEN) { | 351 | if (type == IO_UNWRITTEN) { |
295 | ASSERT(nimaps); | 352 | ASSERT(nimaps); |
296 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 353 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
297 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 354 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
298 | } | 355 | } |
299 | #endif | 356 | #endif |
300 | if (nimaps) | 357 | if (nimaps) |
301 | trace_xfs_map_blocks_found(ip, offset, count, type, imap); | 358 | trace_xfs_map_blocks_found(ip, offset, count, type, imap); |
302 | return 0; | 359 | return 0; |
303 | } | 360 | } |
304 | 361 | ||
305 | STATIC int | 362 | STATIC int |
306 | xfs_imap_valid( | 363 | xfs_imap_valid( |
307 | struct inode *inode, | 364 | struct inode *inode, |
308 | struct xfs_bmbt_irec *imap, | 365 | struct xfs_bmbt_irec *imap, |
309 | xfs_off_t offset) | 366 | xfs_off_t offset) |
310 | { | 367 | { |
311 | offset >>= inode->i_blkbits; | 368 | offset >>= inode->i_blkbits; |
312 | 369 | ||
313 | return offset >= imap->br_startoff && | 370 | return offset >= imap->br_startoff && |
314 | offset < imap->br_startoff + imap->br_blockcount; | 371 | offset < imap->br_startoff + imap->br_blockcount; |
315 | } | 372 | } |
316 | 373 | ||
317 | /* | 374 | /* |
318 | * BIO completion handler for buffered IO. | 375 | * BIO completion handler for buffered IO. |
319 | */ | 376 | */ |
320 | STATIC void | 377 | STATIC void |
321 | xfs_end_bio( | 378 | xfs_end_bio( |
322 | struct bio *bio, | 379 | struct bio *bio, |
323 | int error) | 380 | int error) |
324 | { | 381 | { |
325 | xfs_ioend_t *ioend = bio->bi_private; | 382 | xfs_ioend_t *ioend = bio->bi_private; |
326 | 383 | ||
327 | ASSERT(atomic_read(&bio->bi_cnt) >= 1); | 384 | ASSERT(atomic_read(&bio->bi_cnt) >= 1); |
328 | ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; | 385 | ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; |
329 | 386 | ||
330 | /* Toss bio and pass work off to an xfsdatad thread */ | 387 | /* Toss bio and pass work off to an xfsdatad thread */ |
331 | bio->bi_private = NULL; | 388 | bio->bi_private = NULL; |
332 | bio->bi_end_io = NULL; | 389 | bio->bi_end_io = NULL; |
333 | bio_put(bio); | 390 | bio_put(bio); |
334 | 391 | ||
335 | xfs_finish_ioend(ioend); | 392 | xfs_finish_ioend(ioend); |
336 | } | 393 | } |
337 | 394 | ||
338 | STATIC void | 395 | STATIC void |
339 | xfs_submit_ioend_bio( | 396 | xfs_submit_ioend_bio( |
340 | struct writeback_control *wbc, | 397 | struct writeback_control *wbc, |
341 | xfs_ioend_t *ioend, | 398 | xfs_ioend_t *ioend, |
342 | struct bio *bio) | 399 | struct bio *bio) |
343 | { | 400 | { |
344 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
345 | atomic_inc(&ioend->io_remaining); | 401 | atomic_inc(&ioend->io_remaining); |
346 | bio->bi_private = ioend; | 402 | bio->bi_private = ioend; |
347 | bio->bi_end_io = xfs_end_bio; | 403 | bio->bi_end_io = xfs_end_bio; |
348 | |||
349 | /* | ||
350 | * If the I/O is beyond EOF we mark the inode dirty immediately | ||
351 | * but don't update the inode size until I/O completion. | ||
352 | */ | ||
353 | if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size)) | ||
354 | xfs_mark_inode_dirty(ip); | ||
355 | |||
356 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); | 404 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); |
357 | } | 405 | } |
358 | 406 | ||
359 | STATIC struct bio * | 407 | STATIC struct bio * |
360 | xfs_alloc_ioend_bio( | 408 | xfs_alloc_ioend_bio( |
361 | struct buffer_head *bh) | 409 | struct buffer_head *bh) |
362 | { | 410 | { |
363 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 411 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
364 | struct bio *bio = bio_alloc(GFP_NOIO, nvecs); | 412 | struct bio *bio = bio_alloc(GFP_NOIO, nvecs); |
365 | 413 | ||
366 | ASSERT(bio->bi_private == NULL); | 414 | ASSERT(bio->bi_private == NULL); |
367 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 415 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
368 | bio->bi_bdev = bh->b_bdev; | 416 | bio->bi_bdev = bh->b_bdev; |
369 | return bio; | 417 | return bio; |
370 | } | 418 | } |
371 | 419 | ||
372 | STATIC void | 420 | STATIC void |
373 | xfs_start_buffer_writeback( | 421 | xfs_start_buffer_writeback( |
374 | struct buffer_head *bh) | 422 | struct buffer_head *bh) |
375 | { | 423 | { |
376 | ASSERT(buffer_mapped(bh)); | 424 | ASSERT(buffer_mapped(bh)); |
377 | ASSERT(buffer_locked(bh)); | 425 | ASSERT(buffer_locked(bh)); |
378 | ASSERT(!buffer_delay(bh)); | 426 | ASSERT(!buffer_delay(bh)); |
379 | ASSERT(!buffer_unwritten(bh)); | 427 | ASSERT(!buffer_unwritten(bh)); |
380 | 428 | ||
381 | mark_buffer_async_write(bh); | 429 | mark_buffer_async_write(bh); |
382 | set_buffer_uptodate(bh); | 430 | set_buffer_uptodate(bh); |
383 | clear_buffer_dirty(bh); | 431 | clear_buffer_dirty(bh); |
384 | } | 432 | } |
385 | 433 | ||
386 | STATIC void | 434 | STATIC void |
387 | xfs_start_page_writeback( | 435 | xfs_start_page_writeback( |
388 | struct page *page, | 436 | struct page *page, |
389 | int clear_dirty, | 437 | int clear_dirty, |
390 | int buffers) | 438 | int buffers) |
391 | { | 439 | { |
392 | ASSERT(PageLocked(page)); | 440 | ASSERT(PageLocked(page)); |
393 | ASSERT(!PageWriteback(page)); | 441 | ASSERT(!PageWriteback(page)); |
394 | if (clear_dirty) | 442 | if (clear_dirty) |
395 | clear_page_dirty_for_io(page); | 443 | clear_page_dirty_for_io(page); |
396 | set_page_writeback(page); | 444 | set_page_writeback(page); |
397 | unlock_page(page); | 445 | unlock_page(page); |
398 | /* If no buffers on the page are to be written, finish it here */ | 446 | /* If no buffers on the page are to be written, finish it here */ |
399 | if (!buffers) | 447 | if (!buffers) |
400 | end_page_writeback(page); | 448 | end_page_writeback(page); |
401 | } | 449 | } |
402 | 450 | ||
403 | static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) | 451 | static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) |
404 | { | 452 | { |
405 | return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); | 453 | return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); |
406 | } | 454 | } |
407 | 455 | ||
408 | /* | 456 | /* |
409 | * Submit all of the bios for all of the ioends we have saved up, covering the | 457 | * Submit all of the bios for all of the ioends we have saved up, covering the |
410 | * initial writepage page and also any probed pages. | 458 | * initial writepage page and also any probed pages. |
411 | * | 459 | * |
412 | * Because we may have multiple ioends spanning a page, we need to start | 460 | * Because we may have multiple ioends spanning a page, we need to start |
413 | * writeback on all the buffers before we submit them for I/O. If we mark the | 461 | * writeback on all the buffers before we submit them for I/O. If we mark the |
414 | * buffers as we got, then we can end up with a page that only has buffers | 462 | * buffers as we got, then we can end up with a page that only has buffers |
415 | * marked async write and I/O complete on can occur before we mark the other | 463 | * marked async write and I/O complete on can occur before we mark the other |
416 | * buffers async write. | 464 | * buffers async write. |
417 | * | 465 | * |
418 | * The end result of this is that we trip a bug in end_page_writeback() because | 466 | * The end result of this is that we trip a bug in end_page_writeback() because |
419 | * we call it twice for the one page as the code in end_buffer_async_write() | 467 | * we call it twice for the one page as the code in end_buffer_async_write() |
420 | * assumes that all buffers on the page are started at the same time. | 468 | * assumes that all buffers on the page are started at the same time. |
421 | * | 469 | * |
422 | * The fix is two passes across the ioend list - one to start writeback on the | 470 | * The fix is two passes across the ioend list - one to start writeback on the |
423 | * buffer_heads, and then submit them for I/O on the second pass. | 471 | * buffer_heads, and then submit them for I/O on the second pass. |
424 | */ | 472 | */ |
425 | STATIC void | 473 | STATIC void |
426 | xfs_submit_ioend( | 474 | xfs_submit_ioend( |
427 | struct writeback_control *wbc, | 475 | struct writeback_control *wbc, |
428 | xfs_ioend_t *ioend) | 476 | xfs_ioend_t *ioend) |
429 | { | 477 | { |
430 | xfs_ioend_t *head = ioend; | 478 | xfs_ioend_t *head = ioend; |
431 | xfs_ioend_t *next; | 479 | xfs_ioend_t *next; |
432 | struct buffer_head *bh; | 480 | struct buffer_head *bh; |
433 | struct bio *bio; | 481 | struct bio *bio; |
434 | sector_t lastblock = 0; | 482 | sector_t lastblock = 0; |
435 | 483 | ||
436 | /* Pass 1 - start writeback */ | 484 | /* Pass 1 - start writeback */ |
437 | do { | 485 | do { |
438 | next = ioend->io_list; | 486 | next = ioend->io_list; |
439 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) | 487 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) |
440 | xfs_start_buffer_writeback(bh); | 488 | xfs_start_buffer_writeback(bh); |
441 | } while ((ioend = next) != NULL); | 489 | } while ((ioend = next) != NULL); |
442 | 490 | ||
443 | /* Pass 2 - submit I/O */ | 491 | /* Pass 2 - submit I/O */ |
444 | ioend = head; | 492 | ioend = head; |
445 | do { | 493 | do { |
446 | next = ioend->io_list; | 494 | next = ioend->io_list; |
447 | bio = NULL; | 495 | bio = NULL; |
448 | 496 | ||
449 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | 497 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { |
450 | 498 | ||
451 | if (!bio) { | 499 | if (!bio) { |
452 | retry: | 500 | retry: |
453 | bio = xfs_alloc_ioend_bio(bh); | 501 | bio = xfs_alloc_ioend_bio(bh); |
454 | } else if (bh->b_blocknr != lastblock + 1) { | 502 | } else if (bh->b_blocknr != lastblock + 1) { |
455 | xfs_submit_ioend_bio(wbc, ioend, bio); | 503 | xfs_submit_ioend_bio(wbc, ioend, bio); |
456 | goto retry; | 504 | goto retry; |
457 | } | 505 | } |
458 | 506 | ||
459 | if (bio_add_buffer(bio, bh) != bh->b_size) { | 507 | if (bio_add_buffer(bio, bh) != bh->b_size) { |
460 | xfs_submit_ioend_bio(wbc, ioend, bio); | 508 | xfs_submit_ioend_bio(wbc, ioend, bio); |
461 | goto retry; | 509 | goto retry; |
462 | } | 510 | } |
463 | 511 | ||
464 | lastblock = bh->b_blocknr; | 512 | lastblock = bh->b_blocknr; |
465 | } | 513 | } |
466 | if (bio) | 514 | if (bio) |
467 | xfs_submit_ioend_bio(wbc, ioend, bio); | 515 | xfs_submit_ioend_bio(wbc, ioend, bio); |
468 | xfs_finish_ioend(ioend); | 516 | xfs_finish_ioend(ioend); |
469 | } while ((ioend = next) != NULL); | 517 | } while ((ioend = next) != NULL); |
470 | } | 518 | } |
471 | 519 | ||
472 | /* | 520 | /* |
473 | * Cancel submission of all buffer_heads so far in this endio. | 521 | * Cancel submission of all buffer_heads so far in this endio. |
474 | * Toss the endio too. Only ever called for the initial page | 522 | * Toss the endio too. Only ever called for the initial page |
475 | * in a writepage request, so only ever one page. | 523 | * in a writepage request, so only ever one page. |
476 | */ | 524 | */ |
477 | STATIC void | 525 | STATIC void |
478 | xfs_cancel_ioend( | 526 | xfs_cancel_ioend( |
479 | xfs_ioend_t *ioend) | 527 | xfs_ioend_t *ioend) |
480 | { | 528 | { |
481 | xfs_ioend_t *next; | 529 | xfs_ioend_t *next; |
482 | struct buffer_head *bh, *next_bh; | 530 | struct buffer_head *bh, *next_bh; |
483 | 531 | ||
484 | do { | 532 | do { |
485 | next = ioend->io_list; | 533 | next = ioend->io_list; |
486 | bh = ioend->io_buffer_head; | 534 | bh = ioend->io_buffer_head; |
487 | do { | 535 | do { |
488 | next_bh = bh->b_private; | 536 | next_bh = bh->b_private; |
489 | clear_buffer_async_write(bh); | 537 | clear_buffer_async_write(bh); |
490 | unlock_buffer(bh); | 538 | unlock_buffer(bh); |
491 | } while ((bh = next_bh) != NULL); | 539 | } while ((bh = next_bh) != NULL); |
492 | 540 | ||
493 | mempool_free(ioend, xfs_ioend_pool); | 541 | mempool_free(ioend, xfs_ioend_pool); |
494 | } while ((ioend = next) != NULL); | 542 | } while ((ioend = next) != NULL); |
495 | } | 543 | } |
496 | 544 | ||
497 | /* | 545 | /* |
498 | * Test to see if we've been building up a completion structure for | 546 | * Test to see if we've been building up a completion structure for |
499 | * earlier buffers -- if so, we try to append to this ioend if we | 547 | * earlier buffers -- if so, we try to append to this ioend if we |
500 | * can, otherwise we finish off any current ioend and start another. | 548 | * can, otherwise we finish off any current ioend and start another. |
501 | * Return true if we've finished the given ioend. | 549 | * Return true if we've finished the given ioend. |
502 | */ | 550 | */ |
503 | STATIC void | 551 | STATIC void |
504 | xfs_add_to_ioend( | 552 | xfs_add_to_ioend( |
505 | struct inode *inode, | 553 | struct inode *inode, |
506 | struct buffer_head *bh, | 554 | struct buffer_head *bh, |
507 | xfs_off_t offset, | 555 | xfs_off_t offset, |
508 | unsigned int type, | 556 | unsigned int type, |
509 | xfs_ioend_t **result, | 557 | xfs_ioend_t **result, |
510 | int need_ioend) | 558 | int need_ioend) |
511 | { | 559 | { |
512 | xfs_ioend_t *ioend = *result; | 560 | xfs_ioend_t *ioend = *result; |
513 | 561 | ||
514 | if (!ioend || need_ioend || type != ioend->io_type) { | 562 | if (!ioend || need_ioend || type != ioend->io_type) { |
515 | xfs_ioend_t *previous = *result; | 563 | xfs_ioend_t *previous = *result; |
516 | 564 | ||
517 | ioend = xfs_alloc_ioend(inode, type); | 565 | ioend = xfs_alloc_ioend(inode, type); |
518 | ioend->io_offset = offset; | 566 | ioend->io_offset = offset; |
519 | ioend->io_buffer_head = bh; | 567 | ioend->io_buffer_head = bh; |
520 | ioend->io_buffer_tail = bh; | 568 | ioend->io_buffer_tail = bh; |
521 | if (previous) | 569 | if (previous) |
522 | previous->io_list = ioend; | 570 | previous->io_list = ioend; |
523 | *result = ioend; | 571 | *result = ioend; |
524 | } else { | 572 | } else { |
525 | ioend->io_buffer_tail->b_private = bh; | 573 | ioend->io_buffer_tail->b_private = bh; |
526 | ioend->io_buffer_tail = bh; | 574 | ioend->io_buffer_tail = bh; |
527 | } | 575 | } |
528 | 576 | ||
529 | bh->b_private = NULL; | 577 | bh->b_private = NULL; |
530 | ioend->io_size += bh->b_size; | 578 | ioend->io_size += bh->b_size; |
531 | } | 579 | } |
532 | 580 | ||
533 | STATIC void | 581 | STATIC void |
534 | xfs_map_buffer( | 582 | xfs_map_buffer( |
535 | struct inode *inode, | 583 | struct inode *inode, |
536 | struct buffer_head *bh, | 584 | struct buffer_head *bh, |
537 | struct xfs_bmbt_irec *imap, | 585 | struct xfs_bmbt_irec *imap, |
538 | xfs_off_t offset) | 586 | xfs_off_t offset) |
539 | { | 587 | { |
540 | sector_t bn; | 588 | sector_t bn; |
541 | struct xfs_mount *m = XFS_I(inode)->i_mount; | 589 | struct xfs_mount *m = XFS_I(inode)->i_mount; |
542 | xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); | 590 | xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); |
543 | xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); | 591 | xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); |
544 | 592 | ||
545 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 593 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
546 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 594 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
547 | 595 | ||
548 | bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + | 596 | bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + |
549 | ((offset - iomap_offset) >> inode->i_blkbits); | 597 | ((offset - iomap_offset) >> inode->i_blkbits); |
550 | 598 | ||
551 | ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); | 599 | ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); |
552 | 600 | ||
553 | bh->b_blocknr = bn; | 601 | bh->b_blocknr = bn; |
554 | set_buffer_mapped(bh); | 602 | set_buffer_mapped(bh); |
555 | } | 603 | } |
556 | 604 | ||
557 | STATIC void | 605 | STATIC void |
558 | xfs_map_at_offset( | 606 | xfs_map_at_offset( |
559 | struct inode *inode, | 607 | struct inode *inode, |
560 | struct buffer_head *bh, | 608 | struct buffer_head *bh, |
561 | struct xfs_bmbt_irec *imap, | 609 | struct xfs_bmbt_irec *imap, |
562 | xfs_off_t offset) | 610 | xfs_off_t offset) |
563 | { | 611 | { |
564 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 612 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
565 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 613 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
566 | 614 | ||
567 | xfs_map_buffer(inode, bh, imap, offset); | 615 | xfs_map_buffer(inode, bh, imap, offset); |
568 | set_buffer_mapped(bh); | 616 | set_buffer_mapped(bh); |
569 | clear_buffer_delay(bh); | 617 | clear_buffer_delay(bh); |
570 | clear_buffer_unwritten(bh); | 618 | clear_buffer_unwritten(bh); |
571 | } | 619 | } |
572 | 620 | ||
573 | /* | 621 | /* |
574 | * Test if a given page is suitable for writing as part of an unwritten | 622 | * Test if a given page is suitable for writing as part of an unwritten |
575 | * or delayed allocate extent. | 623 | * or delayed allocate extent. |
576 | */ | 624 | */ |
577 | STATIC int | 625 | STATIC int |
578 | xfs_is_delayed_page( | 626 | xfs_is_delayed_page( |
579 | struct page *page, | 627 | struct page *page, |
580 | unsigned int type) | 628 | unsigned int type) |
581 | { | 629 | { |
582 | if (PageWriteback(page)) | 630 | if (PageWriteback(page)) |
583 | return 0; | 631 | return 0; |
584 | 632 | ||
585 | if (page->mapping && page_has_buffers(page)) { | 633 | if (page->mapping && page_has_buffers(page)) { |
586 | struct buffer_head *bh, *head; | 634 | struct buffer_head *bh, *head; |
587 | int acceptable = 0; | 635 | int acceptable = 0; |
588 | 636 | ||
589 | bh = head = page_buffers(page); | 637 | bh = head = page_buffers(page); |
590 | do { | 638 | do { |
591 | if (buffer_unwritten(bh)) | 639 | if (buffer_unwritten(bh)) |
592 | acceptable = (type == IO_UNWRITTEN); | 640 | acceptable = (type == IO_UNWRITTEN); |
593 | else if (buffer_delay(bh)) | 641 | else if (buffer_delay(bh)) |
594 | acceptable = (type == IO_DELALLOC); | 642 | acceptable = (type == IO_DELALLOC); |
595 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 643 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
596 | acceptable = (type == IO_OVERWRITE); | 644 | acceptable = (type == IO_OVERWRITE); |
597 | else | 645 | else |
598 | break; | 646 | break; |
599 | } while ((bh = bh->b_this_page) != head); | 647 | } while ((bh = bh->b_this_page) != head); |
600 | 648 | ||
601 | if (acceptable) | 649 | if (acceptable) |
602 | return 1; | 650 | return 1; |
603 | } | 651 | } |
604 | 652 | ||
605 | return 0; | 653 | return 0; |
606 | } | 654 | } |
607 | 655 | ||
608 | /* | 656 | /* |
609 | * Allocate & map buffers for page given the extent map. Write it out. | 657 | * Allocate & map buffers for page given the extent map. Write it out. |
610 | * except for the original page of a writepage, this is called on | 658 | * except for the original page of a writepage, this is called on |
611 | * delalloc/unwritten pages only, for the original page it is possible | 659 | * delalloc/unwritten pages only, for the original page it is possible |
612 | * that the page has no mapping at all. | 660 | * that the page has no mapping at all. |
613 | */ | 661 | */ |
614 | STATIC int | 662 | STATIC int |
615 | xfs_convert_page( | 663 | xfs_convert_page( |
616 | struct inode *inode, | 664 | struct inode *inode, |
617 | struct page *page, | 665 | struct page *page, |
618 | loff_t tindex, | 666 | loff_t tindex, |
619 | struct xfs_bmbt_irec *imap, | 667 | struct xfs_bmbt_irec *imap, |
620 | xfs_ioend_t **ioendp, | 668 | xfs_ioend_t **ioendp, |
621 | struct writeback_control *wbc) | 669 | struct writeback_control *wbc) |
622 | { | 670 | { |
623 | struct buffer_head *bh, *head; | 671 | struct buffer_head *bh, *head; |
624 | xfs_off_t end_offset; | 672 | xfs_off_t end_offset; |
625 | unsigned long p_offset; | 673 | unsigned long p_offset; |
626 | unsigned int type; | 674 | unsigned int type; |
627 | int len, page_dirty; | 675 | int len, page_dirty; |
628 | int count = 0, done = 0, uptodate = 1; | 676 | int count = 0, done = 0, uptodate = 1; |
629 | xfs_off_t offset = page_offset(page); | 677 | xfs_off_t offset = page_offset(page); |
630 | 678 | ||
631 | if (page->index != tindex) | 679 | if (page->index != tindex) |
632 | goto fail; | 680 | goto fail; |
633 | if (!trylock_page(page)) | 681 | if (!trylock_page(page)) |
634 | goto fail; | 682 | goto fail; |
635 | if (PageWriteback(page)) | 683 | if (PageWriteback(page)) |
636 | goto fail_unlock_page; | 684 | goto fail_unlock_page; |
637 | if (page->mapping != inode->i_mapping) | 685 | if (page->mapping != inode->i_mapping) |
638 | goto fail_unlock_page; | 686 | goto fail_unlock_page; |
639 | if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) | 687 | if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) |
640 | goto fail_unlock_page; | 688 | goto fail_unlock_page; |
641 | 689 | ||
642 | /* | 690 | /* |
643 | * page_dirty is initially a count of buffers on the page before | 691 | * page_dirty is initially a count of buffers on the page before |
644 | * EOF and is decremented as we move each into a cleanable state. | 692 | * EOF and is decremented as we move each into a cleanable state. |
645 | * | 693 | * |
646 | * Derivation: | 694 | * Derivation: |
647 | * | 695 | * |
648 | * End offset is the highest offset that this page should represent. | 696 | * End offset is the highest offset that this page should represent. |
649 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | 697 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) |
650 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | 698 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and |
651 | * hence give us the correct page_dirty count. On any other page, | 699 | * hence give us the correct page_dirty count. On any other page, |
652 | * it will be zero and in that case we need page_dirty to be the | 700 | * it will be zero and in that case we need page_dirty to be the |
653 | * count of buffers on the page. | 701 | * count of buffers on the page. |
654 | */ | 702 | */ |
655 | end_offset = min_t(unsigned long long, | 703 | end_offset = min_t(unsigned long long, |
656 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, | 704 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, |
657 | i_size_read(inode)); | 705 | i_size_read(inode)); |
658 | 706 | ||
659 | len = 1 << inode->i_blkbits; | 707 | len = 1 << inode->i_blkbits; |
660 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), | 708 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), |
661 | PAGE_CACHE_SIZE); | 709 | PAGE_CACHE_SIZE); |
662 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | 710 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; |
663 | page_dirty = p_offset / len; | 711 | page_dirty = p_offset / len; |
664 | 712 | ||
665 | bh = head = page_buffers(page); | 713 | bh = head = page_buffers(page); |
666 | do { | 714 | do { |
667 | if (offset >= end_offset) | 715 | if (offset >= end_offset) |
668 | break; | 716 | break; |
669 | if (!buffer_uptodate(bh)) | 717 | if (!buffer_uptodate(bh)) |
670 | uptodate = 0; | 718 | uptodate = 0; |
671 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { | 719 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { |
672 | done = 1; | 720 | done = 1; |
673 | continue; | 721 | continue; |
674 | } | 722 | } |
675 | 723 | ||
676 | if (buffer_unwritten(bh) || buffer_delay(bh) || | 724 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
677 | buffer_mapped(bh)) { | 725 | buffer_mapped(bh)) { |
678 | if (buffer_unwritten(bh)) | 726 | if (buffer_unwritten(bh)) |
679 | type = IO_UNWRITTEN; | 727 | type = IO_UNWRITTEN; |
680 | else if (buffer_delay(bh)) | 728 | else if (buffer_delay(bh)) |
681 | type = IO_DELALLOC; | 729 | type = IO_DELALLOC; |
682 | else | 730 | else |
683 | type = IO_OVERWRITE; | 731 | type = IO_OVERWRITE; |
684 | 732 | ||
685 | if (!xfs_imap_valid(inode, imap, offset)) { | 733 | if (!xfs_imap_valid(inode, imap, offset)) { |
686 | done = 1; | 734 | done = 1; |
687 | continue; | 735 | continue; |
688 | } | 736 | } |
689 | 737 | ||
690 | lock_buffer(bh); | 738 | lock_buffer(bh); |
691 | if (type != IO_OVERWRITE) | 739 | if (type != IO_OVERWRITE) |
692 | xfs_map_at_offset(inode, bh, imap, offset); | 740 | xfs_map_at_offset(inode, bh, imap, offset); |
693 | xfs_add_to_ioend(inode, bh, offset, type, | 741 | xfs_add_to_ioend(inode, bh, offset, type, |
694 | ioendp, done); | 742 | ioendp, done); |
695 | 743 | ||
696 | page_dirty--; | 744 | page_dirty--; |
697 | count++; | 745 | count++; |
698 | } else { | 746 | } else { |
699 | done = 1; | 747 | done = 1; |
700 | } | 748 | } |
701 | } while (offset += len, (bh = bh->b_this_page) != head); | 749 | } while (offset += len, (bh = bh->b_this_page) != head); |
702 | 750 | ||
703 | if (uptodate && bh == head) | 751 | if (uptodate && bh == head) |
704 | SetPageUptodate(page); | 752 | SetPageUptodate(page); |
705 | 753 | ||
706 | if (count) { | 754 | if (count) { |
707 | if (--wbc->nr_to_write <= 0 && | 755 | if (--wbc->nr_to_write <= 0 && |
708 | wbc->sync_mode == WB_SYNC_NONE) | 756 | wbc->sync_mode == WB_SYNC_NONE) |
709 | done = 1; | 757 | done = 1; |
710 | } | 758 | } |
711 | xfs_start_page_writeback(page, !page_dirty, count); | 759 | xfs_start_page_writeback(page, !page_dirty, count); |
712 | 760 | ||
713 | return done; | 761 | return done; |
714 | fail_unlock_page: | 762 | fail_unlock_page: |
715 | unlock_page(page); | 763 | unlock_page(page); |
716 | fail: | 764 | fail: |
717 | return 1; | 765 | return 1; |
718 | } | 766 | } |
719 | 767 | ||
720 | /* | 768 | /* |
721 | * Convert & write out a cluster of pages in the same extent as defined | 769 | * Convert & write out a cluster of pages in the same extent as defined |
722 | * by mp and following the start page. | 770 | * by mp and following the start page. |
723 | */ | 771 | */ |
724 | STATIC void | 772 | STATIC void |
725 | xfs_cluster_write( | 773 | xfs_cluster_write( |
726 | struct inode *inode, | 774 | struct inode *inode, |
727 | pgoff_t tindex, | 775 | pgoff_t tindex, |
728 | struct xfs_bmbt_irec *imap, | 776 | struct xfs_bmbt_irec *imap, |
729 | xfs_ioend_t **ioendp, | 777 | xfs_ioend_t **ioendp, |
730 | struct writeback_control *wbc, | 778 | struct writeback_control *wbc, |
731 | pgoff_t tlast) | 779 | pgoff_t tlast) |
732 | { | 780 | { |
733 | struct pagevec pvec; | 781 | struct pagevec pvec; |
734 | int done = 0, i; | 782 | int done = 0, i; |
735 | 783 | ||
736 | pagevec_init(&pvec, 0); | 784 | pagevec_init(&pvec, 0); |
737 | while (!done && tindex <= tlast) { | 785 | while (!done && tindex <= tlast) { |
738 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); | 786 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); |
739 | 787 | ||
740 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) | 788 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) |
741 | break; | 789 | break; |
742 | 790 | ||
743 | for (i = 0; i < pagevec_count(&pvec); i++) { | 791 | for (i = 0; i < pagevec_count(&pvec); i++) { |
744 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 792 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
745 | imap, ioendp, wbc); | 793 | imap, ioendp, wbc); |
746 | if (done) | 794 | if (done) |
747 | break; | 795 | break; |
748 | } | 796 | } |
749 | 797 | ||
750 | pagevec_release(&pvec); | 798 | pagevec_release(&pvec); |
751 | cond_resched(); | 799 | cond_resched(); |
752 | } | 800 | } |
753 | } | 801 | } |
754 | 802 | ||
755 | STATIC void | 803 | STATIC void |
756 | xfs_vm_invalidatepage( | 804 | xfs_vm_invalidatepage( |
757 | struct page *page, | 805 | struct page *page, |
758 | unsigned long offset) | 806 | unsigned long offset) |
759 | { | 807 | { |
760 | trace_xfs_invalidatepage(page->mapping->host, page, offset); | 808 | trace_xfs_invalidatepage(page->mapping->host, page, offset); |
761 | block_invalidatepage(page, offset); | 809 | block_invalidatepage(page, offset); |
762 | } | 810 | } |
763 | 811 | ||
764 | /* | 812 | /* |
765 | * If the page has delalloc buffers on it, we need to punch them out before we | 813 | * If the page has delalloc buffers on it, we need to punch them out before we |
766 | * invalidate the page. If we don't, we leave a stale delalloc mapping on the | 814 | * invalidate the page. If we don't, we leave a stale delalloc mapping on the |
767 | * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read | 815 | * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read |
768 | * is done on that same region - the delalloc extent is returned when none is | 816 | * is done on that same region - the delalloc extent is returned when none is |
769 | * supposed to be there. | 817 | * supposed to be there. |
770 | * | 818 | * |
771 | * We prevent this by truncating away the delalloc regions on the page before | 819 | * We prevent this by truncating away the delalloc regions on the page before |
772 | * invalidating it. Because they are delalloc, we can do this without needing a | 820 | * invalidating it. Because they are delalloc, we can do this without needing a |
773 | * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this | 821 | * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this |
774 | * truncation without a transaction as there is no space left for block | 822 | * truncation without a transaction as there is no space left for block |
775 | * reservation (typically why we see a ENOSPC in writeback). | 823 | * reservation (typically why we see a ENOSPC in writeback). |
776 | * | 824 | * |
777 | * This is not a performance critical path, so for now just do the punching a | 825 | * This is not a performance critical path, so for now just do the punching a |
778 | * buffer head at a time. | 826 | * buffer head at a time. |
779 | */ | 827 | */ |
780 | STATIC void | 828 | STATIC void |
781 | xfs_aops_discard_page( | 829 | xfs_aops_discard_page( |
782 | struct page *page) | 830 | struct page *page) |
783 | { | 831 | { |
784 | struct inode *inode = page->mapping->host; | 832 | struct inode *inode = page->mapping->host; |
785 | struct xfs_inode *ip = XFS_I(inode); | 833 | struct xfs_inode *ip = XFS_I(inode); |
786 | struct buffer_head *bh, *head; | 834 | struct buffer_head *bh, *head; |
787 | loff_t offset = page_offset(page); | 835 | loff_t offset = page_offset(page); |
788 | 836 | ||
789 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) | 837 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) |
790 | goto out_invalidate; | 838 | goto out_invalidate; |
791 | 839 | ||
792 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 840 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
793 | goto out_invalidate; | 841 | goto out_invalidate; |
794 | 842 | ||
795 | xfs_alert(ip->i_mount, | 843 | xfs_alert(ip->i_mount, |
796 | "page discard on page %p, inode 0x%llx, offset %llu.", | 844 | "page discard on page %p, inode 0x%llx, offset %llu.", |
797 | page, ip->i_ino, offset); | 845 | page, ip->i_ino, offset); |
798 | 846 | ||
799 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 847 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
800 | bh = head = page_buffers(page); | 848 | bh = head = page_buffers(page); |
801 | do { | 849 | do { |
802 | int error; | 850 | int error; |
803 | xfs_fileoff_t start_fsb; | 851 | xfs_fileoff_t start_fsb; |
804 | 852 | ||
805 | if (!buffer_delay(bh)) | 853 | if (!buffer_delay(bh)) |
806 | goto next_buffer; | 854 | goto next_buffer; |
807 | 855 | ||
808 | start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | 856 | start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); |
809 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); | 857 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); |
810 | if (error) { | 858 | if (error) { |
811 | /* something screwed, just bail */ | 859 | /* something screwed, just bail */ |
812 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 860 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
813 | xfs_alert(ip->i_mount, | 861 | xfs_alert(ip->i_mount, |
814 | "page discard unable to remove delalloc mapping."); | 862 | "page discard unable to remove delalloc mapping."); |
815 | } | 863 | } |
816 | break; | 864 | break; |
817 | } | 865 | } |
818 | next_buffer: | 866 | next_buffer: |
819 | offset += 1 << inode->i_blkbits; | 867 | offset += 1 << inode->i_blkbits; |
820 | 868 | ||
821 | } while ((bh = bh->b_this_page) != head); | 869 | } while ((bh = bh->b_this_page) != head); |
822 | 870 | ||
823 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 871 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
824 | out_invalidate: | 872 | out_invalidate: |
825 | xfs_vm_invalidatepage(page, 0); | 873 | xfs_vm_invalidatepage(page, 0); |
826 | return; | 874 | return; |
827 | } | 875 | } |
828 | 876 | ||
829 | /* | 877 | /* |
830 | * Write out a dirty page. | 878 | * Write out a dirty page. |
831 | * | 879 | * |
832 | * For delalloc space on the page we need to allocate space and flush it. | 880 | * For delalloc space on the page we need to allocate space and flush it. |
833 | * For unwritten space on the page we need to start the conversion to | 881 | * For unwritten space on the page we need to start the conversion to |
834 | * regular allocated space. | 882 | * regular allocated space. |
835 | * For any other dirty buffer heads on the page we should flush them. | 883 | * For any other dirty buffer heads on the page we should flush them. |
836 | */ | 884 | */ |
837 | STATIC int | 885 | STATIC int |
838 | xfs_vm_writepage( | 886 | xfs_vm_writepage( |
839 | struct page *page, | 887 | struct page *page, |
840 | struct writeback_control *wbc) | 888 | struct writeback_control *wbc) |
841 | { | 889 | { |
842 | struct inode *inode = page->mapping->host; | 890 | struct inode *inode = page->mapping->host; |
843 | struct buffer_head *bh, *head; | 891 | struct buffer_head *bh, *head; |
844 | struct xfs_bmbt_irec imap; | 892 | struct xfs_bmbt_irec imap; |
845 | xfs_ioend_t *ioend = NULL, *iohead = NULL; | 893 | xfs_ioend_t *ioend = NULL, *iohead = NULL; |
846 | loff_t offset; | 894 | loff_t offset; |
847 | unsigned int type; | 895 | unsigned int type; |
848 | __uint64_t end_offset; | 896 | __uint64_t end_offset; |
849 | pgoff_t end_index, last_index; | 897 | pgoff_t end_index, last_index; |
850 | ssize_t len; | 898 | ssize_t len; |
851 | int err, imap_valid = 0, uptodate = 1; | 899 | int err, imap_valid = 0, uptodate = 1; |
852 | int count = 0; | 900 | int count = 0; |
853 | int nonblocking = 0; | 901 | int nonblocking = 0; |
854 | 902 | ||
855 | trace_xfs_writepage(inode, page, 0); | 903 | trace_xfs_writepage(inode, page, 0); |
856 | 904 | ||
857 | ASSERT(page_has_buffers(page)); | 905 | ASSERT(page_has_buffers(page)); |
858 | 906 | ||
859 | /* | 907 | /* |
860 | * Refuse to write the page out if we are called from reclaim context. | 908 | * Refuse to write the page out if we are called from reclaim context. |
861 | * | 909 | * |
862 | * This avoids stack overflows when called from deeply used stacks in | 910 | * This avoids stack overflows when called from deeply used stacks in |
863 | * random callers for direct reclaim or memcg reclaim. We explicitly | 911 | * random callers for direct reclaim or memcg reclaim. We explicitly |
864 | * allow reclaim from kswapd as the stack usage there is relatively low. | 912 | * allow reclaim from kswapd as the stack usage there is relatively low. |
865 | * | 913 | * |
866 | * This should never happen except in the case of a VM regression so | 914 | * This should never happen except in the case of a VM regression so |
867 | * warn about it. | 915 | * warn about it. |
868 | */ | 916 | */ |
869 | if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == | 917 | if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == |
870 | PF_MEMALLOC)) | 918 | PF_MEMALLOC)) |
871 | goto redirty; | 919 | goto redirty; |
872 | 920 | ||
873 | /* | 921 | /* |
874 | * Given that we do not allow direct reclaim to call us, we should | 922 | * Given that we do not allow direct reclaim to call us, we should |
875 | * never be called while in a filesystem transaction. | 923 | * never be called while in a filesystem transaction. |
876 | */ | 924 | */ |
877 | if (WARN_ON(current->flags & PF_FSTRANS)) | 925 | if (WARN_ON(current->flags & PF_FSTRANS)) |
878 | goto redirty; | 926 | goto redirty; |
879 | 927 | ||
880 | /* Is this page beyond the end of the file? */ | 928 | /* Is this page beyond the end of the file? */ |
881 | offset = i_size_read(inode); | 929 | offset = i_size_read(inode); |
882 | end_index = offset >> PAGE_CACHE_SHIFT; | 930 | end_index = offset >> PAGE_CACHE_SHIFT; |
883 | last_index = (offset - 1) >> PAGE_CACHE_SHIFT; | 931 | last_index = (offset - 1) >> PAGE_CACHE_SHIFT; |
884 | if (page->index >= end_index) { | 932 | if (page->index >= end_index) { |
885 | if ((page->index >= end_index + 1) || | 933 | if ((page->index >= end_index + 1) || |
886 | !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { | 934 | !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { |
887 | unlock_page(page); | 935 | unlock_page(page); |
888 | return 0; | 936 | return 0; |
889 | } | 937 | } |
890 | } | 938 | } |
891 | 939 | ||
892 | end_offset = min_t(unsigned long long, | 940 | end_offset = min_t(unsigned long long, |
893 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, | 941 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, |
894 | offset); | 942 | offset); |
895 | len = 1 << inode->i_blkbits; | 943 | len = 1 << inode->i_blkbits; |
896 | 944 | ||
897 | bh = head = page_buffers(page); | 945 | bh = head = page_buffers(page); |
898 | offset = page_offset(page); | 946 | offset = page_offset(page); |
899 | type = IO_OVERWRITE; | 947 | type = IO_OVERWRITE; |
900 | 948 | ||
901 | if (wbc->sync_mode == WB_SYNC_NONE) | 949 | if (wbc->sync_mode == WB_SYNC_NONE) |
902 | nonblocking = 1; | 950 | nonblocking = 1; |
903 | 951 | ||
904 | do { | 952 | do { |
905 | int new_ioend = 0; | 953 | int new_ioend = 0; |
906 | 954 | ||
907 | if (offset >= end_offset) | 955 | if (offset >= end_offset) |
908 | break; | 956 | break; |
909 | if (!buffer_uptodate(bh)) | 957 | if (!buffer_uptodate(bh)) |
910 | uptodate = 0; | 958 | uptodate = 0; |
911 | 959 | ||
912 | /* | 960 | /* |
913 | * set_page_dirty dirties all buffers in a page, independent | 961 | * set_page_dirty dirties all buffers in a page, independent |
914 | * of their state. The dirty state however is entirely | 962 | * of their state. The dirty state however is entirely |
915 | * meaningless for holes (!mapped && uptodate), so skip | 963 | * meaningless for holes (!mapped && uptodate), so skip |
916 | * buffers covering holes here. | 964 | * buffers covering holes here. |
917 | */ | 965 | */ |
918 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { | 966 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { |
919 | imap_valid = 0; | 967 | imap_valid = 0; |
920 | continue; | 968 | continue; |
921 | } | 969 | } |
922 | 970 | ||
923 | if (buffer_unwritten(bh)) { | 971 | if (buffer_unwritten(bh)) { |
924 | if (type != IO_UNWRITTEN) { | 972 | if (type != IO_UNWRITTEN) { |
925 | type = IO_UNWRITTEN; | 973 | type = IO_UNWRITTEN; |
926 | imap_valid = 0; | 974 | imap_valid = 0; |
927 | } | 975 | } |
928 | } else if (buffer_delay(bh)) { | 976 | } else if (buffer_delay(bh)) { |
929 | if (type != IO_DELALLOC) { | 977 | if (type != IO_DELALLOC) { |
930 | type = IO_DELALLOC; | 978 | type = IO_DELALLOC; |
931 | imap_valid = 0; | 979 | imap_valid = 0; |
932 | } | 980 | } |
933 | } else if (buffer_uptodate(bh)) { | 981 | } else if (buffer_uptodate(bh)) { |
934 | if (type != IO_OVERWRITE) { | 982 | if (type != IO_OVERWRITE) { |
935 | type = IO_OVERWRITE; | 983 | type = IO_OVERWRITE; |
936 | imap_valid = 0; | 984 | imap_valid = 0; |
937 | } | 985 | } |
938 | } else { | 986 | } else { |
939 | if (PageUptodate(page)) { | 987 | if (PageUptodate(page)) { |
940 | ASSERT(buffer_mapped(bh)); | 988 | ASSERT(buffer_mapped(bh)); |
941 | imap_valid = 0; | 989 | imap_valid = 0; |
942 | } | 990 | } |
943 | continue; | 991 | continue; |
944 | } | 992 | } |
945 | 993 | ||
946 | if (imap_valid) | 994 | if (imap_valid) |
947 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 995 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
948 | if (!imap_valid) { | 996 | if (!imap_valid) { |
949 | /* | 997 | /* |
950 | * If we didn't have a valid mapping then we need to | 998 | * If we didn't have a valid mapping then we need to |
951 | * put the new mapping into a separate ioend structure. | 999 | * put the new mapping into a separate ioend structure. |
952 | * This ensures non-contiguous extents always have | 1000 | * This ensures non-contiguous extents always have |
953 | * separate ioends, which is particularly important | 1001 | * separate ioends, which is particularly important |
954 | * for unwritten extent conversion at I/O completion | 1002 | * for unwritten extent conversion at I/O completion |
955 | * time. | 1003 | * time. |
956 | */ | 1004 | */ |
957 | new_ioend = 1; | 1005 | new_ioend = 1; |
958 | err = xfs_map_blocks(inode, offset, &imap, type, | 1006 | err = xfs_map_blocks(inode, offset, &imap, type, |
959 | nonblocking); | 1007 | nonblocking); |
960 | if (err) | 1008 | if (err) |
961 | goto error; | 1009 | goto error; |
962 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 1010 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
963 | } | 1011 | } |
964 | if (imap_valid) { | 1012 | if (imap_valid) { |
965 | lock_buffer(bh); | 1013 | lock_buffer(bh); |
966 | if (type != IO_OVERWRITE) | 1014 | if (type != IO_OVERWRITE) |
967 | xfs_map_at_offset(inode, bh, &imap, offset); | 1015 | xfs_map_at_offset(inode, bh, &imap, offset); |
968 | xfs_add_to_ioend(inode, bh, offset, type, &ioend, | 1016 | xfs_add_to_ioend(inode, bh, offset, type, &ioend, |
969 | new_ioend); | 1017 | new_ioend); |
970 | count++; | 1018 | count++; |
971 | } | 1019 | } |
972 | 1020 | ||
973 | if (!iohead) | 1021 | if (!iohead) |
974 | iohead = ioend; | 1022 | iohead = ioend; |
975 | 1023 | ||
976 | } while (offset += len, ((bh = bh->b_this_page) != head)); | 1024 | } while (offset += len, ((bh = bh->b_this_page) != head)); |
977 | 1025 | ||
978 | if (uptodate && bh == head) | 1026 | if (uptodate && bh == head) |
979 | SetPageUptodate(page); | 1027 | SetPageUptodate(page); |
980 | 1028 | ||
981 | xfs_start_page_writeback(page, 1, count); | 1029 | xfs_start_page_writeback(page, 1, count); |
982 | 1030 | ||
983 | if (ioend && imap_valid) { | 1031 | if (ioend && imap_valid) { |
984 | xfs_off_t end_index; | 1032 | xfs_off_t end_index; |
985 | 1033 | ||
986 | end_index = imap.br_startoff + imap.br_blockcount; | 1034 | end_index = imap.br_startoff + imap.br_blockcount; |
987 | 1035 | ||
988 | /* to bytes */ | 1036 | /* to bytes */ |
989 | end_index <<= inode->i_blkbits; | 1037 | end_index <<= inode->i_blkbits; |
990 | 1038 | ||
991 | /* to pages */ | 1039 | /* to pages */ |
992 | end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; | 1040 | end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; |
993 | 1041 | ||
994 | /* check against file size */ | 1042 | /* check against file size */ |
995 | if (end_index > last_index) | 1043 | if (end_index > last_index) |
996 | end_index = last_index; | 1044 | end_index = last_index; |
997 | 1045 | ||
998 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1046 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
999 | wbc, end_index); | 1047 | wbc, end_index); |
1000 | } | 1048 | } |
1001 | 1049 | ||
1002 | if (iohead) | 1050 | if (iohead) { |
1051 | /* | ||
1052 | * Reserve log space if we might write beyond the on-disk | ||
1053 | * inode size. | ||
1054 | */ | ||
1055 | if (ioend->io_type != IO_UNWRITTEN && | ||
1056 | xfs_ioend_is_append(ioend)) { | ||
1057 | err = xfs_setfilesize_trans_alloc(ioend); | ||
1058 | if (err) | ||
1059 | goto error; | ||
1060 | } | ||
1061 | |||
1003 | xfs_submit_ioend(wbc, iohead); | 1062 | xfs_submit_ioend(wbc, iohead); |
1063 | } | ||
1004 | 1064 | ||
1005 | return 0; | 1065 | return 0; |
1006 | 1066 | ||
1007 | error: | 1067 | error: |
1008 | if (iohead) | 1068 | if (iohead) |
1009 | xfs_cancel_ioend(iohead); | 1069 | xfs_cancel_ioend(iohead); |
1010 | 1070 | ||
1011 | if (err == -EAGAIN) | 1071 | if (err == -EAGAIN) |
1012 | goto redirty; | 1072 | goto redirty; |
1013 | 1073 | ||
1014 | xfs_aops_discard_page(page); | 1074 | xfs_aops_discard_page(page); |
1015 | ClearPageUptodate(page); | 1075 | ClearPageUptodate(page); |
1016 | unlock_page(page); | 1076 | unlock_page(page); |
1017 | return err; | 1077 | return err; |
1018 | 1078 | ||
1019 | redirty: | 1079 | redirty: |
1020 | redirty_page_for_writepage(wbc, page); | 1080 | redirty_page_for_writepage(wbc, page); |
1021 | unlock_page(page); | 1081 | unlock_page(page); |
1022 | return 0; | 1082 | return 0; |
1023 | } | 1083 | } |
1024 | 1084 | ||
1025 | STATIC int | 1085 | STATIC int |
1026 | xfs_vm_writepages( | 1086 | xfs_vm_writepages( |
1027 | struct address_space *mapping, | 1087 | struct address_space *mapping, |
1028 | struct writeback_control *wbc) | 1088 | struct writeback_control *wbc) |
1029 | { | 1089 | { |
1030 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); | 1090 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); |
1031 | return generic_writepages(mapping, wbc); | 1091 | return generic_writepages(mapping, wbc); |
1032 | } | 1092 | } |
1033 | 1093 | ||
1034 | /* | 1094 | /* |
1035 | * Called to move a page into cleanable state - and from there | 1095 | * Called to move a page into cleanable state - and from there |
1036 | * to be released. The page should already be clean. We always | 1096 | * to be released. The page should already be clean. We always |
1037 | * have buffer heads in this call. | 1097 | * have buffer heads in this call. |
1038 | * | 1098 | * |
1039 | * Returns 1 if the page is ok to release, 0 otherwise. | 1099 | * Returns 1 if the page is ok to release, 0 otherwise. |
1040 | */ | 1100 | */ |
1041 | STATIC int | 1101 | STATIC int |
1042 | xfs_vm_releasepage( | 1102 | xfs_vm_releasepage( |
1043 | struct page *page, | 1103 | struct page *page, |
1044 | gfp_t gfp_mask) | 1104 | gfp_t gfp_mask) |
1045 | { | 1105 | { |
1046 | int delalloc, unwritten; | 1106 | int delalloc, unwritten; |
1047 | 1107 | ||
1048 | trace_xfs_releasepage(page->mapping->host, page, 0); | 1108 | trace_xfs_releasepage(page->mapping->host, page, 0); |
1049 | 1109 | ||
1050 | xfs_count_page_state(page, &delalloc, &unwritten); | 1110 | xfs_count_page_state(page, &delalloc, &unwritten); |
1051 | 1111 | ||
1052 | if (WARN_ON(delalloc)) | 1112 | if (WARN_ON(delalloc)) |
1053 | return 0; | 1113 | return 0; |
1054 | if (WARN_ON(unwritten)) | 1114 | if (WARN_ON(unwritten)) |
1055 | return 0; | 1115 | return 0; |
1056 | 1116 | ||
1057 | return try_to_free_buffers(page); | 1117 | return try_to_free_buffers(page); |
1058 | } | 1118 | } |
1059 | 1119 | ||
1060 | STATIC int | 1120 | STATIC int |
1061 | __xfs_get_blocks( | 1121 | __xfs_get_blocks( |
1062 | struct inode *inode, | 1122 | struct inode *inode, |
1063 | sector_t iblock, | 1123 | sector_t iblock, |
1064 | struct buffer_head *bh_result, | 1124 | struct buffer_head *bh_result, |
1065 | int create, | 1125 | int create, |
1066 | int direct) | 1126 | int direct) |
1067 | { | 1127 | { |
1068 | struct xfs_inode *ip = XFS_I(inode); | 1128 | struct xfs_inode *ip = XFS_I(inode); |
1069 | struct xfs_mount *mp = ip->i_mount; | 1129 | struct xfs_mount *mp = ip->i_mount; |
1070 | xfs_fileoff_t offset_fsb, end_fsb; | 1130 | xfs_fileoff_t offset_fsb, end_fsb; |
1071 | int error = 0; | 1131 | int error = 0; |
1072 | int lockmode = 0; | 1132 | int lockmode = 0; |
1073 | struct xfs_bmbt_irec imap; | 1133 | struct xfs_bmbt_irec imap; |
1074 | int nimaps = 1; | 1134 | int nimaps = 1; |
1075 | xfs_off_t offset; | 1135 | xfs_off_t offset; |
1076 | ssize_t size; | 1136 | ssize_t size; |
1077 | int new = 0; | 1137 | int new = 0; |
1078 | 1138 | ||
1079 | if (XFS_FORCED_SHUTDOWN(mp)) | 1139 | if (XFS_FORCED_SHUTDOWN(mp)) |
1080 | return -XFS_ERROR(EIO); | 1140 | return -XFS_ERROR(EIO); |
1081 | 1141 | ||
1082 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1142 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1083 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1143 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
1084 | size = bh_result->b_size; | 1144 | size = bh_result->b_size; |
1085 | 1145 | ||
1086 | if (!create && direct && offset >= i_size_read(inode)) | 1146 | if (!create && direct && offset >= i_size_read(inode)) |
1087 | return 0; | 1147 | return 0; |
1088 | 1148 | ||
1089 | if (create) { | 1149 | if (create) { |
1090 | lockmode = XFS_ILOCK_EXCL; | 1150 | lockmode = XFS_ILOCK_EXCL; |
1091 | xfs_ilock(ip, lockmode); | 1151 | xfs_ilock(ip, lockmode); |
1092 | } else { | 1152 | } else { |
1093 | lockmode = xfs_ilock_map_shared(ip); | 1153 | lockmode = xfs_ilock_map_shared(ip); |
1094 | } | 1154 | } |
1095 | 1155 | ||
1096 | ASSERT(offset <= mp->m_maxioffset); | 1156 | ASSERT(offset <= mp->m_maxioffset); |
1097 | if (offset + size > mp->m_maxioffset) | 1157 | if (offset + size > mp->m_maxioffset) |
1098 | size = mp->m_maxioffset - offset; | 1158 | size = mp->m_maxioffset - offset; |
1099 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | 1159 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); |
1100 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 1160 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
1101 | 1161 | ||
1102 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, | 1162 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, |
1103 | &imap, &nimaps, XFS_BMAPI_ENTIRE); | 1163 | &imap, &nimaps, XFS_BMAPI_ENTIRE); |
1104 | if (error) | 1164 | if (error) |
1105 | goto out_unlock; | 1165 | goto out_unlock; |
1106 | 1166 | ||
1107 | if (create && | 1167 | if (create && |
1108 | (!nimaps || | 1168 | (!nimaps || |
1109 | (imap.br_startblock == HOLESTARTBLOCK || | 1169 | (imap.br_startblock == HOLESTARTBLOCK || |
1110 | imap.br_startblock == DELAYSTARTBLOCK))) { | 1170 | imap.br_startblock == DELAYSTARTBLOCK))) { |
1111 | if (direct) { | 1171 | if (direct) { |
1112 | error = xfs_iomap_write_direct(ip, offset, size, | 1172 | error = xfs_iomap_write_direct(ip, offset, size, |
1113 | &imap, nimaps); | 1173 | &imap, nimaps); |
1114 | } else { | 1174 | } else { |
1115 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | 1175 | error = xfs_iomap_write_delay(ip, offset, size, &imap); |
1116 | } | 1176 | } |
1117 | if (error) | 1177 | if (error) |
1118 | goto out_unlock; | 1178 | goto out_unlock; |
1119 | 1179 | ||
1120 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | 1180 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); |
1121 | } else if (nimaps) { | 1181 | } else if (nimaps) { |
1122 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | 1182 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); |
1123 | } else { | 1183 | } else { |
1124 | trace_xfs_get_blocks_notfound(ip, offset, size); | 1184 | trace_xfs_get_blocks_notfound(ip, offset, size); |
1125 | goto out_unlock; | 1185 | goto out_unlock; |
1126 | } | 1186 | } |
1127 | xfs_iunlock(ip, lockmode); | 1187 | xfs_iunlock(ip, lockmode); |
1128 | 1188 | ||
1129 | if (imap.br_startblock != HOLESTARTBLOCK && | 1189 | if (imap.br_startblock != HOLESTARTBLOCK && |
1130 | imap.br_startblock != DELAYSTARTBLOCK) { | 1190 | imap.br_startblock != DELAYSTARTBLOCK) { |
1131 | /* | 1191 | /* |
1132 | * For unwritten extents do not report a disk address on | 1192 | * For unwritten extents do not report a disk address on |
1133 | * the read case (treat as if we're reading into a hole). | 1193 | * the read case (treat as if we're reading into a hole). |
1134 | */ | 1194 | */ |
1135 | if (create || !ISUNWRITTEN(&imap)) | 1195 | if (create || !ISUNWRITTEN(&imap)) |
1136 | xfs_map_buffer(inode, bh_result, &imap, offset); | 1196 | xfs_map_buffer(inode, bh_result, &imap, offset); |
1137 | if (create && ISUNWRITTEN(&imap)) { | 1197 | if (create && ISUNWRITTEN(&imap)) { |
1138 | if (direct) | 1198 | if (direct) |
1139 | bh_result->b_private = inode; | 1199 | bh_result->b_private = inode; |
1140 | set_buffer_unwritten(bh_result); | 1200 | set_buffer_unwritten(bh_result); |
1141 | } | 1201 | } |
1142 | } | 1202 | } |
1143 | 1203 | ||
1144 | /* | 1204 | /* |
1145 | * If this is a realtime file, data may be on a different device. | 1205 | * If this is a realtime file, data may be on a different device. |
1146 | * to that pointed to from the buffer_head b_bdev currently. | 1206 | * to that pointed to from the buffer_head b_bdev currently. |
1147 | */ | 1207 | */ |
1148 | bh_result->b_bdev = xfs_find_bdev_for_inode(inode); | 1208 | bh_result->b_bdev = xfs_find_bdev_for_inode(inode); |
1149 | 1209 | ||
1150 | /* | 1210 | /* |
1151 | * If we previously allocated a block out beyond eof and we are now | 1211 | * If we previously allocated a block out beyond eof and we are now |
1152 | * coming back to use it then we will need to flag it as new even if it | 1212 | * coming back to use it then we will need to flag it as new even if it |
1153 | * has a disk address. | 1213 | * has a disk address. |
1154 | * | 1214 | * |
1155 | * With sub-block writes into unwritten extents we also need to mark | 1215 | * With sub-block writes into unwritten extents we also need to mark |
1156 | * the buffer as new so that the unwritten parts of the buffer gets | 1216 | * the buffer as new so that the unwritten parts of the buffer gets |
1157 | * correctly zeroed. | 1217 | * correctly zeroed. |
1158 | */ | 1218 | */ |
1159 | if (create && | 1219 | if (create && |
1160 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || | 1220 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || |
1161 | (offset >= i_size_read(inode)) || | 1221 | (offset >= i_size_read(inode)) || |
1162 | (new || ISUNWRITTEN(&imap)))) | 1222 | (new || ISUNWRITTEN(&imap)))) |
1163 | set_buffer_new(bh_result); | 1223 | set_buffer_new(bh_result); |
1164 | 1224 | ||
1165 | if (imap.br_startblock == DELAYSTARTBLOCK) { | 1225 | if (imap.br_startblock == DELAYSTARTBLOCK) { |
1166 | BUG_ON(direct); | 1226 | BUG_ON(direct); |
1167 | if (create) { | 1227 | if (create) { |
1168 | set_buffer_uptodate(bh_result); | 1228 | set_buffer_uptodate(bh_result); |
1169 | set_buffer_mapped(bh_result); | 1229 | set_buffer_mapped(bh_result); |
1170 | set_buffer_delay(bh_result); | 1230 | set_buffer_delay(bh_result); |
1171 | } | 1231 | } |
1172 | } | 1232 | } |
1173 | 1233 | ||
1174 | /* | 1234 | /* |
1175 | * If this is O_DIRECT or the mpage code calling tell them how large | 1235 | * If this is O_DIRECT or the mpage code calling tell them how large |
1176 | * the mapping is, so that we can avoid repeated get_blocks calls. | 1236 | * the mapping is, so that we can avoid repeated get_blocks calls. |
1177 | */ | 1237 | */ |
1178 | if (direct || size > (1 << inode->i_blkbits)) { | 1238 | if (direct || size > (1 << inode->i_blkbits)) { |
1179 | xfs_off_t mapping_size; | 1239 | xfs_off_t mapping_size; |
1180 | 1240 | ||
1181 | mapping_size = imap.br_startoff + imap.br_blockcount - iblock; | 1241 | mapping_size = imap.br_startoff + imap.br_blockcount - iblock; |
1182 | mapping_size <<= inode->i_blkbits; | 1242 | mapping_size <<= inode->i_blkbits; |
1183 | 1243 | ||
1184 | ASSERT(mapping_size > 0); | 1244 | ASSERT(mapping_size > 0); |
1185 | if (mapping_size > size) | 1245 | if (mapping_size > size) |
1186 | mapping_size = size; | 1246 | mapping_size = size; |
1187 | if (mapping_size > LONG_MAX) | 1247 | if (mapping_size > LONG_MAX) |
1188 | mapping_size = LONG_MAX; | 1248 | mapping_size = LONG_MAX; |
1189 | 1249 | ||
1190 | bh_result->b_size = mapping_size; | 1250 | bh_result->b_size = mapping_size; |
1191 | } | 1251 | } |
1192 | 1252 | ||
1193 | return 0; | 1253 | return 0; |
1194 | 1254 | ||
1195 | out_unlock: | 1255 | out_unlock: |
1196 | xfs_iunlock(ip, lockmode); | 1256 | xfs_iunlock(ip, lockmode); |
1197 | return -error; | 1257 | return -error; |
1198 | } | 1258 | } |
1199 | 1259 | ||
1200 | int | 1260 | int |
1201 | xfs_get_blocks( | 1261 | xfs_get_blocks( |
1202 | struct inode *inode, | 1262 | struct inode *inode, |
1203 | sector_t iblock, | 1263 | sector_t iblock, |
1204 | struct buffer_head *bh_result, | 1264 | struct buffer_head *bh_result, |
1205 | int create) | 1265 | int create) |
1206 | { | 1266 | { |
1207 | return __xfs_get_blocks(inode, iblock, bh_result, create, 0); | 1267 | return __xfs_get_blocks(inode, iblock, bh_result, create, 0); |
1208 | } | 1268 | } |
1209 | 1269 | ||
1210 | STATIC int | 1270 | STATIC int |
1211 | xfs_get_blocks_direct( | 1271 | xfs_get_blocks_direct( |
1212 | struct inode *inode, | 1272 | struct inode *inode, |
1213 | sector_t iblock, | 1273 | sector_t iblock, |
1214 | struct buffer_head *bh_result, | 1274 | struct buffer_head *bh_result, |
1215 | int create) | 1275 | int create) |
1216 | { | 1276 | { |
1217 | return __xfs_get_blocks(inode, iblock, bh_result, create, 1); | 1277 | return __xfs_get_blocks(inode, iblock, bh_result, create, 1); |
1218 | } | 1278 | } |
1219 | 1279 | ||
1220 | /* | 1280 | /* |
1221 | * Complete a direct I/O write request. | 1281 | * Complete a direct I/O write request. |
1222 | * | 1282 | * |
1223 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1283 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
1224 | * need to issue a transaction to convert the range from unwritten to written | 1284 | * need to issue a transaction to convert the range from unwritten to written |
1225 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1285 | * extents. In case this is regular synchronous I/O we just call xfs_end_io |
1226 | * to do this and we are done. But in case this was a successful AIO | 1286 | * to do this and we are done. But in case this was a successful AIO |
1227 | * request this handler is called from interrupt context, from which we | 1287 | * request this handler is called from interrupt context, from which we |
1228 | * can't start transactions. In that case offload the I/O completion to | 1288 | * can't start transactions. In that case offload the I/O completion to |
1229 | * the workqueues we also use for buffered I/O completion. | 1289 | * the workqueues we also use for buffered I/O completion. |
1230 | */ | 1290 | */ |
1231 | STATIC void | 1291 | STATIC void |
1232 | xfs_end_io_direct_write( | 1292 | xfs_end_io_direct_write( |
1233 | struct kiocb *iocb, | 1293 | struct kiocb *iocb, |
1234 | loff_t offset, | 1294 | loff_t offset, |
1235 | ssize_t size, | 1295 | ssize_t size, |
1236 | void *private, | 1296 | void *private, |
1237 | int ret, | 1297 | int ret, |
1238 | bool is_async) | 1298 | bool is_async) |
1239 | { | 1299 | { |
1240 | struct xfs_ioend *ioend = iocb->private; | 1300 | struct xfs_ioend *ioend = iocb->private; |
1241 | 1301 | ||
1242 | /* | 1302 | /* |
1243 | * While the generic direct I/O code updates the inode size, it does | 1303 | * While the generic direct I/O code updates the inode size, it does |
1244 | * so only after the end_io handler is called, which means our | 1304 | * so only after the end_io handler is called, which means our |
1245 | * end_io handler thinks the on-disk size is outside the in-core | 1305 | * end_io handler thinks the on-disk size is outside the in-core |
1246 | * size. To prevent this just update it a little bit earlier here. | 1306 | * size. To prevent this just update it a little bit earlier here. |
1247 | */ | 1307 | */ |
1248 | if (offset + size > i_size_read(ioend->io_inode)) | 1308 | if (offset + size > i_size_read(ioend->io_inode)) |
1249 | i_size_write(ioend->io_inode, offset + size); | 1309 | i_size_write(ioend->io_inode, offset + size); |
1250 | 1310 | ||
1251 | /* | 1311 | /* |
1252 | * blockdev_direct_IO can return an error even after the I/O | 1312 | * blockdev_direct_IO can return an error even after the I/O |
1253 | * completion handler was called. Thus we need to protect | 1313 | * completion handler was called. Thus we need to protect |
1254 | * against double-freeing. | 1314 | * against double-freeing. |
1255 | */ | 1315 | */ |
1256 | iocb->private = NULL; | 1316 | iocb->private = NULL; |
1257 | 1317 | ||
1258 | ioend->io_offset = offset; | 1318 | ioend->io_offset = offset; |
1259 | ioend->io_size = size; | 1319 | ioend->io_size = size; |
1260 | ioend->io_iocb = iocb; | 1320 | ioend->io_iocb = iocb; |
1261 | ioend->io_result = ret; | 1321 | ioend->io_result = ret; |
1262 | if (private && size > 0) | 1322 | if (private && size > 0) |
1263 | ioend->io_type = IO_UNWRITTEN; | 1323 | ioend->io_type = IO_UNWRITTEN; |
1264 | 1324 | ||
1265 | if (is_async) { | 1325 | if (is_async) { |
1266 | ioend->io_isasync = 1; | 1326 | ioend->io_isasync = 1; |
1267 | xfs_finish_ioend(ioend); | 1327 | xfs_finish_ioend(ioend); |
1268 | } else { | 1328 | } else { |
1269 | xfs_finish_ioend_sync(ioend); | 1329 | xfs_finish_ioend_sync(ioend); |
1270 | } | 1330 | } |
1271 | } | 1331 | } |
1272 | 1332 | ||
1273 | STATIC ssize_t | 1333 | STATIC ssize_t |
1274 | xfs_vm_direct_IO( | 1334 | xfs_vm_direct_IO( |
1275 | int rw, | 1335 | int rw, |
1276 | struct kiocb *iocb, | 1336 | struct kiocb *iocb, |
1277 | const struct iovec *iov, | 1337 | const struct iovec *iov, |
1278 | loff_t offset, | 1338 | loff_t offset, |
1279 | unsigned long nr_segs) | 1339 | unsigned long nr_segs) |
1280 | { | 1340 | { |
1281 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 1341 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
1282 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | 1342 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
1343 | struct xfs_ioend *ioend = NULL; | ||
1283 | ssize_t ret; | 1344 | ssize_t ret; |
1284 | 1345 | ||
1285 | if (rw & WRITE) { | 1346 | if (rw & WRITE) { |
1286 | iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); | 1347 | size_t size = iov_length(iov, nr_segs); |
1287 | 1348 | ||
1349 | /* | ||
1350 | * We need to preallocate a transaction for a size update | ||
1351 | * here. In the case that this write both updates the size | ||
1352 | * and converts at least on unwritten extent we will cancel | ||
1353 | * the still clean transaction after the I/O has finished. | ||
1354 | */ | ||
1355 | iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT); | ||
1356 | if (offset + size > XFS_I(inode)->i_d.di_size) { | ||
1357 | ret = xfs_setfilesize_trans_alloc(ioend); | ||
1358 | if (ret) | ||
1359 | goto out_destroy_ioend; | ||
1360 | ioend->io_isdirect = 1; | ||
1361 | } | ||
1362 | |||
1288 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1363 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1289 | offset, nr_segs, | 1364 | offset, nr_segs, |
1290 | xfs_get_blocks_direct, | 1365 | xfs_get_blocks_direct, |
1291 | xfs_end_io_direct_write, NULL, 0); | 1366 | xfs_end_io_direct_write, NULL, 0); |
1292 | if (ret != -EIOCBQUEUED && iocb->private) | 1367 | if (ret != -EIOCBQUEUED && iocb->private) |
1293 | xfs_destroy_ioend(iocb->private); | 1368 | goto out_trans_cancel; |
1294 | } else { | 1369 | } else { |
1295 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1370 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1296 | offset, nr_segs, | 1371 | offset, nr_segs, |
1297 | xfs_get_blocks_direct, | 1372 | xfs_get_blocks_direct, |
1298 | NULL, NULL, 0); | 1373 | NULL, NULL, 0); |
1299 | } | 1374 | } |
1300 | 1375 | ||
1376 | return ret; | ||
1377 | |||
1378 | out_trans_cancel: | ||
1379 | if (ioend->io_append_trans) { | ||
1380 | current_set_flags_nested(&ioend->io_append_trans->t_pflags, | ||
1381 | PF_FSTRANS); | ||
1382 | xfs_trans_cancel(ioend->io_append_trans, 0); | ||
1383 | } | ||
1384 | out_destroy_ioend: | ||
1385 | xfs_destroy_ioend(ioend); | ||
1301 | return ret; | 1386 | return ret; |
1302 | } | 1387 | } |
1303 | 1388 | ||
1304 | STATIC void | 1389 | STATIC void |
1305 | xfs_vm_write_failed( | 1390 | xfs_vm_write_failed( |
1306 | struct address_space *mapping, | 1391 | struct address_space *mapping, |
1307 | loff_t to) | 1392 | loff_t to) |
1308 | { | 1393 | { |
1309 | struct inode *inode = mapping->host; | 1394 | struct inode *inode = mapping->host; |
1310 | 1395 | ||
1311 | if (to > inode->i_size) { | 1396 | if (to > inode->i_size) { |
1312 | /* | 1397 | /* |
1313 | * Punch out the delalloc blocks we have already allocated. | 1398 | * Punch out the delalloc blocks we have already allocated. |
1314 | * | 1399 | * |
1315 | * Don't bother with xfs_setattr given that nothing can have | 1400 | * Don't bother with xfs_setattr given that nothing can have |
1316 | * made it to disk yet as the page is still locked at this | 1401 | * made it to disk yet as the page is still locked at this |
1317 | * point. | 1402 | * point. |
1318 | */ | 1403 | */ |
1319 | struct xfs_inode *ip = XFS_I(inode); | 1404 | struct xfs_inode *ip = XFS_I(inode); |
1320 | xfs_fileoff_t start_fsb; | 1405 | xfs_fileoff_t start_fsb; |
1321 | xfs_fileoff_t end_fsb; | 1406 | xfs_fileoff_t end_fsb; |
1322 | int error; | 1407 | int error; |
1323 | 1408 | ||
1324 | truncate_pagecache(inode, to, inode->i_size); | 1409 | truncate_pagecache(inode, to, inode->i_size); |
1325 | 1410 | ||
1326 | /* | 1411 | /* |
1327 | * Check if there are any blocks that are outside of i_size | 1412 | * Check if there are any blocks that are outside of i_size |
1328 | * that need to be trimmed back. | 1413 | * that need to be trimmed back. |
1329 | */ | 1414 | */ |
1330 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | 1415 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; |
1331 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | 1416 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); |
1332 | if (end_fsb <= start_fsb) | 1417 | if (end_fsb <= start_fsb) |
1333 | return; | 1418 | return; |
1334 | 1419 | ||
1335 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1420 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1336 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | 1421 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, |
1337 | end_fsb - start_fsb); | 1422 | end_fsb - start_fsb); |
1338 | if (error) { | 1423 | if (error) { |
1339 | /* something screwed, just bail */ | 1424 | /* something screwed, just bail */ |
1340 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 1425 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
1341 | xfs_alert(ip->i_mount, | 1426 | xfs_alert(ip->i_mount, |
1342 | "xfs_vm_write_failed: unable to clean up ino %lld", | 1427 | "xfs_vm_write_failed: unable to clean up ino %lld", |
1343 | ip->i_ino); | 1428 | ip->i_ino); |
1344 | } | 1429 | } |
1345 | } | 1430 | } |
1346 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1431 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1347 | } | 1432 | } |
1348 | } | 1433 | } |
1349 | 1434 | ||
1350 | STATIC int | 1435 | STATIC int |
1351 | xfs_vm_write_begin( | 1436 | xfs_vm_write_begin( |
1352 | struct file *file, | 1437 | struct file *file, |
1353 | struct address_space *mapping, | 1438 | struct address_space *mapping, |
1354 | loff_t pos, | 1439 | loff_t pos, |
1355 | unsigned len, | 1440 | unsigned len, |
1356 | unsigned flags, | 1441 | unsigned flags, |
1357 | struct page **pagep, | 1442 | struct page **pagep, |
1358 | void **fsdata) | 1443 | void **fsdata) |
1359 | { | 1444 | { |
1360 | int ret; | 1445 | int ret; |
1361 | 1446 | ||
1362 | ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, | 1447 | ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, |
1363 | pagep, xfs_get_blocks); | 1448 | pagep, xfs_get_blocks); |
1364 | if (unlikely(ret)) | 1449 | if (unlikely(ret)) |
1365 | xfs_vm_write_failed(mapping, pos + len); | 1450 | xfs_vm_write_failed(mapping, pos + len); |
1366 | return ret; | 1451 | return ret; |
1367 | } | 1452 | } |
1368 | 1453 | ||
1369 | STATIC int | 1454 | STATIC int |
1370 | xfs_vm_write_end( | 1455 | xfs_vm_write_end( |
1371 | struct file *file, | 1456 | struct file *file, |
1372 | struct address_space *mapping, | 1457 | struct address_space *mapping, |
1373 | loff_t pos, | 1458 | loff_t pos, |
1374 | unsigned len, | 1459 | unsigned len, |
1375 | unsigned copied, | 1460 | unsigned copied, |
1376 | struct page *page, | 1461 | struct page *page, |
1377 | void *fsdata) | 1462 | void *fsdata) |
1378 | { | 1463 | { |
1379 | int ret; | 1464 | int ret; |
1380 | 1465 | ||
1381 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 1466 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
1382 | if (unlikely(ret < len)) | 1467 | if (unlikely(ret < len)) |
1383 | xfs_vm_write_failed(mapping, pos + len); | 1468 | xfs_vm_write_failed(mapping, pos + len); |
1384 | return ret; | 1469 | return ret; |
1385 | } | 1470 | } |
1386 | 1471 | ||
1387 | STATIC sector_t | 1472 | STATIC sector_t |
1388 | xfs_vm_bmap( | 1473 | xfs_vm_bmap( |
1389 | struct address_space *mapping, | 1474 | struct address_space *mapping, |
1390 | sector_t block) | 1475 | sector_t block) |
1391 | { | 1476 | { |
1392 | struct inode *inode = (struct inode *)mapping->host; | 1477 | struct inode *inode = (struct inode *)mapping->host; |
1393 | struct xfs_inode *ip = XFS_I(inode); | 1478 | struct xfs_inode *ip = XFS_I(inode); |
1394 | 1479 | ||
1395 | trace_xfs_vm_bmap(XFS_I(inode)); | 1480 | trace_xfs_vm_bmap(XFS_I(inode)); |
1396 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 1481 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
1397 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); | 1482 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); |
1398 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 1483 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
1399 | return generic_block_bmap(mapping, block, xfs_get_blocks); | 1484 | return generic_block_bmap(mapping, block, xfs_get_blocks); |
1400 | } | 1485 | } |
1401 | 1486 | ||
1402 | STATIC int | 1487 | STATIC int |
1403 | xfs_vm_readpage( | 1488 | xfs_vm_readpage( |
1404 | struct file *unused, | 1489 | struct file *unused, |
1405 | struct page *page) | 1490 | struct page *page) |
1406 | { | 1491 | { |
1407 | return mpage_readpage(page, xfs_get_blocks); | 1492 | return mpage_readpage(page, xfs_get_blocks); |
1408 | } | 1493 | } |
1409 | 1494 | ||
1410 | STATIC int | 1495 | STATIC int |
1411 | xfs_vm_readpages( | 1496 | xfs_vm_readpages( |
1412 | struct file *unused, | 1497 | struct file *unused, |
1413 | struct address_space *mapping, | 1498 | struct address_space *mapping, |
1414 | struct list_head *pages, | 1499 | struct list_head *pages, |
1415 | unsigned nr_pages) | 1500 | unsigned nr_pages) |
1416 | { | 1501 | { |
1417 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); | 1502 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); |
1418 | } | 1503 | } |
1419 | 1504 | ||
1420 | const struct address_space_operations xfs_address_space_operations = { | 1505 | const struct address_space_operations xfs_address_space_operations = { |
1421 | .readpage = xfs_vm_readpage, | 1506 | .readpage = xfs_vm_readpage, |
1422 | .readpages = xfs_vm_readpages, | 1507 | .readpages = xfs_vm_readpages, |
fs/xfs/xfs_aops.h
1 | /* | 1 | /* |
2 | * Copyright (c) 2005-2006 Silicon Graphics, Inc. | 2 | * Copyright (c) 2005-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #ifndef __XFS_AOPS_H__ | 18 | #ifndef __XFS_AOPS_H__ |
19 | #define __XFS_AOPS_H__ | 19 | #define __XFS_AOPS_H__ |
20 | 20 | ||
21 | extern mempool_t *xfs_ioend_pool; | 21 | extern mempool_t *xfs_ioend_pool; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * Types of I/O for bmap clustering and I/O completion tracking. | 24 | * Types of I/O for bmap clustering and I/O completion tracking. |
25 | */ | 25 | */ |
26 | enum { | 26 | enum { |
27 | IO_DIRECT = 0, /* special case for direct I/O ioends */ | 27 | IO_DIRECT = 0, /* special case for direct I/O ioends */ |
28 | IO_DELALLOC, /* mapping covers delalloc region */ | 28 | IO_DELALLOC, /* mapping covers delalloc region */ |
29 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | 29 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ |
30 | IO_OVERWRITE, /* mapping covers already allocated extent */ | 30 | IO_OVERWRITE, /* mapping covers already allocated extent */ |
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define XFS_IO_TYPES \ | 33 | #define XFS_IO_TYPES \ |
34 | { 0, "" }, \ | 34 | { 0, "" }, \ |
35 | { IO_DELALLOC, "delalloc" }, \ | 35 | { IO_DELALLOC, "delalloc" }, \ |
36 | { IO_UNWRITTEN, "unwritten" }, \ | 36 | { IO_UNWRITTEN, "unwritten" }, \ |
37 | { IO_OVERWRITE, "overwrite" } | 37 | { IO_OVERWRITE, "overwrite" } |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * xfs_ioend struct manages large extent writes for XFS. | 40 | * xfs_ioend struct manages large extent writes for XFS. |
41 | * It can manage several multi-page bio's at once. | 41 | * It can manage several multi-page bio's at once. |
42 | */ | 42 | */ |
43 | typedef struct xfs_ioend { | 43 | typedef struct xfs_ioend { |
44 | struct xfs_ioend *io_list; /* next ioend in chain */ | 44 | struct xfs_ioend *io_list; /* next ioend in chain */ |
45 | unsigned int io_type; /* delalloc / unwritten */ | 45 | unsigned int io_type; /* delalloc / unwritten */ |
46 | int io_error; /* I/O error code */ | 46 | int io_error; /* I/O error code */ |
47 | atomic_t io_remaining; /* hold count */ | 47 | atomic_t io_remaining; /* hold count */ |
48 | unsigned int io_isasync : 1; /* needs aio_complete */ | 48 | unsigned int io_isasync : 1; /* needs aio_complete */ |
49 | unsigned int io_isdirect : 1;/* direct I/O */ | ||
49 | struct inode *io_inode; /* file being written to */ | 50 | struct inode *io_inode; /* file being written to */ |
50 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | 51 | struct buffer_head *io_buffer_head;/* buffer linked list head */ |
51 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ | 52 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ |
52 | size_t io_size; /* size of the extent */ | 53 | size_t io_size; /* size of the extent */ |
53 | xfs_off_t io_offset; /* offset in the file */ | 54 | xfs_off_t io_offset; /* offset in the file */ |
54 | struct work_struct io_work; /* xfsdatad work queue */ | 55 | struct work_struct io_work; /* xfsdatad work queue */ |
56 | struct xfs_trans *io_append_trans;/* xact. for size update */ | ||
55 | struct kiocb *io_iocb; | 57 | struct kiocb *io_iocb; |
56 | int io_result; | 58 | int io_result; |
57 | } xfs_ioend_t; | 59 | } xfs_ioend_t; |
58 | 60 | ||
59 | extern const struct address_space_operations xfs_address_space_operations; | 61 | extern const struct address_space_operations xfs_address_space_operations; |
60 | extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); | 62 | extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); |
61 | 63 | ||
62 | extern void xfs_count_page_state(struct page *, int *, int *); | 64 | extern void xfs_count_page_state(struct page *, int *, int *); |
63 | 65 | ||
64 | #endif /* __XFS_AOPS_H__ */ | 66 | #endif /* __XFS_AOPS_H__ */ |
65 | 67 |