Commit f2a459565b02b60408f3f2e5ca992a031319712b

Authored by Dave Chinner
Committed by Ben Myers
1 parent eb178619f9

xfs: limit speculative prealloc near ENOSPC thresholds

There is a window on small filesytsems where specualtive
preallocation can be larger than that ENOSPC throttling thresholds,
resulting in specualtive preallocation trying to reserve more space
than there is space available. This causes immediate ENOSPC to be
triggered, prealloc to be turned off and flushing to occur. One the
next write (i.e. next 4k page), we do exactly the same thing, and so
effective drive into synchronous 4k writes by triggering ENOSPC
flushing on every page while in the window between the prealloc size
and the ENOSPC prealloc throttle threshold.

Fix this by checking to see if the prealloc size would consume all
free space, and throttle it appropriately to avoid premature
ENOSPC...

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

Showing 1 changed file with 9 additions and 0 deletions Inline Diff

1 /* 1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_log.h" 20 #include "xfs_log.h"
21 #include "xfs_trans.h" 21 #include "xfs_trans.h"
22 #include "xfs_sb.h" 22 #include "xfs_sb.h"
23 #include "xfs_ag.h" 23 #include "xfs_ag.h"
24 #include "xfs_alloc.h" 24 #include "xfs_alloc.h"
25 #include "xfs_quota.h" 25 #include "xfs_quota.h"
26 #include "xfs_mount.h" 26 #include "xfs_mount.h"
27 #include "xfs_bmap_btree.h" 27 #include "xfs_bmap_btree.h"
28 #include "xfs_alloc_btree.h" 28 #include "xfs_alloc_btree.h"
29 #include "xfs_ialloc_btree.h" 29 #include "xfs_ialloc_btree.h"
30 #include "xfs_dinode.h" 30 #include "xfs_dinode.h"
31 #include "xfs_inode.h" 31 #include "xfs_inode.h"
32 #include "xfs_inode_item.h" 32 #include "xfs_inode_item.h"
33 #include "xfs_btree.h" 33 #include "xfs_btree.h"
34 #include "xfs_bmap.h" 34 #include "xfs_bmap.h"
35 #include "xfs_rtalloc.h" 35 #include "xfs_rtalloc.h"
36 #include "xfs_error.h" 36 #include "xfs_error.h"
37 #include "xfs_itable.h" 37 #include "xfs_itable.h"
38 #include "xfs_attr.h" 38 #include "xfs_attr.h"
39 #include "xfs_buf_item.h" 39 #include "xfs_buf_item.h"
40 #include "xfs_trans_space.h" 40 #include "xfs_trans_space.h"
41 #include "xfs_utils.h" 41 #include "xfs_utils.h"
42 #include "xfs_iomap.h" 42 #include "xfs_iomap.h"
43 #include "xfs_trace.h" 43 #include "xfs_trace.h"
44 #include "xfs_icache.h" 44 #include "xfs_icache.h"
45 45
46 46
47 #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 47 #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
48 << mp->m_writeio_log) 48 << mp->m_writeio_log)
49 #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP 49 #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
50 50
51 STATIC int 51 STATIC int
52 xfs_iomap_eof_align_last_fsb( 52 xfs_iomap_eof_align_last_fsb(
53 xfs_mount_t *mp, 53 xfs_mount_t *mp,
54 xfs_inode_t *ip, 54 xfs_inode_t *ip,
55 xfs_extlen_t extsize, 55 xfs_extlen_t extsize,
56 xfs_fileoff_t *last_fsb) 56 xfs_fileoff_t *last_fsb)
57 { 57 {
58 xfs_fileoff_t new_last_fsb = 0; 58 xfs_fileoff_t new_last_fsb = 0;
59 xfs_extlen_t align = 0; 59 xfs_extlen_t align = 0;
60 int eof, error; 60 int eof, error;
61 61
62 if (!XFS_IS_REALTIME_INODE(ip)) { 62 if (!XFS_IS_REALTIME_INODE(ip)) {
63 /* 63 /*
64 * Round up the allocation request to a stripe unit 64 * Round up the allocation request to a stripe unit
65 * (m_dalign) boundary if the file size is >= stripe unit 65 * (m_dalign) boundary if the file size is >= stripe unit
66 * size, and we are allocating past the allocation eof. 66 * size, and we are allocating past the allocation eof.
67 * 67 *
68 * If mounted with the "-o swalloc" option the alignment is 68 * If mounted with the "-o swalloc" option the alignment is
69 * increased from the strip unit size to the stripe width. 69 * increased from the strip unit size to the stripe width.
70 */ 70 */
71 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) 71 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
72 align = mp->m_swidth; 72 align = mp->m_swidth;
73 else if (mp->m_dalign) 73 else if (mp->m_dalign)
74 align = mp->m_dalign; 74 align = mp->m_dalign;
75 75
76 if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align)) 76 if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align))
77 new_last_fsb = roundup_64(*last_fsb, align); 77 new_last_fsb = roundup_64(*last_fsb, align);
78 } 78 }
79 79
80 /* 80 /*
81 * Always round up the allocation request to an extent boundary 81 * Always round up the allocation request to an extent boundary
82 * (when file on a real-time subvolume or has di_extsize hint). 82 * (when file on a real-time subvolume or has di_extsize hint).
83 */ 83 */
84 if (extsize) { 84 if (extsize) {
85 if (new_last_fsb) 85 if (new_last_fsb)
86 align = roundup_64(new_last_fsb, extsize); 86 align = roundup_64(new_last_fsb, extsize);
87 else 87 else
88 align = extsize; 88 align = extsize;
89 new_last_fsb = roundup_64(*last_fsb, align); 89 new_last_fsb = roundup_64(*last_fsb, align);
90 } 90 }
91 91
92 if (new_last_fsb) { 92 if (new_last_fsb) {
93 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); 93 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
94 if (error) 94 if (error)
95 return error; 95 return error;
96 if (eof) 96 if (eof)
97 *last_fsb = new_last_fsb; 97 *last_fsb = new_last_fsb;
98 } 98 }
99 return 0; 99 return 0;
100 } 100 }
101 101
102 STATIC int 102 STATIC int
103 xfs_alert_fsblock_zero( 103 xfs_alert_fsblock_zero(
104 xfs_inode_t *ip, 104 xfs_inode_t *ip,
105 xfs_bmbt_irec_t *imap) 105 xfs_bmbt_irec_t *imap)
106 { 106 {
107 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, 107 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
108 "Access to block zero in inode %llu " 108 "Access to block zero in inode %llu "
109 "start_block: %llx start_off: %llx " 109 "start_block: %llx start_off: %llx "
110 "blkcnt: %llx extent-state: %x\n", 110 "blkcnt: %llx extent-state: %x\n",
111 (unsigned long long)ip->i_ino, 111 (unsigned long long)ip->i_ino,
112 (unsigned long long)imap->br_startblock, 112 (unsigned long long)imap->br_startblock,
113 (unsigned long long)imap->br_startoff, 113 (unsigned long long)imap->br_startoff,
114 (unsigned long long)imap->br_blockcount, 114 (unsigned long long)imap->br_blockcount,
115 imap->br_state); 115 imap->br_state);
116 return EFSCORRUPTED; 116 return EFSCORRUPTED;
117 } 117 }
118 118
119 int 119 int
120 xfs_iomap_write_direct( 120 xfs_iomap_write_direct(
121 xfs_inode_t *ip, 121 xfs_inode_t *ip,
122 xfs_off_t offset, 122 xfs_off_t offset,
123 size_t count, 123 size_t count,
124 xfs_bmbt_irec_t *imap, 124 xfs_bmbt_irec_t *imap,
125 int nmaps) 125 int nmaps)
126 { 126 {
127 xfs_mount_t *mp = ip->i_mount; 127 xfs_mount_t *mp = ip->i_mount;
128 xfs_fileoff_t offset_fsb; 128 xfs_fileoff_t offset_fsb;
129 xfs_fileoff_t last_fsb; 129 xfs_fileoff_t last_fsb;
130 xfs_filblks_t count_fsb, resaligned; 130 xfs_filblks_t count_fsb, resaligned;
131 xfs_fsblock_t firstfsb; 131 xfs_fsblock_t firstfsb;
132 xfs_extlen_t extsz, temp; 132 xfs_extlen_t extsz, temp;
133 int nimaps; 133 int nimaps;
134 int bmapi_flag; 134 int bmapi_flag;
135 int quota_flag; 135 int quota_flag;
136 int rt; 136 int rt;
137 xfs_trans_t *tp; 137 xfs_trans_t *tp;
138 xfs_bmap_free_t free_list; 138 xfs_bmap_free_t free_list;
139 uint qblocks, resblks, resrtextents; 139 uint qblocks, resblks, resrtextents;
140 int committed; 140 int committed;
141 int error; 141 int error;
142 142
143 error = xfs_qm_dqattach(ip, 0); 143 error = xfs_qm_dqattach(ip, 0);
144 if (error) 144 if (error)
145 return XFS_ERROR(error); 145 return XFS_ERROR(error);
146 146
147 rt = XFS_IS_REALTIME_INODE(ip); 147 rt = XFS_IS_REALTIME_INODE(ip);
148 extsz = xfs_get_extsz_hint(ip); 148 extsz = xfs_get_extsz_hint(ip);
149 149
150 offset_fsb = XFS_B_TO_FSBT(mp, offset); 150 offset_fsb = XFS_B_TO_FSBT(mp, offset);
151 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 151 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
152 if ((offset + count) > XFS_ISIZE(ip)) { 152 if ((offset + count) > XFS_ISIZE(ip)) {
153 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 153 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
154 if (error) 154 if (error)
155 return XFS_ERROR(error); 155 return XFS_ERROR(error);
156 } else { 156 } else {
157 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 157 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
158 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 158 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
159 imap->br_blockcount + 159 imap->br_blockcount +
160 imap->br_startoff); 160 imap->br_startoff);
161 } 161 }
162 count_fsb = last_fsb - offset_fsb; 162 count_fsb = last_fsb - offset_fsb;
163 ASSERT(count_fsb > 0); 163 ASSERT(count_fsb > 0);
164 164
165 resaligned = count_fsb; 165 resaligned = count_fsb;
166 if (unlikely(extsz)) { 166 if (unlikely(extsz)) {
167 if ((temp = do_mod(offset_fsb, extsz))) 167 if ((temp = do_mod(offset_fsb, extsz)))
168 resaligned += temp; 168 resaligned += temp;
169 if ((temp = do_mod(resaligned, extsz))) 169 if ((temp = do_mod(resaligned, extsz)))
170 resaligned += extsz - temp; 170 resaligned += extsz - temp;
171 } 171 }
172 172
173 if (unlikely(rt)) { 173 if (unlikely(rt)) {
174 resrtextents = qblocks = resaligned; 174 resrtextents = qblocks = resaligned;
175 resrtextents /= mp->m_sb.sb_rextsize; 175 resrtextents /= mp->m_sb.sb_rextsize;
176 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 176 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
177 quota_flag = XFS_QMOPT_RES_RTBLKS; 177 quota_flag = XFS_QMOPT_RES_RTBLKS;
178 } else { 178 } else {
179 resrtextents = 0; 179 resrtextents = 0;
180 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 180 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
181 quota_flag = XFS_QMOPT_RES_REGBLKS; 181 quota_flag = XFS_QMOPT_RES_REGBLKS;
182 } 182 }
183 183
184 /* 184 /*
185 * Allocate and setup the transaction 185 * Allocate and setup the transaction
186 */ 186 */
187 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 187 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
188 error = xfs_trans_reserve(tp, resblks, 188 error = xfs_trans_reserve(tp, resblks,
189 XFS_WRITE_LOG_RES(mp), resrtextents, 189 XFS_WRITE_LOG_RES(mp), resrtextents,
190 XFS_TRANS_PERM_LOG_RES, 190 XFS_TRANS_PERM_LOG_RES,
191 XFS_WRITE_LOG_COUNT); 191 XFS_WRITE_LOG_COUNT);
192 /* 192 /*
193 * Check for running out of space, note: need lock to return 193 * Check for running out of space, note: need lock to return
194 */ 194 */
195 if (error) { 195 if (error) {
196 xfs_trans_cancel(tp, 0); 196 xfs_trans_cancel(tp, 0);
197 return XFS_ERROR(error); 197 return XFS_ERROR(error);
198 } 198 }
199 199
200 xfs_ilock(ip, XFS_ILOCK_EXCL); 200 xfs_ilock(ip, XFS_ILOCK_EXCL);
201 201
202 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 202 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
203 if (error) 203 if (error)
204 goto out_trans_cancel; 204 goto out_trans_cancel;
205 205
206 xfs_trans_ijoin(tp, ip, 0); 206 xfs_trans_ijoin(tp, ip, 0);
207 207
208 bmapi_flag = 0; 208 bmapi_flag = 0;
209 if (offset < XFS_ISIZE(ip) || extsz) 209 if (offset < XFS_ISIZE(ip) || extsz)
210 bmapi_flag |= XFS_BMAPI_PREALLOC; 210 bmapi_flag |= XFS_BMAPI_PREALLOC;
211 211
212 /* 212 /*
213 * From this point onwards we overwrite the imap pointer that the 213 * From this point onwards we overwrite the imap pointer that the
214 * caller gave to us. 214 * caller gave to us.
215 */ 215 */
216 xfs_bmap_init(&free_list, &firstfsb); 216 xfs_bmap_init(&free_list, &firstfsb);
217 nimaps = 1; 217 nimaps = 1;
218 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, 218 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
219 &firstfsb, 0, imap, &nimaps, &free_list); 219 &firstfsb, 0, imap, &nimaps, &free_list);
220 if (error) 220 if (error)
221 goto out_bmap_cancel; 221 goto out_bmap_cancel;
222 222
223 /* 223 /*
224 * Complete the transaction 224 * Complete the transaction
225 */ 225 */
226 error = xfs_bmap_finish(&tp, &free_list, &committed); 226 error = xfs_bmap_finish(&tp, &free_list, &committed);
227 if (error) 227 if (error)
228 goto out_bmap_cancel; 228 goto out_bmap_cancel;
229 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 229 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
230 if (error) 230 if (error)
231 goto out_unlock; 231 goto out_unlock;
232 232
233 /* 233 /*
234 * Copy any maps to caller's array and return any error. 234 * Copy any maps to caller's array and return any error.
235 */ 235 */
236 if (nimaps == 0) { 236 if (nimaps == 0) {
237 error = XFS_ERROR(ENOSPC); 237 error = XFS_ERROR(ENOSPC);
238 goto out_unlock; 238 goto out_unlock;
239 } 239 }
240 240
241 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) 241 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
242 error = xfs_alert_fsblock_zero(ip, imap); 242 error = xfs_alert_fsblock_zero(ip, imap);
243 243
244 out_unlock: 244 out_unlock:
245 xfs_iunlock(ip, XFS_ILOCK_EXCL); 245 xfs_iunlock(ip, XFS_ILOCK_EXCL);
246 return error; 246 return error;
247 247
248 out_bmap_cancel: 248 out_bmap_cancel:
249 xfs_bmap_cancel(&free_list); 249 xfs_bmap_cancel(&free_list);
250 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 250 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
251 out_trans_cancel: 251 out_trans_cancel:
252 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 252 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
253 goto out_unlock; 253 goto out_unlock;
254 } 254 }
255 255
256 /* 256 /*
257 * If the caller is doing a write at the end of the file, then extend the 257 * If the caller is doing a write at the end of the file, then extend the
258 * allocation out to the file system's write iosize. We clean up any extra 258 * allocation out to the file system's write iosize. We clean up any extra
259 * space left over when the file is closed in xfs_inactive(). 259 * space left over when the file is closed in xfs_inactive().
260 * 260 *
261 * If we find we already have delalloc preallocation beyond EOF, don't do more 261 * If we find we already have delalloc preallocation beyond EOF, don't do more
262 * preallocation as it it not needed. 262 * preallocation as it it not needed.
263 */ 263 */
264 STATIC int 264 STATIC int
265 xfs_iomap_eof_want_preallocate( 265 xfs_iomap_eof_want_preallocate(
266 xfs_mount_t *mp, 266 xfs_mount_t *mp,
267 xfs_inode_t *ip, 267 xfs_inode_t *ip,
268 xfs_off_t offset, 268 xfs_off_t offset,
269 size_t count, 269 size_t count,
270 xfs_bmbt_irec_t *imap, 270 xfs_bmbt_irec_t *imap,
271 int nimaps, 271 int nimaps,
272 int *prealloc) 272 int *prealloc)
273 { 273 {
274 xfs_fileoff_t start_fsb; 274 xfs_fileoff_t start_fsb;
275 xfs_filblks_t count_fsb; 275 xfs_filblks_t count_fsb;
276 xfs_fsblock_t firstblock; 276 xfs_fsblock_t firstblock;
277 int n, error, imaps; 277 int n, error, imaps;
278 int found_delalloc = 0; 278 int found_delalloc = 0;
279 279
280 *prealloc = 0; 280 *prealloc = 0;
281 if (offset + count <= XFS_ISIZE(ip)) 281 if (offset + count <= XFS_ISIZE(ip))
282 return 0; 282 return 0;
283 283
284 /* 284 /*
285 * If there are any real blocks past eof, then don't 285 * If there are any real blocks past eof, then don't
286 * do any speculative allocation. 286 * do any speculative allocation.
287 */ 287 */
288 start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); 288 start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
289 count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 289 count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
290 while (count_fsb > 0) { 290 while (count_fsb > 0) {
291 imaps = nimaps; 291 imaps = nimaps;
292 firstblock = NULLFSBLOCK; 292 firstblock = NULLFSBLOCK;
293 error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, 293 error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps,
294 0); 294 0);
295 if (error) 295 if (error)
296 return error; 296 return error;
297 for (n = 0; n < imaps; n++) { 297 for (n = 0; n < imaps; n++) {
298 if ((imap[n].br_startblock != HOLESTARTBLOCK) && 298 if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
299 (imap[n].br_startblock != DELAYSTARTBLOCK)) 299 (imap[n].br_startblock != DELAYSTARTBLOCK))
300 return 0; 300 return 0;
301 start_fsb += imap[n].br_blockcount; 301 start_fsb += imap[n].br_blockcount;
302 count_fsb -= imap[n].br_blockcount; 302 count_fsb -= imap[n].br_blockcount;
303 303
304 if (imap[n].br_startblock == DELAYSTARTBLOCK) 304 if (imap[n].br_startblock == DELAYSTARTBLOCK)
305 found_delalloc = 1; 305 found_delalloc = 1;
306 } 306 }
307 } 307 }
308 if (!found_delalloc) 308 if (!found_delalloc)
309 *prealloc = 1; 309 *prealloc = 1;
310 return 0; 310 return 0;
311 } 311 }
312 312
313 /* 313 /*
314 * If we don't have a user specified preallocation size, dynamically increase 314 * If we don't have a user specified preallocation size, dynamically increase
315 * the preallocation size as the size of the file grows. Cap the maximum size 315 * the preallocation size as the size of the file grows. Cap the maximum size
316 * at a single extent or less if the filesystem is near full. The closer the 316 * at a single extent or less if the filesystem is near full. The closer the
317 * filesystem is to full, the smaller the maximum prealocation. 317 * filesystem is to full, the smaller the maximum prealocation.
318 */ 318 */
319 STATIC xfs_fsblock_t 319 STATIC xfs_fsblock_t
320 xfs_iomap_prealloc_size( 320 xfs_iomap_prealloc_size(
321 struct xfs_mount *mp, 321 struct xfs_mount *mp,
322 struct xfs_inode *ip) 322 struct xfs_inode *ip)
323 { 323 {
324 xfs_fsblock_t alloc_blocks = 0; 324 xfs_fsblock_t alloc_blocks = 0;
325 325
326 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 326 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
327 int shift = 0; 327 int shift = 0;
328 int64_t freesp; 328 int64_t freesp;
329 329
330 /* 330 /*
331 * rounddown_pow_of_two() returns an undefined result 331 * rounddown_pow_of_two() returns an undefined result
332 * if we pass in alloc_blocks = 0. Hence the "+ 1" to 332 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
333 * ensure we always pass in a non-zero value. 333 * ensure we always pass in a non-zero value.
334 */ 334 */
335 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1; 335 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
336 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 336 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
337 rounddown_pow_of_two(alloc_blocks)); 337 rounddown_pow_of_two(alloc_blocks));
338 338
339 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 339 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
340 freesp = mp->m_sb.sb_fdblocks; 340 freesp = mp->m_sb.sb_fdblocks;
341 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 341 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
342 shift = 2; 342 shift = 2;
343 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 343 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
344 shift++; 344 shift++;
345 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) 345 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
346 shift++; 346 shift++;
347 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) 347 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
348 shift++; 348 shift++;
349 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) 349 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
350 shift++; 350 shift++;
351 } 351 }
352 if (shift) 352 if (shift)
353 alloc_blocks >>= shift; 353 alloc_blocks >>= shift;
354
355 /*
356 * If we are still trying to allocate more space than is
357 * available, squash the prealloc hard. This can happen if we
358 * have a large file on a small filesystem and the above
359 * lowspace thresholds are smaller than MAXEXTLEN.
360 */
361 while (alloc_blocks >= freesp)
362 alloc_blocks >>= 4;
354 } 363 }
355 364
356 if (alloc_blocks < mp->m_writeio_blocks) 365 if (alloc_blocks < mp->m_writeio_blocks)
357 alloc_blocks = mp->m_writeio_blocks; 366 alloc_blocks = mp->m_writeio_blocks;
358 367
359 return alloc_blocks; 368 return alloc_blocks;
360 } 369 }
361 370
362 int 371 int
363 xfs_iomap_write_delay( 372 xfs_iomap_write_delay(
364 xfs_inode_t *ip, 373 xfs_inode_t *ip,
365 xfs_off_t offset, 374 xfs_off_t offset,
366 size_t count, 375 size_t count,
367 xfs_bmbt_irec_t *ret_imap) 376 xfs_bmbt_irec_t *ret_imap)
368 { 377 {
369 xfs_mount_t *mp = ip->i_mount; 378 xfs_mount_t *mp = ip->i_mount;
370 xfs_fileoff_t offset_fsb; 379 xfs_fileoff_t offset_fsb;
371 xfs_fileoff_t last_fsb; 380 xfs_fileoff_t last_fsb;
372 xfs_off_t aligned_offset; 381 xfs_off_t aligned_offset;
373 xfs_fileoff_t ioalign; 382 xfs_fileoff_t ioalign;
374 xfs_extlen_t extsz; 383 xfs_extlen_t extsz;
375 int nimaps; 384 int nimaps;
376 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 385 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
377 int prealloc; 386 int prealloc;
378 int error; 387 int error;
379 388
380 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 389 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
381 390
382 /* 391 /*
383 * Make sure that the dquots are there. This doesn't hold 392 * Make sure that the dquots are there. This doesn't hold
384 * the ilock across a disk read. 393 * the ilock across a disk read.
385 */ 394 */
386 error = xfs_qm_dqattach_locked(ip, 0); 395 error = xfs_qm_dqattach_locked(ip, 0);
387 if (error) 396 if (error)
388 return XFS_ERROR(error); 397 return XFS_ERROR(error);
389 398
390 extsz = xfs_get_extsz_hint(ip); 399 extsz = xfs_get_extsz_hint(ip);
391 offset_fsb = XFS_B_TO_FSBT(mp, offset); 400 offset_fsb = XFS_B_TO_FSBT(mp, offset);
392 401
393 402
394 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 403 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
395 imap, XFS_WRITE_IMAPS, &prealloc); 404 imap, XFS_WRITE_IMAPS, &prealloc);
396 if (error) 405 if (error)
397 return error; 406 return error;
398 407
399 retry: 408 retry:
400 if (prealloc) { 409 if (prealloc) {
401 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); 410 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
402 411
403 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 412 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
404 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 413 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
405 last_fsb = ioalign + alloc_blocks; 414 last_fsb = ioalign + alloc_blocks;
406 } else { 415 } else {
407 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 416 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
408 } 417 }
409 418
410 if (prealloc || extsz) { 419 if (prealloc || extsz) {
411 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 420 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
412 if (error) 421 if (error)
413 return error; 422 return error;
414 } 423 }
415 424
416 /* 425 /*
417 * Make sure preallocation does not create extents beyond the range we 426 * Make sure preallocation does not create extents beyond the range we
418 * actually support in this filesystem. 427 * actually support in this filesystem.
419 */ 428 */
420 if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)) 429 if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes))
421 last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 430 last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
422 431
423 ASSERT(last_fsb > offset_fsb); 432 ASSERT(last_fsb > offset_fsb);
424 433
425 nimaps = XFS_WRITE_IMAPS; 434 nimaps = XFS_WRITE_IMAPS;
426 error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, 435 error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb,
427 imap, &nimaps, XFS_BMAPI_ENTIRE); 436 imap, &nimaps, XFS_BMAPI_ENTIRE);
428 switch (error) { 437 switch (error) {
429 case 0: 438 case 0:
430 case ENOSPC: 439 case ENOSPC:
431 case EDQUOT: 440 case EDQUOT:
432 break; 441 break;
433 default: 442 default:
434 return XFS_ERROR(error); 443 return XFS_ERROR(error);
435 } 444 }
436 445
437 /* 446 /*
438 * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry 447 * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry
439 * without EOF preallocation. 448 * without EOF preallocation.
440 */ 449 */
441 if (nimaps == 0) { 450 if (nimaps == 0) {
442 trace_xfs_delalloc_enospc(ip, offset, count); 451 trace_xfs_delalloc_enospc(ip, offset, count);
443 if (prealloc) { 452 if (prealloc) {
444 prealloc = 0; 453 prealloc = 0;
445 error = 0; 454 error = 0;
446 goto retry; 455 goto retry;
447 } 456 }
448 return XFS_ERROR(error ? error : ENOSPC); 457 return XFS_ERROR(error ? error : ENOSPC);
449 } 458 }
450 459
451 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) 460 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
452 return xfs_alert_fsblock_zero(ip, &imap[0]); 461 return xfs_alert_fsblock_zero(ip, &imap[0]);
453 462
454 /* 463 /*
455 * Tag the inode as speculatively preallocated so we can reclaim this 464 * Tag the inode as speculatively preallocated so we can reclaim this
456 * space on demand, if necessary. 465 * space on demand, if necessary.
457 */ 466 */
458 if (prealloc) 467 if (prealloc)
459 xfs_inode_set_eofblocks_tag(ip); 468 xfs_inode_set_eofblocks_tag(ip);
460 469
461 *ret_imap = imap[0]; 470 *ret_imap = imap[0];
462 return 0; 471 return 0;
463 } 472 }
464 473
465 /* 474 /*
466 * Pass in a delayed allocate extent, convert it to real extents; 475 * Pass in a delayed allocate extent, convert it to real extents;
467 * return to the caller the extent we create which maps on top of 476 * return to the caller the extent we create which maps on top of
468 * the originating callers request. 477 * the originating callers request.
469 * 478 *
470 * Called without a lock on the inode. 479 * Called without a lock on the inode.
471 * 480 *
472 * We no longer bother to look at the incoming map - all we have to 481 * We no longer bother to look at the incoming map - all we have to
473 * guarantee is that whatever we allocate fills the required range. 482 * guarantee is that whatever we allocate fills the required range.
474 */ 483 */
475 int 484 int
476 xfs_iomap_write_allocate( 485 xfs_iomap_write_allocate(
477 xfs_inode_t *ip, 486 xfs_inode_t *ip,
478 xfs_off_t offset, 487 xfs_off_t offset,
479 size_t count, 488 size_t count,
480 xfs_bmbt_irec_t *imap) 489 xfs_bmbt_irec_t *imap)
481 { 490 {
482 xfs_mount_t *mp = ip->i_mount; 491 xfs_mount_t *mp = ip->i_mount;
483 xfs_fileoff_t offset_fsb, last_block; 492 xfs_fileoff_t offset_fsb, last_block;
484 xfs_fileoff_t end_fsb, map_start_fsb; 493 xfs_fileoff_t end_fsb, map_start_fsb;
485 xfs_fsblock_t first_block; 494 xfs_fsblock_t first_block;
486 xfs_bmap_free_t free_list; 495 xfs_bmap_free_t free_list;
487 xfs_filblks_t count_fsb; 496 xfs_filblks_t count_fsb;
488 xfs_trans_t *tp; 497 xfs_trans_t *tp;
489 int nimaps, committed; 498 int nimaps, committed;
490 int error = 0; 499 int error = 0;
491 int nres; 500 int nres;
492 501
493 /* 502 /*
494 * Make sure that the dquots are there. 503 * Make sure that the dquots are there.
495 */ 504 */
496 error = xfs_qm_dqattach(ip, 0); 505 error = xfs_qm_dqattach(ip, 0);
497 if (error) 506 if (error)
498 return XFS_ERROR(error); 507 return XFS_ERROR(error);
499 508
500 offset_fsb = XFS_B_TO_FSBT(mp, offset); 509 offset_fsb = XFS_B_TO_FSBT(mp, offset);
501 count_fsb = imap->br_blockcount; 510 count_fsb = imap->br_blockcount;
502 map_start_fsb = imap->br_startoff; 511 map_start_fsb = imap->br_startoff;
503 512
504 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); 513 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
505 514
506 while (count_fsb != 0) { 515 while (count_fsb != 0) {
507 /* 516 /*
508 * Set up a transaction with which to allocate the 517 * Set up a transaction with which to allocate the
509 * backing store for the file. Do allocations in a 518 * backing store for the file. Do allocations in a
510 * loop until we get some space in the range we are 519 * loop until we get some space in the range we are
511 * interested in. The other space that might be allocated 520 * interested in. The other space that might be allocated
512 * is in the delayed allocation extent on which we sit 521 * is in the delayed allocation extent on which we sit
513 * but before our buffer starts. 522 * but before our buffer starts.
514 */ 523 */
515 524
516 nimaps = 0; 525 nimaps = 0;
517 while (nimaps == 0) { 526 while (nimaps == 0) {
518 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 527 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
519 tp->t_flags |= XFS_TRANS_RESERVE; 528 tp->t_flags |= XFS_TRANS_RESERVE;
520 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 529 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
521 error = xfs_trans_reserve(tp, nres, 530 error = xfs_trans_reserve(tp, nres,
522 XFS_WRITE_LOG_RES(mp), 531 XFS_WRITE_LOG_RES(mp),
523 0, XFS_TRANS_PERM_LOG_RES, 532 0, XFS_TRANS_PERM_LOG_RES,
524 XFS_WRITE_LOG_COUNT); 533 XFS_WRITE_LOG_COUNT);
525 if (error) { 534 if (error) {
526 xfs_trans_cancel(tp, 0); 535 xfs_trans_cancel(tp, 0);
527 return XFS_ERROR(error); 536 return XFS_ERROR(error);
528 } 537 }
529 xfs_ilock(ip, XFS_ILOCK_EXCL); 538 xfs_ilock(ip, XFS_ILOCK_EXCL);
530 xfs_trans_ijoin(tp, ip, 0); 539 xfs_trans_ijoin(tp, ip, 0);
531 540
532 xfs_bmap_init(&free_list, &first_block); 541 xfs_bmap_init(&free_list, &first_block);
533 542
534 /* 543 /*
535 * it is possible that the extents have changed since 544 * it is possible that the extents have changed since
536 * we did the read call as we dropped the ilock for a 545 * we did the read call as we dropped the ilock for a
537 * while. We have to be careful about truncates or hole 546 * while. We have to be careful about truncates or hole
538 * punchs here - we are not allowed to allocate 547 * punchs here - we are not allowed to allocate
539 * non-delalloc blocks here. 548 * non-delalloc blocks here.
540 * 549 *
541 * The only protection against truncation is the pages 550 * The only protection against truncation is the pages
542 * for the range we are being asked to convert are 551 * for the range we are being asked to convert are
543 * locked and hence a truncate will block on them 552 * locked and hence a truncate will block on them
544 * first. 553 * first.
545 * 554 *
546 * As a result, if we go beyond the range we really 555 * As a result, if we go beyond the range we really
547 * need and hit an delalloc extent boundary followed by 556 * need and hit an delalloc extent boundary followed by
548 * a hole while we have excess blocks in the map, we 557 * a hole while we have excess blocks in the map, we
549 * will fill the hole incorrectly and overrun the 558 * will fill the hole incorrectly and overrun the
550 * transaction reservation. 559 * transaction reservation.
551 * 560 *
552 * Using a single map prevents this as we are forced to 561 * Using a single map prevents this as we are forced to
553 * check each map we look for overlap with the desired 562 * check each map we look for overlap with the desired
554 * range and abort as soon as we find it. Also, given 563 * range and abort as soon as we find it. Also, given
555 * that we only return a single map, having one beyond 564 * that we only return a single map, having one beyond
556 * what we can return is probably a bit silly. 565 * what we can return is probably a bit silly.
557 * 566 *
558 * We also need to check that we don't go beyond EOF; 567 * We also need to check that we don't go beyond EOF;
559 * this is a truncate optimisation as a truncate sets 568 * this is a truncate optimisation as a truncate sets
560 * the new file size before block on the pages we 569 * the new file size before block on the pages we
561 * currently have locked under writeback. Because they 570 * currently have locked under writeback. Because they
562 * are about to be tossed, we don't need to write them 571 * are about to be tossed, we don't need to write them
563 * back.... 572 * back....
564 */ 573 */
565 nimaps = 1; 574 nimaps = 1;
566 end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 575 end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
567 error = xfs_bmap_last_offset(NULL, ip, &last_block, 576 error = xfs_bmap_last_offset(NULL, ip, &last_block,
568 XFS_DATA_FORK); 577 XFS_DATA_FORK);
569 if (error) 578 if (error)
570 goto trans_cancel; 579 goto trans_cancel;
571 580
572 last_block = XFS_FILEOFF_MAX(last_block, end_fsb); 581 last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
573 if ((map_start_fsb + count_fsb) > last_block) { 582 if ((map_start_fsb + count_fsb) > last_block) {
574 count_fsb = last_block - map_start_fsb; 583 count_fsb = last_block - map_start_fsb;
575 if (count_fsb == 0) { 584 if (count_fsb == 0) {
576 error = EAGAIN; 585 error = EAGAIN;
577 goto trans_cancel; 586 goto trans_cancel;
578 } 587 }
579 } 588 }
580 589
581 /* 590 /*
582 * From this point onwards we overwrite the imap 591 * From this point onwards we overwrite the imap
583 * pointer that the caller gave to us. 592 * pointer that the caller gave to us.
584 */ 593 */
585 error = xfs_bmapi_write(tp, ip, map_start_fsb, 594 error = xfs_bmapi_write(tp, ip, map_start_fsb,
586 count_fsb, 595 count_fsb,
587 XFS_BMAPI_STACK_SWITCH, 596 XFS_BMAPI_STACK_SWITCH,
588 &first_block, 1, 597 &first_block, 1,
589 imap, &nimaps, &free_list); 598 imap, &nimaps, &free_list);
590 if (error) 599 if (error)
591 goto trans_cancel; 600 goto trans_cancel;
592 601
593 error = xfs_bmap_finish(&tp, &free_list, &committed); 602 error = xfs_bmap_finish(&tp, &free_list, &committed);
594 if (error) 603 if (error)
595 goto trans_cancel; 604 goto trans_cancel;
596 605
597 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 606 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
598 if (error) 607 if (error)
599 goto error0; 608 goto error0;
600 609
601 xfs_iunlock(ip, XFS_ILOCK_EXCL); 610 xfs_iunlock(ip, XFS_ILOCK_EXCL);
602 } 611 }
603 612
604 /* 613 /*
605 * See if we were able to allocate an extent that 614 * See if we were able to allocate an extent that
606 * covers at least part of the callers request 615 * covers at least part of the callers request
607 */ 616 */
608 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) 617 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
609 return xfs_alert_fsblock_zero(ip, imap); 618 return xfs_alert_fsblock_zero(ip, imap);
610 619
611 if ((offset_fsb >= imap->br_startoff) && 620 if ((offset_fsb >= imap->br_startoff) &&
612 (offset_fsb < (imap->br_startoff + 621 (offset_fsb < (imap->br_startoff +
613 imap->br_blockcount))) { 622 imap->br_blockcount))) {
614 XFS_STATS_INC(xs_xstrat_quick); 623 XFS_STATS_INC(xs_xstrat_quick);
615 return 0; 624 return 0;
616 } 625 }
617 626
618 /* 627 /*
619 * So far we have not mapped the requested part of the 628 * So far we have not mapped the requested part of the
620 * file, just surrounding data, try again. 629 * file, just surrounding data, try again.
621 */ 630 */
622 count_fsb -= imap->br_blockcount; 631 count_fsb -= imap->br_blockcount;
623 map_start_fsb = imap->br_startoff + imap->br_blockcount; 632 map_start_fsb = imap->br_startoff + imap->br_blockcount;
624 } 633 }
625 634
626 trans_cancel: 635 trans_cancel:
627 xfs_bmap_cancel(&free_list); 636 xfs_bmap_cancel(&free_list);
628 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 637 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
629 error0: 638 error0:
630 xfs_iunlock(ip, XFS_ILOCK_EXCL); 639 xfs_iunlock(ip, XFS_ILOCK_EXCL);
631 return XFS_ERROR(error); 640 return XFS_ERROR(error);
632 } 641 }
633 642
634 int 643 int
635 xfs_iomap_write_unwritten( 644 xfs_iomap_write_unwritten(
636 xfs_inode_t *ip, 645 xfs_inode_t *ip,
637 xfs_off_t offset, 646 xfs_off_t offset,
638 size_t count) 647 size_t count)
639 { 648 {
640 xfs_mount_t *mp = ip->i_mount; 649 xfs_mount_t *mp = ip->i_mount;
641 xfs_fileoff_t offset_fsb; 650 xfs_fileoff_t offset_fsb;
642 xfs_filblks_t count_fsb; 651 xfs_filblks_t count_fsb;
643 xfs_filblks_t numblks_fsb; 652 xfs_filblks_t numblks_fsb;
644 xfs_fsblock_t firstfsb; 653 xfs_fsblock_t firstfsb;
645 int nimaps; 654 int nimaps;
646 xfs_trans_t *tp; 655 xfs_trans_t *tp;
647 xfs_bmbt_irec_t imap; 656 xfs_bmbt_irec_t imap;
648 xfs_bmap_free_t free_list; 657 xfs_bmap_free_t free_list;
649 xfs_fsize_t i_size; 658 xfs_fsize_t i_size;
650 uint resblks; 659 uint resblks;
651 int committed; 660 int committed;
652 int error; 661 int error;
653 662
654 trace_xfs_unwritten_convert(ip, offset, count); 663 trace_xfs_unwritten_convert(ip, offset, count);
655 664
656 offset_fsb = XFS_B_TO_FSBT(mp, offset); 665 offset_fsb = XFS_B_TO_FSBT(mp, offset);
657 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 666 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
658 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 667 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
659 668
660 /* 669 /*
661 * Reserve enough blocks in this transaction for two complete extent 670 * Reserve enough blocks in this transaction for two complete extent
662 * btree splits. We may be converting the middle part of an unwritten 671 * btree splits. We may be converting the middle part of an unwritten
663 * extent and in this case we will insert two new extents in the btree 672 * extent and in this case we will insert two new extents in the btree
664 * each of which could cause a full split. 673 * each of which could cause a full split.
665 * 674 *
666 * This reservation amount will be used in the first call to 675 * This reservation amount will be used in the first call to
667 * xfs_bmbt_split() to select an AG with enough space to satisfy the 676 * xfs_bmbt_split() to select an AG with enough space to satisfy the
668 * rest of the operation. 677 * rest of the operation.
669 */ 678 */
670 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 679 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
671 680
672 do { 681 do {
673 /* 682 /*
674 * set up a transaction to convert the range of extents 683 * set up a transaction to convert the range of extents
675 * from unwritten to real. Do allocations in a loop until 684 * from unwritten to real. Do allocations in a loop until
676 * we have covered the range passed in. 685 * we have covered the range passed in.
677 * 686 *
678 * Note that we open code the transaction allocation here 687 * Note that we open code the transaction allocation here
679 * to pass KM_NOFS--we can't risk to recursing back into 688 * to pass KM_NOFS--we can't risk to recursing back into
680 * the filesystem here as we might be asked to write out 689 * the filesystem here as we might be asked to write out
681 * the same inode that we complete here and might deadlock 690 * the same inode that we complete here and might deadlock
682 * on the iolock. 691 * on the iolock.
683 */ 692 */
684 sb_start_intwrite(mp->m_super); 693 sb_start_intwrite(mp->m_super);
685 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); 694 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
686 tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; 695 tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
687 error = xfs_trans_reserve(tp, resblks, 696 error = xfs_trans_reserve(tp, resblks,
688 XFS_WRITE_LOG_RES(mp), 0, 697 XFS_WRITE_LOG_RES(mp), 0,
689 XFS_TRANS_PERM_LOG_RES, 698 XFS_TRANS_PERM_LOG_RES,
690 XFS_WRITE_LOG_COUNT); 699 XFS_WRITE_LOG_COUNT);
691 if (error) { 700 if (error) {
692 xfs_trans_cancel(tp, 0); 701 xfs_trans_cancel(tp, 0);
693 return XFS_ERROR(error); 702 return XFS_ERROR(error);
694 } 703 }
695 704
696 xfs_ilock(ip, XFS_ILOCK_EXCL); 705 xfs_ilock(ip, XFS_ILOCK_EXCL);
697 xfs_trans_ijoin(tp, ip, 0); 706 xfs_trans_ijoin(tp, ip, 0);
698 707
699 /* 708 /*
700 * Modify the unwritten extent state of the buffer. 709 * Modify the unwritten extent state of the buffer.
701 */ 710 */
702 xfs_bmap_init(&free_list, &firstfsb); 711 xfs_bmap_init(&free_list, &firstfsb);
703 nimaps = 1; 712 nimaps = 1;
704 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 713 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
705 XFS_BMAPI_CONVERT, &firstfsb, 714 XFS_BMAPI_CONVERT, &firstfsb,
706 1, &imap, &nimaps, &free_list); 715 1, &imap, &nimaps, &free_list);
707 if (error) 716 if (error)
708 goto error_on_bmapi_transaction; 717 goto error_on_bmapi_transaction;
709 718
710 /* 719 /*
711 * Log the updated inode size as we go. We have to be careful 720 * Log the updated inode size as we go. We have to be careful
712 * to only log it up to the actual write offset if it is 721 * to only log it up to the actual write offset if it is
713 * halfway into a block. 722 * halfway into a block.
714 */ 723 */
715 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 724 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
716 if (i_size > offset + count) 725 if (i_size > offset + count)
717 i_size = offset + count; 726 i_size = offset + count;
718 727
719 i_size = xfs_new_eof(ip, i_size); 728 i_size = xfs_new_eof(ip, i_size);
720 if (i_size) { 729 if (i_size) {
721 ip->i_d.di_size = i_size; 730 ip->i_d.di_size = i_size;
722 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 731 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
723 } 732 }
724 733
725 error = xfs_bmap_finish(&tp, &free_list, &committed); 734 error = xfs_bmap_finish(&tp, &free_list, &committed);
726 if (error) 735 if (error)
727 goto error_on_bmapi_transaction; 736 goto error_on_bmapi_transaction;
728 737
729 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 738 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
730 xfs_iunlock(ip, XFS_ILOCK_EXCL); 739 xfs_iunlock(ip, XFS_ILOCK_EXCL);
731 if (error) 740 if (error)
732 return XFS_ERROR(error); 741 return XFS_ERROR(error);
733 742
734 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 743 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
735 return xfs_alert_fsblock_zero(ip, &imap); 744 return xfs_alert_fsblock_zero(ip, &imap);
736 745
737 if ((numblks_fsb = imap.br_blockcount) == 0) { 746 if ((numblks_fsb = imap.br_blockcount) == 0) {
738 /* 747 /*
739 * The numblks_fsb value should always get 748 * The numblks_fsb value should always get
740 * smaller, otherwise the loop is stuck. 749 * smaller, otherwise the loop is stuck.
741 */ 750 */
742 ASSERT(imap.br_blockcount); 751 ASSERT(imap.br_blockcount);
743 break; 752 break;
744 } 753 }
745 offset_fsb += numblks_fsb; 754 offset_fsb += numblks_fsb;
746 count_fsb -= numblks_fsb; 755 count_fsb -= numblks_fsb;
747 } while (count_fsb > 0); 756 } while (count_fsb > 0);
748 757
749 return 0; 758 return 0;
750 759
751 error_on_bmapi_transaction: 760 error_on_bmapi_transaction:
752 xfs_bmap_cancel(&free_list); 761 xfs_bmap_cancel(&free_list);
753 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); 762 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
754 xfs_iunlock(ip, XFS_ILOCK_EXCL); 763 xfs_iunlock(ip, XFS_ILOCK_EXCL);
755 return XFS_ERROR(error); 764 return XFS_ERROR(error);
756 } 765 }
757 766